324 lines
11 KiB
Python
324 lines
11 KiB
Python
import os
|
|
|
|
import numpy as np
|
|
import proglog
|
|
from tqdm import tqdm
|
|
|
|
from moviepy.audio.io.ffmpeg_audiowriter import ffmpeg_audiowrite
|
|
from moviepy.Clip import Clip
|
|
from moviepy.decorators import requires_duration
|
|
from moviepy.tools import deprecated_version_of, extensions_dict
|
|
|
|
|
|
class AudioClip(Clip):
|
|
""" Base class for audio clips.
|
|
|
|
See ``AudioFileClip`` and ``CompositeSoundClip`` for usable classes.
|
|
|
|
An AudioClip is a Clip with a ``make_frame`` attribute of
|
|
the form `` t -> [ f_t ]`` for mono sound and
|
|
``t-> [ f1_t, f2_t ]`` for stereo sound (the arrays are Numpy arrays).
|
|
The `f_t` are floats between -1 and 1. These bounds can be
|
|
trespassed wihtout problems (the program will put the
|
|
sound back into the bounds at conversion time, without much impact).
|
|
|
|
Parameters
|
|
-----------
|
|
|
|
make_frame
|
|
A function `t-> frame at time t`. The frame does not mean much
|
|
for a sound, it is just a float. What 'makes' the sound are
|
|
the variations of that float in the time.
|
|
|
|
nchannels
|
|
Number of channels (one or two for mono or stereo).
|
|
|
|
Examples
|
|
---------
|
|
|
|
>>> # Plays the note A (a sine wave of frequency 440HZ)
|
|
>>> import numpy as np
|
|
>>> make_frame = lambda t: 2*[ np.sin(440 * 2 * np.pi * t) ]
|
|
>>> clip = AudioClip(make_frame, duration=5)
|
|
>>> clip.preview()
|
|
|
|
"""
|
|
|
|
def __init__(self, make_frame=None, duration=None, fps=None):
|
|
Clip.__init__(self)
|
|
|
|
if fps is not None:
|
|
self.fps = fps
|
|
|
|
if make_frame is not None:
|
|
self.make_frame = make_frame
|
|
frame0 = self.get_frame(0)
|
|
if hasattr(frame0, '__iter__'):
|
|
self.nchannels = len(list(frame0))
|
|
else:
|
|
self.nchannels = 1
|
|
if duration is not None:
|
|
self.duration = duration
|
|
self.end = duration
|
|
|
|
@requires_duration
|
|
def iter_chunks(self, chunksize=None, chunk_duration=None, fps=None,
|
|
quantize=False, nbytes=2, logger=None):
|
|
""" Iterator that returns the whole sound array of the clip by chunks
|
|
"""
|
|
if fps is None:
|
|
fps = self.fps
|
|
logger = proglog.default_bar_logger(logger)
|
|
if chunk_duration is not None:
|
|
chunksize = int(chunk_duration*fps)
|
|
|
|
totalsize = int(fps*self.duration)
|
|
|
|
nchunks = totalsize // chunksize + 1
|
|
|
|
pospos = np.linspace(0, totalsize, nchunks + 1, endpoint=True, dtype=int)
|
|
|
|
for i in logger.iter_bar(chunk=list(range(nchunks))):
|
|
size = pospos[i+1] - pospos[i]
|
|
assert(size <= chunksize)
|
|
tt = (1.0/fps)*np.arange(pospos[i], pospos[i+1])
|
|
yield self.to_soundarray(tt, nbytes=nbytes, quantize=quantize,
|
|
fps=fps, buffersize=chunksize)
|
|
|
|
@requires_duration
|
|
def to_soundarray(self, tt=None, fps=None, quantize=False, nbytes=2, buffersize=50000):
|
|
"""
|
|
Transforms the sound into an array that can be played by pygame
|
|
or written in a wav file. See ``AudioClip.preview``.
|
|
|
|
Parameters
|
|
------------
|
|
|
|
fps
|
|
Frame rate of the sound for the conversion.
|
|
44100 for top quality.
|
|
|
|
nbytes
|
|
Number of bytes to encode the sound: 1 for 8bit sound,
|
|
2 for 16bit, 4 for 32bit sound.
|
|
|
|
"""
|
|
if fps is None:
|
|
fps = self.fps
|
|
|
|
stacker = np.vstack if self.nchannels == 2 else np.hstack
|
|
max_duration = 1.0 * buffersize / fps
|
|
if tt is None:
|
|
if self.duration > max_duration:
|
|
return stacker(self.iter_chunks(fps=fps, quantize=quantize,
|
|
nbytes=2, chunksize=buffersize))
|
|
else:
|
|
tt = np.arange(0, self.duration, 1.0/fps)
|
|
"""
|
|
elif len(tt)> 1.5*buffersize:
|
|
nchunks = int(len(tt)/buffersize+1)
|
|
tt_chunks = np.array_split(tt, nchunks)
|
|
return stacker([self.to_soundarray(tt=ttc, buffersize=buffersize, fps=fps,
|
|
quantize=quantize, nbytes=nbytes)
|
|
for ttc in tt_chunks])
|
|
"""
|
|
#print tt.max() - tt.min(), tt.min(), tt.max()
|
|
|
|
snd_array = self.get_frame(tt)
|
|
|
|
if quantize:
|
|
snd_array = np.maximum(-0.99, np.minimum(0.99, snd_array))
|
|
inttype = {1: 'int8', 2: 'int16', 4: 'int32'}[nbytes]
|
|
snd_array = (2**(8*nbytes-1)*snd_array).astype(inttype)
|
|
|
|
return snd_array
|
|
|
|
def max_volume(self, stereo=False, chunksize=50000, logger=None):
|
|
|
|
stereo = stereo and (self.nchannels == 2)
|
|
|
|
maxi = np.array([0, 0]) if stereo else 0
|
|
for chunk in self.iter_chunks(chunksize=chunksize,logger=logger):
|
|
maxi = np.maximum(maxi, abs(chunk).max(axis=0)) if stereo else max(maxi, abs(chunk).max())
|
|
return maxi
|
|
|
|
@requires_duration
|
|
def write_audiofile(self, filename, fps=None, nbytes=2, buffersize=2000,
|
|
codec=None, bitrate=None, ffmpeg_params=None,
|
|
write_logfile=False, verbose=True, logger='bar'):
|
|
""" Writes an audio file from the AudioClip.
|
|
|
|
|
|
Parameters
|
|
-----------
|
|
|
|
filename
|
|
Name of the output file
|
|
|
|
fps
|
|
Frames per second. If not set, it will try default to self.fps if
|
|
already set, otherwise it will default to 44100
|
|
|
|
nbytes
|
|
Sample width (set to 2 for 16-bit sound, 4 for 32-bit sound)
|
|
|
|
codec
|
|
Which audio codec should be used. If None provided, the codec is
|
|
determined based on the extension of the filename. Choose
|
|
'pcm_s16le' for 16-bit wav and 'pcm_s32le' for 32-bit wav.
|
|
|
|
bitrate
|
|
Audio bitrate, given as a string like '50k', '500k', '3000k'.
|
|
Will determine the size and quality of the output file.
|
|
Note that it mainly an indicative goal, the bitrate won't
|
|
necessarily be the this in the output file.
|
|
|
|
ffmpeg_params
|
|
Any additional parameters you would like to pass, as a list
|
|
of terms, like ['-option1', 'value1', '-option2', 'value2']
|
|
|
|
write_logfile
|
|
If true, produces a detailed logfile named filename + '.log'
|
|
when writing the file
|
|
|
|
verbose
|
|
Boolean indicating whether to print infomation
|
|
|
|
logger
|
|
Either 'bar' or None or any Proglog logger
|
|
|
|
"""
|
|
if not fps:
|
|
if not self.fps:
|
|
fps = 44100
|
|
else:
|
|
fps = self.fps
|
|
|
|
if codec is None:
|
|
name, ext = os.path.splitext(os.path.basename(filename))
|
|
try:
|
|
codec = extensions_dict[ext[1:]]['codec'][0]
|
|
except KeyError:
|
|
raise ValueError("MoviePy couldn't find the codec associated "
|
|
"with the filename. Provide the 'codec' "
|
|
"parameter in write_audiofile.")
|
|
|
|
return ffmpeg_audiowrite(self, filename, fps, nbytes, buffersize,
|
|
codec=codec, bitrate=bitrate,
|
|
write_logfile=write_logfile, verbose=verbose,
|
|
ffmpeg_params=ffmpeg_params,
|
|
logger=logger)
|
|
|
|
|
|
# The to_audiofile method is replaced by the more explicit write_audiofile.
|
|
AudioClip.to_audiofile = deprecated_version_of(AudioClip.write_audiofile,
|
|
'to_audiofile')
|
|
###
|
|
|
|
|
|
class AudioArrayClip(AudioClip):
|
|
"""
|
|
|
|
An audio clip made from a sound array.
|
|
|
|
Parameters
|
|
-----------
|
|
|
|
array
|
|
A Numpy array representing the sound, of size Nx1 for mono,
|
|
Nx2 for stereo.
|
|
|
|
fps
|
|
Frames per second : speed at which the sound is supposed to be
|
|
played.
|
|
|
|
"""
|
|
|
|
def __init__(self, array, fps):
|
|
|
|
Clip.__init__(self)
|
|
self.array = array
|
|
self.fps = fps
|
|
self.duration = 1.0 * len(array) / fps
|
|
|
|
def make_frame(t):
|
|
""" complicated, but must be able to handle the case where t
|
|
is a list of the form sin(t) """
|
|
|
|
if isinstance(t, np.ndarray):
|
|
array_inds = (self.fps*t).astype(int)
|
|
in_array = (array_inds > 0) & (array_inds < len(self.array))
|
|
result = np.zeros((len(t), 2))
|
|
result[in_array] = self.array[array_inds[in_array]]
|
|
return result
|
|
else:
|
|
i = int(self.fps * t)
|
|
if i < 0 or i >= len(self.array):
|
|
return 0*self.array[0]
|
|
else:
|
|
return self.array[i]
|
|
|
|
self.make_frame = make_frame
|
|
self.nchannels = len(list(self.get_frame(0)))
|
|
|
|
|
|
class CompositeAudioClip(AudioClip):
|
|
|
|
""" Clip made by composing several AudioClips.
|
|
|
|
An audio clip made by putting together several audio clips.
|
|
|
|
Parameters
|
|
------------
|
|
|
|
clips
|
|
List of audio clips, which may start playing at different times or
|
|
together. If all have their ``duration`` attribute set, the
|
|
duration of the composite clip is computed automatically.
|
|
|
|
"""
|
|
|
|
def __init__(self, clips):
|
|
|
|
Clip.__init__(self)
|
|
self.clips = clips
|
|
|
|
ends = [c.end for c in self.clips]
|
|
self.nchannels = max([c.nchannels for c in self.clips])
|
|
if not any([(e is None) for e in ends]):
|
|
self.duration = max(ends)
|
|
self.end = max(ends)
|
|
|
|
def make_frame(t):
|
|
|
|
played_parts = [c.is_playing(t) for c in self.clips]
|
|
|
|
sounds = [c.get_frame(t - c.start)*np.array([part]).T
|
|
for c, part in zip(self.clips, played_parts)
|
|
if (part is not False)]
|
|
|
|
if isinstance(t, np.ndarray):
|
|
zero = np.zeros((len(t), self.nchannels))
|
|
|
|
else:
|
|
zero = np.zeros(self.nchannels)
|
|
|
|
return zero + sum(sounds)
|
|
|
|
self.make_frame = make_frame
|
|
|
|
|
|
def concatenate_audioclips(clips):
|
|
"""
|
|
The clip with the highest FPS will be the FPS of the result clip.
|
|
"""
|
|
durations = [c.duration for c in clips]
|
|
tt = np.cumsum([0]+durations) # start times, and end time.
|
|
newclips = [c.set_start(t) for c, t in zip(clips, tt)]
|
|
|
|
result = CompositeAudioClip(newclips).set_duration(tt[-1])
|
|
|
|
fpss = [c.fps for c in clips if getattr(c, 'fps', None)]
|
|
result.fps = max(fpss) if fpss else None
|
|
return result
|