517 lines
18 KiB
Python
517 lines
18 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
Beat and tempo
|
|
==============
|
|
.. autosummary::
|
|
:toctree: generated/
|
|
|
|
beat_track
|
|
plp
|
|
"""
|
|
|
|
import numpy as np
|
|
import scipy
|
|
import scipy.stats
|
|
|
|
from ._cache import cache
|
|
from . import core
|
|
from . import onset
|
|
from . import util
|
|
from .feature import tempogram, fourier_tempogram
|
|
from .feature import tempo as _tempo
|
|
from .util.exceptions import ParameterError
|
|
from .util.decorators import moved
|
|
from typing import Any, Callable, Optional, Tuple
|
|
|
|
__all__ = ["beat_track", "tempo", "plp"]
|
|
|
|
|
|
tempo = moved(moved_from="librosa.beat.tempo", version="0.10.0", version_removed="1.0")(
|
|
_tempo
|
|
)
|
|
|
|
|
|
def beat_track(
|
|
*,
|
|
y: Optional[np.ndarray] = None,
|
|
sr: float = 22050,
|
|
onset_envelope: Optional[np.ndarray] = None,
|
|
hop_length: int = 512,
|
|
start_bpm: float = 120.0,
|
|
tightness: float = 100,
|
|
trim: bool = True,
|
|
bpm: Optional[float] = None,
|
|
prior: Optional[scipy.stats.rv_continuous] = None,
|
|
units: str = "frames",
|
|
) -> Tuple[float, np.ndarray]:
|
|
r"""Dynamic programming beat tracker.
|
|
|
|
Beats are detected in three stages, following the method of [#]_:
|
|
|
|
1. Measure onset strength
|
|
2. Estimate tempo from onset correlation
|
|
3. Pick peaks in onset strength approximately consistent with estimated
|
|
tempo
|
|
|
|
.. [#] Ellis, Daniel PW. "Beat tracking by dynamic programming."
|
|
Journal of New Music Research 36.1 (2007): 51-60.
|
|
http://labrosa.ee.columbia.edu/projects/beattrack/
|
|
|
|
Parameters
|
|
----------
|
|
y : np.ndarray [shape=(n,)] or None
|
|
audio time series
|
|
sr : number > 0 [scalar]
|
|
sampling rate of ``y``
|
|
onset_envelope : np.ndarray [shape=(n,)] or None
|
|
(optional) pre-computed onset strength envelope.
|
|
hop_length : int > 0 [scalar]
|
|
number of audio samples between successive ``onset_envelope`` values
|
|
start_bpm : float > 0 [scalar]
|
|
initial guess for the tempo estimator (in beats per minute)
|
|
tightness : float [scalar]
|
|
tightness of beat distribution around tempo
|
|
trim : bool [scalar]
|
|
trim leading/trailing beats with weak onsets
|
|
bpm : float [scalar]
|
|
(optional) If provided, use ``bpm`` as the tempo instead of
|
|
estimating it from ``onsets``.
|
|
prior : scipy.stats.rv_continuous [optional]
|
|
An optional prior distribution over tempo.
|
|
If provided, ``start_bpm`` will be ignored.
|
|
units : {'frames', 'samples', 'time'}
|
|
The units to encode detected beat events in.
|
|
By default, 'frames' are used.
|
|
|
|
Returns
|
|
-------
|
|
tempo : float [scalar, non-negative]
|
|
estimated global tempo (in beats per minute)
|
|
beats : np.ndarray [shape=(m,)]
|
|
estimated beat event locations in the specified units
|
|
(default is frame indices)
|
|
.. note::
|
|
If no onset strength could be detected, beat_tracker estimates 0 BPM
|
|
and returns an empty list.
|
|
|
|
Raises
|
|
------
|
|
ParameterError
|
|
if neither ``y`` nor ``onset_envelope`` are provided,
|
|
or if ``units`` is not one of 'frames', 'samples', or 'time'
|
|
|
|
See Also
|
|
--------
|
|
librosa.onset.onset_strength
|
|
|
|
Examples
|
|
--------
|
|
Track beats using time series input
|
|
|
|
>>> y, sr = librosa.load(librosa.ex('choice'), duration=10)
|
|
|
|
>>> tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
|
|
>>> tempo
|
|
135.99917763157896
|
|
|
|
Print the frames corresponding to beats
|
|
|
|
>>> beats
|
|
array([ 3, 21, 40, 59, 78, 96, 116, 135, 154, 173, 192, 211,
|
|
230, 249, 268, 287, 306, 325, 344, 363])
|
|
|
|
Or print them as timestamps
|
|
|
|
>>> librosa.frames_to_time(beats, sr=sr)
|
|
array([0.07 , 0.488, 0.929, 1.37 , 1.811, 2.229, 2.694, 3.135,
|
|
3.576, 4.017, 4.458, 4.899, 5.341, 5.782, 6.223, 6.664,
|
|
7.105, 7.546, 7.988, 8.429])
|
|
|
|
Track beats using a pre-computed onset envelope
|
|
|
|
>>> onset_env = librosa.onset.onset_strength(y=y, sr=sr,
|
|
... aggregate=np.median)
|
|
>>> tempo, beats = librosa.beat.beat_track(onset_envelope=onset_env,
|
|
... sr=sr)
|
|
>>> tempo
|
|
135.99917763157896
|
|
>>> beats
|
|
array([ 3, 21, 40, 59, 78, 96, 116, 135, 154, 173, 192, 211,
|
|
230, 249, 268, 287, 306, 325, 344, 363])
|
|
|
|
Plot the beat events against the onset strength envelope
|
|
|
|
>>> import matplotlib.pyplot as plt
|
|
>>> hop_length = 512
|
|
>>> fig, ax = plt.subplots(nrows=2, sharex=True)
|
|
>>> times = librosa.times_like(onset_env, sr=sr, hop_length=hop_length)
|
|
>>> M = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=hop_length)
|
|
>>> librosa.display.specshow(librosa.power_to_db(M, ref=np.max),
|
|
... y_axis='mel', x_axis='time', hop_length=hop_length,
|
|
... ax=ax[0])
|
|
>>> ax[0].label_outer()
|
|
>>> ax[0].set(title='Mel spectrogram')
|
|
>>> ax[1].plot(times, librosa.util.normalize(onset_env),
|
|
... label='Onset strength')
|
|
>>> ax[1].vlines(times[beats], 0, 1, alpha=0.5, color='r',
|
|
... linestyle='--', label='Beats')
|
|
>>> ax[1].legend()
|
|
"""
|
|
# First, get the frame->beat strength profile if we don't already have one
|
|
if onset_envelope is None:
|
|
if y is None:
|
|
raise ParameterError("y or onset_envelope must be provided")
|
|
|
|
onset_envelope = onset.onset_strength(
|
|
y=y, sr=sr, hop_length=hop_length, aggregate=np.median
|
|
)
|
|
|
|
# Do we have any onsets to grab?
|
|
if not onset_envelope.any():
|
|
return (0, np.array([], dtype=int))
|
|
|
|
# Estimate BPM if one was not provided
|
|
if bpm is None:
|
|
bpm = _tempo(
|
|
onset_envelope=onset_envelope,
|
|
sr=sr,
|
|
hop_length=hop_length,
|
|
start_bpm=start_bpm,
|
|
prior=prior,
|
|
)[0]
|
|
|
|
# Then, run the tracker
|
|
beats = __beat_tracker(onset_envelope, bpm, float(sr) / hop_length, tightness, trim)
|
|
|
|
if units == "frames":
|
|
return (bpm, beats)
|
|
elif units == "samples":
|
|
return (bpm, core.frames_to_samples(beats, hop_length=hop_length))
|
|
elif units == "time":
|
|
return (bpm, core.frames_to_time(beats, hop_length=hop_length, sr=sr))
|
|
else:
|
|
raise ParameterError(f"Invalid unit type: {units}")
|
|
|
|
|
|
def plp(
|
|
*,
|
|
y: Optional[np.ndarray] = None,
|
|
sr: float = 22050,
|
|
onset_envelope: Optional[np.ndarray] = None,
|
|
hop_length: int = 512,
|
|
win_length: int = 384,
|
|
tempo_min: Optional[float] = 30,
|
|
tempo_max: Optional[float] = 300,
|
|
prior: Optional[scipy.stats.rv_continuous] = None,
|
|
) -> np.ndarray:
|
|
"""Predominant local pulse (PLP) estimation. [#]_
|
|
|
|
The PLP method analyzes the onset strength envelope in the frequency domain
|
|
to find a locally stable tempo for each frame. These local periodicities
|
|
are used to synthesize local half-waves, which are combined such that peaks
|
|
coincide with rhythmically salient frames (e.g. onset events on a musical time grid).
|
|
The local maxima of the pulse curve can be taken as estimated beat positions.
|
|
|
|
This method may be preferred over the dynamic programming method of `beat_track`
|
|
when the tempo is expected to vary significantly over time. Additionally,
|
|
since `plp` does not require the entire signal to make predictions, it may be
|
|
preferable when beat-tracking long recordings in a streaming setting.
|
|
|
|
.. [#] Grosche, P., & Muller, M. (2011).
|
|
"Extracting predominant local pulse information from music recordings."
|
|
IEEE Transactions on Audio, Speech, and Language Processing, 19(6), 1688-1701.
|
|
|
|
Parameters
|
|
----------
|
|
y : np.ndarray [shape=(..., n)] or None
|
|
audio time series. Multi-channel is supported.
|
|
|
|
sr : number > 0 [scalar]
|
|
sampling rate of ``y``
|
|
|
|
onset_envelope : np.ndarray [shape=(..., n)] or None
|
|
(optional) pre-computed onset strength envelope
|
|
|
|
hop_length : int > 0 [scalar]
|
|
number of audio samples between successive ``onset_envelope`` values
|
|
|
|
win_length : int > 0 [scalar]
|
|
number of frames to use for tempogram analysis.
|
|
By default, 384 frames (at ``sr=22050`` and ``hop_length=512``) corresponds
|
|
to about 8.9 seconds.
|
|
|
|
tempo_min, tempo_max : numbers > 0 [scalar], optional
|
|
Minimum and maximum permissible tempo values. ``tempo_max`` must be at least
|
|
``tempo_min``.
|
|
|
|
Set either (or both) to `None` to disable this constraint.
|
|
|
|
prior : scipy.stats.rv_continuous [optional]
|
|
A prior distribution over tempo (in beats per minute).
|
|
By default, a uniform prior over ``[tempo_min, tempo_max]`` is used.
|
|
|
|
Returns
|
|
-------
|
|
pulse : np.ndarray, shape=[(..., n)]
|
|
The estimated pulse curve. Maxima correspond to rhythmically salient
|
|
points of time.
|
|
|
|
If input is multi-channel, one pulse curve per channel is computed.
|
|
|
|
See Also
|
|
--------
|
|
beat_track
|
|
librosa.onset.onset_strength
|
|
librosa.feature.fourier_tempogram
|
|
|
|
Examples
|
|
--------
|
|
Visualize the PLP compared to an onset strength envelope.
|
|
Both are normalized here to make comparison easier.
|
|
|
|
>>> y, sr = librosa.load(librosa.ex('brahms'))
|
|
>>> onset_env = librosa.onset.onset_strength(y=y, sr=sr)
|
|
>>> pulse = librosa.beat.plp(onset_envelope=onset_env, sr=sr)
|
|
>>> # Or compute pulse with an alternate prior, like log-normal
|
|
>>> import scipy.stats
|
|
>>> prior = scipy.stats.lognorm(loc=np.log(120), scale=120, s=1)
|
|
>>> pulse_lognorm = librosa.beat.plp(onset_envelope=onset_env, sr=sr,
|
|
... prior=prior)
|
|
>>> melspec = librosa.feature.melspectrogram(y=y, sr=sr)
|
|
|
|
>>> import matplotlib.pyplot as plt
|
|
>>> fig, ax = plt.subplots(nrows=3, sharex=True)
|
|
>>> librosa.display.specshow(librosa.power_to_db(melspec,
|
|
... ref=np.max),
|
|
... x_axis='time', y_axis='mel', ax=ax[0])
|
|
>>> ax[0].set(title='Mel spectrogram')
|
|
>>> ax[0].label_outer()
|
|
>>> ax[1].plot(librosa.times_like(onset_env),
|
|
... librosa.util.normalize(onset_env),
|
|
... label='Onset strength')
|
|
>>> ax[1].plot(librosa.times_like(pulse),
|
|
... librosa.util.normalize(pulse),
|
|
... label='Predominant local pulse (PLP)')
|
|
>>> ax[1].set(title='Uniform tempo prior [30, 300]')
|
|
>>> ax[1].label_outer()
|
|
>>> ax[2].plot(librosa.times_like(onset_env),
|
|
... librosa.util.normalize(onset_env),
|
|
... label='Onset strength')
|
|
>>> ax[2].plot(librosa.times_like(pulse_lognorm),
|
|
... librosa.util.normalize(pulse_lognorm),
|
|
... label='Predominant local pulse (PLP)')
|
|
>>> ax[2].set(title='Log-normal tempo prior, mean=120', xlim=[5, 20])
|
|
>>> ax[2].legend()
|
|
|
|
PLP local maxima can be used as estimates of beat positions.
|
|
|
|
>>> tempo, beats = librosa.beat.beat_track(onset_envelope=onset_env)
|
|
>>> beats_plp = np.flatnonzero(librosa.util.localmax(pulse))
|
|
>>> import matplotlib.pyplot as plt
|
|
>>> fig, ax = plt.subplots(nrows=2, sharex=True, sharey=True)
|
|
>>> times = librosa.times_like(onset_env, sr=sr)
|
|
>>> ax[0].plot(times, librosa.util.normalize(onset_env),
|
|
... label='Onset strength')
|
|
>>> ax[0].vlines(times[beats], 0, 1, alpha=0.5, color='r',
|
|
... linestyle='--', label='Beats')
|
|
>>> ax[0].legend()
|
|
>>> ax[0].set(title='librosa.beat.beat_track')
|
|
>>> ax[0].label_outer()
|
|
>>> # Limit the plot to a 15-second window
|
|
>>> times = librosa.times_like(pulse, sr=sr)
|
|
>>> ax[1].plot(times, librosa.util.normalize(pulse),
|
|
... label='PLP')
|
|
>>> ax[1].vlines(times[beats_plp], 0, 1, alpha=0.5, color='r',
|
|
... linestyle='--', label='PLP Beats')
|
|
>>> ax[1].legend()
|
|
>>> ax[1].set(title='librosa.beat.plp', xlim=[5, 20])
|
|
>>> ax[1].xaxis.set_major_formatter(librosa.display.TimeFormatter())
|
|
"""
|
|
# Step 1: get the onset envelope
|
|
if onset_envelope is None:
|
|
onset_envelope = onset.onset_strength(
|
|
y=y, sr=sr, hop_length=hop_length, aggregate=np.median
|
|
)
|
|
|
|
if tempo_min is not None and tempo_max is not None and tempo_max <= tempo_min:
|
|
raise ParameterError(
|
|
f"tempo_max={tempo_max} must be larger than tempo_min={tempo_min}"
|
|
)
|
|
|
|
# Step 2: get the fourier tempogram
|
|
ftgram = fourier_tempogram(
|
|
onset_envelope=onset_envelope,
|
|
sr=sr,
|
|
hop_length=hop_length,
|
|
win_length=win_length,
|
|
)
|
|
|
|
# Step 3: pin to the feasible tempo range
|
|
tempo_frequencies = core.fourier_tempo_frequencies(
|
|
sr=sr, hop_length=hop_length, win_length=win_length
|
|
)
|
|
|
|
if tempo_min is not None:
|
|
ftgram[..., tempo_frequencies < tempo_min, :] = 0
|
|
if tempo_max is not None:
|
|
ftgram[..., tempo_frequencies > tempo_max, :] = 0
|
|
|
|
# reshape lengths to match dimension properly
|
|
tempo_frequencies = util.expand_to(tempo_frequencies, ndim=ftgram.ndim, axes=-2)
|
|
|
|
# Step 3: Discard everything below the peak
|
|
ftmag = np.log1p(1e6 * np.abs(ftgram))
|
|
if prior is not None:
|
|
ftmag += prior.logpdf(tempo_frequencies)
|
|
|
|
peak_values = ftmag.max(axis=-2, keepdims=True)
|
|
ftgram[ftmag < peak_values] = 0
|
|
|
|
# Normalize to keep only phase information
|
|
ftgram /= util.tiny(ftgram) ** 0.5 + np.abs(ftgram.max(axis=-2, keepdims=True))
|
|
|
|
# Step 5: invert the Fourier tempogram to get the pulse
|
|
pulse = core.istft(
|
|
ftgram, hop_length=1, n_fft=win_length, length=onset_envelope.shape[-1]
|
|
)
|
|
|
|
# Step 6: retain only the positive part of the pulse cycle
|
|
pulse = np.clip(pulse, 0, None, pulse)
|
|
|
|
# Return the normalized pulse
|
|
return util.normalize(pulse, axis=-1)
|
|
|
|
|
|
def __beat_tracker(
|
|
onset_envelope: np.ndarray, bpm: float, fft_res: float, tightness: float, trim: bool
|
|
) -> np.ndarray:
|
|
"""Tracks beats in an onset strength envelope.
|
|
|
|
Parameters
|
|
----------
|
|
onset_envelope : np.ndarray [shape=(n,)]
|
|
onset strength envelope
|
|
bpm : float [scalar]
|
|
tempo estimate
|
|
fft_res : float [scalar]
|
|
resolution of the fft (sr / hop_length)
|
|
tightness : float [scalar]
|
|
how closely do we adhere to bpm?
|
|
trim : bool [scalar]
|
|
trim leading/trailing beats with weak onsets?
|
|
|
|
Returns
|
|
-------
|
|
beats : np.ndarray [shape=(n,)]
|
|
frame numbers of beat events
|
|
"""
|
|
if bpm <= 0:
|
|
raise ParameterError("bpm must be strictly positive")
|
|
|
|
# convert bpm to a sample period for searching
|
|
period = round(60.0 * fft_res / bpm)
|
|
|
|
# localscore is a smoothed version of AGC'd onset envelope
|
|
localscore = __beat_local_score(onset_envelope, period)
|
|
|
|
# run the DP
|
|
backlink, cumscore = __beat_track_dp(localscore, period, tightness)
|
|
|
|
# get the position of the last beat
|
|
beats = [__last_beat(cumscore)]
|
|
|
|
# Reconstruct the beat path from backlinks
|
|
while backlink[beats[-1]] >= 0:
|
|
beats.append(backlink[beats[-1]])
|
|
|
|
# Put the beats in ascending order
|
|
# Convert into an array of frame numbers
|
|
beats = np.array(beats[::-1], dtype=int)
|
|
|
|
# Discard spurious trailing beats
|
|
beats = __trim_beats(localscore, beats, trim)
|
|
|
|
return beats
|
|
|
|
|
|
# -- Helper functions for beat tracking
|
|
def __normalize_onsets(onsets):
|
|
"""Map onset strength function into the range [0, 1]"""
|
|
norm = onsets.std(ddof=1)
|
|
if norm > 0:
|
|
onsets = onsets / norm
|
|
return onsets
|
|
|
|
|
|
def __beat_local_score(onset_envelope, period):
|
|
"""Construct the local score for an onset envlope and given period"""
|
|
window = np.exp(-0.5 * (np.arange(-period, period + 1) * 32.0 / period) ** 2)
|
|
return scipy.signal.convolve(__normalize_onsets(onset_envelope), window, "same")
|
|
|
|
|
|
def __beat_track_dp(localscore, period, tightness):
|
|
"""Core dynamic program for beat tracking"""
|
|
backlink = np.zeros_like(localscore, dtype=int)
|
|
cumscore = np.zeros_like(localscore)
|
|
|
|
# Search range for previous beat
|
|
window = np.arange(-2 * period, -np.round(period / 2) + 1, dtype=int)
|
|
|
|
# Make a score window, which begins biased toward start_bpm and skewed
|
|
if tightness <= 0:
|
|
raise ParameterError("tightness must be strictly positive")
|
|
|
|
txwt = -tightness * (np.log(-window / period) ** 2)
|
|
|
|
# Are we on the first beat?
|
|
first_beat = True
|
|
for i, score_i in enumerate(localscore):
|
|
# Are we reaching back before time 0?
|
|
z_pad = np.maximum(0, min(-window[0], len(window)))
|
|
|
|
# Search over all possible predecessors
|
|
candidates = txwt.copy()
|
|
candidates[z_pad:] = candidates[z_pad:] + cumscore[window[z_pad:]]
|
|
|
|
# Find the best preceding beat
|
|
beat_location = np.argmax(candidates)
|
|
|
|
# Add the local score
|
|
cumscore[i] = score_i + candidates[beat_location]
|
|
|
|
# Special case the first onset. Stop if the localscore is small
|
|
if first_beat and score_i < 0.01 * localscore.max():
|
|
backlink[i] = -1
|
|
else:
|
|
backlink[i] = window[beat_location]
|
|
first_beat = False
|
|
|
|
# Update the time range
|
|
window = window + 1
|
|
|
|
return backlink, cumscore
|
|
|
|
|
|
def __last_beat(cumscore):
|
|
"""Get the last beat from the cumulative score array"""
|
|
maxes = util.localmax(cumscore)
|
|
med_score = np.median(cumscore[np.argwhere(maxes)])
|
|
|
|
# The last of these is the last beat (since score generally increases)
|
|
return np.argwhere((cumscore * maxes * 2 > med_score)).max()
|
|
|
|
|
|
def __trim_beats(localscore: np.ndarray, beats: np.ndarray, trim: bool) -> np.ndarray:
|
|
"""Remove spurious leading and trailing beats"""
|
|
smooth_boe = scipy.signal.convolve(localscore[beats], scipy.signal.hann(5), "same")
|
|
|
|
if trim:
|
|
threshold = 0.5 * ((smooth_boe**2).mean() ** 0.5)
|
|
else:
|
|
threshold = 0.0
|
|
|
|
valid = np.argwhere(smooth_boe > threshold)
|
|
|
|
return beats[valid.min() : valid.max()]
|