import numpy as np
import math as m
import sympy as sym
from scipy.signal import spectrogram
from scipy.interpolate import CubicSpline
[docs]
def swipep(x,fs,plim,hop_size,dlog2p,dERBs,sTHR):
"""
Perform the SWIPE' (Sawtooth Waveform Inspired Pitch Estimator) algorithm
for pitch estimation in a given audio signal.
Parameters:
-----------
x : ndarray
Input audio signal (1D array).
fs : float
Sampling frequency of the audio signal (in Hz).
plim : tuple
Pitch range as a tuple (min_pitch, max_pitch) in Hz.
hop_size : float
Hop size for analysis (in samples).
dlog2p : float
Step size for pitch candidates in log2 scale.
dERBs : float
Step size for Equivalent Rectangular Bandwidth (ERB) spaced frequencies.
sTHR : float
Threshold for pitch strength to consider a valid pitch.
Returns:
--------
p : ndarray
Estimated pitch values (in Hz) for each time frame.
t : ndarray
Time vector corresponding to the pitch estimates (in seconds).
s : ndarray
Pitch strength values for each time frame.
Notes:
------
- The function uses a multi-resolution analysis approach to estimate pitch.
- It computes pitch candidates, their strengths, and refines the pitch
estimates using parabolic interpolation.
- The algorithm is robust to noise and works well for a wide range of
pitch frequencies.
"""
dt = hop_size / fs # Time step for analysis (seconds)
t =np.arange( 0, len(x)/fs, dt)# time vektor
dc = 4 #Hop size
K = 2 #Parameter for size window
#Define pitch candidates
log2pc = np.arange(np.log2(plim[0]), np.log2(plim[1]), dlog2p).reshape(-1, 1)
pc = 2 ** log2pc
S = np.zeros((len(pc),len(t))) # Pitch candidate strenght matrix
# Determine P2 - WSs
divFS = [fs / x for x in plim] #variable so I can divide by list
logWs = [round(m.log2(4 * K * df)) for df in divFS]
#ws_arg = np.arange(logWs[0], logWs[1], -1)
ws = 2** np.arange(logWs[0], logWs[1], -1)
pO = 4 * K * fs / ws
# Determine window sizes used by each pitch candidate
d = 1 + log2pc - m.log2(4*K*fs/ws[0])
# Create ERBs spaced frequencies (in Hertz)
fERBs = erbs2hz(np.arange(hz2erbs(pc[0]/4), hz2erbs(fs/2),dERBs))[:,np.newaxis]
for i in range(len(ws)):
dn = round(dc * fs / pO[i]) #Hop size in samples
# Zero pad signal
xzp = np.concatenate([np.zeros((ws[i]//2,)), x.flatten(), np.zeros((dn + ws[i]//2,))])
# Compute spectrum
w = np.hanning(ws[i]) # Hanning window
o = max(0, round(ws[i] - dn))
f, ti, X = spectrogram(xzp, fs=fs, window=w, nperseg=ws[i], noverlap=o, mode='complex')
# Interpolate at eqidistant ERBs steps
# Perform interpolation
# TO DO: ferb je hodnota musim posilat poradi prvku v liste
interp_func = CubicSpline(f, np.abs(X), extrapolate=False)
# Calculate the interpolated magnitudes
M = np.maximum(0, interp_func(fERBs) ) # Ensure non-negative values
M = np.squeeze(M)#
L = [np.sqrt(ms) for ms in M]# Loudness
# Select candidates that use this window size
# Loop over window
# Select candidates that use this window size
if i == len(ws) - 1:
j = np.where(d - i > -1)[0]
k = np.where(d[j] - i < 0)[0]
elif i == 0:
j = np.where(d - i < 1)[0]
k = np.where(d[j] - i > 0)[0]
else:
j = np.where(np.abs(d - i) < 1)[0]
k = np.arange(len(j))
Si = pitchStrengthAllCandidates(fERBs, L, pc[j])
# Pitch strength for selected candidates
# Interpolate at desired times
if Si.shape[1] > 1:
Si = np.nan_to_num(Si, nan=0.0, posinf=0.0, neginf=0.0)
interp_func = CubicSpline(ti, Si.T, extrapolate = False)
#Si = [interp_func(i) for i in t]
Si = interp_func(t)
else:
Si = np.full((len(Si), len(t)), np.nan)
# Calculate lambda and mu for weighting
lambda_ = d[j[k]] - i
mu = np.ones(j.shape).T
mu[k] = 1 - np.abs(lambda_.T)
# Update pitch strength matrix
S[j, :] += np.outer(mu, np.ones(Si.T.shape[1])) * Si.T
#S[j, :] += (mu * Si.T).T
# Initialize pitch and strength ys with NaN
p = np.full((S.shape[1], 1), np.nan)
s = np.full((S.shape[1], 1), np.nan)
# Loop over each time frame
for j in range(S.shape[1]):
# Find the maximum strength and its index
s[j], i = np.max(S[:, j]), np.argmax(S[:, j])
# Skip if the strength is below the threshold
if s[j] < sTHR:
continue
# Handle boundary cases
if i == 0 or i == len(pc) - 1:
p[j] = pc[0]
else:
# Use neighboring points for interpolation
I = np.arange(i-1, i+2) # Indices for 3-point interpolation
tc = 1.0 / pc[I] # Convert pitch candidates to periods
ntc = (tc / tc[1] - 1) * 2 * np.pi # Normalize periods
# Perform parabolic interpolation using polyfit
c = np.polyfit(np.squeeze(ntc), S[I, j], 2)
# Generate fine-tuned frequency candidates for interpolation
ftc = 1.0 / 2.0**np.arange(np.log2(pc[I[0]]), np.log2(pc[I[2]]) + 1/12/64, 1/12/64)
nftc = (ftc / tc[1] - 1) * 2 * np.pi # Normalize fine-tuned candidates Use the interpolated polynomial to find the fine-tuned maximum
s[j], k = np.max(np.polyval(c, nftc)), np.argmax(np.polyval(c, nftc))
# Convert the fine-tuned result back to pitch
p[j] = 2 ** (np.log2(pc[I[0]]) + (k - 1) / (12 * 64))
return p, t, s
[docs]
def pitchStrengthAllCandidates(f,L,pc):
"""
Calculate the pitch strength for all candidates.
Parameters:
f -- Frequency y
L -- Loudness y
pc -- Pitch candidates
Returns:
S -- Pitch salience matrix
"""
with np.errstate(divide= 'ignore', invalid = 'ignore'):
L = np.array(L)
L = L / np.sqrt(np.sum(L ** 2, axis = 0, keepdims = True))
#Create pitch salience matrix
S = np.zeros((len(pc), L.shape[1]))
for j in range(len(pc)):
S[j,:] = pitchStrengthOneCandidate(f, L, pc[j])
return S
[docs]
def pitchStrengthOneCandidate(f,L,pc):
"""
Calculate the pitch strength for one pitch candidate.
Parameters:
f -- Frequency y
L -- Loudness y
pc -- Pitch candidate
Returns:
S -- Pitch strength for this candidate
"""
n = int(np.fix(f[-1]/pc - 0.75)) # Number of harmonics
k = np.zeros(f.shape) # Kernel
q = f / pc #Normalize frequency w.r.t candidate
for i in [1] + list(sym.primerange(n)):
a = np.abs(q-i)
p = a < 0.25 #Peaks weights
k[p] = np.cos(2*np.pi * q[p]) /2
k = k * np.sqrt(1. /f) # Aplly envelope
k = k / np.linalg.norm(k[k > 0]) # K+-normalize kernel
S = np.dot(k.T, L)
return S
[docs]
def hz2erbs(hz):
"""Convert frequency in Hz to ERBs."""
return 21.4* np.log10(1+ hz / 229)
[docs]
def erbs2hz(erbs):
"""Convert ERBs to frequency in Hz."""
return (10** (erbs / 21.4)-1) * 229