Source code for VuVoPy.features.utils.hnr_cepstral

import numpy as np
from scipy.signal import find_peaks
from VuVoPy.data.containers.prepocessing import Preprocessed as pp
from VuVoPy.data.containers.sample import VoiceSample as vs
from VuVoPy.data.containers.segmentation import Segmented as sg
from VuVoPy.data.utils.fundamental_frequency import FundamentalFrequency as ff

[docs] def hnr_cepstrum(folder_path, winlen=1024, winover=512, wintype='hann', f0_min=75, f0_max=500): """ Compute HNR using cepstral analysis (quefrency method). Parameters: - folder_path: Path to the audio file - winlen: Frame length in frames - winover: Overlap in frames - wintype: Window type - f0_min: Minimum fundamental frequency (Hz) - f0_max: Maximum fundamental frequency (Hz) Returns: - Mean HNR value across frames (in dB) """ # Load and preprocess the audio file segment = sg.from_voice_sample(pp.from_voice_sample(vs.from_wav(folder_path)), winlen, wintype, winover) signal = segment.get_norm_segment().T # Transpose to (num_frames, num_samples) fs = segment.get_sampling_rate() hnr_values = [] epsilon = 1e-10 # To avoid log(0) for frame in signal: # Compute the log power spectrum spectrum = np.abs(np.fft.fft(frame)) ** 2 log_spectrum = np.log(spectrum + epsilon) # Compute cepstrum (inverse FFT of log spectrum) cepstrum = np.fft.ifft(log_spectrum).real # Define quefrency range corresponding to valid F0 min_quefrency = int(fs / f0_max) max_quefrency = int(fs / f0_min) if min_quefrency >= max_quefrency: continue # Skip frame if range is invalid # Find peak in valid quefrency range peak_idx = np.argmax(cepstrum[min_quefrency:max_quefrency]) + min_quefrency r_max = cepstrum[peak_idx] # Maximum cepstral peak # Compute HNR relative to noise floor noise_floor = np.mean(cepstrum[min_quefrency:max_quefrency]) # Average background noise if noise_floor > epsilon: hnr = 10 * np.log10(r_max / noise_floor) hnr_values.append(hnr) return np.mean(hnr_values) if hnr_values else float('nan')