Source code for VuVoPy.features.utils.hnr_cepstral

import numpy as np
from scipy.signal import find_peaks
from VuVoPy.data.containers.prepocessing import Preprocessed as pp
from VuVoPy.data.containers.sample import VoiceSample as vs
from VuVoPy.data.containers.segmentation import Segmented as sg
from VuVoPy.data.utils.fundamental_frequency import FundamentalFrequency as ff


[docs]
def hnr_cepstrum(folder_path, winlen=1024, winover=512, wintype='hann', f0_min=75, f0_max=500):
    """
    Compute HNR using cepstral analysis (quefrency method).

    Parameters:
    - folder_path: Path to the audio file  
    - winlen: Frame length in frames
    - winover: Overlap in frames
    - wintype: Window type
    - f0_min: Minimum fundamental frequency (Hz)
    - f0_max: Maximum fundamental frequency (Hz)

    Returns:
    - Mean HNR value across frames (in dB)
    """

    # Load and preprocess the audio file
    segment = sg.from_voice_sample(pp.from_voice_sample(vs.from_wav(folder_path)), winlen, wintype, winover)
    signal = segment.get_norm_segment().T  # Transpose to (num_frames, num_samples)
    fs = segment.get_sampling_rate()

    hnr_values = []
    epsilon = 1e-10  # To avoid log(0)

    for frame in signal:
        # Compute the log power spectrum
        spectrum = np.abs(np.fft.fft(frame)) ** 2
        log_spectrum = np.log(spectrum + epsilon)

        # Compute cepstrum (inverse FFT of log spectrum)
        cepstrum = np.fft.ifft(log_spectrum).real

        # Define quefrency range corresponding to valid F0
        min_quefrency = int(fs / f0_max)
        max_quefrency = int(fs / f0_min)

        if min_quefrency >= max_quefrency:
            continue  # Skip frame if range is invalid

        # Find peak in valid quefrency range
        peak_idx = np.argmax(cepstrum[min_quefrency:max_quefrency]) + min_quefrency
        r_max = cepstrum[peak_idx]  # Maximum cepstral peak

        # Compute HNR relative to noise floor
        noise_floor = np.mean(cepstrum[min_quefrency:max_quefrency])  # Average background noise
        if noise_floor > epsilon:
            hnr = 10 * np.log10(r_max / noise_floor)
            hnr_values.append(hnr)

    return np.mean(hnr_values) if hnr_values else float('nan')