Source code for VuVoPy.features.utils.hnr_fft

import numpy as np
from scipy.signal import get_window
from VuVoPy.data.containers.prepocessing import Preprocessed as pp
from VuVoPy.data.containers.sample import VoiceSample as vs
from VuVoPy.data.containers.segmentation import Segmented as sg
from VuVoPy.data.utils.fundamental_frequency import FundamentalFrequency as ff


[docs]
def hnr_fft(folder_path, winlen=1608, winover=804, wintype='hann', f0_min=75, f0_max=500):
    """
    Compute HNR using an FFT-based spectral method.

    Parameters:
    - folder_path: Path to the audio file  
    - winlen: Frame length in samples
    - winover: Overlap in samples
    - wintype: Window type
    - f0_min: Minimum fundamental frequency (Hz)
    - f0_max: Maximum fundamental frequency (Hz)

    Returns:
    - Mean HNR value across frames (in dB)
    """

    # Load and preprocess the audio file
    voice_sample = vs.from_wav(folder_path)
    processed_sample = pp.from_voice_sample(voice_sample)
    signal = processed_sample.get_preemphasis()
    fs = voice_sample.get_sampling_rate()  # Get sampling rate

    # Compute fundamental frequency using FundamentalFrequency class
    f0_array = ff(voice_sample, plim=(f0_min, f0_max), hop_size=winover, dlog2p=1/96, dERBs=0.1, sTHR=-np.inf).get_f0()

    # Ensure F0 array matches frame count
    num_frames = (len(signal) - winlen) // winover + 1
    f0_array = np.pad(f0_array, (0, max(0, num_frames - len(f0_array))), mode='edge')

    hnr_values = []
    epsilon = 1e-10  # To avoid log(0)

    # Define window function
    window = get_window(wintype, winlen)

    # Process each frame
    for i in range(num_frames):
        start = i * winover
        end = start + winlen
        if end > len(signal):
            break
        
        frame = signal[start:end] * window  # Apply window
        spectrum = np.abs(np.fft.rfft(frame)) ** 2  # Power spectrum
        freqs = np.fft.rfftfreq(winlen, 1 / fs)

        # Get F0 for this frame
        f0 = f0_array[i]
        if np.isnan(f0) or f0 < f0_min or f0 > f0_max:
            continue  # Skip unreliable F0 values

        # Find harmonic indices in FFT spectrum
        harmonic_indices = [np.argmin(np.abs(freqs - (n * f0))) for n in range(1, 6) if (n * f0) < (fs / 2)]

        # Compute harmonic and noise power
        harmonic_power = np.sum(spectrum[harmonic_indices]) if harmonic_indices else 0
        total_power = np.sum(spectrum)
        noise_power = max(total_power - harmonic_power, epsilon)  # Ensure non-zero noise power

        # Compute HNR in dB
        if harmonic_power > 0:
            hnr = 10 * np.log10(harmonic_power / noise_power)
            hnr_values.append(hnr)

    return np.mean(hnr_values) if hnr_values else float('nan')