Source code for VuVoPy.features.utils.shimmer

import numpy as np  
from VuVoPy.data.containers.prepocessing import Preprocessed as pp
from VuVoPy.data.containers.sample import VoiceSample as vs
from VuVoPy.data.utils.fundamental_frequency import FundamentalFrequency as f0
from VuVoPy.data.containers.segmentation import Segmented as sg
from VuVoPy.data.utils.vuvs_detection import Vuvs as vuvs
from VuVoPy.data.containers.voiced_sample import VoicedSample as vos


[docs]
def shimmerAPQ(folder_path, n_points=5, plim=(30, 500), sTHR=0.5, winlen = 512, winover = 496 , wintype = 'hamm'):
    """
    Calculate shimmer APQ-N: amplitude perturbation quotient over an N-point window.

    This function estimates shimmer by analyzing cycle-to-cycle amplitude variations 
    in voiced frames of an audio signal. The average absolute difference between local 
    peak amplitudes is computed over a moving window of size `n_points`.

    Args:
        folder_path (str): Path to the .wav file.
        n_points (int): Number of points in the local averaging window (e.g., 3 for APQ3, 5 for APQ5).
        plim (tuple): F0 pitch range in Hz. Default is (30, 500).
        sTHR (float): Voicing threshold for F0 tracking.
        winlen (int): Window length for segmentation.
        winover (int): Window overlap for segmentation.
        wintype (str): Window type for segmentation.

    Returns:
        float: Shimmer APQ-N value.
    """
    preprocessed_sample = pp.from_voice_sample(vs.from_wav(folder_path))
    segment = sg.from_voice_sample(preprocessed_sample, winlen, wintype, winover)
    fs = segment.get_sampling_rate()
    labels = vuvs(segment, fs=fs, winlen =segment.get_window_length(), winover = segment.get_window_overlap(), wintype=segment.get_window_type(), smoothing_window=5)
    silence_removed_sample = vos(preprocessed_sample, labels, fs)
    signal = silence_removed_sample.get_silence_remove_sample()
    sr = silence_removed_sample.get_sampling_rate()

    # Extract F0 and keep only voiced
    f0_track = f0(silence_removed_sample, plim=plim, sTHR=sTHR).get_f0()
    f0_track = f0_track[f0_track > 30]
    if len(f0_track) < n_points:
        return 0

    # Approximate cycle boundaries using median F0
    cycle_len = int(sr / np.median(f0_track))
    starts = np.arange(0, len(signal) - cycle_len, cycle_len)

    amplitudes = []
    for start in starts:
        cycle = signal[start:start + cycle_len]
        if len(cycle) < cycle_len:
            continue
        amp = np.max(cycle) - np.min(cycle)
        amplitudes.append(amp)

    amplitudes = np.array(amplitudes)
    if len(amplitudes) < n_points:
        return 0

    global_mean_amp = np.mean(amplitudes)
    if global_mean_amp == 0:
        return 0

    # Smoothed local average using moving average (centered)
    kernel = np.ones(n_points) / n_points
    smoothed = np.convolve(amplitudes, kernel, mode='valid')

    # Align original amplitude vector to the smoothed one
    offset = n_points // 2
    trimmed = amplitudes[offset:len(amplitudes) - offset]

    if len(trimmed) != len(smoothed):
        return 0

    shimmer_vals = np.abs(trimmed - smoothed) / global_mean_amp
    return np.mean(shimmer_vals) if len(shimmer_vals) else 0