Source code for VuVoPy.data.utils.formant_frequencies

import numpy as np
import librosa as lb
import matplotlib.pyplot as plt
from VuVoPy.data.containers.prepocessing import Preprocessed     
from VuVoPy.data.containers.sample import VoiceSample
from VuVoPy.data.containers.segmentation import Segmented


[docs] class FormantFrequencies(Segmented): """ FormantFrequencies is a class for extracting and managing formant frequencies from segmented voice data. Formants are resonant frequencies of the vocal tract, and this class provides methods to calculate and retrieve formant frequencies from raw, pre-emphasized, and normalized voice segments. Attributes: formants (numpy.ndarray): A 3D array containing formant frequencies for raw, pre-emphasized, and normalized voice segments. The shape is (N, 3, 3), where N is the number of segments, and the second and third dimensions correspond to the first three formants (F1, F2, F3) and the type of segment (raw, pre-emphasized, normalized), respectively. Methods: __init__(fs, formants): Initializes the FormantFrequencies object with a sampling rate and formant frequencies. from_voice_sample(segments): Class method to create an instance of FormantFrequencies by extracting formant frequencies from segmented voice data. get_formants(): Returns the numpy array of formants extracted from the raw waveform. get_formants_preem(): Returns the numpy array of formants extracted from the pre-emphasized waveform. get_formants_norm(): Returns the numpy array of formants extracted from the normalized waveform. get_sampling_rate(): Returns the sampling rate of the voice data. """
[docs] def __init__(self, fs, formants): super().__init__(None, fs, None, None, None, None, None, None, None) self.formants = formants
[docs] @classmethod def from_voice_sample(cls, segments): """ Creates an instance of the class from a voice sample by extracting formant frequencies. Args: cls: The class itself, used to create an instance. segments: An object containing segmented voice data with methods to retrieve raw, pre-emphasized, and normalized segments, as well as the sampling rate. Returns: An instance of the class initialized with the sampling rate and extracted formant frequencies. Notes: - The method calculates LPC coefficients for raw, pre-emphasized, and normalized segments. - Formant frequencies are derived from the roots of the LPC polynomial. - Only roots with non-negative imaginary parts are considered. - The method currently extracts and sorts the first three formants for each segment. - The bandwidths of the formants are not calculated at this stage. """ seg_x = segments.get_segment() seg_x_preem = segments.get_preem_segment() seg_x_norm = segments.get_norm_segment() fs = segments.get_sampling_rate() order = int(np.fix(fs/1000 +2)) lpc_coeff_x = lb.lpc(seg_x, order=order) lpc_coeff_x_prem = lb.lpc(seg_x_preem ,order=order) lpc_coeff_x_norm = lb.lpc(seg_x_norm, order=order) N = lpc_coeff_x.shape[0] formants = np.zeros((N,3,3)) rts_x = np.zeros((N,3)) rts_x_preem = np.zeros((N,3)) rts_x_norm= np.zeros((N,3)) for i in range(N): #Findiung roots of nominator of transfer function rts_x = np.roots(lpc_coeff_x[i,:]) rts_x_preem = np.roots(lpc_coeff_x_prem[i,:]) rts_x_norm = np.roots(lpc_coeff_x_norm[i,:]) #Finding non-zero Im{Z} >=0 rts_x = rts_x[(np.imag(rts_x)>0 )].copy() rts_x_preem = rts_x_preem[(np.imag(rts_x_preem)>0 )].copy() rts_x_norm = rts_x_norm[(np.imag(rts_x_norm)>0 )].copy() #Finding formants tempF_x = np.arctan2(np.imag(rts_x),np.real(rts_x)) tempF_x_preem = np.arctan2(np.imag(rts_x_preem),np.real(rts_x_preem)) tempF_x_norm = np.arctan2(np.imag(rts_x_norm),np.real(rts_x_norm)) #Sorting formants sort_F = sorted(tempF_x) sort_F_preem = sorted(tempF_x_preem) sort_F_norm = sorted(tempF_x_norm) if sort_F == []: sort_F = np.zeros(3) sort_F_preem = np.zeros(3) sort_F_norm = np.zeros(3) formants[i, 0, 0] = np.real(sort_F[0]) * (fs / (2 * np.pi)) # F1 formants[i, 1, 0] = np.real(sort_F[1]) * (fs / (2 * np.pi)) formants[i, 2, 0] = np.real(sort_F[2]) * (fs / (2 * np.pi)) # F3 formants[i, 0, 1] = np.real(sort_F_preem[0]) * (fs / (2 * np.pi)) # F1 formants[i, 1, 1] = np.real(sort_F_preem[1]) * (fs / (2 * np.pi)) formants[i, 2, 1] = np.real(sort_F_preem[2]) * (fs / (2 * np.pi)) # F3 formants[i, 0, 2] = np.real(sort_F_norm[0]) * (fs / (2 * np.pi)) # F1 formants[i, 1, 2] = np.real(sort_F_norm[1]) * (fs / (2 * np.pi)) formants[i, 2, 2] = np.real(sort_F_norm[2]) * (fs / (2 * np.pi)) # F3 formants = formants.copy() return cls(fs, formants)
[docs] def get_formants(self): """Return the numpy array of formants extracted from raw waveform""" return self.formants[:,:,0]
[docs] def get_formants_preem(self): """Return the numpy array of formants extracted from pre-emphasis waveform""" return self.formants[:,:,1]
[docs] def get_formants_norm(self): """Return the numpy array of formants extracted from normalized waveform""" return self.formants[:,:,2]
[docs] def get_sampling_rate(self): """Return the sampling rate.""" return self.fs