Source code for VuVoPy.data.containers.segmentation

import numpy as np
from .sample import VoiceSample
from .prepocessing import Preprocessed  

[docs] class Segmented(Preprocessed):
[docs] class Segmented: """ A class for segmenting and preprocessing audio signals. The `Segmented` class extends the `Preprocessed` class and provides functionality for segmenting audio signals into overlapping frames, applying window functions, and storing the segmented data in multiple forms (original, pre-emphasized, and normalized). Attributes: x (numpy.ndarray): The original waveform. fs (int): The sampling rate of the audio signal. xnorm (numpy.ndarray): The normalized waveform. preem (numpy.ndarray): The pre-emphasized waveform. xsegment (numpy.ndarray): A 3D array containing segmented data for the original, pre-emphasized, and normalized waveforms. winlen (int): The length of the window used for segmentation. wintype (str): The type of window function applied (e.g., "hann", "hamming"). winover (int): The overlap between consecutive windows. alpha (float): The pre-emphasis coefficient (default is 0.94). Methods: from_voice_sample(voice_sample, winlen, wintype, winover, alpha=0.94): Class method to create a `Segmented` instance from a voice sample object. get_segment(): Returns the segmented original waveform as a NumPy array. get_preem_segment(): Returns the segmented pre-emphasized waveform as a NumPy array. get_norm_segment(): Returns the segmented normalized waveform as a NumPy array. get_sampling_rate(): Returns the sampling rate of the audio signal. get_window_type(): Returns the type of window function applied. get_window_length(): Returns the length of the window used for segmentation. get_window_overlap(): Returns the overlap between consecutive windows. """
def __init__(self, x, fs, xnorm, preem, xsegment, winlen, wintype, winover, alpha=0.94): super().__init__(x, fs, xnorm, preem, alpha) self.xsegment = xsegment if xsegment is not None else x self.winlen = winlen self.wintype = wintype self.winover = winover
[docs] @classmethod def from_voice_sample(cls, voice_sample, winlen, wintype, winover, alpha=0.94): """ Creates a segmentation object from a voice sample. This method processes a voice sample by segmenting it into overlapping frames, applying pre-emphasis, normalization, and a specified windowing function. Args: voice_sample (VoiceSample): The input voice sample object containing waveform, sampling rate, pre-emphasis, and normalization methods. winlen (int): The length of the window (in samples) to be applied to each frame. wintype (str): The type of window to apply. Supported types are: "hann", "blackman", "hamm", "square". Defaults to "hamming" if unspecified. winover (int): The overlap (in samples) between consecutive frames. alpha (float, optional): The pre-emphasis coefficient. Defaults to 0.94. Returns: Segmentation: An instance of the Segmentation class containing the segmented waveform, sampling rate, normalized waveform, pre-emphasized waveform, and other parameters. Raises: ValueError: If an unsupported window type is specified. Notes: - The input waveform is padded with zeros if its length is not a multiple of the window length. - The segmentation process generates three versions of the signal: original, pre-emphasized, and normalized, each of which is windowed and stored in the output. """ x = voice_sample.get_waveform() fs = voice_sample.get_sampling_rate() x_preem = voice_sample.get_preemphasis(alpha) x_norm = voice_sample.get_normalization() # Define window match wintype: case "hann": win = np.hanning(winlen) case "blackman": win = np.blackman(winlen) case "hamm": win = np.hamming(winlen) case "square": # Fixed typo win = np.ones(winlen) case _: win = np.hamming(winlen) # Compute number of frames cols = int(np.ceil((x.size - winover) / (winlen - winover))) # Pad signal if necessary if len(x) % winlen != 0: x = np.pad(x, (0, cols * winlen - len(x)), mode='constant') x_preem = np.pad(x_preem, (0, cols * winlen - len(x_preem)), mode='constant') x_norm = np.pad(x_norm, (0, cols * winlen - len(x_norm)), mode='constant') # Initialize segmented array xsegment = np.zeros((winlen, cols, 3)) # Segment sel = np.arange(winlen).reshape(-1, 1) step = np.arange(0, (cols - 1) * (winlen - winover) + 1, winlen - winover) xsegment[:, :, 0] = x[sel + step] # Original waveform xsegment[:, :, 1] = x_preem[sel + step] # Pre-emphasized xsegment[:, :, 2] = x_norm[sel + step] # Normalized # Apply window xsegment[:, :, 0] *= win[:, np.newaxis] xsegment[:, :, 1] *= win[:, np.newaxis] xsegment[:, :, 2] *= win[:, np.newaxis] return cls(x, fs, x_norm, x_preem, xsegment, winlen, wintype, winover, alpha)
[docs] def get_segment(self): """Return the waveform as a NumPy array.""" return self.xsegment[:,:,0]
[docs] def get_preem_segment(self): """Return the waveform as a NumPy array.""" return self.xsegment[:,:,1]
[docs] def get_norm_segment(self): """Return the waveform as a NumPy array.""" return self.xsegment[:,:,2]
[docs] def get_sampling_rate(self): """Return the sampling rate.""" return self.fs
[docs] def get_window_type(self): """Return the window type.""" return self.wintype
[docs] def get_window_length(self): """Return the window length.""" return self.winlen
[docs] def get_window_overlap(self): """Return the window overlap.""" return self.winover