Interval to individual .wav files - selmling/Analytics-and-Data-Exploration GitHub Wiki

import pympi
import parselmouth
import numpy as np
import pandas as pd

def extract_wav_clips(trial_times,sound_file_name,snd,text_grid_file,out_path):
    df_all = pd.DataFrame()
    # Convert the text grid into a pandas dataframe:
    for a in text_grid_file.get_tier_name_num():
        b = text_grid_file.get_tier(a[1])
        df = pd.DataFrame(b.get_all_intervals())
        df["speaker"] = a[1]
        df_all = df_all.append(df)
    df_all[2].replace('', np.nan, inplace=True)
    df_all.dropna(subset=[2], inplace=True)
    df_all = df_all.reset_index(drop=True)
    df_all.rename(columns={0: "onset", 1: "offset", 2: "utt"}, inplace=True)
    choices = ["infant_absent","infant_present"]
    inf_ab = pd.DataFrame(trial_times[trial_times["trial_ID"] == "infant_absent"])
    inf_pres = pd.DataFrame(trial_times[trial_times["trial_ID"] == "infant_present"])
    # Loop through `df` and generate sound clip files
    for i, row in df_all.iterrows():
        ft=df_all.loc[i,'onset']
        tt=df_all.loc[i,'offset']
        utt=df_all.loc[i,'utt']
        spkr=df_all.loc[i,'speaker']
        # what is the `trial_ID` that ft is both > onset and < offset for?
        conditions = [
            (ft >= inf_ab["onset"]) & (ft <= inf_ab["offset"]),
            (ft >= inf_pres["onset"]) & (ft <= inf_pres["offset"]),
        ]
        # assign the current trial ID
        tr = np.select(conditions, choices,default="NaN")
        tr = tr[0]
        snd_clip = snd.extract_part(from_time=ft,
                                    to_time=tt,
                                    preserve_times=True)
        snd_clip.save("{}/{}_{}_{}_{}_{}_{}.wav".format(out_path,sound_file_name,utt,ft,tt,spkr,tr),"WAV")