Interval to individual .wav files - selmling/Analytics-and-Data-Exploration GitHub Wiki
import pympi
import parselmouth
import numpy as np
import pandas as pd
def extract_wav_clips(trial_times,sound_file_name,snd,text_grid_file,out_path):
df_all = pd.DataFrame()
# Convert the text grid into a pandas dataframe:
for a in text_grid_file.get_tier_name_num():
b = text_grid_file.get_tier(a[1])
df = pd.DataFrame(b.get_all_intervals())
df["speaker"] = a[1]
df_all = df_all.append(df)
df_all[2].replace('', np.nan, inplace=True)
df_all.dropna(subset=[2], inplace=True)
df_all = df_all.reset_index(drop=True)
df_all.rename(columns={0: "onset", 1: "offset", 2: "utt"}, inplace=True)
choices = ["infant_absent","infant_present"]
inf_ab = pd.DataFrame(trial_times[trial_times["trial_ID"] == "infant_absent"])
inf_pres = pd.DataFrame(trial_times[trial_times["trial_ID"] == "infant_present"])
# Loop through `df` and generate sound clip files
for i, row in df_all.iterrows():
ft=df_all.loc[i,'onset']
tt=df_all.loc[i,'offset']
utt=df_all.loc[i,'utt']
spkr=df_all.loc[i,'speaker']
# what is the `trial_ID` that ft is both > onset and < offset for?
conditions = [
(ft >= inf_ab["onset"]) & (ft <= inf_ab["offset"]),
(ft >= inf_pres["onset"]) & (ft <= inf_pres["offset"]),
]
# assign the current trial ID
tr = np.select(conditions, choices,default="NaN")
tr = tr[0]
snd_clip = snd.extract_part(from_time=ft,
to_time=tt,
preserve_times=True)
snd_clip.save("{}/{}_{}_{}_{}_{}_{}.wav".format(out_path,sound_file_name,utt,ft,tt,spkr,tr),"WAV")