Skip to content

Commit de6b021

Browse files
committed
removed some spaghetti code & add docs in some methods
1 parent 7d2342b commit de6b021

File tree

6 files changed

+87
-69
lines changed

6 files changed

+87
-69
lines changed
-682 Bytes
Binary file not shown.

convert_wavs.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,32 @@
1-
import os
21

32

3+
"""
4+
A utility script used for converting audio samples to be
5+
suitable for feature extraction
6+
"""
7+
8+
import os
9+
410
def convert_audio(audio_path, target_path, remove=False):
511
"""This function sets the audio `audio_path` to:
612
- 16000Hz Sampling rate
7-
- one number of audio channels ( mono )
13+
- one audio channel ( mono )
814
Params:
915
audio_path (str): the path of audio wav file you want to convert
1016
target_path (str): target path to save your new converted wav file
1117
remove (bool): whether to remove the old file after converting
1218
Note that this function requires ffmpeg installed in your system."""
1319

14-
# os.system(f"ffmpeg -i {audio_path} -ac 1 -ar 16000 {target_path}")
15-
os.system(f"ffmpeg -i {audio_path} -ac 1 {target_path}")
20+
os.system(f"ffmpeg -i {audio_path} -ac 1 -ar 16000 {target_path}")
21+
# os.system(f"ffmpeg -i {audio_path} -ac 1 {target_path}")
1622
if remove:
1723
os.remove(audio_path)
1824

1925

2026
def convert_audios(path, target_path, remove=False):
2127
"""Converts a path of wav files to:
2228
- 16000Hz Sampling rate
23-
- one number of audio channels ( mono )
29+
- one audio channel ( mono )
2430
and then put them into a new folder called `target_path`
2531
Params:
2632
audio_path (str): the path of audio wav file you want to convert

create_csv.py

Lines changed: 55 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@ def write_emodb_csv(emotions=["sad", "neutral", "happy"], train_name="train_emo.
77
test_name="test_emo.csv", train_size=0.8, verbose=1):
88
"""
99
Reads speech emodb dataset from directory and write it to a metadata CSV file.
10+
params:
11+
emotions (list): list of emotions to read from the folder, default is ['sad', 'neutral', 'happy']
12+
train_name (str): the output csv filename for training data, default is 'train_emo.csv'
13+
test_name (str): the output csv filename for testing data, default is 'test_emo.csv'
14+
train_size (float): the ratio of splitting training data, default is 0.8 (80% Training data and 20% testing data)
15+
verbose (int/bool): verbositiy level, 0 for silence, 1 for info, default is 1
1016
"""
1117
target = {"path": [], "emotion": []}
1218
categories = {
@@ -50,65 +56,62 @@ def write_tess_ravdess_csv(emotions=["sad", "neutral", "happy"], train_name="tra
5056
test_name="test_tess_ravdess.csv", verbose=1):
5157
"""
5258
Reads speech TESS & RAVDESS datasets from directory and write it to a metadata CSV file.
59+
params:
60+
emotions (list): list of emotions to read from the folder, default is ['sad', 'neutral', 'happy']
61+
train_name (str): the output csv filename for training data, default is 'train_tess_ravdess.csv'
62+
test_name (str): the output csv filename for testing data, default is 'test_tess_ravdess.csv'
63+
verbose (int/bool): verbositiy level, 0 for silence, 1 for info, default is 1
5364
"""
54-
target = {"path": [], "emotion": []}
55-
categories = {
56-
1: "neutral",
57-
2: "calm",
58-
3: "happy",
59-
4: "sad",
60-
5: "angry",
61-
6: "fear",
62-
7: "disgust",
63-
8: "ps"
64-
}
65-
# delete not specified emotions
66-
categories_reversed = { v: k for k, v in categories.items() }
67-
for emotion, code in categories_reversed.items():
68-
if emotion not in emotions:
69-
del categories[code]
70-
# for training speech directory
71-
for _, category in categories.items():
65+
train_target = {"path": [], "emotion": []}
66+
test_target = {"path": [], "emotion": []}
67+
68+
for category in emotions:
69+
# for training speech directory
7270
for i, path in enumerate(glob.glob(f"data/training/Actor_*/*_{category}.wav")):
73-
target["path"].append(path)
74-
target["emotion"].append(category)
71+
train_target["path"].append(path)
72+
train_target["emotion"].append(category)
7573
if verbose:
7674
print(f"[TESS&RAVDESS] There are {i} training audio files for category:{category}")
77-
pd.DataFrame(target).to_csv(train_name)
78-
target = {"path": [], "emotion": []}
79-
# for validation speech directory
80-
for _, category in categories.items():
75+
76+
# for validation speech directory
8177
for i, path in enumerate(glob.glob(f"data/validation/Actor_*/*_{category}.wav")):
82-
target["path"].append(path)
83-
target["emotion"].append(category)
78+
test_target["path"].append(path)
79+
test_target["emotion"].append(category)
8480
if verbose:
8581
print(f"[TESS&RAVDESS] There are {i} testing audio files for category:{category}")
86-
pd.DataFrame(target).to_csv(test_name)
82+
pd.DataFrame(test_target).to_csv(test_name)
83+
pd.DataFrame(train_target).to_csv(train_name)
8784

8885

89-
def write_custom_csv(emotions=['sad', 'neutral', 'happy'], train_name="train_custom.csv", test_name="test_custom.csv"):
90-
categories = {
91-
1: "sad",
92-
2: "neutral",
93-
3: "happy"
94-
}
95-
# delete not specified emotions
96-
categories_reversed = { v: k for k, v in categories.items() }
97-
for emotion, code in categories_reversed.items():
98-
if emotion not in emotions:
99-
del categories[code]
100-
target = {"path": [], "emotion": []}
101-
for code, category in categories.items():
102-
for file in glob.glob(f"data/train-custom/*_{category}.wav"):
103-
target["path"].append(file)
104-
target["emotion"].append(category)
105-
if target["path"]:
106-
pd.DataFrame(target).to_csv(train_name)
86+
def write_custom_csv(emotions=['sad', 'neutral', 'happy'], train_name="train_custom.csv", test_name="test_custom.csv",
87+
verbose=1):
88+
"""
89+
Reads Custom Audio data from data/*-custom and then writes description files (csv)
90+
params:
91+
emotions (list): list of emotions to read from the folder, default is ['sad', 'neutral', 'happy']
92+
train_name (str): the output csv filename for training data, default is 'train_custom.csv'
93+
test_name (str): the output csv filename for testing data, default is 'test_custom.csv'
94+
verbose (int/bool): verbositiy level, 0 for silence, 1 for info, default is 1
95+
"""
96+
train_target = {"path": [], "emotion": []}
97+
test_target = {"path": [], "emotion": []}
98+
for category in emotions:
99+
# train data
100+
for i, file in enumerate(glob.glob(f"data/train-custom/*_{category}.wav")):
101+
train_target["path"].append(file)
102+
train_target["emotion"].append(category)
103+
if verbose:
104+
print(f"[Custom Dataset] There are {i} training audio files for category:{category}")
105+
106+
# test data
107+
for i, file in enumerate(glob.glob(f"data/test-custom/*_{category}.wav")):
108+
test_target["path"].append(file)
109+
test_target["emotion"].append(category)
110+
if verbose:
111+
print(f"[Custom Dataset] There are {i} testing audio files for category:{category}")
112+
113+
if train_target["path"]:
114+
pd.DataFrame(train_target).to_csv(train_name)
107115

108-
target = {"path": [], "emotion": []}
109-
for code, category in categories.items():
110-
for file in glob.glob(f"data/test-custom/*_{category}.wav"):
111-
target["path"].append(file)
112-
target["emotion"].append(category)
113-
if target["path"]:
114-
pd.DataFrame(target).to_csv(test_name)
116+
if test_target["path"]:
117+
pd.DataFrame(test_target).to_csv(test_name)

data_extractor.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,20 @@
1111

1212
class AudioExtractor:
1313
"""A class that is used to featurize audio clips, and provide
14-
them to the network for training and testing"""
15-
def __init__(self, desc_file=None, audio_config=None, verbose=1, features_folder_name="features", classification=True,
14+
them to the machine learning algorithms for training and testing"""
15+
def __init__(self, audio_config=None, verbose=1, features_folder_name="features", classification=True,
1616
emotions=['sad', 'neutral', 'happy'], balance=True):
17-
"""Params:
18-
desc_file (str, optional): Path to a csv file that contains labels and paths to the audio files.
19-
If provided, then load metadata right away."""
20-
self.desc_file = desc_file
17+
"""
18+
Params:
19+
audio_config (dict): the dictionary that indicates what features to extract from the audio file,
20+
default is {'mfcc': True, 'chroma': True, 'mel': True, 'contrast': False, 'tonnetz': False}
21+
(i.e mfcc, chroma and mel)
22+
verbose (bool/int): verbosity level, 0 for silence, 1 for info, default is 1
23+
features_folder_name (str): the folder to store output features extracted, default is "features".
24+
classification (bool): whether it is a classification or regression, default is True (i.e classification)
25+
emotions (list): list of emotions to be extracted, default is ['sad', 'neutral', 'happy']
26+
balance (bool): whether to balance dataset (both training and testing), default is True
27+
"""
2128
self.audio_config = audio_config if audio_config else {'mfcc': True, 'chroma': True, 'mel': True, 'contrast': False, 'tonnetz': False}
2229
self.verbose = verbose
2330
self.features_folder_name = features_folder_name
@@ -59,9 +66,11 @@ def shuffle_data_by_partition(self, partition):
5966
raise TypeError("Invalid partition, must be either train/test")
6067

6168
def load_metadata_from_desc_file(self, desc_files, partition):
62-
"""Read metadata from a CSV file
69+
"""Read metadata from a CSV file & Extract and loads features of audio files
6370
Params:
64-
desc_files"""
71+
desc_files (list): list of description files (csv files) to read from
72+
partition (str): whether is "train" or "test"
73+
"""
6574
# empty dataframe
6675
df = pd.DataFrame({'path': [], 'emotion': []})
6776
for desc_file in desc_files:

test.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
1-
from utils import extract_feature
21
from emotion_recognition import EmotionRecognizer
32

43
import pyaudio
54
import os
6-
import time
75
import wave
86
import numpy as np
97
from sys import byteorder

utils.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,15 +43,17 @@ def get_first_letters(emotions):
4343

4444

4545
def extract_feature(file_name, **kwargs):
46-
"""Extract feature from audio file `file_name`
46+
"""
47+
Extract feature from audio file `file_name`
4748
Features supported:
4849
- MFCC (mfcc)
4950
- Chroma (chroma)
5051
- MEL Spectrogram Frequency (mel)
5152
- Contrast (contrast)
5253
- Tonnetz (tonnetz)
5354
e.g:
54-
`features = extract_feature(path, mel=True, mfcc=True)`"""
55+
`features = extract_feature(path, mel=True, mfcc=True)`
56+
"""
5557
mfcc = kwargs.get("mfcc")
5658
chroma = kwargs.get("chroma")
5759
mel = kwargs.get("mel")

0 commit comments

Comments
 (0)