removed some spaghetti code & add docs in some methods

x4nth055 · x4nth055 · commit de6b02191252 · 2019-07-17T15:43:38.000+02:00
diff --git a/__pycache__/create_csv.cpython-36.pyc b/__pycache__/create_csv.cpython-36.pyc
diff --git a/convert_wavs.py b/convert_wavs.py
@@ -1,26 +1,32 @@
-import os
 
 
+"""
+A utility script used for converting audio samples to be 
+suitable for feature extraction
+"""
+
+import os
+
 def convert_audio(audio_path, target_path, remove=False):
     """This function sets the audio `audio_path` to:
         - 16000Hz Sampling rate
-        - one number of audio channels ( mono )
+        - one audio channel ( mono )
             Params:
                 audio_path (str): the path of audio wav file you want to convert
                 target_path (str): target path to save your new converted wav file
                 remove (bool): whether to remove the old file after converting
         Note that this function requires ffmpeg installed in your system."""
 
-    # os.system(f"ffmpeg -i {audio_path} -ac 1 -ar 16000 {target_path}")
-    os.system(f"ffmpeg -i {audio_path} -ac 1 {target_path}")
+    os.system(f"ffmpeg -i {audio_path} -ac 1 -ar 16000 {target_path}")
+    # os.system(f"ffmpeg -i {audio_path} -ac 1 {target_path}")
     if remove:
         os.remove(audio_path)
 
 
 def convert_audios(path, target_path, remove=False):
     """Converts a path of wav files to:
         - 16000Hz Sampling rate
-        - one number of audio channels ( mono )
+        - one audio channel ( mono )
         and then put them into a new folder called `target_path`
             Params:
                 audio_path (str): the path of audio wav file you want to convert
diff --git a/create_csv.py b/create_csv.py
@@ -7,6 +7,12 @@ def write_emodb_csv(emotions=["sad", "neutral", "happy"], train_name="train_emo.
                     test_name="test_emo.csv", train_size=0.8, verbose=1):
     """
     Reads speech emodb dataset from directory and write it to a metadata CSV file.
+    params:
+        emotions (list): list of emotions to read from the folder, default is ['sad', 'neutral', 'happy']
+        train_name (str): the output csv filename for training data, default is 'train_emo.csv'
+        test_name (str): the output csv filename for testing data, default is 'test_emo.csv'
+        train_size (float): the ratio of splitting training data, default is 0.8 (80% Training data and 20% testing data)
+        verbose (int/bool): verbositiy level, 0 for silence, 1 for info, default is 1
     """
     target = {"path": [], "emotion": []}
     categories = {
@@ -50,65 +56,62 @@ def write_tess_ravdess_csv(emotions=["sad", "neutral", "happy"], train_name="tra
                             test_name="test_tess_ravdess.csv", verbose=1):
     """
     Reads speech TESS & RAVDESS datasets from directory and write it to a metadata CSV file.
+    params:
+        emotions (list): list of emotions to read from the folder, default is ['sad', 'neutral', 'happy']
+        train_name (str): the output csv filename for training data, default is 'train_tess_ravdess.csv'
+        test_name (str): the output csv filename for testing data, default is 'test_tess_ravdess.csv'
+        verbose (int/bool): verbositiy level, 0 for silence, 1 for info, default is 1
     """
-    target = {"path": [], "emotion": []}
-    categories = {
-        1: "neutral",
-        2: "calm",
-        3: "happy",
-        4: "sad",
-        5: "angry",
-        6: "fear",
-        7: "disgust",
-        8: "ps"
-    }
-    # delete not specified emotions
-    categories_reversed = { v: k for k, v in categories.items() }
-    for emotion, code in categories_reversed.items():
-        if emotion not in emotions:
-            del categories[code]
-    # for training speech directory
-    for _, category in categories.items():
+    train_target = {"path": [], "emotion": []}
+    test_target = {"path": [], "emotion": []}
+    
+    for category in emotions:
+        # for training speech directory
         for i, path in enumerate(glob.glob(f"data/training/Actor_*/*_{category}.wav")):
-            target["path"].append(path)
-            target["emotion"].append(category)
+            train_target["path"].append(path)
+            train_target["emotion"].append(category)
         if verbose:
             print(f"[TESS&RAVDESS] There are {i} training audio files for category:{category}")
-    pd.DataFrame(target).to_csv(train_name)
-    target = {"path": [], "emotion": []}
-    # for validation speech directory
-    for _, category in categories.items():
+    
+        # for validation speech directory
         for i, path in enumerate(glob.glob(f"data/validation/Actor_*/*_{category}.wav")):
-            target["path"].append(path)
-            target["emotion"].append(category)
+            test_target["path"].append(path)
+            test_target["emotion"].append(category)
         if verbose:
             print(f"[TESS&RAVDESS] There are {i} testing audio files for category:{category}")
-    pd.DataFrame(target).to_csv(test_name)
+    pd.DataFrame(test_target).to_csv(test_name)
+    pd.DataFrame(train_target).to_csv(train_name)
 
 
-def write_custom_csv(emotions=['sad', 'neutral', 'happy'], train_name="train_custom.csv", test_name="test_custom.csv"):
-    categories = {
-        1: "sad",
-        2: "neutral",
-        3: "happy"
-    }
-    # delete not specified emotions
-    categories_reversed = { v: k for k, v in categories.items() }
-    for emotion, code in categories_reversed.items():
-        if emotion not in emotions:
-            del categories[code]
-    target = {"path": [], "emotion": []}
-    for code, category in categories.items():
-        for file in glob.glob(f"data/train-custom/*_{category}.wav"):
-            target["path"].append(file)
-            target["emotion"].append(category)
-    if target["path"]:
-        pd.DataFrame(target).to_csv(train_name)
+def write_custom_csv(emotions=['sad', 'neutral', 'happy'], train_name="train_custom.csv", test_name="test_custom.csv",
+                    verbose=1):
+    """
+    Reads Custom Audio data from data/*-custom and then writes description files (csv)
+    params:
+        emotions (list): list of emotions to read from the folder, default is ['sad', 'neutral', 'happy']
+        train_name (str): the output csv filename for training data, default is 'train_custom.csv'
+        test_name (str): the output csv filename for testing data, default is 'test_custom.csv'
+        verbose (int/bool): verbositiy level, 0 for silence, 1 for info, default is 1
+    """
+    train_target = {"path": [], "emotion": []}
+    test_target = {"path": [], "emotion": []}
+    for category in emotions:
+        # train data
+        for i, file in enumerate(glob.glob(f"data/train-custom/*_{category}.wav")):
+            train_target["path"].append(file)
+            train_target["emotion"].append(category)
+        if verbose:
+            print(f"[Custom Dataset] There are {i} training audio files for category:{category}")
+        
+        # test data
+        for i, file in enumerate(glob.glob(f"data/test-custom/*_{category}.wav")):
+            test_target["path"].append(file)
+            test_target["emotion"].append(category)
+        if verbose:
+            print(f"[Custom Dataset] There are {i} testing audio files for category:{category}")
+        
+    if train_target["path"]:
+        pd.DataFrame(train_target).to_csv(train_name)
 
-    target = {"path": [], "emotion": []}
-    for code, category in categories.items():
-        for file in glob.glob(f"data/test-custom/*_{category}.wav"):
-            target["path"].append(file)
-            target["emotion"].append(category)
-    if target["path"]:
-        pd.DataFrame(target).to_csv(test_name)
+    if test_target["path"]:
+            pd.DataFrame(test_target).to_csv(test_name)
diff --git a/data_extractor.py b/data_extractor.py
@@ -11,13 +11,20 @@
 
 class AudioExtractor:
     """A class that is used to featurize audio clips, and provide
-    them to the network for training and testing"""
-    def __init__(self, desc_file=None, audio_config=None, verbose=1, features_folder_name="features", classification=True,
+    them to the machine learning algorithms for training and testing"""
+    def __init__(self, audio_config=None, verbose=1, features_folder_name="features", classification=True,
                     emotions=['sad', 'neutral', 'happy'], balance=True):
-        """Params:
-            desc_file (str, optional): Path to a csv file that contains labels and paths to the audio files.
-            If provided, then load metadata right away."""
-        self.desc_file = desc_file
+        """
+        Params:
+            audio_config (dict): the dictionary that indicates what features to extract from the audio file,
+                default is {'mfcc': True, 'chroma': True, 'mel': True, 'contrast': False, 'tonnetz': False}
+                (i.e mfcc, chroma and mel)
+            verbose (bool/int): verbosity level, 0 for silence, 1 for info, default is 1
+            features_folder_name (str): the folder to store output features extracted, default is "features".
+            classification (bool): whether it is a classification or regression, default is True (i.e classification)
+            emotions (list): list of emotions to be extracted, default is ['sad', 'neutral', 'happy']
+            balance (bool): whether to balance dataset (both training and testing), default is True
+        """
         self.audio_config = audio_config if audio_config else {'mfcc': True, 'chroma': True, 'mel': True, 'contrast': False, 'tonnetz': False}
         self.verbose = verbose
         self.features_folder_name = features_folder_name
@@ -59,9 +66,11 @@ def shuffle_data_by_partition(self, partition):
             raise TypeError("Invalid partition, must be either train/test")
 
     def load_metadata_from_desc_file(self, desc_files, partition):
-        """Read metadata from a CSV file
+        """Read metadata from a CSV file & Extract and loads features of audio files
         Params:
-            desc_files"""
+            desc_files (list): list of description files (csv files) to read from
+            partition (str): whether is "train" or "test"
+        """
         # empty dataframe
         df = pd.DataFrame({'path': [], 'emotion': []})
         for desc_file in desc_files:
diff --git a/test.py b/test.py
@@ -1,9 +1,7 @@
-from utils import extract_feature
 from emotion_recognition import EmotionRecognizer
 
 import pyaudio
 import os
-import time
 import wave
 import numpy as np
 from sys import byteorder
diff --git a/utils.py b/utils.py
@@ -43,15 +43,17 @@ def get_first_letters(emotions):
 
 
 def extract_feature(file_name, **kwargs):
-    """Extract feature from audio file `file_name`
+    """
+    Extract feature from audio file `file_name`
         Features supported:
             - MFCC (mfcc)
             - Chroma (chroma)
             - MEL Spectrogram Frequency (mel)
             - Contrast (contrast)
             - Tonnetz (tonnetz)
         e.g:
-        `features = extract_feature(path, mel=True, mfcc=True)`"""
+        `features = extract_feature(path, mel=True, mfcc=True)`
+    """
     mfcc = kwargs.get("mfcc")
     chroma = kwargs.get("chroma")
     mel = kwargs.get("mel")