Make codebase consistently documented

MatthewScholefield · MatthewScholefield · commit 13db6f01a2af · 2020-04-16T10:20:35.000-05:00
Ensures there is always a module level docstring
Adds missing docstrings to functions and classes
diff --git a/precise/functions.py b/precise/functions.py
@@ -11,6 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+Mathematical functions used to customize
+computation in various places
+"""
 from math import exp, log, sqrt, pi
 import numpy as np
 from typing import *
@@ -20,6 +24,11 @@
 
 def set_loss_bias(bias: float):
     """
+    Changes the loss bias
+
+    This allows customizing the acceptable tolerance between
+    false negatives and false positives
+
     Near 1.0 reduces false positives
     Near 0.0 reduces false negatives
     """
@@ -42,6 +51,7 @@ def weighted_log_loss(yt, yp) -> Any:
 
 
 def weighted_mse_loss(yt, yp) -> Any:
+    """Standard mse loss with a weighting between false negatives and positives"""
     from keras import backend as K
 
     total = K.sum(K.ones_like(yt))
@@ -52,16 +62,27 @@ def weighted_mse_loss(yt, yp) -> Any:
 
 
 def false_pos(yt, yp) -> Any:
+    """
+    Metric for Keras that *estimates* false positives while training
+    This will not be completely accurate because it weights batches
+    equally
+    """
     from keras import backend as K
     return K.sum(K.cast(yp * (1 - yt) > 0.5, 'float')) / K.maximum(1.0, K.sum(1 - yt))
 
 
 def false_neg(yt, yp) -> Any:
+    """
+    Metric for Keras that *estimates* false negatives while training
+    This will not be completely accurate because it weights batches
+    equally
+    """
     from keras import backend as K
     return K.sum(K.cast((1 - yp) * (0 + yt) > 0.5, 'float')) / K.maximum(1.0, K.sum(0 + yt))
 
 
 def load_keras() -> Any:
+    """Imports Keras injecting custom functions to prevent exceptions"""
     import keras
     keras.losses.weighted_log_loss = weighted_log_loss
     keras.metrics.false_pos = false_pos
diff --git a/precise/model.py b/precise/model.py
@@ -11,6 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+Loads model
+"""
 import attr
 from os.path import isfile
 from typing import *
diff --git a/precise/network_runner.py b/precise/network_runner.py
@@ -11,6 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+Pieces that convert audio to predictions
+"""
 import numpy as np
 from abc import abstractmethod, ABCMeta
 from importlib import import_module
@@ -26,6 +29,10 @@
 
 
 class Runner(metaclass=ABCMeta):
+    """
+    Classes that execute trained models on vectorized audio
+    and produce prediction values
+    """
     @abstractmethod
     def predict(self, inputs: np.ndarray) -> np.ndarray:
         pass
@@ -36,6 +43,7 @@ def run(self, inp: np.ndarray) -> float:
 
 
 class TensorFlowRunner(Runner):
+    """Executes a frozen Tensorflow model created from precise-convert"""
     def __init__(self, model_name: str):
         if model_name.endswith('.net'):
             print('Warning: ', model_name, 'looks like a Keras model.')
@@ -67,6 +75,7 @@ def run(self, inp: np.ndarray) -> float:
 
 
 class KerasRunner(Runner):
+    """ Executes a regular Keras model created from precise-train"""
     def __init__(self, model_name: str):
         import tensorflow as tf
         # ISSUE 88 - Following 3 lines added to resolve issue 88 - JM 2020-02-04 per liny90626
diff --git a/precise/params.py b/precise/params.py
@@ -11,6 +11,12 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+Parameters used in the audio pipeline
+These configure the following stages:
+ - Conversion from audio to input vectors
+ - Interpretation of the network output to a confidence value
+"""
 from math import floor
 
 import attr
@@ -21,42 +27,78 @@
 
 @attr.s(frozen=True)
 class ListenerParams:
+    """
+    General pipeline information:
+     - Audio goes through a series of transformations to convert raw audio into machine readable data
+     - These transformations are as follows:
+       - Raw audio -> chopped audio
+         - buffer_t, sample_depth: Input audio loaded and truncated using these value
+         - window_t, hop_t: Linear audio chopped into overlapping frames using a sliding window
+       - Chopped audio -> FFT spectrogram
+         - n_fft, sample_rate: Each audio frame is converted to n_fft frequency intensities
+       - FFT spectrogram -> Mel spectrogram (compressed)
+         - n_filt: Each fft frame is compressed to n_filt summarized mel frequency bins/bands
+       - Mel spectrogram -> MFCC
+         - n_mfcc: Each mel frame is converted to MFCCs and the first n_mfcc values are taken
+       - Disabled by default: Last phase -> Delta vectors
+         - use_delta: If this value is true, the difference between consecutive vectors is concatenated to each frame
+
+    Parameters for audio pipeline:
+     - buffer_t: Input size of audio. Wakeword must fit within this time
+     - window_t: Time of the window used to calculate a single spectrogram frame
+     - hop_t: Time the window advances forward to calculate the next spectrogram frame
+     - sample_rate: Input audio sample rate
+     - sample_depth: Bytes per input audio sample
+     - n_fft: Size of FFT to generate from audio frame
+     - n_filt: Number of filters to compress FFT to
+     - n_mfcc: Number of MFCC coefficients to use
+     - use_delta: If True, generates "delta vectors" before sending to network
+     - vectorizer: The type of input fed into the network. Options listed in class Vectorizer
+     - threshold_config: Output distribution configuration automatically generated from precise-calc-threshold
+     - threshold_center: Output distribution center automatically generated from precise-calc-threshold
+    """
+    buffer_t = attr.ib()  # type: float
     window_t = attr.ib()  # type: float
     hop_t = attr.ib()  # type: float
-    buffer_t = attr.ib()  # type: float
     sample_rate = attr.ib()  # type: int
     sample_depth = attr.ib()  # type: int
-    n_mfcc = attr.ib()  # type: int
-    n_filt = attr.ib()  # type: int
     n_fft = attr.ib()  # type: int
+    n_filt = attr.ib()  # type: int
+    n_mfcc = attr.ib()  # type: int
     use_delta = attr.ib()  # type: bool
     vectorizer = attr.ib()  # type: int
     threshold_config = attr.ib()  # type: tuple
     threshold_center = attr.ib()  # type: float
 
     @property
     def buffer_samples(self):
+        """buffer_t converted to samples, truncating partial frames"""
         samples = int(self.sample_rate * self.buffer_t + 0.5)
         return self.hop_samples * (samples // self.hop_samples)
 
     @property
     def n_features(self):
+        """Number of timesteps in one input to the network"""
         return 1 + int(floor((self.buffer_samples - self.window_samples) / self.hop_samples))
 
     @property
     def window_samples(self):
+        """window_t converted to samples"""
         return int(self.sample_rate * self.window_t + 0.5)
 
     @property
     def hop_samples(self):
+        """hop_t converted to samples"""
         return int(self.sample_rate * self.hop_t + 0.5)
 
     @property
     def max_samples(self):
+        """The input size converted to audio samples"""
         return int(self.buffer_t * self.sample_rate)
 
     @property
     def feature_size(self):
+        """The size of an input vector generated with these parameters"""
         num_features = {
             Vectorizer.mfccs: self.n_mfcc,
             Vectorizer.mels: self.n_filt,
@@ -77,15 +119,27 @@ def vectorization_md5_hash(self):
 
 
 class Vectorizer:
+    """
+    Chooses which function to call to vectorize audio
+
+    Options:
+        mels: Convert to a compressed Mel spectrogram
+        mfccs: Convert to a MFCC spectrogram
+        speechpy_mfccs: Legacy option to convert to MFCCs using old library
+    """
     mels = 1
     mfccs = 2
     speechpy_mfccs = 3
 
 
 # Global listener parameters
+# These are the default values for all parameters
+# These were selected tentatively to balance CPU usage with accuracy
+# For the Hey Mycroft wake word, small changes to these parameters
+# did not make a significant difference in accuracy
 pr = ListenerParams(
-    window_t=0.1, hop_t=0.05, buffer_t=1.5, sample_rate=16000,
-    sample_depth=2, n_mfcc=13, n_filt=20, n_fft=512, use_delta=False,
+    buffer_t=1.5, window_t=0.1, hop_t=0.05, sample_rate=16000,
+    sample_depth=2, n_fft=512, n_filt=20, n_mfcc=13, use_delta=False,
     threshold_config=((6, 4),), threshold_center=0.2, vectorizer=Vectorizer.mfccs
 )
 
diff --git a/precise/pocketsphinx/listener.py b/precise/pocketsphinx/listener.py
@@ -12,6 +12,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+Conversion of audio data to predictions using Pocketsphinx
+Used for comparison with Precise
+"""
 import numpy as np
 from typing import *
 from typing import BinaryIO
diff --git a/precise/stats.py b/precise/stats.py
@@ -12,6 +12,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License
+"""
+Module handles computing and formatting basic statistics
+about a dataset like false negatives and false positives
+"""
 import numpy as np
 
 counts_str = '''
diff --git a/precise/threshold_decoder.py b/precise/threshold_decoder.py
@@ -11,6 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+Code for converting network output to confidence level
+"""
 import numpy as np
 from typing import Tuple
 
@@ -23,6 +26,14 @@ class ThresholdDecoder:
     This works by estimating the logit normal distribution of network
     activations using a series of averages and standard deviations to
     calculate a cumulative probability distribution
+
+    Background:
+    We could simply take the output of the neural network as the confidence of a given
+    prediction, but this typically jumps quickly between 0.01 and 0.99 even in cases where
+    the network is less confident about a prediction. This is a symptom of the sigmoid squashing
+    high values to values close to 1. This ThresholdDecoder measures the average output of
+    the network over a dataset and uses that to create a smooth distribution so that an output
+    of 80% means that the network output is greater than roughly 80% of the dataset
     """
     def __init__(self, mu_stds: Tuple[Tuple[float, float]], center=0.5, resolution=200, min_z=-4, max_z=4):
         self.min_out = int(min(mu + min_z * std for mu, std in mu_stds))
diff --git a/precise/train_data.py b/precise/train_data.py
@@ -11,6 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+Handles loading dataset into memory and processing it
+Used for training and generating statistics for a dataset
+"""
 import json
 import numpy as np
 from glob import glob
@@ -155,6 +159,7 @@ def loader(kws: list, nkws: list):
 
     @staticmethod
     def merge(data_a: tuple, data_b: tuple) -> tuple:
+        """Combine two TrainData objects"""
         return np.concatenate((data_a[0], data_b[0])), np.concatenate((data_a[1], data_b[1]))
 
     def __repr__(self) -> str:
diff --git a/precise/util.py b/precise/util.py
@@ -11,6 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+Miscellaneous utility functions for things like audio loading
+"""
 import hashlib
 import numpy as np
 from os.path import join, dirname, abspath
@@ -20,6 +23,7 @@
 
 
 class InvalidAudio(ValueError):
+    """Thrown the audio isn't in the expected format"""
     pass
 
 
@@ -40,6 +44,7 @@ def audio_to_buffer(audio: np.ndarray) -> bytes:
 
 def load_audio(file: Any) -> np.ndarray:
     """
+    Loads properly formatted audio from a file to a numpy array
     Args:
         file: Audio filename or file object
     Returns:
@@ -61,6 +66,7 @@ def load_audio(file: Any) -> np.ndarray:
 
 
 def save_audio(filename: str, audio: np.ndarray):
+    """Save loaded audio to file using the configured audio parameters"""
     import wavio
     save_audio = (audio * np.iinfo(np.int16).max).astype(np.int16)
     wavio.write(filename, save_audio, pr.sample_rate, sampwidth=pr.sample_depth, scale='none')
@@ -79,6 +85,7 @@ def play_audio(filename: str):
 
 
 def activate_notify():
+    """Play some sound to indicate a wakeword activation when testing a model"""
     audio = 'data/activate.wav'
     audio = join(dirname(abspath(__file__)), audio)
     play_audio(audio)
@@ -102,6 +109,7 @@ def find_wavs(folder: str) -> Tuple[List[str], List[str]]:
 
 
 def calc_sample_hash(inp: np.ndarray, outp: np.ndarray) -> str:
+    """Hashes a training sample of an input vector and target output vector"""
     md5 = hashlib.md5()
     md5.update(inp.tostring())
     md5.update(outp.tostring())
diff --git a/precise/vectorization.py b/precise/vectorization.py
@@ -11,6 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+Functions that convert audio to machine readable vectors
+"""
 import hashlib
 import numpy as np
 import os
@@ -24,6 +27,7 @@
 inhibit_dist_t = 1.0
 inhibit_hop_t = 0.1
 
+# Functions that convert audio frames -> vectors
 vectorizers = {
     Vectorizer.mels: lambda x: mel_spec(
         x, pr.sample_rate, (pr.window_samples, pr.hop_samples),
@@ -47,6 +51,7 @@ def vectorize_raw(audio: np.ndarray) -> np.ndarray:
 
 
 def add_deltas(features: np.ndarray) -> np.ndarray:
+    """Inserts extra features that are the difference between adjacent timesteps"""
     deltas = np.zeros_like(features)
     for i in range(1, len(features)):
         deltas[i] = features[i] - features[i - 1]
@@ -56,6 +61,9 @@ def add_deltas(features: np.ndarray) -> np.ndarray:
 
 def vectorize(audio: np.ndarray) -> np.ndarray:
     """
+    Converts audio to machine readable vectors using
+    configuration specified in ListenerParams (params.py)
+
     Args:
         audio: Audio verified to be of `sample_rate`
 
@@ -77,6 +85,7 @@ def vectorize(audio: np.ndarray) -> np.ndarray:
 
 
 def vectorize_delta(audio: np.ndarray) -> np.ndarray:
+    """Vectorizer for when use_delta is True"""
     return add_deltas(vectorize(audio))