Skip to content

Commit 13db6f0

Browse files
Make codebase consistently documented
Ensures there is always a module level docstring Adds missing docstrings to functions and classes
1 parent 6ffe3f8 commit 13db6f0

File tree

10 files changed

+133
-5
lines changed

10 files changed

+133
-5
lines changed

precise/functions.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14+
"""
15+
Mathematical functions used to customize
16+
computation in various places
17+
"""
1418
from math import exp, log, sqrt, pi
1519
import numpy as np
1620
from typing import *
@@ -20,6 +24,11 @@
2024

2125
def set_loss_bias(bias: float):
2226
"""
27+
Changes the loss bias
28+
29+
This allows customizing the acceptable tolerance between
30+
false negatives and false positives
31+
2332
Near 1.0 reduces false positives
2433
Near 0.0 reduces false negatives
2534
"""
@@ -42,6 +51,7 @@ def weighted_log_loss(yt, yp) -> Any:
4251

4352

4453
def weighted_mse_loss(yt, yp) -> Any:
54+
"""Standard mse loss with a weighting between false negatives and positives"""
4555
from keras import backend as K
4656

4757
total = K.sum(K.ones_like(yt))
@@ -52,16 +62,27 @@ def weighted_mse_loss(yt, yp) -> Any:
5262

5363

5464
def false_pos(yt, yp) -> Any:
65+
"""
66+
Metric for Keras that *estimates* false positives while training
67+
This will not be completely accurate because it weights batches
68+
equally
69+
"""
5570
from keras import backend as K
5671
return K.sum(K.cast(yp * (1 - yt) > 0.5, 'float')) / K.maximum(1.0, K.sum(1 - yt))
5772

5873

5974
def false_neg(yt, yp) -> Any:
75+
"""
76+
Metric for Keras that *estimates* false negatives while training
77+
This will not be completely accurate because it weights batches
78+
equally
79+
"""
6080
from keras import backend as K
6181
return K.sum(K.cast((1 - yp) * (0 + yt) > 0.5, 'float')) / K.maximum(1.0, K.sum(0 + yt))
6282

6383

6484
def load_keras() -> Any:
85+
"""Imports Keras injecting custom functions to prevent exceptions"""
6586
import keras
6687
keras.losses.weighted_log_loss = weighted_log_loss
6788
keras.metrics.false_pos = false_pos

precise/model.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14+
"""
15+
Loads model
16+
"""
1417
import attr
1518
from os.path import isfile
1619
from typing import *

precise/network_runner.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14+
"""
15+
Pieces that convert audio to predictions
16+
"""
1417
import numpy as np
1518
from abc import abstractmethod, ABCMeta
1619
from importlib import import_module
@@ -26,6 +29,10 @@
2629

2730

2831
class Runner(metaclass=ABCMeta):
32+
"""
33+
Classes that execute trained models on vectorized audio
34+
and produce prediction values
35+
"""
2936
@abstractmethod
3037
def predict(self, inputs: np.ndarray) -> np.ndarray:
3138
pass
@@ -36,6 +43,7 @@ def run(self, inp: np.ndarray) -> float:
3643

3744

3845
class TensorFlowRunner(Runner):
46+
"""Executes a frozen Tensorflow model created from precise-convert"""
3947
def __init__(self, model_name: str):
4048
if model_name.endswith('.net'):
4149
print('Warning: ', model_name, 'looks like a Keras model.')
@@ -67,6 +75,7 @@ def run(self, inp: np.ndarray) -> float:
6775

6876

6977
class KerasRunner(Runner):
78+
""" Executes a regular Keras model created from precise-train"""
7079
def __init__(self, model_name: str):
7180
import tensorflow as tf
7281
# ISSUE 88 - Following 3 lines added to resolve issue 88 - JM 2020-02-04 per liny90626

precise/params.py

Lines changed: 59 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,12 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14+
"""
15+
Parameters used in the audio pipeline
16+
These configure the following stages:
17+
- Conversion from audio to input vectors
18+
- Interpretation of the network output to a confidence value
19+
"""
1420
from math import floor
1521

1622
import attr
@@ -21,42 +27,78 @@
2127

2228
@attr.s(frozen=True)
2329
class ListenerParams:
30+
"""
31+
General pipeline information:
32+
- Audio goes through a series of transformations to convert raw audio into machine readable data
33+
- These transformations are as follows:
34+
- Raw audio -> chopped audio
35+
- buffer_t, sample_depth: Input audio loaded and truncated using these value
36+
- window_t, hop_t: Linear audio chopped into overlapping frames using a sliding window
37+
- Chopped audio -> FFT spectrogram
38+
- n_fft, sample_rate: Each audio frame is converted to n_fft frequency intensities
39+
- FFT spectrogram -> Mel spectrogram (compressed)
40+
- n_filt: Each fft frame is compressed to n_filt summarized mel frequency bins/bands
41+
- Mel spectrogram -> MFCC
42+
- n_mfcc: Each mel frame is converted to MFCCs and the first n_mfcc values are taken
43+
- Disabled by default: Last phase -> Delta vectors
44+
- use_delta: If this value is true, the difference between consecutive vectors is concatenated to each frame
45+
46+
Parameters for audio pipeline:
47+
- buffer_t: Input size of audio. Wakeword must fit within this time
48+
- window_t: Time of the window used to calculate a single spectrogram frame
49+
- hop_t: Time the window advances forward to calculate the next spectrogram frame
50+
- sample_rate: Input audio sample rate
51+
- sample_depth: Bytes per input audio sample
52+
- n_fft: Size of FFT to generate from audio frame
53+
- n_filt: Number of filters to compress FFT to
54+
- n_mfcc: Number of MFCC coefficients to use
55+
- use_delta: If True, generates "delta vectors" before sending to network
56+
- vectorizer: The type of input fed into the network. Options listed in class Vectorizer
57+
- threshold_config: Output distribution configuration automatically generated from precise-calc-threshold
58+
- threshold_center: Output distribution center automatically generated from precise-calc-threshold
59+
"""
60+
buffer_t = attr.ib() # type: float
2461
window_t = attr.ib() # type: float
2562
hop_t = attr.ib() # type: float
26-
buffer_t = attr.ib() # type: float
2763
sample_rate = attr.ib() # type: int
2864
sample_depth = attr.ib() # type: int
29-
n_mfcc = attr.ib() # type: int
30-
n_filt = attr.ib() # type: int
3165
n_fft = attr.ib() # type: int
66+
n_filt = attr.ib() # type: int
67+
n_mfcc = attr.ib() # type: int
3268
use_delta = attr.ib() # type: bool
3369
vectorizer = attr.ib() # type: int
3470
threshold_config = attr.ib() # type: tuple
3571
threshold_center = attr.ib() # type: float
3672

3773
@property
3874
def buffer_samples(self):
75+
"""buffer_t converted to samples, truncating partial frames"""
3976
samples = int(self.sample_rate * self.buffer_t + 0.5)
4077
return self.hop_samples * (samples // self.hop_samples)
4178

4279
@property
4380
def n_features(self):
81+
"""Number of timesteps in one input to the network"""
4482
return 1 + int(floor((self.buffer_samples - self.window_samples) / self.hop_samples))
4583

4684
@property
4785
def window_samples(self):
86+
"""window_t converted to samples"""
4887
return int(self.sample_rate * self.window_t + 0.5)
4988

5089
@property
5190
def hop_samples(self):
91+
"""hop_t converted to samples"""
5292
return int(self.sample_rate * self.hop_t + 0.5)
5393

5494
@property
5595
def max_samples(self):
96+
"""The input size converted to audio samples"""
5697
return int(self.buffer_t * self.sample_rate)
5798

5899
@property
59100
def feature_size(self):
101+
"""The size of an input vector generated with these parameters"""
60102
num_features = {
61103
Vectorizer.mfccs: self.n_mfcc,
62104
Vectorizer.mels: self.n_filt,
@@ -77,15 +119,27 @@ def vectorization_md5_hash(self):
77119

78120

79121
class Vectorizer:
122+
"""
123+
Chooses which function to call to vectorize audio
124+
125+
Options:
126+
mels: Convert to a compressed Mel spectrogram
127+
mfccs: Convert to a MFCC spectrogram
128+
speechpy_mfccs: Legacy option to convert to MFCCs using old library
129+
"""
80130
mels = 1
81131
mfccs = 2
82132
speechpy_mfccs = 3
83133

84134

85135
# Global listener parameters
136+
# These are the default values for all parameters
137+
# These were selected tentatively to balance CPU usage with accuracy
138+
# For the Hey Mycroft wake word, small changes to these parameters
139+
# did not make a significant difference in accuracy
86140
pr = ListenerParams(
87-
window_t=0.1, hop_t=0.05, buffer_t=1.5, sample_rate=16000,
88-
sample_depth=2, n_mfcc=13, n_filt=20, n_fft=512, use_delta=False,
141+
buffer_t=1.5, window_t=0.1, hop_t=0.05, sample_rate=16000,
142+
sample_depth=2, n_fft=512, n_filt=20, n_mfcc=13, use_delta=False,
89143
threshold_config=((6, 4),), threshold_center=0.2, vectorizer=Vectorizer.mfccs
90144
)
91145

precise/pocketsphinx/listener.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@
1212
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
15+
"""
16+
Conversion of audio data to predictions using Pocketsphinx
17+
Used for comparison with Precise
18+
"""
1519
import numpy as np
1620
from typing import *
1721
from typing import BinaryIO

precise/stats.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@
1212
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License
15+
"""
16+
Module handles computing and formatting basic statistics
17+
about a dataset like false negatives and false positives
18+
"""
1519
import numpy as np
1620

1721
counts_str = '''

precise/threshold_decoder.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14+
"""
15+
Code for converting network output to confidence level
16+
"""
1417
import numpy as np
1518
from typing import Tuple
1619

@@ -23,6 +26,14 @@ class ThresholdDecoder:
2326
This works by estimating the logit normal distribution of network
2427
activations using a series of averages and standard deviations to
2528
calculate a cumulative probability distribution
29+
30+
Background:
31+
We could simply take the output of the neural network as the confidence of a given
32+
prediction, but this typically jumps quickly between 0.01 and 0.99 even in cases where
33+
the network is less confident about a prediction. This is a symptom of the sigmoid squashing
34+
high values to values close to 1. This ThresholdDecoder measures the average output of
35+
the network over a dataset and uses that to create a smooth distribution so that an output
36+
of 80% means that the network output is greater than roughly 80% of the dataset
2637
"""
2738
def __init__(self, mu_stds: Tuple[Tuple[float, float]], center=0.5, resolution=200, min_z=-4, max_z=4):
2839
self.min_out = int(min(mu + min_z * std for mu, std in mu_stds))

precise/train_data.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14+
"""
15+
Handles loading dataset into memory and processing it
16+
Used for training and generating statistics for a dataset
17+
"""
1418
import json
1519
import numpy as np
1620
from glob import glob
@@ -155,6 +159,7 @@ def loader(kws: list, nkws: list):
155159

156160
@staticmethod
157161
def merge(data_a: tuple, data_b: tuple) -> tuple:
162+
"""Combine two TrainData objects"""
158163
return np.concatenate((data_a[0], data_b[0])), np.concatenate((data_a[1], data_b[1]))
159164

160165
def __repr__(self) -> str:

precise/util.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14+
"""
15+
Miscellaneous utility functions for things like audio loading
16+
"""
1417
import hashlib
1518
import numpy as np
1619
from os.path import join, dirname, abspath
@@ -20,6 +23,7 @@
2023

2124

2225
class InvalidAudio(ValueError):
26+
"""Thrown the audio isn't in the expected format"""
2327
pass
2428

2529

@@ -40,6 +44,7 @@ def audio_to_buffer(audio: np.ndarray) -> bytes:
4044

4145
def load_audio(file: Any) -> np.ndarray:
4246
"""
47+
Loads properly formatted audio from a file to a numpy array
4348
Args:
4449
file: Audio filename or file object
4550
Returns:
@@ -61,6 +66,7 @@ def load_audio(file: Any) -> np.ndarray:
6166

6267

6368
def save_audio(filename: str, audio: np.ndarray):
69+
"""Save loaded audio to file using the configured audio parameters"""
6470
import wavio
6571
save_audio = (audio * np.iinfo(np.int16).max).astype(np.int16)
6672
wavio.write(filename, save_audio, pr.sample_rate, sampwidth=pr.sample_depth, scale='none')
@@ -79,6 +85,7 @@ def play_audio(filename: str):
7985

8086

8187
def activate_notify():
88+
"""Play some sound to indicate a wakeword activation when testing a model"""
8289
audio = 'data/activate.wav'
8390
audio = join(dirname(abspath(__file__)), audio)
8491
play_audio(audio)
@@ -102,6 +109,7 @@ def find_wavs(folder: str) -> Tuple[List[str], List[str]]:
102109

103110

104111
def calc_sample_hash(inp: np.ndarray, outp: np.ndarray) -> str:
112+
"""Hashes a training sample of an input vector and target output vector"""
105113
md5 = hashlib.md5()
106114
md5.update(inp.tostring())
107115
md5.update(outp.tostring())

precise/vectorization.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14+
"""
15+
Functions that convert audio to machine readable vectors
16+
"""
1417
import hashlib
1518
import numpy as np
1619
import os
@@ -24,6 +27,7 @@
2427
inhibit_dist_t = 1.0
2528
inhibit_hop_t = 0.1
2629

30+
# Functions that convert audio frames -> vectors
2731
vectorizers = {
2832
Vectorizer.mels: lambda x: mel_spec(
2933
x, pr.sample_rate, (pr.window_samples, pr.hop_samples),
@@ -47,6 +51,7 @@ def vectorize_raw(audio: np.ndarray) -> np.ndarray:
4751

4852

4953
def add_deltas(features: np.ndarray) -> np.ndarray:
54+
"""Inserts extra features that are the difference between adjacent timesteps"""
5055
deltas = np.zeros_like(features)
5156
for i in range(1, len(features)):
5257
deltas[i] = features[i] - features[i - 1]
@@ -56,6 +61,9 @@ def add_deltas(features: np.ndarray) -> np.ndarray:
5661

5762
def vectorize(audio: np.ndarray) -> np.ndarray:
5863
"""
64+
Converts audio to machine readable vectors using
65+
configuration specified in ListenerParams (params.py)
66+
5967
Args:
6068
audio: Audio verified to be of `sample_rate`
6169
@@ -77,6 +85,7 @@ def vectorize(audio: np.ndarray) -> np.ndarray:
7785

7886

7987
def vectorize_delta(audio: np.ndarray) -> np.ndarray:
88+
"""Vectorizer for when use_delta is True"""
8089
return add_deltas(vectorize(audio))
8190

8291

0 commit comments

Comments
 (0)