Misc fixes

jonnor · jonnor · commit 5aa63d1b56f3 · 2019-05-13T00:27:48.000+02:00
diff --git a/TODO.md b/TODO.md
@@ -12,13 +12,18 @@ Feedback needed
 
 Results
 
-- Include latest results
-- Measure runtime on device for latest models
 - Use Strided-DS-24 as chosen model (confusion matrix etc), instead of auto "best"
 - Finish basic Discussion and Conclusion
 - Make plots a bit prettier
 - Add picture of demo setup
 
+Reprod
+
+- Tag a branch for submitted thesis
+- Upload models to GH
+- Results are from git commit `b49efa5dde48f9fd72a32eff4c751d9d0c0de712`
+- Include perftools Python script in appendix?
+
 Abstract
 
 - Write it!
diff --git a/braindump.md b/braindump.md
@@ -56,6 +56,22 @@ For DS-5x5 12, going from 0.5 dropout to 0.25 increases perf from 65% to 72%
 
 python train.py --model strided --conv_block depthwise_separable --epochs 100 --downsample_size=2x2 --filters 12 --dropout 0.25
 
+### Aggregation
+Low-pass filter over consequtive frames?
+Exponential Moving Average?
+
+## Testing
+
+Jackhammer
+https://annotator.freesound.org/fsd/explore/%252Fm%252F03p19w/
+https://freesound.org/people/Mark_Ian/sounds/131918/
+
+Dog bark
+https://annotator.freesound.org/fsd/explore/%252Fm%252F0bt9lr/
+http://freesound.org/s/365053
+
+
+
 
 ## Kubernetes
 
diff --git a/microesc/livedemo.py b/microesc/livedemo.py
@@ -8,6 +8,7 @@
 
 import numpy
 import serial
+import scipy.signal
 
 import matplotlib
 from matplotlib import pyplot as plt
@@ -75,62 +76,115 @@ def create_interactive():
     win = Gtk.Window()
     win.connect("delete-event", Gtk.main_quit)
     win.set_default_size(400, 300)
-    win.set_title("Embedding in GTK")
+    win.set_title("On-sensor Audio Classification")
 
-    f = matplotlib.figure.Figure(figsize=(5, 4), dpi=100)
-    ax = f.add_subplot(111)
-    t = numpy.arange(0.0, 3.0, 0.01)
-    s = numpy.sin(2*numpy.pi*t)
-
-    #ax.plot(t, s)
+    fig, (ax, text_ax) = plt.subplots(1, 2)
 
     sw = Gtk.ScrolledWindow()
     win.add(sw)
     # A scrolled window border goes outside the scrollbars and viewport
     sw.set_border_width(10)
 
-    canvas = FigureCanvas(f)  # a Gtk.DrawingArea
-    canvas.set_size_request(800, 600)
+    canvas = FigureCanvas(fig)  # a Gtk.DrawingArea
+    canvas.set_size_request(200, 400)
     sw.add_with_viewport(canvas)
 
-    predictions = numpy.random.random(10)
-    rects = ax.bar(numpy.arange(len(predictions)), predictions, align='center', alpha=0.5)
+    prediction_threshold = 0.35
 
-    return win, f, ax, rects
+    # Plots
+    predictions = numpy.zeros(11)
+    tt = numpy.arange(len(predictions))
+    rects = ax.barh(tt, predictions, align='center', alpha=0.5)
+    ax.set_yticks(tt)
+    ax.set_yticklabels(classnames)
+    ax.set_xlim(0, 1)
 
-def update_plot(ser, ax, fig, rects):
-    raw = ser.readline()
-    line = raw.decode('utf-8')
-    predictions = parse_input(line)
+    ax.axvline(prediction_threshold)
+    ax.yaxis.set_ticks_position('right')
+
+    # Text
+    text_ax.axes.get_xaxis().set_visible(False)
+    text_ax.axes.get_yaxis().set_visible(False)
+
+    text = text_ax.text(0.5, 0.2, "Unknown",
+        horizontalalignment='center',
+        verticalalignment='center',
+        fontsize=32,
+    )
+
+    def emwa(new, prev, alpha):
+        return alpha * new + (1 - alpha) * prev
+
+    prev = predictions
+    alpha = 0.2 # smoothing coefficient
+
+    window = numpy.zeros(shape=(4, 11))
+
+    from scipy.ndimage.interpolation import shift
+
+    def update_plot(predictions):
+
+        if len(predictions) < 10:
+            return
+
+        # add unknown class
+        predictions = numpy.concatenate([predictions, [0.0]])
+
+        window[:, :] = numpy.roll(window, 1, axis=0)
+        window[0, :] = predictions
+
+        predictions = numpy.mean(window, axis=0)
 
-    if predictions:
         best_p = numpy.max(predictions)
         best_c = numpy.argmax(predictions)
-        name = classnames[best_c]
-        if best_p >= 0.35:
-            print('p', name, best_p)
+        if best_p <= prediction_threshold:
+            best_c = 10
+            best_p = 0.0
 
         for rect, h in zip(rects, predictions):
-            rect.set_height(h)
+            rect.set_width(h)
+
+        name = classnames[best_c]
+        text.set_text(name)
+
+        fig.tight_layout()
+        fig.canvas.draw()
+
+    return win, update_plot
+
+def fetch_predictions(ser):
+    raw = ser.readline()
+    line = raw.decode('utf-8')
+    predictions = parse_input(line)
+    return predictions
 
-    fig.canvas.draw()
 
-    return True
 
 def main():
     test_parse_preds()
 
     device = '/dev/ttyACM1'
     baudrate = 115200
 
-    window, fig, ax, rects = create_interactive()
+    window, plot = create_interactive()
     window.show_all()
 
+    def update(ser):
+        try:
+            preds = fetch_predictions(ser)
+        except Exception as e:
+            print('error', e)
+            return True
+
+        if preds is not None:
+            plot(preds)
+        return True
+
     with serial.Serial(device, baudrate, timeout=0.1) as ser:
         # avoid reading stale data
         thrash = ser.read(10000)
-       
-        GLib.timeout_add(200.0, update_plot, ser, ax, fig, rects)
+      
+        GLib.timeout_add(200.0, update, ser)
 
         Gtk.main() # WARN: blocking
 
diff --git a/report/abstract.latex b/report/abstract.latex
@@ -4,7 +4,11 @@
 \mbox{}
 
 \begin{abstract}
-This is my summary/abstract
+
+Purpose/Motivation
+Methods
+Results
+Conclusions
 
 FIXME: write it
 \end{abstract}
diff --git a/report/report.md b/report/report.md
@@ -856,7 +856,7 @@ the last window is zero padded.
 Sometimes there is a mismatch between the desired length of analysis window,
 and the labeled clips available in the training data.
 For example a dataset may consist of labeled audio clips with a length of 10 seconds,
-while the desired output is every 1 seconds.
+while the desired output is every second.
 When a dataset is labeled only with the presence of a sound at a coarse timescale,
 without information about where exactly the relevant sound(s) appears
 it is referred to as *weakly annotated* or *weakly labeled* data[@ComputationalAnalysisSound, ch 14.2.4.1].
@@ -1611,6 +1611,13 @@ What is the battery lifetime. BOM
 
 # Conclusions
 
+<!--
+
+Recap what you did.
+Highlight the big accomplishments.
+Conclude. Wraps up your paper. Tie your research to the “real world.”
+-->
+
 Able to demonstrate Environmental Sound Classification
 running on a low-power microcontroller suitable for use in a sensor node.
 
@@ -1641,31 +1648,33 @@ However there are also promising results showing that CNNs can be
 effectively implemented with as little as 2 bits[@andri2016yodann][@miyashita2016convolutional][@IncrementalNetworkQuantization],
 and without using any multiplications[@leng2018extremely][@cintra2018low].
 
+<!--
 Low-power hardware accelerators for Convolutional Neural Networks will hopefully
 become available over the next few years.
 This may enable larger models at the same power budget,
 or to reduce power consumption at a given predictive performance level. 
 End-to-end CNN models using raw audio as input becomes extra interesting with such a co-processor,
 since it allows also the filterbank processing to be offloaded from the general purpose CPU.
+-->
 
-In a practical deployment of on-edge classification, it is still desirable to
-be able to collect some data for evaluation of performance and further training.
+In a practical deployment of on-sensor classification, it is still desirable to
+be able to collect *some* data for evaluation of performance and further training.
 This could be sampled at random. But could it be more effective to use some sort of
-adaptive sampling, and possibly Active Learning?
+adaptive sampling, possibly Active Learning?
 
+<!--
 Normally such training and evaluation data is transferred as raw PCM audio,
 which inefficient in terms of bandwidth.
 Could low-power audio coding be applied to compress the data,
 while still enable reliable human labeling and use as evaluation/training data?
- 
-It is also desirable to reduce how often classification is needed.
+--> 
+
+It is also very desirable to reduce how often classification is needed.
 Could this benefit from an adaptive sampling strategy?
 For example to primarily do classification for time-periods which exceed
 a sound level threshold, or to sample less often when the sound source changes slowly.
 
 
-
-
 <!---
 DROP: clean up the scripts, make fit on one/two page
 MAYBE: table with software versions? From requirements.txt