add audio read plugin for EPG Systems' AQ8 files

bjarthur · bjarthur · commit ad6ad8197417 · 2024-09-11T09:08:54.000-04:00
diff --git a/src/audio-read-plugin.py b/src/audio-read-plugin.py
@@ -1,8 +1,21 @@
-# a function that inputs the full path to a file containing the audio recording,
-# an interval of time, and some keyword arguments and returns the sampling
-# rate, shape of entire recording (not just the interval), and requested data as int16
+#a function that inputs the full path (including possibly a recording letter) to
+#a file containing the audio recording, an interval of time, and some keyword
+#arguments and returns the sampling rate, shape of entire recording (not just
+#the interval), and requested data as int16.  if {start,stop}_tic are None, return
+#the entire recording
 def audio_read(fullpath, start_tic, stop_tic, **kw):
 
     # load data, determine sampling rate and length, and do any special processing
 
     return sampling_rate, nsamples_nchannels, slice_of_data
+
+# a function that returns a list of file extensions which this plugin can handle
+def audio_read_exts(**kw):
+    return []  # e.g. ['.wav', '.WAV']
+
+# a function that returns a dictionary that maps logical recordings to channels in the file
+def audio_read_rec2ch(**kw):
+    return {}  # e.g. {'A':[0], 'B':[1]}, or {'A':[0,1]}
+
+def audio_read_init(**kw):
+    pass
diff --git a/src/data.py b/src/data.py
@@ -118,12 +118,15 @@ def __init__(self, data_dir,
         self.np_rng = np.random.default_rng(None if random_seed_batch==-1 else random_seed_batch)
 
         sys.path.append(os.path.dirname(audio_read_plugin))
-        self.audio_read_plugin = os.path.basename(audio_read_plugin)
+        audio_read_plugin = os.path.basename(audio_read_plugin)
         self.audio_read_plugin_kwargs = audio_read_plugin_kwargs
+        self.audio_read_module = importlib.import_module(audio_read_plugin)
+        self.audio_read_module.audio_read_init(**self.audio_read_plugin_kwargs)
 
         sys.path.append(os.path.dirname(video_read_plugin))
-        self.video_read_plugin = os.path.basename(video_read_plugin)
+        video_read_plugin = os.path.basename(video_read_plugin)
         self.video_read_plugin_kwargs = video_read_plugin_kwargs
+        self.video_read_module = importlib.import_module(video_read_plugin)
 
         self.prepare_data_index(shiftby,
                                 labels_touse, kinds_touse,
@@ -139,14 +142,12 @@ def __init__(self, data_dir,
         signal.signal(signal.SIGTERM, term)
 
     def audio_read(self, fullpath, start_tic=None, stop_tic=None):
-        audio_read_module = importlib.import_module(self.audio_read_plugin)
-        return audio_read_module.audio_read(fullpath, start_tic, stop_tic,
-                                            **self.audio_read_plugin_kwargs)
+        return self.audio_read_module.audio_read(fullpath, start_tic, stop_tic,
+                                                 **self.audio_read_plugin_kwargs)
 
     def video_read(self, fullpath, start_frame=None, stop_frame=None):
-        video_read_module = importlib.import_module(self.video_read_plugin)
-        return video_read_module.video_read(fullpath, start_frame, stop_frame,
-                                            **self.video_read_plugin_kwargs)
+        return self.video_read_module.video_read(fullpath, start_frame, stop_frame,
+                                                 **self.video_read_plugin_kwargs)
 
     def catalog_overlaps(self, data):
         data.sort(key=lambda x: x['ticks'][0])
diff --git a/src/gui/controller.py b/src/gui/controller.py
@@ -1043,15 +1043,17 @@ def _validation_test_files(files_string, comma=True):
             return [','.join(wavfiles)] if comma else list(wavfiles)
     elif os.path.dirname(files_string.rstrip(os.sep)) == V.groundtruth_folder.value.rstrip(os.sep):
         dfs = []
-        for csvfile in filter(lambda x: x.endswith('.csv'), os.listdir(files_string)):
+        for csvfile in filter(lambda x: os.path.splitext(x)[1] in M.audio_read_exts(),
+                              os.listdir(files_string)):
             filepath = os.path.join(files_string, csvfile)
             if os.path.getsize(filepath) > 0:
                 dfs.append(pd.read_csv(filepath, header=None, index_col=False))
         if dfs:
             df = pd.concat(dfs)
             wavfiles = sorted(list(set(df.loc[df[3]=="annotated"][0])))
             return [','.join(wavfiles)] if comma else list(wavfiles)
-    elif files_string.lower().endswith('.wav'):
+    elif os.path.splitext(files_string[:-2 if len(M.audio_read_rec2ch())>1 else None])[1] \
+                in M.audio_read_exts():
         return [files_string] if comma else files_string.split(',')
     elif files_string!='':
         with open(files_string, "r") as fid:
@@ -2098,12 +2100,18 @@ def wavcsv_files_callback():
     if len(V.file_dialog_source.selected.indices)==0:
         bokehlog.info('ERROR: a file(s) must be selected in the file browser')
         return
-    filename = V.file_dialog_source.data['names'][V.file_dialog_source.selected.indices[0]]
-    files = os.path.join(M.file_dialog_root, filename)
-    for i in range(1, len(V.file_dialog_source.selected.indices)):
+    files = []
+    for i in range(len(V.file_dialog_source.selected.indices)):
         filename = V.file_dialog_source.data['names'][V.file_dialog_source.selected.indices[i]]
-        files += ','+os.path.join(M.file_dialog_root, filename)
-    V.wavcsv_files.value = files
+        if os.path.splitext(filename)[1] in M.audio_read_exts():
+            if len(M.audio_read_rec2ch()) == 1:
+                files.append(os.path.join(M.file_dialog_root, filename))
+            else:
+                files.extend([os.path.join(M.file_dialog_root, filename)+'-'+k
+                              for k in M.audio_read_rec2ch().keys()])
+        else:
+            files.append(os.path.join(M.file_dialog_root, filename))
+    V.wavcsv_files.value = ','.join(files)
 
 def groundtruth_callback():
     if len(V.file_dialog_source.selected.indices)>=2:
@@ -2123,8 +2131,12 @@ def _validation_test_files_callback():
       filename = V.file_dialog_source.data['names'][V.file_dialog_source.selected.indices[0]]
       filepath = os.path.join(M.file_dialog_root, filename)
     if nindices<2:
-        if filepath.lower().endswith('.wav'):
-            return os.path.basename(filepath)
+        if os.path.splitext(filepath)[1] in M.audio_read_exts():
+            if len(M.audio_read_rec2ch()) == 1:
+                return os.path.basename(filepath)
+            else:
+                return ','.join([os.path.basename(filepath)+'-'+k
+                                 for k in M.audio_read_rec2ch().keys()])
         else:
             return filepath
     else:
diff --git a/src/gui/model.py b/src/gui/model.py
@@ -84,8 +84,14 @@ def save_annotations():
         fids = {}
         csvwriters = {}
         csvfiles_current = set([])
-        for wavfile in set([os.path.join(*x['file']) for x in annotated_sounds if x["label"]!=""]):
-            csvfile = wavfile[:-4]+"-annotated-"+songexplorer_starttime+".csv"
+        wavfiles = set()
+        for sound in annotated_sounds:
+            if not sound["label"]:  continue
+            wavfile = os.path.join(*sound["file"])
+            wavfile_norec = ''.join(wavfile.split('-')[:-1]) if len(audio_read_rec2ch())>1 else wavfile
+            wavfiles |= set([wavfile_norec])
+        for wavfile in wavfiles:
+            csvfile = os.path.splitext(wavfile)[0]+"-annotated-"+songexplorer_starttime+".csv"
             annotated_csvfiles_all.add(csvfile)
             csvfiles_current.add(csvfile)
             fids[wavfile] = open(os.path.join(V.groundtruth_folder.value, csvfile),
@@ -98,7 +104,9 @@ def save_annotations():
         corrected_sounds=[]
         for annotation in annotated_sounds:
             if annotation['label']!="" and not annotation['label'].isspace():
-                csvwriters[os.path.join(*annotation['file'])].writerow(
+                wavfile = os.path.join(*annotation['file'])
+                wavfile_norec = ''.join(wavfile.split('-')[:-1]) if len(audio_read_rec2ch())>1 else wavfile
+                csvwriters[wavfile_norec].writerow(
                         [annotation['file'][1],
                         annotation['ticks'][0], annotation['ticks'][1],
                         'annotated', annotation['label']])
@@ -110,10 +118,15 @@ def save_annotations():
                                           x['ticks'][1], 'annotated', x['label']] \
                                          for x in corrected_sounds], \
                                         columns=['file','start','stop','kind','label'])
-            for wavfile in set([os.path.join(*x['file']) for x in corrected_sounds]):
+            wavfiles = set()
+            for sound in corrected_sounds:
+                wavfile = os.path.join(*sound["file"])
+                wavfile_norec = ''.join(wavfile.split('-')[:-1]) if len(audio_read_rec2ch())>1 else wavfile
+                wavfiles |= set([wavfile_norec])
+            for wavfile in wavfiles:
                 wavdir, wavbase = os.path.split(wavfile)
                 wavpath = os.path.join(V.groundtruth_folder.value, wavdir)
-                for csvbase in filter(lambda x: x.startswith(wavbase[:-4]) and
+                for csvbase in filter(lambda x: x.startswith(os.path.splitext(wavbase)[0]) and
                                                 x.endswith(".csv") and
                                                 "-annotated" in x and
                                                 songexplorer_starttime not in x,
@@ -235,7 +248,8 @@ def init(_bokeh_document, _configuration_file, _use_aitch):
     global context_width_sec0, context_offset_sec0
     global xcluster, ycluster, zcluster, ndcluster, tic2pix_max, snippet_width_pix, ilayer, ispecies, iword, inohyphen, ikind, nlayers, layers, species, words, nohyphens, kinds, used_labels, snippets_gap_sec, snippets_tic, snippets_gap_tic, snippets_decimate_by, snippets_pix, snippets_gap_pix, context_decimate_by, context_width_tic, context_offset_tic, context_sound, isnippet, xsnippet, ysnippet, file_nframes, context_midpoint_tic, ilabel, used_sounds, used_starts_sorted, used_stops, iused_stops_sorted, annotated_sounds, annotated_starts_sorted, annotated_stops, iannotated_stops_sorted, annotated_csvfiles_all, nrecent_annotations, clustered_sounds, clustered_activations, used_recording2firstsound, clustered_starts_sorted, clustered_stops, iclustered_stops_sorted, songexplorer_starttime, history_stack, history_idx, wizard, action, function, statepath, state, file_dialog_root, file_dialog_filter, nearest_sounds, status_ticker_queue, waitfor_job, dfs, remaining_isounds
     global user_changed_recording, user_copied_parameters
-    global audio_read, video_read, detect_labels, doubleclick_annotation, context_data, context_data_istart, model, video_findfile
+    global audio_read, audio_read_exts, audio_read_rec2ch
+    global video_read, detect_labels, doubleclick_annotation, context_data, context_data_istart, model, video_findfile
     global detect_parameters, doubleclick_parameters, model_parameters, cluster_parameters
 
     bokeh_document = _bokeh_document
@@ -253,9 +267,11 @@ def init(_bokeh_document, _configuration_file, _use_aitch):
 
     sys.path.insert(0,os.path.dirname(audio_read_plugin))
     audio_read_module = importlib.import_module(os.path.basename(audio_read_plugin))
+    audio_read_module.audio_read_init(**audio_read_plugin_kwargs)
     def audio_read(wav_path, start_tic=None, stop_tic=None):
-        return audio_read_module.audio_read(wav_path, start_tic, stop_tic,
-                                            **audio_read_plugin_kwargs)
+        return audio_read_module.audio_read(wav_path, start_tic, stop_tic, **audio_read_plugin_kwargs)
+    def audio_read_exts(): return audio_read_module.audio_read_exts(**audio_read_plugin_kwargs)
+    def audio_read_rec2ch(): return audio_read_module.audio_read_rec2ch(**audio_read_plugin_kwargs)
 
     sys.path.insert(0,os.path.dirname(video_read_plugin))
     video_read_module = importlib.import_module(os.path.basename(video_read_plugin))
diff --git a/src/highpass-filter.py b/src/highpass-filter.py
@@ -33,3 +33,12 @@ def audio_read(wav_path, start_tic, stop_tic, cutoff=1, order=2):
     data_unpadded = data_filtered[padlenL:-padlenR or None, :]
 
     return sampling_rate, data.shape, data_unpadded
+
+def audio_read_exts(**kw):
+    return ['.wav', '.WAV']
+
+def audio_read_rec2ch(**kw):
+    return {'A':[0]}
+
+def audio_read_init(**kw):
+    pass
diff --git a/src/load-epg-lut.npy b/src/load-epg-lut.npy
diff --git a/src/load-epg-make-lut.py b/src/load-epg-make-lut.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python3
+
+# to have songexplorer automatically convert the values stored in EPG .aq8
+# files into voltages, export the binary data in some .DOx files as plain text
+# .A0x files using the “Save measured data as ASCII” function in the stylet+
+# software.  then use this script to generate a lookup table, and specify the
+# path to the generated .npy file in the keyword arguments to the load-epg
+# audio_read plugin in configuration.py.
+
+# e.g. src/load-epg-make-lut.py <path-to-folder-of-DOx-and-A0x-files>
+
+import sys
+import os
+import numpy as np
+
+_, path2data = sys.argv
+
+lut = np.empty((0,2))
+for asciifile in filter(lambda x: os.path.splitext(x)[1].startswith('.A'),
+                        os.listdir(path2data)):
+    print(asciifile)
+    asciidata = np.loadtxt(os.path.join(path2data, asciifile), delimiter=';')
+    binaryfile = asciifile[:-3]+'D'+asciifile[-2:]
+    binarydata = np.fromfile(os.path.join(path2data, binaryfile), dtype=np.uint32)
+    this_lut = np.hstack((np.expand_dims(binarydata, axis=1), asciidata[:,[1]]))
+    lut = np.unique(np.vstack((lut, np.unique(this_lut, axis=0))), axis=0)
+
+isort = np.argsort(lut[:,0])
+lut = lut[isort,:]
+
+np.save(path2data+".npy", lut)
diff --git a/src/load-epg.py b/src/load-epg.py
@@ -0,0 +1,53 @@
+# to analyze Electrical Penetration Graph (EGP; https://epgsystems.eu) data,
+# first create a lookup table using load-epg-make-lut.py.  then use the .aq8
+# files directly using this plugin.  the .D0x files that stylet+ automatically
+# creates can be deleted (as can any .A0x files).
+
+#audio_read_plugin="load-epg"
+#audio_read_plugin_kwargs={"nchan":8, "lut_file":"load-epg-lut.npy",
+#                          "ncomments":3, "Fs":"smpl.frq= ([0-9.]+)Hz"}
+
+import re
+import numpy as np
+import os
+
+def audio_read(fullpath_aq8_rec, start_tic, stop_tic,
+               nchan=8, ncomments=3, Fs="smpl.frq= ([0-9.]+)Hz", **kw):
+    fullpath_aq8, rec = fullpath_aq8_rec[:-2], fullpath_aq8_rec[-1]
+
+    if not start_tic:  start_tic=0
+
+    with open(fullpath_aq8, 'rb') as fid:
+        for _ in range(ncomments):
+            line = fid.readline().decode()
+            m = re.search(Fs, line)
+            if m:  sampling_rate = float(m.group(1))
+        n0 = fid.tell()
+        n1 = fid.seek(0,2)
+        nsamples = (n1-n0)//4//nchan
+        fid.seek(n0)
+        if not stop_tic:  stop_tic=nsamples
+        fid.seek(4*nchan*start_tic, 1)
+        b = fid.read(4*nchan*(stop_tic-start_tic))
+
+    v = np.frombuffer(b, dtype=np.uint32)
+    a = np.reshape(v, (-1,nchan))
+
+    chs = audio_read_rec2ch()[rec]
+    s = a[:, chs]
+
+    i = np.searchsorted(lut[:,0], s)
+    m = np.take(lut[:,1], i)
+    c = (m / 10 * np.iinfo(np.int16).max).astype(np.int16)
+
+    return sampling_rate, (nsamples,len(chs)), c
+
+def audio_read_exts(nchan=8, **kw):
+    return ['.aq'+str(nchan)]
+
+def audio_read_rec2ch(nchan=8, **kw):
+    return {chr(65+i):[i] for i in range(nchan)}
+
+def audio_read_init(lut_file="load-epg-lut.npy", **kw):
+    script_dir = os.path.abspath(os.path.dirname(__file__))
+    global lut = np.load(os.path.join(script_dir, lut_file))
diff --git a/src/load-wav.py b/src/load-wav.py
@@ -19,3 +19,12 @@ def audio_read(wav_path, start_tic, stop_tic, mmap=True):
     data_sliced = data[start_tic_clamped : stop_tic_clamped, :]
 
     return sampling_rate, data.shape, data_sliced
+
+def audio_read_exts(**kw):
+    return ['.wav', '.WAV']
+
+def audio_read_rec2ch(**kw):
+    return {'A':[0]}
+
+def audio_read_init(**kw):
+    pass