Skip to content

Commit 4c7b851

Browse files
committed
add audio read plugin for EPG Systems' AQ8 files
1 parent 08c00b5 commit 4c7b851

File tree

13 files changed

+233
-52
lines changed

13 files changed

+233
-52
lines changed

src/audio-read-plugin.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,21 @@
1-
# a function that inputs the full path to a file containing the audio recording,
2-
# an interval of time, and some keyword arguments and returns the sampling
3-
# rate, shape of entire recording (not just the interval), and requested data as int16
1+
#a function that inputs the full path (including possibly a recording letter) to
2+
#a file containing the audio recording, an interval of time, and some keyword
3+
#arguments and returns the sampling rate, shape of entire recording (not just
4+
#the interval), and requested data as int16. if {start,stop}_tic are None, return
5+
#the entire recording
46
def audio_read(fullpath, start_tic, stop_tic, **kw):
57

68
# load data, determine sampling rate and length, and do any special processing
79

810
return sampling_rate, nsamples_nchannels, slice_of_data
11+
12+
# a function that returns a list of file extensions which this plugin can handle
13+
def audio_read_exts(**kw):
14+
return [] # e.g. ['.wav', '.WAV']
15+
16+
# a function that returns a dictionary that maps logical recordings to channels in the file
17+
def audio_read_rec2ch(**kw):
18+
return {} # e.g. {'A':[0], 'B':[1]}, or {'A':[0,1]}
19+
20+
def audio_read_init(**kw):
21+
pass

src/classify

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@ def main():
7878
def audio_read(wav_path, start_tic=None, stop_tic=None):
7979
return audio_read_module.audio_read(wav_path, start_tic, stop_tic,
8080
**FLAGS.audio_read_plugin_kwargs)
81+
def audio_read_rec2ch():
82+
return audio_read_module.audio_read_rec2ch(**FLAGS.audio_read_plugin_kwargs)
8183

8284
sys.path.append(os.path.dirname(FLAGS.video_read_plugin))
8385
video_read_module = importlib.import_module(os.path.basename(FLAGS.video_read_plugin))
@@ -244,7 +246,13 @@ def main():
244246
else:
245247
adjusted_probability = probability_matrix[:,ch]
246248
waveform = adjusted_probability*np.iinfo(np.int16).max
247-
filename = os.path.splitext(FLAGS.wav)[0]+'-'+labels[ch]+'.wav'
249+
if len(audio_read_rec2ch()) == 1:
250+
withoutext = os.path.splitext(FLAGS.wav)[0]
251+
else:
252+
tmp = FLAGS.wav.split('-')
253+
withext, rec = '-'.join(tmp[:-1]), tmp[-1]
254+
withoutext = os.path.splitext(withext)[0]+'-'+rec
255+
filename = withoutext+'-'+labels[ch]+'.wav'
248256
wavfile.write(filename, int(sample_rate), waveform.astype('int16'))
249257

250258
if __name__ == '__main__':

src/congruence

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@
1111
# --measure=label \
1212
# --nprobabilities=20 \
1313
# --audio_tic_rate=2500 \
14-
# --parallelize=1
14+
# --parallelize=1 \
15+
# --has_rec=False
1516

1617
import argparse
1718
import sys
@@ -164,7 +165,12 @@ def main():
164165
thresholds=set()
165166
for wavdir in wavdirs:
166167
for wavfile in wavdirs[wavdir]:
167-
wavfile_noext = os.path.splitext(wavfile)[0]
168+
if FLAGS.has_rec == "False":
169+
wavfile_noext = os.path.splitext(wavfile)[0]
170+
else:
171+
tmp = wavfile.split('-')
172+
withext, rec = '-'.join(tmp[:-1]), tmp[-1]
173+
wavfile_noext = os.path.splitext(withext)[0]+'-'+rec
168174

169175
if not labels:
170176
with open(os.path.join(FLAGS.basepath,wavdir,wavfile_noext+'-ethogram.log'), 'r') as fid:
@@ -704,6 +710,10 @@ if __name__ == "__main__":
704710
'--parallelize',
705711
default=0,
706712
type=int)
713+
parser.add_argument(
714+
'--has_rec',
715+
default='False',
716+
type=str)
707717

708718
FLAGS, unparsed = parser.parse_known_args()
709719

src/data.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -118,12 +118,15 @@ def __init__(self, data_dir,
118118
self.np_rng = np.random.default_rng(None if random_seed_batch==-1 else random_seed_batch)
119119

120120
sys.path.append(os.path.dirname(audio_read_plugin))
121-
self.audio_read_plugin = os.path.basename(audio_read_plugin)
121+
audio_read_plugin = os.path.basename(audio_read_plugin)
122122
self.audio_read_plugin_kwargs = audio_read_plugin_kwargs
123+
self.audio_read_module = importlib.import_module(audio_read_plugin)
124+
self.audio_read_module.audio_read_init(**self.audio_read_plugin_kwargs)
123125

124126
sys.path.append(os.path.dirname(video_read_plugin))
125-
self.video_read_plugin = os.path.basename(video_read_plugin)
127+
video_read_plugin = os.path.basename(video_read_plugin)
126128
self.video_read_plugin_kwargs = video_read_plugin_kwargs
129+
self.video_read_module = importlib.import_module(video_read_plugin)
127130

128131
self.prepare_data_index(shiftby,
129132
labels_touse, kinds_touse,
@@ -139,14 +142,12 @@ def __init__(self, data_dir,
139142
signal.signal(signal.SIGTERM, term)
140143

141144
def audio_read(self, fullpath, start_tic=None, stop_tic=None):
142-
audio_read_module = importlib.import_module(self.audio_read_plugin)
143-
return audio_read_module.audio_read(fullpath, start_tic, stop_tic,
144-
**self.audio_read_plugin_kwargs)
145+
return self.audio_read_module.audio_read(fullpath, start_tic, stop_tic,
146+
**self.audio_read_plugin_kwargs)
145147

146148
def video_read(self, fullpath, start_frame=None, stop_frame=None):
147-
video_read_module = importlib.import_module(self.video_read_plugin)
148-
return video_read_module.video_read(fullpath, start_frame, stop_frame,
149-
**self.video_read_plugin_kwargs)
149+
return self.video_read_module.video_read(fullpath, start_frame, stop_frame,
150+
**self.video_read_plugin_kwargs)
150151

151152
def catalog_overlaps(self, data):
152153
data.sort(key=lambda x: x['ticks'][0])

src/ethogram

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
# apply per-class thresholds to discretize probabilities
44

5-
# ethogram <logdir> <model> <thresholds-file> <wav-file> <wav-tic-rate>
5+
# ethogram <logdir> <model> <thresholds-file> <wav-file> <wav-tic-rate> <has_rec>
66

77
# e.g.
8-
# ethogram `pwd`/trained-classifier 1k 50 `pwd`/groundtruth-data/round1/20161207T102314_ch1_p1.wav 5000
8+
# ethogram `pwd`/trained-classifier 1k 50 `pwd`/groundtruth-data/round1/20161207T102314_ch1_p1.wav 5000 False
99

1010
import sys
1111
import os
@@ -29,19 +29,28 @@ print("hostname = "+socket.gethostname())
2929

3030
try:
3131

32-
_,logdir,model,thresholds_file,wav_file,audio_tic_rate = argv
32+
_,logdir,model,thresholds_file,wav_file,audio_tic_rate,has_rec = argv
3333
print('logdir: '+logdir)
3434
print('model: '+model)
3535
print('thresholds_file: '+thresholds_file)
3636
print('wav_file: '+wav_file)
3737
print('audio_tic_rate: '+audio_tic_rate)
38+
print('has_rec: '+has_rec)
3839
audio_tic_rate=float(audio_tic_rate)
3940

40-
if not os.path.isfile(wav_file):
41-
print('cannot find WAV file')
42-
exit()
4341
wavpath, wavname = os.path.split(wav_file)
44-
wavname_noext = os.path.splitext(wavname)[0]
42+
if has_rec == "False":
43+
if not os.path.isfile(wav_file):
44+
print('cannot find WAV file')
45+
exit()
46+
wavname_noext = os.path.splitext(wavname)[0]
47+
else:
48+
tmp = wavname.split('-')
49+
withext, rec = '-'.join(tmp[:-1]), tmp[-1]
50+
wavname_noext = os.path.splitext(withext)[0]+'-'+rec
51+
if not os.path.isfile(os.path.join(wavpath, withext)):
52+
print('cannot find WAV file')
53+
exit()
4554

4655
precision_recall_ratios, thresholds = read_thresholds(logdir, model, thresholds_file)
4756

src/gui/controller.py

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1043,15 +1043,17 @@ def _validation_test_files(files_string, comma=True):
10431043
return [','.join(wavfiles)] if comma else list(wavfiles)
10441044
elif os.path.dirname(files_string.rstrip(os.sep)) == V.groundtruth_folder.value.rstrip(os.sep):
10451045
dfs = []
1046-
for csvfile in filter(lambda x: x.endswith('.csv'), os.listdir(files_string)):
1046+
for csvfile in filter(lambda x: os.path.splitext(x)[1] in M.audio_read_exts(),
1047+
os.listdir(files_string)):
10471048
filepath = os.path.join(files_string, csvfile)
10481049
if os.path.getsize(filepath) > 0:
10491050
dfs.append(pd.read_csv(filepath, header=None, index_col=False))
10501051
if dfs:
10511052
df = pd.concat(dfs)
10521053
wavfiles = sorted(list(set(df.loc[df[3]=="annotated"][0])))
10531054
return [','.join(wavfiles)] if comma else list(wavfiles)
1054-
elif files_string.lower().endswith('.wav'):
1055+
elif os.path.splitext(files_string[:-2 if len(M.audio_read_rec2ch())>1 else None])[1] \
1056+
in M.audio_read_exts():
10551057
return [files_string] if comma else files_string.split(',')
10561058
elif files_string!='':
10571059
with open(files_string, "r") as fid:
@@ -1827,7 +1829,7 @@ def classify_succeeded(modeldir, wavfile, reftime):
18271829
with open(os.path.join(modeldir, 'labels.txt'), 'r') as fid:
18281830
labels = fid.read().splitlines()
18291831
for x in labels:
1830-
if not recent_file_exists(wavfile[:-4]+'-'+x+'.wav', reftime, True):
1832+
if not recent_file_exists(M.trim_ext(wavfile)+'-'+x+'.wav', reftime, True):
18311833
return False
18321834
return True
18331835

@@ -1839,7 +1841,7 @@ async def _classify_actuate(wavfiles):
18391841
wavfile = wavfiles.pop(0)
18401842
currtime = time.time()
18411843
logdir, model, _, check_point = M.parse_model_file(V.model_file.value)
1842-
logfile = os.path.splitext(wavfile)[0]+'-classify.log'
1844+
logfile = M.trim_ext(wavfile)+'-classify.log'
18431845
args = ["--context="+V.context.value,
18441846
"--shiftby="+V.shiftby.value,
18451847
"--loss="+V.loss.value,
@@ -1900,7 +1902,7 @@ def ethogram_succeeded(modeldir, ckpt, wavfile, reftime):
19001902
row1 = next(csvreader)
19011903
precision_recalls = row1[1:]
19021904
for x in precision_recalls:
1903-
if not recent_file_exists(wavfile[:-4]+'-predicted-'+x+'pr.csv', reftime, True):
1905+
if not recent_file_exists(M.trim_ext(wavfile)+'-predicted-'+x+'pr.csv', reftime, True):
19041906
return False
19051907
return True
19061908

@@ -1918,14 +1920,15 @@ async def _ethogram_actuate(i, wavfiles, threads, results):
19181920
thresholds_file = os.path.basename(V.model_file.value)
19191921
else:
19201922
thresholds_file = 'thresholds.ckpt-'+check_point+'.csv'
1921-
logfile = os.path.splitext(wavfile)[0]+'-ethogram.log'
1923+
logfile = M.trim_ext(wavfile)+'-ethogram.log'
19221924
jobid = generic_actuate("ethogram", logfile, M.ethogram_where,
19231925
M.ethogram_ncpu_cores,
19241926
M.ethogram_ngpu_cards,
19251927
M.ethogram_ngigabytes_memory,
19261928
M.ethogram_cluster_flags,
19271929
logdir, model, thresholds_file, wavfile,
1928-
str(M.audio_tic_rate))
1930+
str(M.audio_tic_rate),
1931+
"False" if len(M.audio_read_rec2ch()) == 1 else "True")
19291932
displaystring = "ETHOGRAM "+os.path.basename(wavfile)
19301933
if jobid:
19311934
displaystring += " ("+jobid+")"
@@ -2048,7 +2051,8 @@ async def congruence_actuate():
20482051
"--measure="+V.congruence_measure.value,
20492052
"--nprobabilities="+str(M.nprobabilities),
20502053
"--audio_tic_rate="+str(M.audio_tic_rate),
2051-
"--parallelize="+str(M.congruence_parallelize))
2054+
"--parallelize="+str(M.congruence_parallelize),
2055+
"--has_rec="+("False" if len(M.audio_read_rec2ch()) == 1 else "True"))
20522056
displaystring = "CONGRUENCE "+os.path.basename(all_files[0])
20532057
if jobid:
20542058
displaystring += " ("+jobid+")"
@@ -2098,12 +2102,18 @@ def wavcsv_files_callback():
20982102
if len(V.file_dialog_source.selected.indices)==0:
20992103
bokehlog.info('ERROR: a file(s) must be selected in the file browser')
21002104
return
2101-
filename = V.file_dialog_source.data['names'][V.file_dialog_source.selected.indices[0]]
2102-
files = os.path.join(M.file_dialog_root, filename)
2103-
for i in range(1, len(V.file_dialog_source.selected.indices)):
2105+
files = []
2106+
for i in range(len(V.file_dialog_source.selected.indices)):
21042107
filename = V.file_dialog_source.data['names'][V.file_dialog_source.selected.indices[i]]
2105-
files += ','+os.path.join(M.file_dialog_root, filename)
2106-
V.wavcsv_files.value = files
2108+
if os.path.splitext(filename)[1] in M.audio_read_exts():
2109+
if len(M.audio_read_rec2ch()) == 1:
2110+
files.append(os.path.join(M.file_dialog_root, filename))
2111+
else:
2112+
files.extend([os.path.join(M.file_dialog_root, filename)+'-'+k
2113+
for k in M.audio_read_rec2ch().keys()])
2114+
else:
2115+
files.append(os.path.join(M.file_dialog_root, filename))
2116+
V.wavcsv_files.value = ','.join(files)
21072117

21082118
def groundtruth_callback():
21092119
if len(V.file_dialog_source.selected.indices)>=2:
@@ -2123,8 +2133,12 @@ def _validation_test_files_callback():
21232133
filename = V.file_dialog_source.data['names'][V.file_dialog_source.selected.indices[0]]
21242134
filepath = os.path.join(M.file_dialog_root, filename)
21252135
if nindices<2:
2126-
if filepath.lower().endswith('.wav'):
2127-
return os.path.basename(filepath)
2136+
if os.path.splitext(filepath)[1] in M.audio_read_exts():
2137+
if len(M.audio_read_rec2ch()) == 1:
2138+
return os.path.basename(filepath)
2139+
else:
2140+
return ','.join([os.path.basename(filepath)+'-'+k
2141+
for k in M.audio_read_rec2ch().keys()])
21282142
else:
21292143
return filepath
21302144
else:

src/gui/model.py

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -78,14 +78,27 @@ def isused(sound):
7878
return np.where([x['file']==sound['file'] and x['ticks']==sound['ticks'] \
7979
for x in used_sounds])[0]
8080

81+
def trim_ext(wavfile):
82+
if len(audio_read_rec2ch()) == 1:
83+
withoutext = os.path.splitext(wavfile)[0]
84+
else:
85+
tmp = wavfile.split('-')
86+
withext, rec = '-'.join(tmp[:-1]), tmp[-1]
87+
withoutext = os.path.splitext(withext)[0]+'-'+rec
88+
return withoutext
89+
8190
def save_annotations():
8291
global nrecent_annotations
8392
if nrecent_annotations>0:
8493
fids = {}
8594
csvwriters = {}
8695
csvfiles_current = set([])
87-
for wavfile in set([os.path.join(*x['file']) for x in annotated_sounds if x["label"]!=""]):
88-
csvfile = wavfile[:-4]+"-annotated-"+songexplorer_starttime+".csv"
96+
wavfiles = set()
97+
for sound in annotated_sounds:
98+
if not sound["label"]: continue
99+
wavfiles |= set([trim_ext(os.path.join(*sound["file"]))])
100+
for wavfile in wavfiles:
101+
csvfile = wavfile+"-annotated-"+songexplorer_starttime+".csv"
89102
annotated_csvfiles_all.add(csvfile)
90103
csvfiles_current.add(csvfile)
91104
fids[wavfile] = open(os.path.join(V.groundtruth_folder.value, csvfile),
@@ -98,10 +111,11 @@ def save_annotations():
98111
corrected_sounds=[]
99112
for annotation in annotated_sounds:
100113
if annotation['label']!="" and not annotation['label'].isspace():
101-
csvwriters[os.path.join(*annotation['file'])].writerow(
114+
wavfile_noext = trim_ext(os.path.join(*annotation['file']))
115+
csvwriters[wavfile_noext].writerow(
102116
[annotation['file'][1],
103-
annotation['ticks'][0], annotation['ticks'][1],
104-
'annotated', annotation['label']])
117+
annotation['ticks'][0], annotation['ticks'][1],
118+
'annotated', annotation['label']])
105119
iused = isused(annotation)
106120
if len(iused)>0 and used_sounds[iused[0]]['kind']=='annotated':
107121
corrected_sounds.append(annotation)
@@ -110,10 +124,14 @@ def save_annotations():
110124
x['ticks'][1], 'annotated', x['label']] \
111125
for x in corrected_sounds], \
112126
columns=['file','start','stop','kind','label'])
113-
for wavfile in set([os.path.join(*x['file']) for x in corrected_sounds]):
127+
wavfiles = set()
128+
for sound in corrected_sounds:
129+
wavfile_noext = trim_ext(os.path.join(*sound["file"]))
130+
wavfiles |= set([wavfile_noext])
131+
for wavfile in wavfiles:
114132
wavdir, wavbase = os.path.split(wavfile)
115133
wavpath = os.path.join(V.groundtruth_folder.value, wavdir)
116-
for csvbase in filter(lambda x: x.startswith(wavbase[:-4]) and
134+
for csvbase in filter(lambda x: x.startswith(os.path.splitext(wavbase)[0]) and
117135
x.endswith(".csv") and
118136
"-annotated" in x and
119137
songexplorer_starttime not in x,
@@ -235,7 +253,8 @@ def init(_bokeh_document, _configuration_file, _use_aitch):
235253
global context_width_sec0, context_offset_sec0
236254
global xcluster, ycluster, zcluster, ndcluster, tic2pix_max, snippet_width_pix, ilayer, ispecies, iword, inohyphen, ikind, nlayers, layers, species, words, nohyphens, kinds, used_labels, snippets_gap_sec, snippets_tic, snippets_gap_tic, snippets_decimate_by, snippets_pix, snippets_gap_pix, context_decimate_by, context_width_tic, context_offset_tic, context_sound, isnippet, xsnippet, ysnippet, file_nframes, context_midpoint_tic, ilabel, used_sounds, used_starts_sorted, used_stops, iused_stops_sorted, annotated_sounds, annotated_starts_sorted, annotated_stops, iannotated_stops_sorted, annotated_csvfiles_all, nrecent_annotations, clustered_sounds, clustered_activations, used_recording2firstsound, clustered_starts_sorted, clustered_stops, iclustered_stops_sorted, songexplorer_starttime, history_stack, history_idx, wizard, action, function, statepath, state, file_dialog_root, file_dialog_filter, nearest_sounds, status_ticker_queue, waitfor_job, dfs, remaining_isounds
237255
global user_changed_recording, user_copied_parameters
238-
global audio_read, video_read, detect_labels, doubleclick_annotation, context_data, context_data_istart, model, video_findfile
256+
global audio_read, audio_read_exts, audio_read_rec2ch
257+
global video_read, detect_labels, doubleclick_annotation, context_data, context_data_istart, model, video_findfile
239258
global detect_parameters, doubleclick_parameters, model_parameters, cluster_parameters
240259

241260
bokeh_document = _bokeh_document
@@ -253,9 +272,11 @@ def init(_bokeh_document, _configuration_file, _use_aitch):
253272

254273
sys.path.insert(0,os.path.dirname(audio_read_plugin))
255274
audio_read_module = importlib.import_module(os.path.basename(audio_read_plugin))
275+
audio_read_module.audio_read_init(**audio_read_plugin_kwargs)
256276
def audio_read(wav_path, start_tic=None, stop_tic=None):
257-
return audio_read_module.audio_read(wav_path, start_tic, stop_tic,
258-
**audio_read_plugin_kwargs)
277+
return audio_read_module.audio_read(wav_path, start_tic, stop_tic, **audio_read_plugin_kwargs)
278+
def audio_read_exts(): return audio_read_module.audio_read_exts(**audio_read_plugin_kwargs)
279+
def audio_read_rec2ch(): return audio_read_module.audio_read_rec2ch(**audio_read_plugin_kwargs)
259280

260281
sys.path.insert(0,os.path.dirname(video_read_plugin))
261282
video_read_module = importlib.import_module(os.path.basename(video_read_plugin))

src/highpass-filter.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,12 @@ def audio_read(wav_path, start_tic, stop_tic, cutoff=1, order=2):
3333
data_unpadded = data_filtered[padlenL:-padlenR or None, :]
3434

3535
return sampling_rate, data.shape, data_unpadded
36+
37+
def audio_read_exts(**kw):
38+
return ['.wav', '.WAV']
39+
40+
def audio_read_rec2ch(**kw):
41+
return {'A':[0]}
42+
43+
def audio_read_init(**kw):
44+
pass

src/load-epg-lut.npy

189 KB
Binary file not shown.

0 commit comments

Comments
 (0)