From d1e3efd970855afdffd0029b371d5f0e56378b6b Mon Sep 17 00:00:00 2001 From: Alex Thornton Date: Thu, 28 Apr 2022 22:47:56 -0400 Subject: [PATCH 01/12] parsing and automatic test/dev/eval split on audio files --- egs/ami/s5b/local/ami_download.sh | 10 +++++++++- egs/ami/s5c/local/prepare_data.py | 2 ++ egs/ami/s5c/run.sh | 2 ++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/egs/ami/s5b/local/ami_download.sh b/egs/ami/s5b/local/ami_download.sh index bae72d1716a..b460b539bc4 100755 --- a/egs/ami/s5b/local/ami_download.sh +++ b/egs/ami/s5b/local/ami_download.sh @@ -55,9 +55,15 @@ cat local/split_train.orig local/split_eval.orig local/split_dev.orig > $wdir/am wgetfile=$wdir/wget_$mic.sh -# TODO fix this with Pawel, files don't exist anymore, +' +# TODO fix this with Pawel, files dont exist anymore, manifest="wget --continue -O $adir/MANIFEST.TXT http://groups.inf.ed.ac.uk/ami/download/temp/amiBuild-04237-Sun-Jun-15-2014.manifest.txt" license="wget --continue -O $adir/LICENCE.TXT http://groups.inf.ed.ac.uk/ami/download/temp/Creative-Commons-Attribution-NonCommercial-ShareAlike-2.5.txt" +' +manifest="wget --continue -O $adir/MANIFEST.TXT https://groups.inf.ed.ac.uk/ami/download/temp/amiBuild-1372-Thu-Apr-28-2022.manifest.txt" + +# Parse the manifest file, and separate recordings into train, dev, and eval sets +python3 split_manifest.py echo "#!/usr/bin/env bash" > $wgetfile echo $manifest >> $wgetfile @@ -86,6 +92,7 @@ echo "Look at $wdir/log/download_ami_$mic.log for progress" $wgetfile &> $wdir/log/download_ami_$mic.log # Do rough check if #wavs is as expected, it will fail anyway in data prep stage if it isn't, +' if [ "$mic" == "ihm" ]; then num_files=$(find $adir -iname *Headset* | wc -l) if [ $num_files -ne 687 ]; then @@ -102,6 +109,7 @@ else exit 1; fi fi +' echo "Downloads of AMI corpus completed succesfully. License can be found under $adir/LICENCE.TXT" exit 0; diff --git a/egs/ami/s5c/local/prepare_data.py b/egs/ami/s5c/local/prepare_data.py index 8a7a6d24b8c..c8e938c7db7 100755 --- a/egs/ami/s5c/local/prepare_data.py +++ b/egs/ami/s5c/local/prepare_data.py @@ -28,6 +28,8 @@ def find_audios(wav_path, file_list): # Filter list to keep only those in annotations (for the specific data split) file_names_str = "|".join(file_list) + #print(file_names_str) + #print(df_wav) df_wav = df_wav.loc[df_wav['key'].str.contains(file_names_str)].sort_values('key') return df_wav diff --git a/egs/ami/s5c/run.sh b/egs/ami/s5c/run.sh index cc4cd87610b..cca190f09f4 100755 --- a/egs/ami/s5c/run.sh +++ b/egs/ami/s5c/run.sh @@ -26,6 +26,8 @@ diarizer_stage=0 nj=50 decode_nj=15 +export mic=ihm + model_dir=exp/xvector_nnet_1a train_set=train From f7b0840206c4ec0969f8c9edd500822091a3a630 Mon Sep 17 00:00:00 2001 From: Alex Thornton Date: Fri, 29 Apr 2022 02:54:58 +0000 Subject: [PATCH 02/12] just deleted trash we dont need --- egs/ami/s5b/local/ami_download.sh | 26 +------------------------- 1 file changed, 1 insertion(+), 25 deletions(-) diff --git a/egs/ami/s5b/local/ami_download.sh b/egs/ami/s5b/local/ami_download.sh index b460b539bc4..b434f57c58c 100755 --- a/egs/ami/s5b/local/ami_download.sh +++ b/egs/ami/s5b/local/ami_download.sh @@ -55,15 +55,10 @@ cat local/split_train.orig local/split_eval.orig local/split_dev.orig > $wdir/am wgetfile=$wdir/wget_$mic.sh -' -# TODO fix this with Pawel, files dont exist anymore, -manifest="wget --continue -O $adir/MANIFEST.TXT http://groups.inf.ed.ac.uk/ami/download/temp/amiBuild-04237-Sun-Jun-15-2014.manifest.txt" -license="wget --continue -O $adir/LICENCE.TXT http://groups.inf.ed.ac.uk/ami/download/temp/Creative-Commons-Attribution-NonCommercial-ShareAlike-2.5.txt" -' manifest="wget --continue -O $adir/MANIFEST.TXT https://groups.inf.ed.ac.uk/ami/download/temp/amiBuild-1372-Thu-Apr-28-2022.manifest.txt" # Parse the manifest file, and separate recordings into train, dev, and eval sets -python3 split_manifest.py +python3 local/split_manifest.py echo "#!/usr/bin/env bash" > $wgetfile echo $manifest >> $wgetfile @@ -91,25 +86,6 @@ echo "Downloading audio files for $mic scenario." echo "Look at $wdir/log/download_ami_$mic.log for progress" $wgetfile &> $wdir/log/download_ami_$mic.log -# Do rough check if #wavs is as expected, it will fail anyway in data prep stage if it isn't, -' -if [ "$mic" == "ihm" ]; then - num_files=$(find $adir -iname *Headset* | wc -l) - if [ $num_files -ne 687 ]; then - echo "Warning: Found $num_files headset wavs but expected 687. Check $wdir/log/download_ami_$mic.log for details." - exit 1; - fi -else - num_files=$(find $adir -iname *Array1* | wc -l) - if [[ $num_files -lt 1352 && "$mic" == "mdm" ]]; then - echo "Warning: Found $num_files distant Array1 waves but expected 1352 for mdm. Check $wdir/log/download_ami_$mic.log for details." - exit 1; - elif [[ $num_files -lt 169 && "$mic" == "sdm" ]]; then - echo "Warning: Found $num_files distant Array1 waves but expected 169 for sdm. Check $wdir/log/download_ami_$mic.log for details." - exit 1; - fi -fi -' echo "Downloads of AMI corpus completed succesfully. License can be found under $adir/LICENCE.TXT" exit 0; From 435a65a9dc1f63ac6cbddb9e4431c2a0d52018b0 Mon Sep 17 00:00:00 2001 From: Alex Thornton Date: Thu, 28 Apr 2022 22:56:27 -0400 Subject: [PATCH 03/12] added python script to automatically split the manifext text file --- egs/ami/s5c/local/split_manifest.py | 58 +++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 egs/ami/s5c/local/split_manifest.py diff --git a/egs/ami/s5c/local/split_manifest.py b/egs/ami/s5c/local/split_manifest.py new file mode 100644 index 00000000000..5330f774c82 --- /dev/null +++ b/egs/ami/s5c/local/split_manifest.py @@ -0,0 +1,58 @@ +import os +import sys + +def unique(m): + unique_list = [] + + for i in m: + if i not in unique_list: + unique_list.append(i) + + return unique_list + +# Load in the MANIFEST file, save off the audio recoding file names +file = sys.argv[1] +prefix = ' https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/' +m = [] + +with open(file) as f: + for line in f: + if line.startswith(prefix): + splits = line.split('/') + m.append(splits[7]) +m = unique(m) +print("Got the audio files from MANIFEST.TXT") +#print(m) + +# Separate files and save off into train, dev, and eval partitions +N = len(m) + +train = m[:round(N*.5)] +dev = m[round(N*.5)+1:round(N*.8)] +eval = m[round(N*.8)+1:] + +print("Train set: "+str(train)) +print("Dev set: "+str(dev)) +print("Eval set: "+str(eval)) + +if os.path.exists('split_train.orig'): + os.remove('split_train.orig') +if os.path.exists('split_dev.orig'): + os.remove('split_dev.orig') +if os.path.exists('split_eval.orig'): + os.remove('split_eval.orig') + +with open('split_train.orig', 'a') as train_file: + for d in train: + train_file.write(d) + train_file.write("\n") + +with open('split_dev.orig', 'a') as dev_file: + for d in dev: + dev_file.write(d) + dev_file.write("\n") + +with open('split_eval.orig', 'a') as eval_file: + for d in eval: + eval_file.write(d) + eval_file.write("\n") \ No newline at end of file From f2e4c50237e5079a8c93ba064de3e44fa72c3d99 Mon Sep 17 00:00:00 2001 From: Alex Thornton Date: Fri, 29 Apr 2022 03:07:43 +0000 Subject: [PATCH 04/12] fixed some stuff on the VM side at runtime --- egs/ami/s5b/local/ami_download.sh | 2 +- egs/ami/s5c/local/split_manifest.py | 25 ++++++++++++++----------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/egs/ami/s5b/local/ami_download.sh b/egs/ami/s5b/local/ami_download.sh index b434f57c58c..ae4823c5f51 100755 --- a/egs/ami/s5b/local/ami_download.sh +++ b/egs/ami/s5b/local/ami_download.sh @@ -58,7 +58,7 @@ wgetfile=$wdir/wget_$mic.sh manifest="wget --continue -O $adir/MANIFEST.TXT https://groups.inf.ed.ac.uk/ami/download/temp/amiBuild-1372-Thu-Apr-28-2022.manifest.txt" # Parse the manifest file, and separate recordings into train, dev, and eval sets -python3 local/split_manifest.py +python3 local/split_manifest.py $adir/MANIFEST.TXT echo "#!/usr/bin/env bash" > $wgetfile echo $manifest >> $wgetfile diff --git a/egs/ami/s5c/local/split_manifest.py b/egs/ami/s5c/local/split_manifest.py index 5330f774c82..244d66f4ac8 100644 --- a/egs/ami/s5c/local/split_manifest.py +++ b/egs/ami/s5c/local/split_manifest.py @@ -12,13 +12,16 @@ def unique(m): # Load in the MANIFEST file, save off the audio recoding file names file = sys.argv[1] -prefix = ' https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/' +prefix = '\thttps://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/' m = [] with open(file) as f: for line in f: + #splits = line.split('/') + #print(splits) if line.startswith(prefix): splits = line.split('/') + #print(splits) m.append(splits[7]) m = unique(m) print("Got the audio files from MANIFEST.TXT") @@ -35,24 +38,24 @@ def unique(m): print("Dev set: "+str(dev)) print("Eval set: "+str(eval)) -if os.path.exists('split_train.orig'): - os.remove('split_train.orig') -if os.path.exists('split_dev.orig'): - os.remove('split_dev.orig') -if os.path.exists('split_eval.orig'): - os.remove('split_eval.orig') +if os.path.exists('local/split_train.orig'): + os.remove('local/split_train.orig') +if os.path.exists('local/split_dev.orig'): + os.remove('local/split_dev.orig') +if os.path.exists('local/split_eval.orig'): + os.remove('local/split_eval.orig') -with open('split_train.orig', 'a') as train_file: +with open('local/split_train.orig', 'a') as train_file: for d in train: train_file.write(d) train_file.write("\n") -with open('split_dev.orig', 'a') as dev_file: +with open('local/split_dev.orig', 'a') as dev_file: for d in dev: dev_file.write(d) dev_file.write("\n") -with open('split_eval.orig', 'a') as eval_file: +with open('local/split_eval.orig', 'a') as eval_file: for d in eval: eval_file.write(d) - eval_file.write("\n") \ No newline at end of file + eval_file.write("\n") From d48f3c7eb48a777bba48b6ec1452359f9d234706 Mon Sep 17 00:00:00 2001 From: Alex Thornton Date: Fri, 29 Apr 2022 04:25:54 +0000 Subject: [PATCH 05/12] mini-batch w/ first few files in each train/dev/eval --- egs/ami/s5c/local/split_manifest.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/egs/ami/s5c/local/split_manifest.py b/egs/ami/s5c/local/split_manifest.py index 244d66f4ac8..c85d4a31294 100644 --- a/egs/ami/s5c/local/split_manifest.py +++ b/egs/ami/s5c/local/split_manifest.py @@ -32,11 +32,15 @@ def unique(m): train = m[:round(N*.5)] dev = m[round(N*.5)+1:round(N*.8)] -eval = m[round(N*.8)+1:] +ev = m[round(N*.8)+1:] + +train = train[:8] +dev = dev[:5] +ev = ev[:5] print("Train set: "+str(train)) print("Dev set: "+str(dev)) -print("Eval set: "+str(eval)) +print("Eval set: "+str(ev)) if os.path.exists('local/split_train.orig'): os.remove('local/split_train.orig') @@ -56,6 +60,6 @@ def unique(m): dev_file.write("\n") with open('local/split_eval.orig', 'a') as eval_file: - for d in eval: + for d in ev: eval_file.write(d) eval_file.write("\n") From d696a53d0a8c45fb4aa7c2905040ae7bd526275c Mon Sep 17 00:00:00 2001 From: Alex Thornton Date: Fri, 29 Apr 2022 05:20:01 +0000 Subject: [PATCH 06/12] get rid of the damn queue.pl call to the external cluster. annoying af this is the default --- egs/ami/s5c/cmd.sh | 4 ++-- egs/ami/s5c/local/prepare_data.py | 6 +++--- egs/ami/s5c/run.sh | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/egs/ami/s5c/cmd.sh b/egs/ami/s5c/cmd.sh index bbf636f7052..bd48cedd704 100755 --- a/egs/ami/s5c/cmd.sh +++ b/egs/ami/s5c/cmd.sh @@ -10,6 +10,6 @@ # conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information, # or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl. -export train_cmd="queue.pl --mem 4G" -export decode_cmd="queue.pl --mem 4G" +export train_cmd="run.pl --mem 4G" +export decode_cmd="run.pl --mem 4G" diff --git a/egs/ami/s5c/local/prepare_data.py b/egs/ami/s5c/local/prepare_data.py index c8e938c7db7..4872733a4bd 100755 --- a/egs/ami/s5c/local/prepare_data.py +++ b/egs/ami/s5c/local/prepare_data.py @@ -20,7 +20,7 @@ def find_audios(wav_path, file_list): # Get all wav file names from audio directory - command = 'find %s -name "*Mix-Headset.wav"' % (wav_path) + command = 'find %s -name "*.wav"' % (wav_path) wavs = subprocess.check_output(command, shell=True).decode('utf-8').splitlines() keys = [ os.path.splitext(os.path.basename(wav))[0] for wav in wavs ] data = {'key': keys, 'file_path': wavs} @@ -28,8 +28,8 @@ def find_audios(wav_path, file_list): # Filter list to keep only those in annotations (for the specific data split) file_names_str = "|".join(file_list) - #print(file_names_str) - #print(df_wav) + print(file_names_str) + print(df_wav) df_wav = df_wav.loc[df_wav['key'].str.contains(file_names_str)].sort_values('key') return df_wav diff --git a/egs/ami/s5c/run.sh b/egs/ami/s5c/run.sh index cca190f09f4..20050df7740 100755 --- a/egs/ami/s5c/run.sh +++ b/egs/ami/s5c/run.sh @@ -20,7 +20,7 @@ set -euo pipefail mfccdir=`pwd`/mfcc -stage=0 +stage=2 overlap_stage=0 diarizer_stage=0 nj=50 From 2187a564e356d81308c554e4d91422db0ce6ecd2 Mon Sep 17 00:00:00 2001 From: Alex Thornton Date: Sat, 30 Apr 2022 19:49:43 +0000 Subject: [PATCH 07/12] fixed the recipe to include xvector running, added soft link to xvector run scripts --- .../local/nnet3/xvector/tuning/run_xvector_1a.sh | 9 ++++++--- egs/ami/s5c/local/split_manifest.py | 16 ++++++++++------ egs/ami/s5c/run.sh | 7 +++++-- egs/ami/s5c/sid | 1 + egs/sre08/v1/sid/nnet3/xvector/get_egs.sh | 6 +++--- 5 files changed, 25 insertions(+), 14 deletions(-) create mode 120000 egs/ami/s5c/sid diff --git a/egs/ami/s5c/local/nnet3/xvector/tuning/run_xvector_1a.sh b/egs/ami/s5c/local/nnet3/xvector/tuning/run_xvector_1a.sh index 2189e406a7e..0541d480613 100755 --- a/egs/ami/s5c/local/nnet3/xvector/tuning/run_xvector_1a.sh +++ b/egs/ami/s5c/local/nnet3/xvector/tuning/run_xvector_1a.sh @@ -58,15 +58,18 @@ if [ $stage -le 6 ]; then utils/create_split_dir.pl \ /export/b{03,04,05,06}/$USER/kaldi-data/egs/callhome_diarization/v2/xvector-$(date +'%m_%d_%H_%M')/$egs_dir/storage $egs_dir/storage fi + # frame per iter original 1000000000 + # frame per iter diagnostic original 500000 + # num repeat original 1 sid/nnet3/xvector/get_egs.sh --cmd "$train_cmd" \ --nj 8 \ --stage 0 \ - --frames-per-iter 1000000000 \ - --frames-per-iter-diagnostic 500000 \ + --frames-per-iter 100000 \ + --frames-per-iter-diagnostic 10000 \ --min-frames-per-chunk 200 \ --max-frames-per-chunk 400 \ --num-diagnostic-archives 3 \ - --num-repeats 40 \ + --num-repeats 1 \ "$data" $egs_dir fi diff --git a/egs/ami/s5c/local/split_manifest.py b/egs/ami/s5c/local/split_manifest.py index c85d4a31294..162a15719d8 100644 --- a/egs/ami/s5c/local/split_manifest.py +++ b/egs/ami/s5c/local/split_manifest.py @@ -30,13 +30,17 @@ def unique(m): # Separate files and save off into train, dev, and eval partitions N = len(m) -train = m[:round(N*.5)] -dev = m[round(N*.5)+1:round(N*.8)] -ev = m[round(N*.8)+1:] +#train = m[:round(N*.5)] +#dev = m[round(N*.5)+1:round(N*.8)] +#ev = m[round(N*.8)+1:] -train = train[:8] -dev = dev[:5] -ev = ev[:5] +#train = train[:12] +#dev = dev[:10] +#ev = ev[:10] + +train = m[:8] +dev = m[9:15] +ev = m[16:20] print("Train set: "+str(train)) print("Dev set: "+str(dev)) diff --git a/egs/ami/s5c/run.sh b/egs/ami/s5c/run.sh index 20050df7740..6e041dfa786 100755 --- a/egs/ami/s5c/run.sh +++ b/egs/ami/s5c/run.sh @@ -20,10 +20,10 @@ set -euo pipefail mfccdir=`pwd`/mfcc -stage=2 +stage=4 overlap_stage=0 diarizer_stage=0 -nj=50 +nj=10 decode_nj=15 export mic=ihm @@ -89,6 +89,7 @@ if [ $stage -le 3 ]; then steps/make_mfcc.sh --mfcc-config conf/mfcc_hires.conf --nj $nj --cmd "$train_cmd" data/$dataset steps/compute_cmvn_stats.sh data/$dataset utils/fix_data_dir.sh data/$dataset + echo "FEATURES COMPLETE FOR DATASET" done fi @@ -96,6 +97,8 @@ if [ $stage -le 4 ]; then echo "$0: preparing a AMI training data to train PLDA model" local/nnet3/xvector/prepare_feats.sh --nj $nj --cmd "$train_cmd" \ data/train data/plda_train exp/plda_train_cmn + local/nnet3/xvector/run_xvector.sh --stage $stage --train-stage -1 \ + --data data/plda_train fi if [ $stage -le 5 ]; then diff --git a/egs/ami/s5c/sid b/egs/ami/s5c/sid new file mode 120000 index 00000000000..893a12f30c9 --- /dev/null +++ b/egs/ami/s5c/sid @@ -0,0 +1 @@ +../../sre08/v1/sid \ No newline at end of file diff --git a/egs/sre08/v1/sid/nnet3/xvector/get_egs.sh b/egs/sre08/v1/sid/nnet3/xvector/get_egs.sh index 216d1cad318..607b3b8c188 100755 --- a/egs/sre08/v1/sid/nnet3/xvector/get_egs.sh +++ b/egs/sre08/v1/sid/nnet3/xvector/get_egs.sh @@ -24,9 +24,9 @@ cmd=run.pl # $min_frames_per_eg and $max_frames_per_eg. min_frames_per_chunk=50 max_frames_per_chunk=300 -frames_per_iter=10000000 # target number of frames per archive. +frames_per_iter=100000 #10000000 # target number of frames per archive. -frames_per_iter_diagnostic=100000 # have this many frames per archive for +frames_per_iter_diagnostic=10000 #100000 # have this many frames per archive for # the archives used for diagnostics. num_diagnostic_archives=3 # we want to test the training likelihoods @@ -34,7 +34,7 @@ num_diagnostic_archives=3 # we want to test the training likelihoods # how many archives we evaluate on. -compress=true # set this to false to disable compression (e.g. if you want to see whether +compress=false #true # set this to false to disable compression (e.g. if you want to see whether # results are affected). num_heldout_utts=100 # number of utterances held out for training subset From 0d37ef1700bbe87ed5bdf8d3e28a055b9a2a31b5 Mon Sep 17 00:00:00 2001 From: Alex Thornton Date: Sun, 1 May 2022 00:10:27 +0000 Subject: [PATCH 08/12] local Manifest file instead of wget'ing it --- egs/ami/s5b/local/ami_download.sh | 9 +- egs/ami/s5c/local/MANIFEST.TXT | 149 ++++++++++++++++++++++++++++++ egs/ami/s5c/run.sh | 2 +- 3 files changed, 156 insertions(+), 4 deletions(-) create mode 100644 egs/ami/s5c/local/MANIFEST.TXT diff --git a/egs/ami/s5b/local/ami_download.sh b/egs/ami/s5b/local/ami_download.sh index ae4823c5f51..dbc2e0f7aad 100755 --- a/egs/ami/s5b/local/ami_download.sh +++ b/egs/ami/s5b/local/ami_download.sh @@ -51,15 +51,18 @@ mkdir -p $wdir/log #download waves -cat local/split_train.orig local/split_eval.orig local/split_dev.orig > $wdir/ami_meet_ids.flist - wgetfile=$wdir/wget_$mic.sh -manifest="wget --continue -O $adir/MANIFEST.TXT https://groups.inf.ed.ac.uk/ami/download/temp/amiBuild-1372-Thu-Apr-28-2022.manifest.txt" +cp local/MANIFEST.TXT $adir/MANIFEST.TXT +manifest=$adir/MANIFEST.TXT +#manifest="wget --continue -O $adir/MANIFEST.TXT https://groups.inf.ed.ac.uk/ami/download/temp/amiBuild-1372-Thu-Apr-28-2022.manifest.txt" +license="wget --continue -O $adir/LICENSE.TXT http://groups.inf.ed.ac.uk/ami/corpus/license.shtml" # Parse the manifest file, and separate recordings into train, dev, and eval sets python3 local/split_manifest.py $adir/MANIFEST.TXT +cat local/split_train.orig local/split_eval.orig local/split_dev.orig > $wdir/ami_meet_ids.flist + echo "#!/usr/bin/env bash" > $wgetfile echo $manifest >> $wgetfile echo $license >> $wgetfile diff --git a/egs/ami/s5c/local/MANIFEST.TXT b/egs/ami/s5c/local/MANIFEST.TXT new file mode 100644 index 00000000000..103723f8ff2 --- /dev/null +++ b/egs/ami/s5c/local/MANIFEST.TXT @@ -0,0 +1,149 @@ +All of the signals and transcription, and some of the annotations, have been released publicly under the Creative Commons Attribution 4.0 Licence (http://creativecommons.org/licenses/by/4.0). + + License text: CCBY4.0.txt + +Content of the download: + + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002a/video/ES2002a.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002a/video/ES2002a.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002a/video/ES2002a.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002a/video/ES2002a.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002a/video/ES2002a.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002a/video/ES2002a.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002b/video/ES2002b.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002b/video/ES2002b.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002b/video/ES2002b.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002b/video/ES2002b.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002b/video/ES2002b.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002b/video/ES2002b.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002c/video/ES2002c.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002c/video/ES2002c.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002c/video/ES2002c.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002c/video/ES2002c.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002c/video/ES2002c.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002c/video/ES2002c.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002d/video/ES2002d.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002d/video/ES2002d.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002d/video/ES2002d.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002d/video/ES2002d.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002d/video/ES2002d.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002d/video/ES2002d.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003a/video/ES2003a.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003a/video/ES2003a.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003a/video/ES2003a.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003a/video/ES2003a.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003a/video/ES2003a.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003b/video/ES2003b.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003b/video/ES2003b.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003b/video/ES2003b.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003b/video/ES2003b.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003b/video/ES2003b.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003b/video/ES2003b.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003c/video/ES2003c.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003c/video/ES2003c.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003c/video/ES2003c.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003c/video/ES2003c.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003c/video/ES2003c.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003c/video/ES2003c.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003d/video/ES2003d.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003d/video/ES2003d.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003d/video/ES2003d.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003d/video/ES2003d.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003d/video/ES2003d.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003d/video/ES2003d.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004a/video/ES2004a.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004a/video/ES2004a.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004a/video/ES2004a.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004a/video/ES2004a.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004a/video/ES2004a.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004a/video/ES2004a.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004b/video/ES2004b.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004b/video/ES2004b.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004b/video/ES2004b.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004b/video/ES2004b.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004b/video/ES2004b.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004b/video/ES2004b.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004c/video/ES2004c.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004c/video/ES2004c.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004c/video/ES2004c.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004c/video/ES2004c.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004c/video/ES2004c.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004c/video/ES2004c.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004d/video/ES2004d.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004d/video/ES2004d.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004d/video/ES2004d.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004d/video/ES2004d.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004d/video/ES2004d.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004d/video/ES2004d.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005a/video/ES2005a.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005a/video/ES2005a.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005a/video/ES2005a.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005a/video/ES2005a.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005a/video/ES2005a.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005a/video/ES2005a.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005b/video/ES2005b.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005b/video/ES2005b.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005b/video/ES2005b.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005b/video/ES2005b.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005b/video/ES2005b.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005b/video/ES2005b.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005c/video/ES2005c.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005c/video/ES2005c.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005c/video/ES2005c.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005c/video/ES2005c.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005c/video/ES2005c.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005c/video/ES2005c.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005d/video/ES2005d.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005d/video/ES2005d.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005d/video/ES2005d.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005d/video/ES2005d.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005d/video/ES2005d.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005d/video/ES2005d.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006a/video/ES2006a.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006a/video/ES2006a.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006a/video/ES2006a.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006a/video/ES2006a.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006a/video/ES2006a.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006a/video/ES2006a.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006b/video/ES2006b.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006b/video/ES2006b.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006b/video/ES2006b.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006b/video/ES2006b.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006b/video/ES2006b.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006b/video/ES2006b.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006c/video/ES2006c.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006c/video/ES2006c.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006c/video/ES2006c.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006c/video/ES2006c.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006c/video/ES2006c.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006c/video/ES2006c.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006d/video/ES2006d.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006d/video/ES2006d.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006d/video/ES2006d.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006d/video/ES2006d.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006d/video/ES2006d.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006d/video/ES2006d.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007a/video/ES2007a.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007a/video/ES2007a.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007a/video/ES2007a.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007a/video/ES2007a.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007a/video/ES2007a.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007a/video/ES2007a.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007b/video/ES2007b.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007b/video/ES2007b.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007b/video/ES2007b.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007b/video/ES2007b.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007b/video/ES2007b.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007b/video/ES2007b.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007c/video/ES2007c.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007c/video/ES2007c.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007c/video/ES2007c.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007c/video/ES2007c.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007c/video/ES2007c.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007c/video/ES2007c.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007d/video/ES2007d.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007d/video/ES2007d.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007d/video/ES2007d.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007d/video/ES2007d.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007d/video/ES2007d.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007d/video/ES2007d.Overhead.rm diff --git a/egs/ami/s5c/run.sh b/egs/ami/s5c/run.sh index 6e041dfa786..a1a4a18b4f1 100755 --- a/egs/ami/s5c/run.sh +++ b/egs/ami/s5c/run.sh @@ -20,7 +20,7 @@ set -euo pipefail mfccdir=`pwd`/mfcc -stage=4 +stage=2 overlap_stage=0 diarizer_stage=0 nj=10 From 7173fcaee70cb40f5582674e9ddd0e0b6d664f8d Mon Sep 17 00:00:00 2001 From: Alex Thornton Date: Sat, 30 Apr 2022 21:56:40 -0400 Subject: [PATCH 09/12] new subset of MANIFEST for test/train/dev --- egs/ami/s5b/local/ami_download.sh | 2 +- egs/ami/s5c/local/MANIFEST.TXT | 178 +++++++++++++++++++++++++---- egs/ami/s5c/local/split_dev.orig | 8 ++ egs/ami/s5c/local/split_eval.orig | 8 ++ egs/ami/s5c/local/split_train.orig | 27 +++++ 5 files changed, 198 insertions(+), 25 deletions(-) create mode 100644 egs/ami/s5c/local/split_dev.orig create mode 100644 egs/ami/s5c/local/split_eval.orig create mode 100644 egs/ami/s5c/local/split_train.orig diff --git a/egs/ami/s5b/local/ami_download.sh b/egs/ami/s5b/local/ami_download.sh index dbc2e0f7aad..388f33b3530 100755 --- a/egs/ami/s5b/local/ami_download.sh +++ b/egs/ami/s5b/local/ami_download.sh @@ -59,7 +59,7 @@ manifest=$adir/MANIFEST.TXT license="wget --continue -O $adir/LICENSE.TXT http://groups.inf.ed.ac.uk/ami/corpus/license.shtml" # Parse the manifest file, and separate recordings into train, dev, and eval sets -python3 local/split_manifest.py $adir/MANIFEST.TXT +# python3 local/split_manifest.py $adir/MANIFEST.TXT cat local/split_train.orig local/split_eval.orig local/split_dev.orig > $wdir/ami_meet_ids.flist diff --git a/egs/ami/s5c/local/MANIFEST.TXT b/egs/ami/s5c/local/MANIFEST.TXT index 103723f8ff2..30a4f29447b 100644 --- a/egs/ami/s5c/local/MANIFEST.TXT +++ b/egs/ami/s5c/local/MANIFEST.TXT @@ -123,27 +123,157 @@ Content of the download: https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006d/video/ES2006d.Closeup4.rm https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006d/video/ES2006d.Corner.rm https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006d/video/ES2006d.Overhead.rm - https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007a/video/ES2007a.Closeup1.rm - https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007a/video/ES2007a.Closeup2.rm - https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007a/video/ES2007a.Closeup3.rm - https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007a/video/ES2007a.Closeup4.rm - https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007a/video/ES2007a.Corner.rm - https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007a/video/ES2007a.Overhead.rm - https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007b/video/ES2007b.Closeup1.rm - https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007b/video/ES2007b.Closeup2.rm - https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007b/video/ES2007b.Closeup3.rm - https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007b/video/ES2007b.Closeup4.rm - https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007b/video/ES2007b.Corner.rm - https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007b/video/ES2007b.Overhead.rm - https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007c/video/ES2007c.Closeup1.rm - https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007c/video/ES2007c.Closeup2.rm - https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007c/video/ES2007c.Closeup3.rm - https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007c/video/ES2007c.Closeup4.rm - https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007c/video/ES2007c.Corner.rm - https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007c/video/ES2007c.Overhead.rm - https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007d/video/ES2007d.Closeup1.rm - https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007d/video/ES2007d.Closeup2.rm - https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007d/video/ES2007d.Closeup3.rm - https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007d/video/ES2007d.Closeup4.rm - https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007d/video/ES2007d.Corner.rm - https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2007d/video/ES2007d.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011a/video/ES2011a.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011a/video/ES2011a.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011a/video/ES2011a.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011a/video/ES2011a.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011a/video/ES2011a.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011a/video/ES2011a.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011b/video/ES2011b.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011b/video/ES2011b.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011b/video/ES2011b.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011b/video/ES2011b.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011b/video/ES2011b.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011b/video/ES2011b.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011c/video/ES2011c.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011c/video/ES2011c.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011c/video/ES2011c.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011c/video/ES2011c.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011c/video/ES2011c.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011c/video/ES2011c.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011d/video/ES2011d.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011d/video/ES2011d.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011d/video/ES2011d.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011d/video/ES2011d.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011d/video/ES2011d.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011d/video/ES2011d.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000a/video/IS1000a.C.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000a/video/IS1000a.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000a/video/IS1000a.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000a/video/IS1000a.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000a/video/IS1000a.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000a/video/IS1000a.L.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000a/video/IS1000a.R.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000b/video/IS1000b.C.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000b/video/IS1000b.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000b/video/IS1000b.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000b/video/IS1000b.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000b/video/IS1000b.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000b/video/IS1000b.L.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000b/video/IS1000b.R.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000c/video/IS1000c.C.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000c/video/IS1000c.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000c/video/IS1000c.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000c/video/IS1000c.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000c/video/IS1000c.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000c/video/IS1000c.L.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000c/video/IS1000c.R.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000d/video/IS1000d.C.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000d/video/IS1000d.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000d/video/IS1000d.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000d/video/IS1000d.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000d/video/IS1000d.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000d/video/IS1000d.L.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000d/video/IS1000d.R.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001a/video/IS1001a.C.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001a/video/IS1001a.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001a/video/IS1001a.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001a/video/IS1001a.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001a/video/IS1001a.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001a/video/IS1001a.L.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001a/video/IS1001a.R.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001b/video/IS1001b.C.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001b/video/IS1001b.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001b/video/IS1001b.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001b/video/IS1001b.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001b/video/IS1001b.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001b/video/IS1001b.L.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001b/video/IS1001b.R.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001c/video/IS1001c.C.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001c/video/IS1001c.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001c/video/IS1001c.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001c/video/IS1001c.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001c/video/IS1001c.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001c/video/IS1001c.L.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001c/video/IS1001c.R.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001d/video/IS1001d.C.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001d/video/IS1001d.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001d/video/IS1001d.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001d/video/IS1001d.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001d/video/IS1001d.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001d/video/IS1001d.L.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001d/video/IS1001d.R.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002b/video/IS1002b.C.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002b/video/IS1002b.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002b/video/IS1002b.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002b/video/IS1002b.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002b/video/IS1002b.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002b/video/IS1002b.L.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002b/video/IS1002b.R.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002c/video/IS1002c.C.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002c/video/IS1002c.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002c/video/IS1002c.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002c/video/IS1002c.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002c/video/IS1002c.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002c/video/IS1002c.L.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002c/video/IS1002c.R.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002d/video/IS1002d.C.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002d/video/IS1002d.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002d/video/IS1002d.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002d/video/IS1002d.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002d/video/IS1002d.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002d/video/IS1002d.L.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002d/video/IS1002d.R.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002a/video/EN2002a.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002a/video/EN2002a.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002a/video/EN2002a.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002a/video/EN2002a.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002a/video/EN2002a.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002a/video/EN2002a.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002b/video/EN2002b.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002b/video/EN2002b.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002b/video/EN2002b.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002b/video/EN2002b.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002b/video/EN2002b.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002b/video/EN2002b.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002c/video/EN2002c.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002c/video/EN2002c.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002c/video/EN2002c.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002c/video/EN2002c.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002c/video/EN2002c.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002c/video/EN2002c.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002d/video/EN2002d.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002d/video/EN2002d.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002d/video/EN2002d.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002d/video/EN2002d.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002d/video/EN2002d.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002d/video/EN2002d.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4001/video/IB4001.C.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4001/video/IB4001.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4001/video/IB4001.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4001/video/IB4001.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4001/video/IB4001.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4001/video/IB4001.L.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4001/video/IB4001.R.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4002/video/IB4002.C.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4002/video/IB4002.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4002/video/IB4002.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4002/video/IB4002.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4002/video/IB4002.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4002/video/IB4002.L.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4002/video/IB4002.R.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4003/video/IB4003.Closeup1_T000005.880_T003343.040.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4003/video/IB4003.C.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4003/video/IB4003.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4003/video/IB4003.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4003/video/IB4003.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4003/video/IB4003.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4003/video/IB4003.L.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4003/video/IB4003.R.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4004/video/IB4004.C.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4004/video/IB4004.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4004/video/IB4004.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4004/video/IB4004.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4004/video/IB4004.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4004/video/IB4004.L.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4004/video/IB4004.R.rm \ No newline at end of file diff --git a/egs/ami/s5c/local/split_dev.orig b/egs/ami/s5c/local/split_dev.orig new file mode 100644 index 00000000000..7e3bec8a622 --- /dev/null +++ b/egs/ami/s5c/local/split_dev.orig @@ -0,0 +1,8 @@ +ES2011a +ES2011b +ES2011c +ES2011d +IB4001 +IB4002 +IB4003 +IB4004 \ No newline at end of file diff --git a/egs/ami/s5c/local/split_eval.orig b/egs/ami/s5c/local/split_eval.orig new file mode 100644 index 00000000000..eb80674dbe2 --- /dev/null +++ b/egs/ami/s5c/local/split_eval.orig @@ -0,0 +1,8 @@ +ES2004a +ES2004b +ES2004c +ES2004d +EN2002a +EN2002b +EN2002c +EN2002d \ No newline at end of file diff --git a/egs/ami/s5c/local/split_train.orig b/egs/ami/s5c/local/split_train.orig new file mode 100644 index 00000000000..882e64bd633 --- /dev/null +++ b/egs/ami/s5c/local/split_train.orig @@ -0,0 +1,27 @@ +IS1000a +IS1000b +IS1000c +IS1000d +IS1001a +IS1001b +IS1001c +IS1001d +IS1002b +IS1002c +IS1002d +ES2002a +ES2002b +ES2002c +ES2002d +ES2003a +ES2003b +ES2003c +ES2003d +ES2005a +ES2005b +ES2005c +ES2005d +ES2006a +ES2006b +ES2006c +ES2006d \ No newline at end of file From c9fd9d2a4798eeea684ff9738918a741ffc42070 Mon Sep 17 00:00:00 2001 From: "Thornton, Alexander P" Date: Fri, 6 May 2022 23:35:03 +0000 Subject: [PATCH 10/12] infinite loop fix for allocate_egs.py --- .../s5c/local/nnet3/xvector/tuning/run_xvector_1a.sh | 6 +----- egs/ami/s5c/run.sh | 11 +++-------- egs/sre08/v1/sid/nnet3/xvector/allocate_egs.py | 11 ++++++++--- egs/sre08/v1/sid/nnet3/xvector/get_egs.sh | 7 +++++-- 4 files changed, 17 insertions(+), 18 deletions(-) diff --git a/egs/ami/s5c/local/nnet3/xvector/tuning/run_xvector_1a.sh b/egs/ami/s5c/local/nnet3/xvector/tuning/run_xvector_1a.sh index 0541d480613..93fedcb33bd 100755 --- a/egs/ami/s5c/local/nnet3/xvector/tuning/run_xvector_1a.sh +++ b/egs/ami/s5c/local/nnet3/xvector/tuning/run_xvector_1a.sh @@ -54,10 +54,6 @@ num_pdfs=$(awk '{print $2}' $data/utt2spk | sort | uniq -c | wc -l) if [ $stage -le 6 ]; then echo "$0: Getting neural network training egs"; # dump egs. - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b{03,04,05,06}/$USER/kaldi-data/egs/callhome_diarization/v2/xvector-$(date +'%m_%d_%H_%M')/$egs_dir/storage $egs_dir/storage - fi # frame per iter original 1000000000 # frame per iter diagnostic original 500000 # num repeat original 1 @@ -69,7 +65,7 @@ if [ $stage -le 6 ]; then --min-frames-per-chunk 200 \ --max-frames-per-chunk 400 \ --num-diagnostic-archives 3 \ - --num-repeats 1 \ + --num-repeats 10 \ "$data" $egs_dir fi diff --git a/egs/ami/s5c/run.sh b/egs/ami/s5c/run.sh index a1a4a18b4f1..73a555c6180 100755 --- a/egs/ami/s5c/run.sh +++ b/egs/ami/s5c/run.sh @@ -20,7 +20,7 @@ set -euo pipefail mfccdir=`pwd`/mfcc -stage=2 +stage=4 overlap_stage=0 diarizer_stage=0 nj=10 @@ -39,11 +39,6 @@ diarizer_type=spectral # must be one of (ahc, spectral, vbx) # Path where AMI gets downloaded (or where locally available): AMI_DIR=$PWD/wav_db # Default, -case $(hostname -d) in - fit.vutbr.cz) AMI_DIR=/mnt/matylda5/iveselyk/KALDI_AMI_WAV ;; # BUT, - clsp.jhu.edu) AMI_DIR=/export/corpora5/amicorpus ;; # JHU, - cstr.ed.ac.uk) AMI_DIR= ;; # Edinburgh, -esac # Download AMI corpus, You need around 130GB of free space to get whole data if [ $stage -le 1 ]; then @@ -95,8 +90,8 @@ fi if [ $stage -le 4 ]; then echo "$0: preparing a AMI training data to train PLDA model" - local/nnet3/xvector/prepare_feats.sh --nj $nj --cmd "$train_cmd" \ - data/train data/plda_train exp/plda_train_cmn + #local/nnet3/xvector/prepare_feats.sh --nj $nj --cmd "$train_cmd" \ + # data/train data/plda_train exp/plda_train_cmn local/nnet3/xvector/run_xvector.sh --stage $stage --train-stage -1 \ --data data/plda_train fi diff --git a/egs/sre08/v1/sid/nnet3/xvector/allocate_egs.py b/egs/sre08/v1/sid/nnet3/xvector/allocate_egs.py index b9b82e612a9..8a736aa155c 100755 --- a/egs/sre08/v1/sid/nnet3/xvector/allocate_egs.py +++ b/egs/sre08/v1/sid/nnet3/xvector/allocate_egs.py @@ -257,12 +257,17 @@ def main(): print("Ran out of speakers for archive {0}".format(archive_index + 1)) break spkr = spkrs.pop() + # apt2141 : fix from https://github.com/brijmohan/kaldi/commit/62b8ed90e261a6bb5088dfe506b7972dd052743f to stop infinite loop utt_len = 0 - while utt_len < length: + nutt_spkr = len(spk2utt[spkr]) + break_loop=0 + while utt_len < length and break_loop < nutt_spkr: utt = get_random_utt(spkr, spk2utt) utt_len = utt2len[utt] - offset = get_random_offset(utt_len, length) - this_egs.append( (utt, offset) ) + break_loop+=1 + if break_loop < nutt_spkr: + offset = get_random_offset(utt_len, length) + this_egs.append( (utt, offset) ) all_egs.append(this_egs) info_f.close() diff --git a/egs/sre08/v1/sid/nnet3/xvector/get_egs.sh b/egs/sre08/v1/sid/nnet3/xvector/get_egs.sh index 607b3b8c188..74e325e3521 100755 --- a/egs/sre08/v1/sid/nnet3/xvector/get_egs.sh +++ b/egs/sre08/v1/sid/nnet3/xvector/get_egs.sh @@ -41,8 +41,8 @@ num_heldout_utts=100 # number of utterances held out for training subset num_repeats=1 # number of times each speaker repeats per archive -stage=0 -nj=6 # This should be set to the maximum number of jobs you are +stage=3 +nj=8 # This should be set to the maximum number of jobs you are # comfortable to run in parallel; you can increase it if your disk # speed is greater and you have more machines. @@ -171,6 +171,9 @@ if [ $stage -le 2 ]; then --utt2len-filename=$dir/temp/utt2num_frames.train_subset \ --utt2int-filename=$dir/temp/utt2int.train_subset --egs-dir=$dir || exit 1 +fi + +if [ $stage -le 2 ]; then echo "$0: Allocating validation examples" $cmd $dir/log/allocate_examples_valid.log \ sid/nnet3/xvector/allocate_egs.py \ From 1af92d40432d21d44e2907eeeff9d6ebf97d8730 Mon Sep 17 00:00:00 2001 From: "Thornton, Alexander P" Date: Sat, 7 May 2022 00:31:06 +0000 Subject: [PATCH 11/12] finally ran to results --- egs/ami/s5c/run.sh | 2 +- egs/callhome_diarization/v1/diarization/score_cossim.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/egs/ami/s5c/run.sh b/egs/ami/s5c/run.sh index 73a555c6180..f44d2e52e77 100755 --- a/egs/ami/s5c/run.sh +++ b/egs/ami/s5c/run.sh @@ -20,7 +20,7 @@ set -euo pipefail mfccdir=`pwd`/mfcc -stage=4 +stage=7 overlap_stage=0 diarizer_stage=0 nj=10 diff --git a/egs/callhome_diarization/v1/diarization/score_cossim.sh b/egs/callhome_diarization/v1/diarization/score_cossim.sh index 5f02a0035b3..23010b5b981 100755 --- a/egs/callhome_diarization/v1/diarization/score_cossim.sh +++ b/egs/callhome_diarization/v1/diarization/score_cossim.sh @@ -68,7 +68,7 @@ feats="scp:$sdata/JOB/feats.scp" if [ $stage -le 0 ]; then echo "$0: scoring xvectors" $cmd JOB=1:$nj $dir/log/cossim_scoring.JOB.log \ - python diarization/calc_cossim_scores.py \ + python3 diarization/calc_cossim_scores.py \ ark:$sdata/JOB/spk2utt "$feats" - \|\ copy-feats ark,t:- ark,scp:$dir/scores.JOB.ark,$dir/scores.JOB.scp || exit 1; fi From 09d386fac7bde5034546b6de3a90725bba7ca411 Mon Sep 17 00:00:00 2001 From: "Thornton, Alexander" Date: Sun, 8 May 2022 20:13:03 +0000 Subject: [PATCH 12/12] packaged stuff up for submission --- egs/ami/s5c/README.txt | 0 egs/ami/s5c_apt2141/README.txt | 38 +++ egs/ami/s5c_apt2141/cmd.sh | 15 + egs/ami/s5c_apt2141/conf/decode.conf | 3 + egs/ami/s5c_apt2141/conf/mfcc.conf | 2 + egs/ami/s5c_apt2141/conf/mfcc_hires.conf | 10 + egs/ami/s5c_apt2141/conf/online_cmvn.conf | 3 + egs/ami/s5c_apt2141/conf/pitch.conf | 1 + egs/ami/s5c_apt2141/diarization | 1 + egs/ami/s5c_apt2141/egs_ami_s5c_apt2141.diff | 212 +++++++++++++ egs/ami/s5c_apt2141/local/MANIFEST.TXT | 279 ++++++++++++++++++ egs/ami/s5c_apt2141/local/ami_download.sh | 1 + egs/ami/s5c_apt2141/local/detect_overlaps.sh | 205 +++++++++++++ egs/ami/s5c_apt2141/local/diarize_ahc.sh | 74 +++++ egs/ami/s5c_apt2141/local/diarize_spectral.sh | 72 +++++ egs/ami/s5c_apt2141/local/diarize_vbx.sh | 81 +++++ .../local/generate_forced_aligned_rttm.py | 105 +++++++ .../local/nnet3/xvector/prepare_feats.sh | 89 ++++++ .../nnet3/xvector/prepare_feats_for_egs.sh | 83 ++++++ .../local/nnet3/xvector/run_xvector.sh | 1 + .../local/nnet3/xvector/score_plda.sh | 83 ++++++ .../nnet3/xvector/tuning/run_xvector_1a.sh | 148 ++++++++++ .../local/overlap/run_tdnn_lstm.sh | 1 + .../local/overlap/tuning/run_tdnn_lstm_1a.sh | 152 ++++++++++ egs/ami/s5c_apt2141/local/prepare_data.py | 94 ++++++ egs/ami/s5c_apt2141/local/split_dev.orig | 8 + egs/ami/s5c_apt2141/local/split_eval.orig | 8 + egs/ami/s5c_apt2141/local/split_manifest.py | 69 +++++ egs/ami/s5c_apt2141/local/split_train.orig | 27 ++ .../local/train_overlap_detector.sh | 130 ++++++++ egs/ami/s5c_apt2141/path.sh | 6 + egs/ami/s5c_apt2141/run.sh | 168 +++++++++++ egs/ami/s5c_apt2141/sid | 1 + egs/ami/s5c_apt2141/steps | 1 + egs/ami/s5c_apt2141/utils | 1 + 35 files changed, 2172 insertions(+) create mode 100644 egs/ami/s5c/README.txt create mode 100644 egs/ami/s5c_apt2141/README.txt create mode 100755 egs/ami/s5c_apt2141/cmd.sh create mode 100644 egs/ami/s5c_apt2141/conf/decode.conf create mode 100644 egs/ami/s5c_apt2141/conf/mfcc.conf create mode 100755 egs/ami/s5c_apt2141/conf/mfcc_hires.conf create mode 100644 egs/ami/s5c_apt2141/conf/online_cmvn.conf create mode 100644 egs/ami/s5c_apt2141/conf/pitch.conf create mode 120000 egs/ami/s5c_apt2141/diarization create mode 100644 egs/ami/s5c_apt2141/egs_ami_s5c_apt2141.diff create mode 100644 egs/ami/s5c_apt2141/local/MANIFEST.TXT create mode 120000 egs/ami/s5c_apt2141/local/ami_download.sh create mode 100755 egs/ami/s5c_apt2141/local/detect_overlaps.sh create mode 100755 egs/ami/s5c_apt2141/local/diarize_ahc.sh create mode 100755 egs/ami/s5c_apt2141/local/diarize_spectral.sh create mode 100755 egs/ami/s5c_apt2141/local/diarize_vbx.sh create mode 100755 egs/ami/s5c_apt2141/local/generate_forced_aligned_rttm.py create mode 100755 egs/ami/s5c_apt2141/local/nnet3/xvector/prepare_feats.sh create mode 100755 egs/ami/s5c_apt2141/local/nnet3/xvector/prepare_feats_for_egs.sh create mode 120000 egs/ami/s5c_apt2141/local/nnet3/xvector/run_xvector.sh create mode 100755 egs/ami/s5c_apt2141/local/nnet3/xvector/score_plda.sh create mode 100755 egs/ami/s5c_apt2141/local/nnet3/xvector/tuning/run_xvector_1a.sh create mode 120000 egs/ami/s5c_apt2141/local/overlap/run_tdnn_lstm.sh create mode 100755 egs/ami/s5c_apt2141/local/overlap/tuning/run_tdnn_lstm_1a.sh create mode 100755 egs/ami/s5c_apt2141/local/prepare_data.py create mode 100644 egs/ami/s5c_apt2141/local/split_dev.orig create mode 100644 egs/ami/s5c_apt2141/local/split_eval.orig create mode 100644 egs/ami/s5c_apt2141/local/split_manifest.py create mode 100644 egs/ami/s5c_apt2141/local/split_train.orig create mode 100755 egs/ami/s5c_apt2141/local/train_overlap_detector.sh create mode 100755 egs/ami/s5c_apt2141/path.sh create mode 100755 egs/ami/s5c_apt2141/run.sh create mode 120000 egs/ami/s5c_apt2141/sid create mode 120000 egs/ami/s5c_apt2141/steps create mode 120000 egs/ami/s5c_apt2141/utils diff --git a/egs/ami/s5c/README.txt b/egs/ami/s5c/README.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/egs/ami/s5c_apt2141/README.txt b/egs/ami/s5c_apt2141/README.txt new file mode 100644 index 00000000000..71eebeceb47 --- /dev/null +++ b/egs/ami/s5c_apt2141/README.txt @@ -0,0 +1,38 @@ +A. Alexander Thornton (apt2141) + +B. May 8, 2022 + +C. Project Title: Speaker Diarization: Deep Speech Embeddings for Time Delay Neural Networks (TDNN) + +D. Project Summary: + +Abstract—The fundamental problem of Speaker Diarization +can be simplified as ”who spoke when”. At its essence, Speaker +Diarization can be reduced to the traditional Speaker Identifica- +tion problem, but expanded to N interleaving speakers through +time. This work improves upon the existing Speaker Diarization +project in Kaldi, which was incomplete and unfinished prior to +my efforts. +Index Terms—speaker identification, diarization, time delay +neural networks, time series learning + +E. All tools are included with the code here. Build Kaldi, and you can just run the run.sh + +F. Only use run.sh, the stage is set to 7 for decoding + +G. Run the code with this simple command: + + ./run.sh + +All environment variables are defined inside + +Sample output will appear at the bottom, with the test accuracy + +H. The data used was built off a MANIFEXT file downloaded here: + +https://groups.inf.ed.ac.uk/ami/download/temp/amiBuild-1372-Thu-Apr-28-2022.manifest.txt + +It's important to know that those files change daily, and are constantly changing, so this one might already be gone + + + diff --git a/egs/ami/s5c_apt2141/cmd.sh b/egs/ami/s5c_apt2141/cmd.sh new file mode 100755 index 00000000000..bd48cedd704 --- /dev/null +++ b/egs/ami/s5c_apt2141/cmd.sh @@ -0,0 +1,15 @@ +# you can change cmd.sh depending on what type of queue you are using. +# If you have no queueing system and want to run on a local machine, you +# can change all instances 'queue.pl' to run.pl (but be careful and run +# commands one by one: most recipes will exhaust the memory on your +# machine). queue.pl works with GridEngine (qsub). slurm.pl works +# with slurm. Different queues are configured differently, with different +# queue names and different ways of specifying things like memory; +# to account for these differences you can create and edit the file +# conf/queue.conf to match your queue's configuration. Search for +# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information, +# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl. + +export train_cmd="run.pl --mem 4G" +export decode_cmd="run.pl --mem 4G" + diff --git a/egs/ami/s5c_apt2141/conf/decode.conf b/egs/ami/s5c_apt2141/conf/decode.conf new file mode 100644 index 00000000000..c8a0ece58bf --- /dev/null +++ b/egs/ami/s5c_apt2141/conf/decode.conf @@ -0,0 +1,3 @@ +beam=11.0 # beam for decoding. Was 13.0 in the scripts. +first_beam=8.0 # beam for 1st-pass decoding in SAT. + diff --git a/egs/ami/s5c_apt2141/conf/mfcc.conf b/egs/ami/s5c_apt2141/conf/mfcc.conf new file mode 100644 index 00000000000..a1aa3d6c158 --- /dev/null +++ b/egs/ami/s5c_apt2141/conf/mfcc.conf @@ -0,0 +1,2 @@ +--use-energy=false # only non-default option. +--sample-frequency=16000 diff --git a/egs/ami/s5c_apt2141/conf/mfcc_hires.conf b/egs/ami/s5c_apt2141/conf/mfcc_hires.conf new file mode 100755 index 00000000000..434834a6725 --- /dev/null +++ b/egs/ami/s5c_apt2141/conf/mfcc_hires.conf @@ -0,0 +1,10 @@ +# config for high-resolution MFCC features, intended for neural network training +# Note: we keep all cepstra, so it has the same info as filterbank features, +# but MFCC is more easily compressible (because less correlated) which is why +# we prefer this method. +--use-energy=false # use average of log energy, not energy. +--num-mel-bins=40 # similar to Google's setup. +--num-ceps=40 # there is no dimensionality reduction. +--low-freq=20 # low cutoff frequency for mel bins... this is high-bandwidth data, so + # there might be some information at the low end. +--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600) diff --git a/egs/ami/s5c_apt2141/conf/online_cmvn.conf b/egs/ami/s5c_apt2141/conf/online_cmvn.conf new file mode 100644 index 00000000000..34db39b23f0 --- /dev/null +++ b/egs/ami/s5c_apt2141/conf/online_cmvn.conf @@ -0,0 +1,3 @@ +# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh +--norm-means=true +--norm-vars=false \ No newline at end of file diff --git a/egs/ami/s5c_apt2141/conf/pitch.conf b/egs/ami/s5c_apt2141/conf/pitch.conf new file mode 100644 index 00000000000..5d54a086435 --- /dev/null +++ b/egs/ami/s5c_apt2141/conf/pitch.conf @@ -0,0 +1 @@ +--sample-frequency=16000 \ No newline at end of file diff --git a/egs/ami/s5c_apt2141/diarization b/egs/ami/s5c_apt2141/diarization new file mode 120000 index 00000000000..bad937c1444 --- /dev/null +++ b/egs/ami/s5c_apt2141/diarization @@ -0,0 +1 @@ +../../callhome_diarization/v1/diarization \ No newline at end of file diff --git a/egs/ami/s5c_apt2141/egs_ami_s5c_apt2141.diff b/egs/ami/s5c_apt2141/egs_ami_s5c_apt2141.diff new file mode 100644 index 00000000000..4ac8ed808a2 --- /dev/null +++ b/egs/ami/s5c_apt2141/egs_ami_s5c_apt2141.diff @@ -0,0 +1,212 @@ +Only in .: README.txt +diff -ru /home/apt2141/git/kaldi_original/kaldi/egs/ami/s5c/cmd.sh ./cmd.sh +--- /home/apt2141/git/kaldi_original/kaldi/egs/ami/s5c/cmd.sh 2022-05-08 20:01:18.877615118 +0000 ++++ ./cmd.sh 2022-05-08 19:16:13.885246620 +0000 +@@ -10,6 +10,6 @@ + # conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information, + # or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl. + +-export train_cmd="queue.pl --mem 4G" +-export decode_cmd="queue.pl --mem 4G" ++export train_cmd="run.pl --mem 4G" ++export decode_cmd="run.pl --mem 4G" + +diff -ru /home/apt2141/git/kaldi_original/kaldi/egs/ami/s5c/diarization/score_cossim.sh ./diarization/score_cossim.sh +--- /home/apt2141/git/kaldi_original/kaldi/egs/ami/s5c/diarization/score_cossim.sh 2022-05-08 20:01:18.949615994 +0000 ++++ ./diarization/score_cossim.sh 2022-05-08 19:16:14.025246342 +0000 +@@ -68,7 +68,7 @@ + if [ $stage -le 0 ]; then + echo "$0: scoring xvectors" + $cmd JOB=1:$nj $dir/log/cossim_scoring.JOB.log \ +- python diarization/calc_cossim_scores.py \ ++ python3 diarization/calc_cossim_scores.py \ + ark:$sdata/JOB/spk2utt "$feats" - \|\ + copy-feats ark,t:- ark,scp:$dir/scores.JOB.ark,$dir/scores.JOB.scp || exit 1; + fi +Only in .: egs_ami_s5c_apt2141.diff +Only in ./local: MANIFEST.TXT +diff -ru /home/apt2141/git/kaldi_original/kaldi/egs/ami/s5c/local/ami_download.sh ./local/ami_download.sh +--- /home/apt2141/git/kaldi_original/kaldi/egs/ami/s5c/local/ami_download.sh 2022-05-08 20:01:18.869615021 +0000 ++++ ./local/ami_download.sh 2022-05-08 19:16:13.869246652 +0000 +@@ -51,13 +51,17 @@ + + #download waves + +-cat local/split_train.orig local/split_eval.orig local/split_dev.orig > $wdir/ami_meet_ids.flist +- + wgetfile=$wdir/wget_$mic.sh + +-# TODO fix this with Pawel, files don't exist anymore, +-manifest="wget --continue -O $adir/MANIFEST.TXT http://groups.inf.ed.ac.uk/ami/download/temp/amiBuild-04237-Sun-Jun-15-2014.manifest.txt" +-license="wget --continue -O $adir/LICENCE.TXT http://groups.inf.ed.ac.uk/ami/download/temp/Creative-Commons-Attribution-NonCommercial-ShareAlike-2.5.txt" ++cp local/MANIFEST.TXT $adir/MANIFEST.TXT ++manifest=$adir/MANIFEST.TXT ++#manifest="wget --continue -O $adir/MANIFEST.TXT https://groups.inf.ed.ac.uk/ami/download/temp/amiBuild-1372-Thu-Apr-28-2022.manifest.txt" ++license="wget --continue -O $adir/LICENSE.TXT http://groups.inf.ed.ac.uk/ami/corpus/license.shtml" ++ ++# Parse the manifest file, and separate recordings into train, dev, and eval sets ++# python3 local/split_manifest.py $adir/MANIFEST.TXT ++ ++cat local/split_train.orig local/split_eval.orig local/split_dev.orig > $wdir/ami_meet_ids.flist + + echo "#!/usr/bin/env bash" > $wgetfile + echo $manifest >> $wgetfile +@@ -85,23 +89,6 @@ + echo "Look at $wdir/log/download_ami_$mic.log for progress" + $wgetfile &> $wdir/log/download_ami_$mic.log + +-# Do rough check if #wavs is as expected, it will fail anyway in data prep stage if it isn't, +-if [ "$mic" == "ihm" ]; then +- num_files=$(find $adir -iname *Headset* | wc -l) +- if [ $num_files -ne 687 ]; then +- echo "Warning: Found $num_files headset wavs but expected 687. Check $wdir/log/download_ami_$mic.log for details." +- exit 1; +- fi +-else +- num_files=$(find $adir -iname *Array1* | wc -l) +- if [[ $num_files -lt 1352 && "$mic" == "mdm" ]]; then +- echo "Warning: Found $num_files distant Array1 waves but expected 1352 for mdm. Check $wdir/log/download_ami_$mic.log for details." +- exit 1; +- elif [[ $num_files -lt 169 && "$mic" == "sdm" ]]; then +- echo "Warning: Found $num_files distant Array1 waves but expected 169 for sdm. Check $wdir/log/download_ami_$mic.log for details." +- exit 1; +- fi +-fi + + echo "Downloads of AMI corpus completed succesfully. License can be found under $adir/LICENCE.TXT" + exit 0; +diff -ru /home/apt2141/git/kaldi_original/kaldi/egs/ami/s5c/local/nnet3/xvector/run_xvector.sh ./local/nnet3/xvector/run_xvector.sh +--- /home/apt2141/git/kaldi_original/kaldi/egs/ami/s5c/local/nnet3/xvector/run_xvector.sh 2022-05-08 20:01:18.877615118 +0000 ++++ ./local/nnet3/xvector/run_xvector.sh 2022-05-08 19:16:13.885246620 +0000 +@@ -54,19 +54,18 @@ + if [ $stage -le 6 ]; then + echo "$0: Getting neural network training egs"; + # dump egs. +- if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $egs_dir/storage ]; then +- utils/create_split_dir.pl \ +- /export/b{03,04,05,06}/$USER/kaldi-data/egs/callhome_diarization/v2/xvector-$(date +'%m_%d_%H_%M')/$egs_dir/storage $egs_dir/storage +- fi ++ # frame per iter original 1000000000 ++ # frame per iter diagnostic original 500000 ++ # num repeat original 1 + sid/nnet3/xvector/get_egs.sh --cmd "$train_cmd" \ + --nj 8 \ + --stage 0 \ +- --frames-per-iter 1000000000 \ +- --frames-per-iter-diagnostic 500000 \ ++ --frames-per-iter 100000 \ ++ --frames-per-iter-diagnostic 10000 \ + --min-frames-per-chunk 200 \ + --max-frames-per-chunk 400 \ + --num-diagnostic-archives 3 \ +- --num-repeats 40 \ ++ --num-repeats 10 \ + "$data" $egs_dir + fi + +diff -ru /home/apt2141/git/kaldi_original/kaldi/egs/ami/s5c/local/nnet3/xvector/tuning/run_xvector_1a.sh ./local/nnet3/xvector/tuning/run_xvector_1a.sh +--- /home/apt2141/git/kaldi_original/kaldi/egs/ami/s5c/local/nnet3/xvector/tuning/run_xvector_1a.sh 2022-05-08 20:01:18.877615118 +0000 ++++ ./local/nnet3/xvector/tuning/run_xvector_1a.sh 2022-05-08 19:16:13.885246620 +0000 +@@ -54,19 +54,18 @@ + if [ $stage -le 6 ]; then + echo "$0: Getting neural network training egs"; + # dump egs. +- if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $egs_dir/storage ]; then +- utils/create_split_dir.pl \ +- /export/b{03,04,05,06}/$USER/kaldi-data/egs/callhome_diarization/v2/xvector-$(date +'%m_%d_%H_%M')/$egs_dir/storage $egs_dir/storage +- fi ++ # frame per iter original 1000000000 ++ # frame per iter diagnostic original 500000 ++ # num repeat original 1 + sid/nnet3/xvector/get_egs.sh --cmd "$train_cmd" \ + --nj 8 \ + --stage 0 \ +- --frames-per-iter 1000000000 \ +- --frames-per-iter-diagnostic 500000 \ ++ --frames-per-iter 100000 \ ++ --frames-per-iter-diagnostic 10000 \ + --min-frames-per-chunk 200 \ + --max-frames-per-chunk 400 \ + --num-diagnostic-archives 3 \ +- --num-repeats 40 \ ++ --num-repeats 10 \ + "$data" $egs_dir + fi + +diff -ru /home/apt2141/git/kaldi_original/kaldi/egs/ami/s5c/local/prepare_data.py ./local/prepare_data.py +--- /home/apt2141/git/kaldi_original/kaldi/egs/ami/s5c/local/prepare_data.py 2022-05-08 20:01:18.877615118 +0000 ++++ ./local/prepare_data.py 2022-05-08 19:16:13.885246620 +0000 +@@ -20,7 +20,7 @@ + + def find_audios(wav_path, file_list): + # Get all wav file names from audio directory +- command = 'find %s -name "*Mix-Headset.wav"' % (wav_path) ++ command = 'find %s -name "*.wav"' % (wav_path) + wavs = subprocess.check_output(command, shell=True).decode('utf-8').splitlines() + keys = [ os.path.splitext(os.path.basename(wav))[0] for wav in wavs ] + data = {'key': keys, 'file_path': wavs} +@@ -28,6 +28,8 @@ + + # Filter list to keep only those in annotations (for the specific data split) + file_names_str = "|".join(file_list) ++ print(file_names_str) ++ print(df_wav) + df_wav = df_wav.loc[df_wav['key'].str.contains(file_names_str)].sort_values('key') + return df_wav + +Only in ./local: split_dev.orig +Only in ./local: split_eval.orig +Only in ./local: split_manifest.py +Only in ./local: split_train.orig +diff -ru /home/apt2141/git/kaldi_original/kaldi/egs/ami/s5c/run.sh ./run.sh +--- /home/apt2141/git/kaldi_original/kaldi/egs/ami/s5c/run.sh 2022-05-08 20:01:18.877615118 +0000 ++++ ./run.sh 2022-05-08 19:16:13.885246620 +0000 +@@ -20,12 +20,14 @@ + set -euo pipefail + mfccdir=`pwd`/mfcc + +-stage=0 ++stage=7 + overlap_stage=0 + diarizer_stage=0 +-nj=50 ++nj=10 + decode_nj=15 + ++export mic=ihm ++ + model_dir=exp/xvector_nnet_1a + + train_set=train +@@ -37,11 +39,6 @@ + + # Path where AMI gets downloaded (or where locally available): + AMI_DIR=$PWD/wav_db # Default, +-case $(hostname -d) in +- fit.vutbr.cz) AMI_DIR=/mnt/matylda5/iveselyk/KALDI_AMI_WAV ;; # BUT, +- clsp.jhu.edu) AMI_DIR=/export/corpora5/amicorpus ;; # JHU, +- cstr.ed.ac.uk) AMI_DIR= ;; # Edinburgh, +-esac + + # Download AMI corpus, You need around 130GB of free space to get whole data + if [ $stage -le 1 ]; then +@@ -87,13 +84,16 @@ + steps/make_mfcc.sh --mfcc-config conf/mfcc_hires.conf --nj $nj --cmd "$train_cmd" data/$dataset + steps/compute_cmvn_stats.sh data/$dataset + utils/fix_data_dir.sh data/$dataset ++ echo "FEATURES COMPLETE FOR DATASET" + done + fi + + if [ $stage -le 4 ]; then + echo "$0: preparing a AMI training data to train PLDA model" +- local/nnet3/xvector/prepare_feats.sh --nj $nj --cmd "$train_cmd" \ +- data/train data/plda_train exp/plda_train_cmn ++ #local/nnet3/xvector/prepare_feats.sh --nj $nj --cmd "$train_cmd" \ ++ # data/train data/plda_train exp/plda_train_cmn ++ local/nnet3/xvector/run_xvector.sh --stage $stage --train-stage -1 \ ++ --data data/plda_train + fi + + if [ $stage -le 5 ]; then +Only in .: sid diff --git a/egs/ami/s5c_apt2141/local/MANIFEST.TXT b/egs/ami/s5c_apt2141/local/MANIFEST.TXT new file mode 100644 index 00000000000..30a4f29447b --- /dev/null +++ b/egs/ami/s5c_apt2141/local/MANIFEST.TXT @@ -0,0 +1,279 @@ +All of the signals and transcription, and some of the annotations, have been released publicly under the Creative Commons Attribution 4.0 Licence (http://creativecommons.org/licenses/by/4.0). + + License text: CCBY4.0.txt + +Content of the download: + + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002a/video/ES2002a.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002a/video/ES2002a.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002a/video/ES2002a.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002a/video/ES2002a.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002a/video/ES2002a.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002a/video/ES2002a.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002b/video/ES2002b.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002b/video/ES2002b.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002b/video/ES2002b.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002b/video/ES2002b.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002b/video/ES2002b.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002b/video/ES2002b.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002c/video/ES2002c.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002c/video/ES2002c.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002c/video/ES2002c.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002c/video/ES2002c.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002c/video/ES2002c.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002c/video/ES2002c.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002d/video/ES2002d.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002d/video/ES2002d.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002d/video/ES2002d.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002d/video/ES2002d.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002d/video/ES2002d.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2002d/video/ES2002d.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003a/video/ES2003a.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003a/video/ES2003a.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003a/video/ES2003a.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003a/video/ES2003a.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003a/video/ES2003a.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003b/video/ES2003b.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003b/video/ES2003b.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003b/video/ES2003b.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003b/video/ES2003b.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003b/video/ES2003b.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003b/video/ES2003b.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003c/video/ES2003c.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003c/video/ES2003c.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003c/video/ES2003c.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003c/video/ES2003c.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003c/video/ES2003c.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003c/video/ES2003c.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003d/video/ES2003d.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003d/video/ES2003d.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003d/video/ES2003d.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003d/video/ES2003d.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003d/video/ES2003d.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2003d/video/ES2003d.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004a/video/ES2004a.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004a/video/ES2004a.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004a/video/ES2004a.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004a/video/ES2004a.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004a/video/ES2004a.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004a/video/ES2004a.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004b/video/ES2004b.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004b/video/ES2004b.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004b/video/ES2004b.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004b/video/ES2004b.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004b/video/ES2004b.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004b/video/ES2004b.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004c/video/ES2004c.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004c/video/ES2004c.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004c/video/ES2004c.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004c/video/ES2004c.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004c/video/ES2004c.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004c/video/ES2004c.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004d/video/ES2004d.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004d/video/ES2004d.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004d/video/ES2004d.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004d/video/ES2004d.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004d/video/ES2004d.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2004d/video/ES2004d.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005a/video/ES2005a.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005a/video/ES2005a.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005a/video/ES2005a.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005a/video/ES2005a.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005a/video/ES2005a.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005a/video/ES2005a.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005b/video/ES2005b.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005b/video/ES2005b.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005b/video/ES2005b.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005b/video/ES2005b.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005b/video/ES2005b.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005b/video/ES2005b.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005c/video/ES2005c.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005c/video/ES2005c.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005c/video/ES2005c.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005c/video/ES2005c.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005c/video/ES2005c.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005c/video/ES2005c.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005d/video/ES2005d.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005d/video/ES2005d.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005d/video/ES2005d.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005d/video/ES2005d.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005d/video/ES2005d.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2005d/video/ES2005d.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006a/video/ES2006a.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006a/video/ES2006a.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006a/video/ES2006a.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006a/video/ES2006a.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006a/video/ES2006a.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006a/video/ES2006a.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006b/video/ES2006b.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006b/video/ES2006b.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006b/video/ES2006b.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006b/video/ES2006b.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006b/video/ES2006b.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006b/video/ES2006b.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006c/video/ES2006c.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006c/video/ES2006c.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006c/video/ES2006c.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006c/video/ES2006c.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006c/video/ES2006c.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006c/video/ES2006c.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006d/video/ES2006d.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006d/video/ES2006d.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006d/video/ES2006d.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006d/video/ES2006d.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006d/video/ES2006d.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2006d/video/ES2006d.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011a/video/ES2011a.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011a/video/ES2011a.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011a/video/ES2011a.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011a/video/ES2011a.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011a/video/ES2011a.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011a/video/ES2011a.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011b/video/ES2011b.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011b/video/ES2011b.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011b/video/ES2011b.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011b/video/ES2011b.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011b/video/ES2011b.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011b/video/ES2011b.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011c/video/ES2011c.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011c/video/ES2011c.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011c/video/ES2011c.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011c/video/ES2011c.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011c/video/ES2011c.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011c/video/ES2011c.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011d/video/ES2011d.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011d/video/ES2011d.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011d/video/ES2011d.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011d/video/ES2011d.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011d/video/ES2011d.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/ES2011d/video/ES2011d.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000a/video/IS1000a.C.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000a/video/IS1000a.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000a/video/IS1000a.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000a/video/IS1000a.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000a/video/IS1000a.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000a/video/IS1000a.L.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000a/video/IS1000a.R.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000b/video/IS1000b.C.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000b/video/IS1000b.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000b/video/IS1000b.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000b/video/IS1000b.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000b/video/IS1000b.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000b/video/IS1000b.L.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000b/video/IS1000b.R.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000c/video/IS1000c.C.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000c/video/IS1000c.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000c/video/IS1000c.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000c/video/IS1000c.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000c/video/IS1000c.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000c/video/IS1000c.L.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000c/video/IS1000c.R.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000d/video/IS1000d.C.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000d/video/IS1000d.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000d/video/IS1000d.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000d/video/IS1000d.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000d/video/IS1000d.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000d/video/IS1000d.L.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1000d/video/IS1000d.R.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001a/video/IS1001a.C.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001a/video/IS1001a.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001a/video/IS1001a.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001a/video/IS1001a.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001a/video/IS1001a.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001a/video/IS1001a.L.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001a/video/IS1001a.R.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001b/video/IS1001b.C.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001b/video/IS1001b.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001b/video/IS1001b.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001b/video/IS1001b.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001b/video/IS1001b.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001b/video/IS1001b.L.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001b/video/IS1001b.R.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001c/video/IS1001c.C.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001c/video/IS1001c.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001c/video/IS1001c.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001c/video/IS1001c.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001c/video/IS1001c.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001c/video/IS1001c.L.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001c/video/IS1001c.R.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001d/video/IS1001d.C.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001d/video/IS1001d.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001d/video/IS1001d.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001d/video/IS1001d.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001d/video/IS1001d.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001d/video/IS1001d.L.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1001d/video/IS1001d.R.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002b/video/IS1002b.C.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002b/video/IS1002b.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002b/video/IS1002b.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002b/video/IS1002b.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002b/video/IS1002b.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002b/video/IS1002b.L.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002b/video/IS1002b.R.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002c/video/IS1002c.C.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002c/video/IS1002c.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002c/video/IS1002c.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002c/video/IS1002c.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002c/video/IS1002c.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002c/video/IS1002c.L.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002c/video/IS1002c.R.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002d/video/IS1002d.C.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002d/video/IS1002d.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002d/video/IS1002d.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002d/video/IS1002d.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002d/video/IS1002d.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002d/video/IS1002d.L.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IS1002d/video/IS1002d.R.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002a/video/EN2002a.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002a/video/EN2002a.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002a/video/EN2002a.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002a/video/EN2002a.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002a/video/EN2002a.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002a/video/EN2002a.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002b/video/EN2002b.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002b/video/EN2002b.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002b/video/EN2002b.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002b/video/EN2002b.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002b/video/EN2002b.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002b/video/EN2002b.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002c/video/EN2002c.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002c/video/EN2002c.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002c/video/EN2002c.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002c/video/EN2002c.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002c/video/EN2002c.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002c/video/EN2002c.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002d/video/EN2002d.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002d/video/EN2002d.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002d/video/EN2002d.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002d/video/EN2002d.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002d/video/EN2002d.Corner.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/EN2002d/video/EN2002d.Overhead.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4001/video/IB4001.C.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4001/video/IB4001.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4001/video/IB4001.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4001/video/IB4001.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4001/video/IB4001.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4001/video/IB4001.L.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4001/video/IB4001.R.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4002/video/IB4002.C.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4002/video/IB4002.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4002/video/IB4002.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4002/video/IB4002.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4002/video/IB4002.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4002/video/IB4002.L.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4002/video/IB4002.R.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4003/video/IB4003.Closeup1_T000005.880_T003343.040.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4003/video/IB4003.C.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4003/video/IB4003.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4003/video/IB4003.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4003/video/IB4003.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4003/video/IB4003.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4003/video/IB4003.L.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4003/video/IB4003.R.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4004/video/IB4004.C.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4004/video/IB4004.Closeup1.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4004/video/IB4004.Closeup2.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4004/video/IB4004.Closeup3.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4004/video/IB4004.Closeup4.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4004/video/IB4004.L.rm + https://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4004/video/IB4004.R.rm \ No newline at end of file diff --git a/egs/ami/s5c_apt2141/local/ami_download.sh b/egs/ami/s5c_apt2141/local/ami_download.sh new file mode 120000 index 00000000000..fd7bc9a9e9e --- /dev/null +++ b/egs/ami/s5c_apt2141/local/ami_download.sh @@ -0,0 +1 @@ +../../s5b/local/ami_download.sh \ No newline at end of file diff --git a/egs/ami/s5c_apt2141/local/detect_overlaps.sh b/egs/ami/s5c_apt2141/local/detect_overlaps.sh new file mode 100755 index 00000000000..7fd6ac3e8aa --- /dev/null +++ b/egs/ami/s5c_apt2141/local/detect_overlaps.sh @@ -0,0 +1,205 @@ +#!/usr/bin/env bash + +# Copyright 2020 Desh Raj (Johns Hopkins University) +# Apache 2.0. + +# This script does nnet3-based overlap detection given an input +# kaldi data directory and outputs an overlap RTTM file. + +set -e +set -o pipefail +set -u + +if [ -f ./path.sh ]; then . ./path.sh; fi + +nj=32 +cmd=run.pl +stage=0 +region_type=overlap +convert_data_dir_to_whole=false + +output_name=output # The output node in the network +output_scale= # provide scaling factors for "silence single overlap" (tune on dev set) + +# Network config +iter=final # Model iteration to use + +# Contexts must ideally match training for LSTM models, but +# may not necessarily for stats components +extra_left_context=0 # Set to some large value, typically 40 for LSTM (must match training) +extra_right_context=0 +extra_left_context_initial=-1 +extra_right_context_final=-1 +frames_per_chunk=300 + +# Decoding options +graph_opts="--min-silence-duration=0 --min-speech-duration=0.03 --max-speech-duration=10.0 --min-overlap-duration 0.1 --max-overlap-duration 5.0" +acwt=0.1 + +# Postprocessing options +segment_padding=0.05 # Duration (in seconds) of padding added to overlap segments +min_segment_dur=0 # Minimum duration (in seconds) required for a segment to be included + # This is before any padding. Segments shorter than this duration will be removed. + # This is an alternative to --min-overlap-duration above. +merge_consecutive_max_dur=inf # Merge consecutive segments as long as the merged segment is no longer than this many + # seconds. The segments are only merged if their boundaries are touching. + # This is after padding by --segment-padding seconds. + # 0 means do not merge. Use 'inf' to not limit the duration. + +echo $* + +. utils/parse_options.sh + +if [ $# -ne 3 ]; then + echo "This script does nnet3-based overlap detection given an input kaldi " + echo "data directory and outputs an RTTM file." + echo "See script for details of the options to be supplied." + echo "Usage: $0 " + echo " e.g.: $0 data/dev exp/overlap_1a/tdnn_stats_1a exp/overlap_1a/dev" + echo "" + echo "Options: " + echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." + echo " --nj # number of parallel jobs to run." + echo " --stage # stage to do partial re-run from." + echo " --output-name # The output node in the network" + echo " --extra-left-context # Set to some large value, typically 40 for LSTM (must match training)" + echo " --extra-right-context # For BLSTM or statistics pooling" + exit 1 +fi + +data_dir=$1 # The input data directory. +nnet_dir=$2 # The overlap detection neural network +out_dir=$3 # The output data directory + +data_id=`basename $data_dir` +overlap_dir=${out_dir}/overlap # working directory + +test_data_dir=${data_dir} +if [ $convert_data_dir_to_whole == "true" ]; then + test_data_dir=${data_dir}_whole + if ! [ -d $test_data_dir ]; then + utils/data/convert_data_dir_to_whole.sh $data_dir $test_data_dir + utils/fix_data_dir.sh $test_data_dir + num_wavs=$(wc -l < "$data_dir"/wav.scp) + if [ $nj -gt $num_wavs ]; then + nj=$num_wavs + fi + steps/make_mfcc.sh --mfcc-config conf/mfcc_hires.conf --nj $nj --cmd "$cmd" \ + --write-utt2num-frames true ${test_data_dir} + steps/compute_cmvn_stats.sh ${test_data_dir} + utils/fix_data_dir.sh ${test_data_dir} + fi +fi + +num_wavs=$(wc -l < "$data_dir"/wav.scp) +if [ $nj -gt $num_wavs ]; then + nj=$num_wavs +fi + +############################################################################### +## Forward pass through the network network and dump the log-likelihoods. +############################################################################### + +frame_subsampling_factor=1 +if [ -f $nnet_dir/frame_subsampling_factor ]; then + frame_subsampling_factor=$(cat $nnet_dir/frame_subsampling_factor) +fi + +mkdir -p $overlap_dir +if [ $stage -le 1 ]; then + if [ "$(utils/make_absolute.sh $nnet_dir)" != "$(utils/make_absolute.sh $overlap_dir)" ]; then + cp $nnet_dir/cmvn_opts $overlap_dir || exit 1 + fi + + ######################################################################## + ## Initialize neural network for decoding using the output $output_name + ######################################################################## + + if [ ! -z "$output_name" ] && [ "$output_name" != output ]; then + $cmd $out_dir/log/get_nnet_${output_name}.log \ + nnet3-copy --edits="rename-node old-name=$output_name new-name=output" \ + $nnet_dir/$iter.raw $overlap_dir/${iter}_${output_name}.raw || exit 1 + iter=${iter}_${output_name} + else + if ! diff $nnet_dir/$iter.raw $out_dir/$iter.raw; then + cp $nnet_dir/$iter.raw $overlap_dir/ + fi + fi + + steps/nnet3/compute_output.sh --nj $nj --cmd "$cmd" \ + --iter ${iter} \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk $frames_per_chunk --apply-exp true \ + --frame-subsampling-factor $frame_subsampling_factor \ + ${test_data_dir} $overlap_dir $out_dir || exit 1 +fi + +############################################################################### +## Prepare FST we search to make overlap decisions. +############################################################################### + +utils/data/get_utt2dur.sh --nj $nj --cmd "$cmd" $test_data_dir || exit 1 +frame_shift=$(utils/data/get_frame_shift.sh $test_data_dir) || exit 1 + +graph_dir=${overlap_dir}/graph_${output_name} +if [ $stage -le 2 ]; then + mkdir -p $graph_dir + + # 0 for silence, 1 for single speaker, and 2 for overlap + cat < $graph_dir/words.txt + 0 +silence 1 +single 2 +overlap 3 +EOF + + $cmd $graph_dir/log/make_graph.log \ + steps/overlap/prepare_overlap_graph.py $graph_opts \ + --frame-shift=$(perl -e "print $frame_shift * $frame_subsampling_factor") - \| \ + fstcompile --isymbols=$graph_dir/words.txt --osymbols=$graph_dir/words.txt '>' \ + $graph_dir/HCLG.fst +fi + +############################################################################### +## Do Viterbi decoding to create per-frame alignments. +############################################################################### + +transform_opt= +if ! [ -z "$output_scale" ]; then + # Transformation matrix for output scaling computed from provided + # `output_scale` values + echo $output_scale | python -c "import sys +sys.path.insert(0, 'steps') +import libs.common as common_lib + +line = sys.stdin.read() +sil_prior, single_prior, ovl_prior = line.strip().split() +transform_mat = [[float(sil_prior),0,0], [0,float(single_prior),0], [0,0,float(ovl_prior)]] +common_lib.write_matrix_ascii(sys.stdout, transform_mat)" > $overlap_dir/transform_probs.mat + transform_opt="--transform $overlap_dir/transform_probs.mat" +fi + +if [ $stage -le 3 ]; then + echo "$0: Decoding output" + steps/segmentation/decode_sad.sh --acwt $acwt --cmd "$cmd" --nj $nj \ + $transform_opt $graph_dir $out_dir $overlap_dir +fi + +############################################################################### +## Post-process output to create RTTM file containing overlaps. +############################################################################### + +if [ $stage -le 4 ]; then + steps/overlap/post_process_output.sh \ + --segment-padding $segment_padding --min-segment-dur $min_segment_dur \ + --merge-consecutive-max-dur $merge_consecutive_max_dur \ + --cmd "$cmd" --frame-shift $(perl -e "print $frame_subsampling_factor * $frame_shift") \ + --region-type $region_type \ + ${test_data_dir} ${overlap_dir} ${out_dir} +fi + +echo "$0: Created output overlap RTTM at ${out_dir}/rttm_${region_type}" +exit 0 diff --git a/egs/ami/s5c_apt2141/local/diarize_ahc.sh b/egs/ami/s5c_apt2141/local/diarize_ahc.sh new file mode 100755 index 00000000000..bc7ab1d7803 --- /dev/null +++ b/egs/ami/s5c_apt2141/local/diarize_ahc.sh @@ -0,0 +1,74 @@ +#!/bin/bash +# Copyright 2019 David Snyder +# 2020 Desh Raj + +# Apache 2.0. +# +# This script takes an input directory that has a segments file (and +# a feats.scp file), and performs diarization on it, using agglomerative +# hierarchical clustering. The output directory contains an RTTM file +# which can be used to resegment the input data. + +stage=0 +nj=10 +cmd="run.pl" + + +echo "$0 $@" # Print the command line for logging +if [ -f path.sh ]; then . ./path.sh; fi +. parse_options.sh || exit 1; +if [ $# != 3 ]; then + echo "Usage: $0 " + echo "e.g.: $0 exp/xvector_nnet_1a data/dev exp/dev_diarization" + echo "Options: " + echo " --nj # number of parallel jobs." + echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." + exit 1; +fi + +model_dir=$1 +data_in=$2 +out_dir=$3 + +name=`basename $data_in` + +for f in $data_in/feats.scp $data_in/segments $model_dir/plda \ + $model_dir/final.raw $model_dir/extract.config; do + [ ! -f $f ] && echo "$0: No such file $f" && exit 1; +done + +if [ $stage -le 1 ]; then + echo "$0: computing features for x-vector extractor" + utils/fix_data_dir.sh data/${name} + rm -rf data/${name}_cmn + local/nnet3/xvector/prepare_feats.sh --nj $nj --cmd "$cmd" \ + data/$name data/${name}_cmn exp/${name}_cmn + cp data/$name/segments exp/${name}_cmn/ + utils/fix_data_dir.sh data/${name}_cmn +fi + +if [ $stage -le 2 ]; then + echo "$0: extracting x-vectors for all segments" + diarization/nnet3/xvector/extract_xvectors.sh --cmd "$cmd" \ + --nj $nj --window 1.5 --period 0.75 --apply-cmn false \ + --min-segment 0.5 $model_dir \ + data/${name}_cmn $out_dir/xvectors_${name} +fi + +# Perform PLDA scoring +if [ $stage -le 3 ]; then + # Perform PLDA scoring on all xvector pairs in arrays + echo "$0: performing PLDA scoring between x-vectors" + diarization/nnet3/xvector/score_plda.sh --cmd "$cmd" \ + --target-energy 0.1 \ + --nj $nj $model_dir/ $out_dir/xvectors_${name} \ + $out_dir/xvectors_${name}/plda_scores +fi + +if [ $stage -le 4 ]; then + echo "$0: performing clustering using PLDA scores" + diarization/cluster.sh --cmd "$train_cmd" --nj 10 --stage 0\ + --rttm-channel 1 --threshold 0.1 \ + $out_dir/xvectors_${name}/plda_scores $out_dir + echo "$0: wrote RTTM to output directory ${out_dir}" +fi diff --git a/egs/ami/s5c_apt2141/local/diarize_spectral.sh b/egs/ami/s5c_apt2141/local/diarize_spectral.sh new file mode 100755 index 00000000000..9784d8d47b1 --- /dev/null +++ b/egs/ami/s5c_apt2141/local/diarize_spectral.sh @@ -0,0 +1,72 @@ +#!/bin/bash +# Copyright 2019 David Snyder +# 2020 Desh Raj + +# Apache 2.0. +# +# This script takes an input directory that has a segments file (and +# a feats.scp file), and performs diarization on it using spectral +# clustering. + +stage=0 +nj=10 +cmd="run.pl" +rttm_affix= + +echo "$0 $@" # Print the command line for logging +if [ -f path.sh ]; then . ./path.sh; fi +. parse_options.sh || exit 1; +if [ $# != 3 ]; then + echo "Usage: $0 " + echo "e.g.: $0 exp/xvector_nnet_1a data/dev exp/dev_diarization" + echo "Options: " + echo " --nj # number of parallel jobs." + echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." + exit 1; +fi + +model_dir=$1 +data_in=$2 +out_dir=$3 + +name=`basename $data_in` + +for f in $data_in/feats.scp $data_in/segments \ + $model_dir/final.raw $model_dir/extract.config; do + [ ! -f $f ] && echo "$0: No such file $f" && exit 1; +done + +if [ $stage -le 1 ]; then + echo "$0: computing features for x-vector extractor" + utils/fix_data_dir.sh data/${name} + rm -rf data/${name}_cmn + local/nnet3/xvector/prepare_feats.sh --nj $nj --cmd "$cmd" \ + data/$name data/${name}_cmn exp/${name}_cmn + cp data/$name/segments exp/${name}_cmn/ + utils/fix_data_dir.sh data/${name}_cmn +fi + +if [ $stage -le 2 ]; then + echo "$0: extracting x-vectors for all segments" + diarization/nnet3/xvector/extract_xvectors.sh --cmd "$cmd" \ + --nj $nj --window 1.5 --period 0.75 --apply-cmn false \ + --min-segment 0.5 $model_dir \ + data/${name}_cmn $out_dir/xvectors_${name} +fi + +# Perform cosine similarity scoring +if [ $stage -le 3 ]; then + # Perform cosine similarity scoring on all pairs of segments for each recording. + echo "$0: performing cosine similarity scoring between all pairs of x-vectors" + diarization/score_cossim.sh --cmd "$cmd" \ + --nj $nj $out_dir/xvectors_${name} \ + $out_dir/xvectors_${name}/cossim_scores +fi + +if [ $stage -le 4 ]; then + echo "$0: performing spectral clustering using cosine similarity scores" + diarization/scluster.sh --cmd "$cmd" --nj $nj \ + --rttm-channel 1 --rttm-affix "$rttm_affix" \ + $out_dir/xvectors_${name}/cossim_scores $out_dir + echo "$0: wrote RTTM to output directory ${out_dir}" +fi diff --git a/egs/ami/s5c_apt2141/local/diarize_vbx.sh b/egs/ami/s5c_apt2141/local/diarize_vbx.sh new file mode 100755 index 00000000000..d5e681a9142 --- /dev/null +++ b/egs/ami/s5c_apt2141/local/diarize_vbx.sh @@ -0,0 +1,81 @@ +#!/bin/bash +# Copyright 2019 David Snyder +# 2020 Desh Raj + +# Apache 2.0. +# +# This script takes an input directory that has a segments file (and +# a feats.scp file), and performs diarization on it, using BUTs +# Bayesian HMM-based diarization model. A first-pass of AHC is performed +# first followed by VB-HMM. + +stage=0 +nj=10 +cmd="run.pl" + +echo "$0 $@" # Print the command line for logging +if [ -f path.sh ]; then . ./path.sh; fi +. parse_options.sh || exit 1; +if [ $# != 3 ]; then + echo "Usage: $0 " + echo "e.g.: $0 exp/xvector_nnet_1a data/dev exp/dev_diarization" + echo "Options: " + echo " --nj # number of parallel jobs." + echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." + exit 1; +fi + +model_dir=$1 +data_in=$2 +out_dir=$3 + +name=`basename $data_in` + +for f in $data_in/feats.scp $data_in/segments $model_dir/plda \ + $model_dir/final.raw $model_dir/extract.config; do + [ ! -f $f ] && echo "$0: No such file $f" && exit 1; +done + +if [ $stage -le 1 ]; then + echo "$0: computing features for x-vector extractor" + utils/fix_data_dir.sh data/${name} + rm -rf data/${name}_cmn + local/nnet3/xvector/prepare_feats.sh --nj $nj --cmd "$cmd" \ + data/$name data/${name}_cmn exp/${name}_cmn + cp data/$name/segments exp/${name}_cmn/ + utils/fix_data_dir.sh data/${name}_cmn +fi + +if [ $stage -le 2 ]; then + echo "$0: extracting x-vectors for all segments" + diarization/nnet3/xvector/extract_xvectors.sh --cmd "$cmd" \ + --nj $nj --window 1.5 --period 0.75 --apply-cmn false \ + --min-segment 0.5 $model_dir \ + data/${name}_cmn $out_dir/xvectors_${name} +fi + +# Perform PLDA scoring +if [ $stage -le 3 ]; then + # Perform PLDA scoring on all pairs of segments for each recording. + echo "$0: performing PLDA scoring between all pairs of x-vectors" + diarization/nnet3/xvector/score_plda.sh --cmd "$cmd" \ + --target-energy 0.1 \ + --nj $nj $model_dir/ $out_dir/xvectors_${name} \ + $out_dir/xvectors_${name}/plda_scores +fi + +if [ $stage -le 4 ]; then + echo "$0: performing clustering using PLDA scores (threshold tuned on dev)" + diarization/cluster.sh --cmd "$cmd" --nj $nj \ + --rttm-channel 1 --threshold 0.1 \ + $out_dir/xvectors_${name}/plda_scores $out_dir + echo "$0: wrote RTTM to output directory ${out_dir}" +fi + +if [ $stage -le 5 ]; then + echo "$0: performing VB-HMM on top of first-pass AHC" + diarization/vb_hmm_xvector.sh --nj $nj --rttm-channel 1 \ + --loop-prob 0.5 --fa 0.05 \ + $out_dir $out_dir/xvectors_${name} $model_dir/plda +fi + diff --git a/egs/ami/s5c_apt2141/local/generate_forced_aligned_rttm.py b/egs/ami/s5c_apt2141/local/generate_forced_aligned_rttm.py new file mode 100755 index 00000000000..50422e4c979 --- /dev/null +++ b/egs/ami/s5c_apt2141/local/generate_forced_aligned_rttm.py @@ -0,0 +1,105 @@ +#! /usr/bin/env python +# Copyright 2020 Desh Raj (Johns Hopkins University) +# Apache 2.0. + +"""This script uses forced alignments of the AMI training data +to generate an RTTM file for the simulated LibriCSS data. +This "new" RTTM file can be used to then obtain new segments +and utt2spk files. We do this because the original Librispeech +utterances often have long silences (which are still considered +speech). These can be harmful when used for training systems +for diarization or overlap detection, etc. The alignment file +must have the following format: +116-288045-0000 1 0.530 0.160 AS""" + +import argparse +import itertools +from collections import defaultdict + +def get_args(): + parser = argparse.ArgumentParser( + description="""This script uses forced alignments of the Librispeech data + to generate an RTTM file for the simulated LibriCSS data. + This "new" RTTM file can be used to then obtain new segments + and utt2spk files. We do this because the original Librispeech + utterances often have long silences (which are still considered + speech). These can be harmful when used for training systems + for diarization or overlap detection, etc.""") + parser.add_argument("--max-pause", type=float, default=0.5, + help="Maximum pause between words in a segment") + parser.add_argument("--extend-time", type=float, default=0, + help="Extend segments by this duration on each end") + + parser.add_argument("ctm_file", type=str, + help="""Input CTM file. + The format of the CTM file is + """ + """ """) + + args = parser.parse_args() + + return args + +class Word: + def __init__(self, parts): + seg_id = parts[0] + spk_reco_id, seg_start, _ = seg_id.rsplit('-', 2) + self.spk_id, self.reco_id = spk_reco_id.split('_') + self.start_time = float(seg_start)/100 + float(parts[2]) + self.duration = float(parts[3]) + self.end_time = self.start_time + self.duration + self.text = parts[4] + +def groupby(iterable, keyfunc): + """Wrapper around ``itertools.groupby`` which sorts data first.""" + iterable = sorted(iterable, key=keyfunc) + for key, group in itertools.groupby(iterable, keyfunc): + yield key, group + +def main(): + args = get_args() + + # Read the CTM file and store as a list of Word objects + ctm_words=[] + with open(args.ctm_file, 'r') as f: + for line in f: + ctm_words.append(Word(line.strip().split())) + + # Group the list into a dictionary indexed by reco id + reco_and_spk_to_words = defaultdict(list, + {reco_id : list(g) for reco_id, g in groupby(ctm_words, lambda x: (x.reco_id,x.spk_id))}) + + new_segments = [] # [(reco_id, start_time, end_time, spkid)] + + for uid in sorted(reco_and_spk_to_words): + reco_id, spk_id = uid + words = sorted(reco_and_spk_to_words[uid], key=lambda x: x.start_time) + + cur_start = words[0].start_time + cur_end = words[0].end_time + for word in words[1:]: + if (word.start_time > cur_end + args.max_pause): + # flush current segment + segment_start = max(0, cur_start - args.extend_time) + segment_end = cur_end + args.extend_time + new_segments.append((reco_id, segment_start, segment_end, spk_id)) + + # start new segment and text + cur_start = word.start_time + cur_end = word.end_time + + else: + # extend running segment and text + cur_end = word.end_time + + # flush last remaining segment + segment_start = max(0, cur_start - args.extend_time) + segment_end = cur_end + args.extend_time + new_segments.append((reco_id, segment_start, segment_end, spk_id)) + + rttm_str = "SPEAKER {0} 1 {1:7.3f} {2:7.3f} {3} " + for segment in sorted(new_segments, key=lambda x: (x[0], x[1])): + print(rttm_str.format(segment[0], segment[1], segment[2] - segment[1], segment[3])) + +if __name__ == '__main__': + main() diff --git a/egs/ami/s5c_apt2141/local/nnet3/xvector/prepare_feats.sh b/egs/ami/s5c_apt2141/local/nnet3/xvector/prepare_feats.sh new file mode 100755 index 00000000000..6b5ccd466c3 --- /dev/null +++ b/egs/ami/s5c_apt2141/local/nnet3/xvector/prepare_feats.sh @@ -0,0 +1,89 @@ +#!/usr/bin/env bash +# +# Apache 2.0. + +# This script applies sliding window CMVN and writes the features to disk. +# +# Although this kind of script isn't necessary in speaker recognition recipes, +# it can be helpful in the diarization recipes. The script +# diarization/nnet3/xvector/extract_xvectors.sh extracts x-vectors from very +# short (e.g., 1-2 seconds) segments. Therefore, in order to apply the sliding +# window CMVN in a meaningful way, it must be performed prior to performing +# the subsegmentation. + +nj=40 +cmd="run.pl" +stage=0 +norm_vars=false +center=true +compress=true +cmn_window=300 + +echo "$0 $@" # Print the command line for logging + +if [ -f path.sh ]; then . ./path.sh; fi +. parse_options.sh || exit 1; +if [ $# != 3 ]; then + echo "Usage: $0 " + echo "e.g.: $0 data/train data/train_no_sil exp/make_xvector_features" + echo "Options: " + echo " --nj # number of parallel jobs" + echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." + echo " --norm-vars # If true, normalize variances in the sliding window cmvn" + exit 1; +fi + +data_in=$1 +data_out=$2 +dir=$3 + +name=`basename $data_in` + +for f in $data_in/feats.scp ; do + [ ! -f $f ] && echo "$0: No such file $f" && exit 1; +done + +# Set various variables. +mkdir -p $dir/log +mkdir -p $data_out +featdir=$(utils/make_absolute.sh $dir) + +if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $featdir/storage ]; then + utils/create_split_dir.pl \ + /export/b{14,15,16,17}/$USER/kaldi-data/egs/callhome_diarization/v2/xvector-$(date +'%m_%d_%H_%M')/xvector_cmvn_feats/storage $featdir/storage +fi + +for n in $(seq $nj); do + # the next command does nothing unless $featdir/storage/ exists, see + # utils/create_data_link.pl for more info. + utils/create_data_link.pl $featdir/xvector_cmvn_feats_${name}.${n}.ark +done + +cp $data_in/utt2spk $data_out/utt2spk +cp $data_in/spk2utt $data_out/spk2utt +cp $data_in/wav.scp $data_out/wav.scp +for f in $data_in/segments $data_in/segments/vad.scp ; do + [ -f $f ] && cp $f $data_out/`basename $f`; +done + +write_num_frames_opt="--write-num-frames=ark,t:$featdir/log/utt2num_frames.JOB" + +sdata_in=$data_in/split$nj; +utils/split_data.sh $data_in $nj || exit 1; + +$cmd JOB=1:$nj $dir/log/create_xvector_cmvn_feats_${name}.JOB.log \ + apply-cmvn-sliding --norm-vars=false --center=true --cmn-window=$cmn_window \ + scp:${sdata_in}/JOB/feats.scp ark:- \| \ + copy-feats --compress=$compress $write_num_frames_opt ark:- \ + ark,scp:$featdir/xvector_cmvn_feats_${name}.JOB.ark,$featdir/xvector_cmvn_feats_${name}.JOB.scp || exit 1; + +for n in $(seq $nj); do + cat $featdir/xvector_cmvn_feats_${name}.$n.scp || exit 1; +done > ${data_out}/feats.scp || exit 1 + +for n in $(seq $nj); do + cat $featdir/log/utt2num_frames.$n || exit 1; +done > $data_out/utt2num_frames || exit 1 +rm $featdir/log/utt2num_frames.* + +echo "$0: Succeeded creating xvector features for $name" diff --git a/egs/ami/s5c_apt2141/local/nnet3/xvector/prepare_feats_for_egs.sh b/egs/ami/s5c_apt2141/local/nnet3/xvector/prepare_feats_for_egs.sh new file mode 100755 index 00000000000..326b6dbb9fa --- /dev/null +++ b/egs/ami/s5c_apt2141/local/nnet3/xvector/prepare_feats_for_egs.sh @@ -0,0 +1,83 @@ +#!/usr/bin/env bash +# +# Apache 2.0. + +# This script applies sliding window CMVN and removes silence frames. This +# is performed on the raw features prior to generating examples for training +# the x-vector system. Once the training examples are generated, the features +# created by this script can be removed. + +nj=40 +cmd="run.pl" +stage=0 +norm_vars=false +center=true +compress=true +cmn_window=300 + +echo "$0 $@" # Print the command line for logging + +if [ -f path.sh ]; then . ./path.sh; fi +. parse_options.sh || exit 1; +if [ $# != 3 ]; then + echo "Usage: $0 " + echo "e.g.: $0 data/train data/train_no_sil exp/make_xvector_features" + echo "Options: " + echo " --nj # number of parallel jobs" + echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." + echo " --norm-vars # If true, normalize variances in the sliding window cmvn" + exit 1; +fi + +data_in=$1 +data_out=$2 +dir=$3 + +name=`basename $data_in` + +for f in $data_in/feats.scp $data_in/vad.scp ; do + [ ! -f $f ] && echo "$0: No such file $f" && exit 1; +done + +# Set various variables. +mkdir -p $dir/log +mkdir -p $data_out +featdir=$(utils/make_absolute.sh $dir) + +if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $featdir/storage ]; then + utils/create_split_dir.pl \ + /export/b{14,15,16,17}/$USER/kaldi-data/egs/callhome_diarization/v2/xvector-$(date +'%m_%d_%H_%M')/xvector_feats/storage $featdir/storage +fi + +for n in $(seq $nj); do + # the next command does nothing unless $featdir/storage/ exists, see + # utils/create_data_link.pl for more info. + utils/create_data_link.pl $featdir/xvector_feats_${name}.${n}.ark +done + +cp $data_in/utt2spk $data_out/utt2spk +cp $data_in/spk2utt $data_out/spk2utt +cp $data_in/wav.scp $data_out/wav.scp + +write_num_frames_opt="--write-num-frames=ark,t:$featdir/log/utt2num_frames.JOB" + +sdata_in=$data_in/split$nj; +utils/split_data.sh $data_in $nj || exit 1; + +$cmd JOB=1:$nj $dir/log/create_xvector_feats_${name}.JOB.log \ + apply-cmvn-sliding --norm-vars=false --center=true --cmn-window=$cmn_window \ + scp:${sdata_in}/JOB/feats.scp ark:- \| \ + select-voiced-frames ark:- scp,s,cs:${sdata_in}/JOB/vad.scp ark:- \| \ + copy-feats --compress=$compress $write_num_frames_opt ark:- \ + ark,scp:$featdir/xvector_feats_${name}.JOB.ark,$featdir/xvector_feats_${name}.JOB.scp || exit 1; + +for n in $(seq $nj); do + cat $featdir/xvector_feats_${name}.$n.scp || exit 1; +done > ${data_out}/feats.scp || exit 1 + +for n in $(seq $nj); do + cat $featdir/log/utt2num_frames.$n || exit 1; +done > $data_out/utt2num_frames || exit 1 +rm $featdir/log/utt2num_frames.* + +echo "$0: Succeeded creating xvector features for $name" diff --git a/egs/ami/s5c_apt2141/local/nnet3/xvector/run_xvector.sh b/egs/ami/s5c_apt2141/local/nnet3/xvector/run_xvector.sh new file mode 120000 index 00000000000..585b63fd2dd --- /dev/null +++ b/egs/ami/s5c_apt2141/local/nnet3/xvector/run_xvector.sh @@ -0,0 +1 @@ +tuning/run_xvector_1a.sh \ No newline at end of file diff --git a/egs/ami/s5c_apt2141/local/nnet3/xvector/score_plda.sh b/egs/ami/s5c_apt2141/local/nnet3/xvector/score_plda.sh new file mode 100755 index 00000000000..f1ecdfe8fd4 --- /dev/null +++ b/egs/ami/s5c_apt2141/local/nnet3/xvector/score_plda.sh @@ -0,0 +1,83 @@ +#!/usr/bin/env bash +# Copyright 2016-2018 David Snyder +# 2017-2018 Matthew Maciejewski +# Apache 2.0. + +# This script is a modified version of diarization/score_plda.sh +# that replaces i-vectors with x-vectors. +# +# This script computes PLDA scores from pairs of x-vectors extracted +# from segments of a recording. These scores are in the form of +# affinity matrices, one for each recording. Most likely, the x-vectors +# were computed using diarization/nnet3/xvector/extract_xvectors.sh. +# The affinity matrices are most likely going to be clustered using +# diarization/cluster.sh. + +# Note: It computes scores between all pairs of xvectors across all +# arrays. + +# Begin configuration section. +cmd="run.pl" +stage=0 +target_energy=0.1 +nj=10 +cleanup=true +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +if [ -f path.sh ]; then . ./path.sh; fi +. parse_options.sh || exit 1; + + +if [ $# != 3 ]; then + echo "Usage: $0 " + echo " e.g.: $0 exp/xvectors_callhome_heldout exp/xvectors_callhome_test exp/xvectors_callhome_test" + echo "main options (for others, see top of script file)" + echo " --config # config containing options" + echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." + echo " --nj # Number of jobs (also see num-processes and num-threads)" + echo " --stage # To control partial reruns" + echo " --target-energy # Target energy remaining in xvectors after applying" + echo " # a conversation dependent PCA." + echo " --cleanup # If true, remove temporary files" + exit 1; +fi + +pldadir=$1 +xvecdir=$2 +dir=$3 + +mkdir -p $dir/tmp + +for f in $xvecdir/xvector.scp $xvecdir/spk2utt $xvecdir/utt2spk $xvecdir/segments $pldadir/plda $pldadir/mean.vec $pldadir/transform.mat; do + [ ! -f $f ] && echo "No such file $f" && exit 1; +done +cp $xvecdir/xvector.scp $dir/tmp/feats.scp +cp $xvecdir/spk2utt $dir/tmp/ +cp $xvecdir/utt2spk $dir/tmp/ +cp $xvecdir/spk2session $dir/tmp/ +cp $xvecdir/session2spk $dir/tmp/ +cp $xvecdir/segments $dir/tmp/ +cp $xvecdir/spk2utt $dir/ +cp $xvecdir/utt2spk $dir/ +cp $xvecdir/segments $dir/ + +utils/fix_data_dir.sh $dir/tmp > /dev/null + +data=$dir/tmp/; + +# Set various variables. +mkdir -p $dir/log + +feats="ark:ivector-subtract-global-mean $pldadir/mean.vec scp:$data/feats.scp ark:- | transform-vec $pldadir/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" +if [ $stage -le 0 ]; then + echo "$0: scoring xvectors" + $cmd $dir/log/plda_scoring.log \ + ivector-plda-scoring-dense-multi --target-energy=$target_energy $pldadir/plda \ + ark:$data/spk2utt ark:$data/session2spk "$feats" ark,scp:$dir/scores.ark,$dir/scores.scp || exit 1; +fi + +if $cleanup ; then + rm -rf $dir/tmp || exit 1; +fi diff --git a/egs/ami/s5c_apt2141/local/nnet3/xvector/tuning/run_xvector_1a.sh b/egs/ami/s5c_apt2141/local/nnet3/xvector/tuning/run_xvector_1a.sh new file mode 100755 index 00000000000..93fedcb33bd --- /dev/null +++ b/egs/ami/s5c_apt2141/local/nnet3/xvector/tuning/run_xvector_1a.sh @@ -0,0 +1,148 @@ +#!/usr/bin/env bash +# Copyright 2018 David Snyder +# 2018 Johns Hopkins University (Author: Daniel Garcia-Romero) +# 2018 Johns Hopkins University (Author: Daniel Povey) +# Apache 2.0. + +# This script trains the x-vector DNN. The recipe is similar to the one +# described in "Diarization is Hard: Some Experiences and Lessons Learned +# for the JHU Team in the Inaugural DIHARD Challenge" by Sell et al. + +. ./cmd.sh +set -e + +stage=1 +train_stage=-1 +use_gpu=true +remove_egs=false + +data=data/train +nnet_dir=exp/xvector_nnet_1a/ +egs_dir=exp/xvector_nnet_1a/egs + +. ./path.sh +. ./cmd.sh +. ./utils/parse_options.sh + +num_pdfs=$(awk '{print $2}' $data/utt2spk | sort | uniq -c | wc -l) + +# Now we create the nnet examples using sid/nnet3/xvector/get_egs.sh. +# The argument --num-repeats is related to the number of times a speaker +# repeats per archive. If it seems like you're getting too many archives +# (e.g., more than 200) try increasing the --frames-per-iter option. The +# arguments --min-frames-per-chunk and --max-frames-per-chunk specify the +# minimum and maximum length (in terms of number of frames) of the features +# in the examples. +# +# To make sense of the egs script, it may be necessary to put an "exit 1" +# command immediately after stage 3. Then, inspect +# exp//egs/temp/ranges.* . The ranges files specify the examples that +# will be created, and which archives they will be stored in. Each line of +# ranges.* has the following form: +# +# For example: +# 100304-f-sre2006-kacg-A 1 2 4079 881 23 + +# If you're satisfied with the number of archives (e.g., 50-150 archives is +# reasonable) and with the number of examples per speaker (e.g., 1000-5000 +# is reasonable) then you can let the script continue to the later stages. +# Otherwise, try increasing or decreasing the --num-repeats option. You might +# need to fiddle with --frames-per-iter. Increasing this value decreases the +# the number of archives and increases the number of examples per archive. +# Decreasing this value increases the number of archives, while decreasing the +# number of examples per archive. +if [ $stage -le 6 ]; then + echo "$0: Getting neural network training egs"; + # dump egs. + # frame per iter original 1000000000 + # frame per iter diagnostic original 500000 + # num repeat original 1 + sid/nnet3/xvector/get_egs.sh --cmd "$train_cmd" \ + --nj 8 \ + --stage 0 \ + --frames-per-iter 100000 \ + --frames-per-iter-diagnostic 10000 \ + --min-frames-per-chunk 200 \ + --max-frames-per-chunk 400 \ + --num-diagnostic-archives 3 \ + --num-repeats 10 \ + "$data" $egs_dir +fi + +if [ $stage -le 7 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + num_targets=$(wc -w $egs_dir/pdf2num | awk '{print $1}') + feat_dim=$(cat $egs_dir/info/feat_dim) + + # This chunk-size corresponds to the maximum number of frames the + # stats layer is able to pool over. In this script, it corresponds + # to 4 seconds. If the input recording is greater than 4 seconds, + # we will compute multiple xvectors from the same recording and average + # to produce the final xvector. + max_chunk_size=400 + + # The smallest number of frames we're comfortable computing an xvector from. + # Note that the hard minimum is given by the left and right context of the + # frame-level layers. + min_chunk_size=20 + mkdir -p $nnet_dir/configs + cat < $nnet_dir/configs/network.xconfig + # please note that it is important to have input layer with the name=input + + # The frame-level layers + input dim=${feat_dim} name=input + relu-batchnorm-layer name=tdnn1 input=Append(-2,-1,0,1,2) dim=512 + relu-batchnorm-layer name=tdnn2 input=Append(-2,0,2) dim=512 + relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=512 + relu-batchnorm-layer name=tdnn4 dim=512 + relu-batchnorm-layer name=tdnn5 dim=1500 + + # The stats pooling layer. Layers after this are segment-level. + # In the config below, the first and last argument (0, and ${max_chunk_size}) + # means that we pool over an input segment starting at frame 0 + # and ending at frame ${max_chunk_size} or earlier. The other arguments (1:1) + # mean that no subsampling is performed. + stats-layer name=stats config=mean+stddev(0:1:1:${max_chunk_size}) + + # This is where we usually extract the embedding (aka xvector) from. + relu-batchnorm-layer name=tdnn6 dim=128 input=stats + output-layer name=output include-log-softmax=true dim=${num_targets} +EOF + + steps/nnet3/xconfig_to_configs.py \ + --xconfig-file $nnet_dir/configs/network.xconfig \ + --config-dir $nnet_dir/configs/ + cp $nnet_dir/configs/final.config $nnet_dir/nnet.config + + # These three files will be used by sid/nnet3/xvector/extract_xvectors.sh + echo "output-node name=output input=tdnn6.affine" > $nnet_dir/extract.config + echo "$max_chunk_size" > $nnet_dir/max_chunk_size + echo "$min_chunk_size" > $nnet_dir/min_chunk_size +fi + +dropout_schedule='0,0@0.20,0.1@0.50,0' +srand=123 +if [ $stage -le 8 ]; then + steps/nnet3/train_raw_dnn.py --stage=$train_stage \ + --cmd="$train_cmd" \ + --trainer.optimization.proportional-shrink 10 \ + --trainer.optimization.momentum=0.5 \ + --trainer.optimization.num-jobs-initial=3 \ + --trainer.optimization.num-jobs-final=8 \ + --trainer.optimization.initial-effective-lrate=0.001 \ + --trainer.optimization.final-effective-lrate=0.0001 \ + --trainer.optimization.minibatch-size=64 \ + --trainer.srand=$srand \ + --trainer.max-param-change=2 \ + --trainer.num-epochs=3 \ + --trainer.dropout-schedule="$dropout_schedule" \ + --trainer.shuffle-buffer-size=1000 \ + --egs.frames-per-eg=1 \ + --egs.dir="$egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --dir=$nnet_dir || exit 1; +fi + +exit 0; diff --git a/egs/ami/s5c_apt2141/local/overlap/run_tdnn_lstm.sh b/egs/ami/s5c_apt2141/local/overlap/run_tdnn_lstm.sh new file mode 120000 index 00000000000..8e647598556 --- /dev/null +++ b/egs/ami/s5c_apt2141/local/overlap/run_tdnn_lstm.sh @@ -0,0 +1 @@ +tuning/run_tdnn_lstm_1a.sh \ No newline at end of file diff --git a/egs/ami/s5c_apt2141/local/overlap/tuning/run_tdnn_lstm_1a.sh b/egs/ami/s5c_apt2141/local/overlap/tuning/run_tdnn_lstm_1a.sh new file mode 100755 index 00000000000..f8f6d730370 --- /dev/null +++ b/egs/ami/s5c_apt2141/local/overlap/tuning/run_tdnn_lstm_1a.sh @@ -0,0 +1,152 @@ +#!/usr/bin/env bash + +# Copyright 2017 Nagendra Kumar Goel +# 2018 Vimal Manohar +# 2020 Desh Raj (Johns Hopkins University) +# Apache 2.0 + +# This is a script to train a TDNN-LSTM for overlap detections +# using statistics pooling for long-context information. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= + +chunk_width=50 + +# The context is chosen to be around 1 second long. The context at test time +# is expected to be around the same. +extra_left_context=79 +extra_right_context=21 + +relu_dim=512 + +# training options +num_epochs=40 +initial_effective_lrate=0.00001 +final_effective_lrate=0.000001 +num_jobs_initial=8 +num_jobs_final=12 +remove_egs=true +max_param_change=0.2 # Small max-param change for small network + +egs_dir= +nj=40 + +dir= +affix=1a + +data_dir= +targets_dir= + +. ./cmd.sh +if [ -f ./path.sh ]; then . ./path.sh; fi +. ./utils/parse_options.sh + +set -o pipefail +set -u + +if [ -z "$dir" ]; then + dir=exp/overlap_1a/tdnn_lstm +fi +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/cmvn_opts + +if [ $stage -le 1 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + tdnn_opts="l2-regularize=0.01" + lstm_opts="l2-regularize=0.01 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3" + output_opts="l2-regularize=0.01" + label_delay=5 + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$data_dir/feats.scp -` name=input + + fixed-affine-layer name=lda input=Append(-1,0,1) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-batchnorm-layer name=tdnn1 $tdnn_opts dim=512 + relu-batchnorm-layer name=tdnn2 $tdnn_opts input=Append(-1,0,1) dim=512 + fast-lstmp-layer name=lstm1 $lstm_opts + + relu-batchnorm-layer name=tdnn3 $tdnn_opts input=Append(-3,0,3) dim=512 + relu-batchnorm-layer name=tdnn4 $tdnn_opts input=Append(-3,0,3) dim=512 + fast-lstmp-layer name=lstm2 $lstm_opts + + relu-batchnorm-layer name=tdnn5 $tdnn_opts input=Append(-3,0,3) dim=512 + relu-batchnorm-layer name=tdnn6 $tdnn_opts input=Append(-3,0,3) dim=512 + fast-lstmp-layer name=lstm3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=true dim=3 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ + + cat <> $dir/configs/vars +num_targets=3 +EOF +fi + +if [ $stage -le 2 ]; then + num_utts=`cat $data_dir/utt2spk | wc -l` + # Set num_utts_subset for diagnostics to a reasonable value + # of max(min(0.005 * num_utts, 300), 12) + num_utts_subset=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 300 ? 300 : ($n < 12 ? 12 : $n))' $num_utts` + + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="$cmvn_opts" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.chunk-left-context-initial=0 \ + --egs.chunk-right-context-final=0 \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.rnn.num-chunk-per-minibatch=128,64 \ + --trainer.optimization.momentum=0.5 \ + --trainer.deriv-truncate-margin=10 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj $nj \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=wait \ + --use-dense-targets=true \ + --feat-dir=$data_dir \ + --targets-scp="$targets_dir/targets.scp" \ + --egs.opts="--frame-subsampling-factor 1 --num-utts-subset $num_utts_subset" \ + --dir=$dir || exit 1 +fi + +if [ $stage -le 3 ]; then + # Use a subset to compute prior over the output targets + $train_cmd $dir/log/get_priors.log \ + matrix-sum-rows scp:$targets_dir/targets.scp \ + ark:- \| vector-sum --binary=false ark:- $dir/post_output.vec || exit 1 + echo 1 > $dir/frame_subsampling_factor +fi diff --git a/egs/ami/s5c_apt2141/local/prepare_data.py b/egs/ami/s5c_apt2141/local/prepare_data.py new file mode 100755 index 00000000000..4872733a4bd --- /dev/null +++ b/egs/ami/s5c_apt2141/local/prepare_data.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 +""" + Copyright 2020 Johns Hopkins University (Author: Desh Raj) + Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + + Prepare AMI mix-headset data. We use the RTTMs and SAD labels from the + "only_words" category of BUT's AMI setup: + https://github.com/BUTSpeechFIT/AMI-diarization-setup + + For more details about AMI splits and references used in other literature, + please refer to Section 4 of this paper: https://arxiv.org/abs/2012.14952 +""" + +import sys +import os +import argparse +import subprocess + +import pandas as pd + +def find_audios(wav_path, file_list): + # Get all wav file names from audio directory + command = 'find %s -name "*.wav"' % (wav_path) + wavs = subprocess.check_output(command, shell=True).decode('utf-8').splitlines() + keys = [ os.path.splitext(os.path.basename(wav))[0] for wav in wavs ] + data = {'key': keys, 'file_path': wavs} + df_wav = pd.DataFrame(data) + + # Filter list to keep only those in annotations (for the specific data split) + file_names_str = "|".join(file_list) + print(file_names_str) + print(df_wav) + df_wav = df_wav.loc[df_wav['key'].str.contains(file_names_str)].sort_values('key') + return df_wav + +def write_wav(df_wav, output_path, bin_wav=True): + with open(output_path + '/wav.scp', 'w') as f: + for key,file_path in zip(df_wav['key'], df_wav['file_path']): + key = key.split('.')[0] + if bin_wav: + f.write('%s sox %s -t wav - remix 1 | \n' % (key, file_path)) + else: + f.write('%s %s\n' % (key, file_path)) + +def write_segments(sad_labels_dir, output_path): + with open(output_path + '/segments', 'w') as f: + for sad_file in os.listdir(sad_labels_dir): + lab_path = os.path.join(sad_labels_dir, sad_file) + file_id = sad_file.split('.')[0] + with open(lab_path, 'r') as f_lab: + for line in f_lab: + parts = line.strip().split() + start = float(parts[0]) + end = float(parts[1]) + seg_id = f'{file_id}_{100*start:06.0f}_{100*end:06.0f}' + f.write(f'{seg_id} {file_id} {start} {end}\n') + + +def make_diar_data(meetings, wav_path, output_path, sad_labels_dir=None): + + if not os.path.exists(output_path): + os.makedirs(output_path) + + print('get file list') + file_list = [] + with open(meetings, 'r') as f: + for line in f: + file_list.append(line.strip()) + + print('read audios') + df_wav = find_audios(wav_path, file_list) + + print('make wav.scp') + write_wav(df_wav, output_path) + + if sad_labels_dir: + print('make segments') + write_segments(sad_labels_dir, output_path) + + +if __name__ == "__main__": + + parser=argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + fromfile_prefix_chars='@', + description='Prepare AMI dataset for diarization') + + parser.add_argument('meetings', help="Path to file containing list of meetings") + parser.add_argument('wav_path', help="Path to AMI corpus dir") + parser.add_argument('output_path', help="Path to generate data directory") + parser.add_argument('--sad-labels-dir', help="Path to SAD labels", default=None) + args=parser.parse_args() + + make_diar_data(**vars(args)) diff --git a/egs/ami/s5c_apt2141/local/split_dev.orig b/egs/ami/s5c_apt2141/local/split_dev.orig new file mode 100644 index 00000000000..7e3bec8a622 --- /dev/null +++ b/egs/ami/s5c_apt2141/local/split_dev.orig @@ -0,0 +1,8 @@ +ES2011a +ES2011b +ES2011c +ES2011d +IB4001 +IB4002 +IB4003 +IB4004 \ No newline at end of file diff --git a/egs/ami/s5c_apt2141/local/split_eval.orig b/egs/ami/s5c_apt2141/local/split_eval.orig new file mode 100644 index 00000000000..eb80674dbe2 --- /dev/null +++ b/egs/ami/s5c_apt2141/local/split_eval.orig @@ -0,0 +1,8 @@ +ES2004a +ES2004b +ES2004c +ES2004d +EN2002a +EN2002b +EN2002c +EN2002d \ No newline at end of file diff --git a/egs/ami/s5c_apt2141/local/split_manifest.py b/egs/ami/s5c_apt2141/local/split_manifest.py new file mode 100644 index 00000000000..162a15719d8 --- /dev/null +++ b/egs/ami/s5c_apt2141/local/split_manifest.py @@ -0,0 +1,69 @@ +import os +import sys + +def unique(m): + unique_list = [] + + for i in m: + if i not in unique_list: + unique_list.append(i) + + return unique_list + +# Load in the MANIFEST file, save off the audio recoding file names +file = sys.argv[1] +prefix = '\thttps://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/' +m = [] + +with open(file) as f: + for line in f: + #splits = line.split('/') + #print(splits) + if line.startswith(prefix): + splits = line.split('/') + #print(splits) + m.append(splits[7]) +m = unique(m) +print("Got the audio files from MANIFEST.TXT") +#print(m) + +# Separate files and save off into train, dev, and eval partitions +N = len(m) + +#train = m[:round(N*.5)] +#dev = m[round(N*.5)+1:round(N*.8)] +#ev = m[round(N*.8)+1:] + +#train = train[:12] +#dev = dev[:10] +#ev = ev[:10] + +train = m[:8] +dev = m[9:15] +ev = m[16:20] + +print("Train set: "+str(train)) +print("Dev set: "+str(dev)) +print("Eval set: "+str(ev)) + +if os.path.exists('local/split_train.orig'): + os.remove('local/split_train.orig') +if os.path.exists('local/split_dev.orig'): + os.remove('local/split_dev.orig') +if os.path.exists('local/split_eval.orig'): + os.remove('local/split_eval.orig') + +with open('local/split_train.orig', 'a') as train_file: + for d in train: + train_file.write(d) + train_file.write("\n") + +with open('local/split_dev.orig', 'a') as dev_file: + for d in dev: + dev_file.write(d) + dev_file.write("\n") + +with open('local/split_eval.orig', 'a') as eval_file: + for d in ev: + eval_file.write(d) + eval_file.write("\n") diff --git a/egs/ami/s5c_apt2141/local/split_train.orig b/egs/ami/s5c_apt2141/local/split_train.orig new file mode 100644 index 00000000000..882e64bd633 --- /dev/null +++ b/egs/ami/s5c_apt2141/local/split_train.orig @@ -0,0 +1,27 @@ +IS1000a +IS1000b +IS1000c +IS1000d +IS1001a +IS1001b +IS1001c +IS1001d +IS1002b +IS1002c +IS1002d +ES2002a +ES2002b +ES2002c +ES2002d +ES2003a +ES2003b +ES2003c +ES2003d +ES2005a +ES2005b +ES2005c +ES2005d +ES2006a +ES2006b +ES2006c +ES2006d \ No newline at end of file diff --git a/egs/ami/s5c_apt2141/local/train_overlap_detector.sh b/egs/ami/s5c_apt2141/local/train_overlap_detector.sh new file mode 100755 index 00000000000..98759911f6e --- /dev/null +++ b/egs/ami/s5c_apt2141/local/train_overlap_detector.sh @@ -0,0 +1,130 @@ +#!/usr/bin/env bash + +# Copyright 2020 Desh Raj (Johns Hopkins University) +# Apache 2.0 + +# This script trains an overlap detector. It is based on the Aspire +# speech activity detection system. We train with 3 targets: +# silence, single, and overlap. As such, at decode time, this +# can also be used as an SAD system. + +# We can use the annotated speech time marks or forced alignments +# for training. Here we provide code for both. To use forced alignments +# we need a pretrained acoustic model in order to obtain the +# alignments. + +affix=1a + +train_stage=-10 +stage=0 +nj=50 +test_nj=10 + +test_sets="dev test" + +target_type=annotation # set this to "annotation" or "forced" + +# If target_type is forced, the following must contain path to a tri3 model +src_dir=exp/tri3_cleaned +ali_dir=${src_dir}_ali + +. ./cmd.sh + +if [ -f ./path.sh ]; then . ./path.sh; fi + +set -e -u -o pipefail +. utils/parse_options.sh + +if [ $# != 1 ]; then + echo "Usage: $0 " + echo "e.g.: $0 /export/data/ami" + echo "Options: " + echo " --nj # number of parallel jobs." + echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." + exit 1; +fi + +AMI_DIR=$1 + +train_set=train_ovl +dir=exp/overlap_${affix} + +train_data_dir=data/${train_set} +whole_data_dir=data/${train_set}_whole +whole_data_id=$(basename $train_set) + +mfccdir=mfcc + +mkdir -p $dir + +ref_rttm=$train_data_dir/rttm.annotation +if [ $stage -le 0 ]; then + utils/copy_data_dir.sh data/train $train_data_dir + cp data/train/rttm.annotation $ref_rttm +fi + +if [ $target_type == "forced" ]; then + # Prepare forced alignments for the training data + if [ $stage -le 1 ]; then + steps/make_mfcc.sh --nj $nj --cmd "$train_cmd" --write-utt2num-frames true \ + --mfcc-config conf/mfcc.conf $train_data_dir + steps/compute_cmvn_stats.sh $train_data_dir + utils/fix_data_dir.sh $train_data_dir + fi + + if [ $stage -le 2 ]; then + steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \ + $train_data_dir data/lang $src_dir $ali_dir + fi + + if [ $stage -le 3 ]; then + steps/get_train_ctm.sh --use-segments false --stage 0 \ + $train_data_dir data/lang $ali_dir + fi + + if [ $stage -le 4 ]; then + local/generate_forced_aligned_rttm.py --max-pause 0.1 $ali_dir/ctm > $train_data_dir/rttm.forced + fi + + ref_rttm=$train_data_dir/rttm.forced +fi + +if [ $stage -le 5 ]; then + # The training data may already be segmented, so we first prepare + # a "whole" training data (not segmented) for training the overlap + # detector. + utils/data/convert_data_dir_to_whole.sh $train_data_dir $whole_data_dir + steps/overlap/get_overlap_segments.py $ref_rttm > $whole_data_dir/overlap.rttm +fi + +############################################################################### +# Extract features for the whole data directory. We extract 40-dim MFCCs to +# train the NN-based overlap detector. +############################################################################### +if [ $stage -le 6 ]; then + steps/make_mfcc.sh --nj $nj --cmd "$train_cmd" --write-utt2num-frames true \ + --mfcc-config conf/mfcc_hires.conf ${whole_data_dir} + steps/compute_cmvn_stats.sh ${whole_data_dir} + utils/fix_data_dir.sh ${whole_data_dir} +fi + +############################################################################### +# Prepare targets for training the overlap detector +############################################################################### +if [ $stage -le 7 ]; then + steps/overlap/get_overlap_targets.py \ + ${whole_data_dir}/utt2num_frames ${whole_data_dir}/overlap.rttm - |\ + copy-feats ark,t:- ark,scp:$dir/targets.ark,$dir/targets.scp +fi + +############################################################################### +# Train neural network for overlap detector +############################################################################### +if [ $stage -le 8 ]; then + # Train a TDNN-LSTM network for SAD + local/overlap/run_tdnn_lstm.sh \ + --targets-dir $dir --dir exp/overlap_$affix/tdnn_lstm \ + --data-dir ${whole_data_dir} || exit 1 +fi + +exit 0; diff --git a/egs/ami/s5c_apt2141/path.sh b/egs/ami/s5c_apt2141/path.sh new file mode 100755 index 00000000000..ae38e737c1f --- /dev/null +++ b/egs/ami/s5c_apt2141/path.sh @@ -0,0 +1,6 @@ +export KALDI_ROOT=`pwd`/../../.. +[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe_v2.5:$KALDI_ROOT/tools/sctk/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/tools/config/common_path.sh +export LC_ALL=C diff --git a/egs/ami/s5c_apt2141/run.sh b/egs/ami/s5c_apt2141/run.sh new file mode 100755 index 00000000000..f44d2e52e77 --- /dev/null +++ b/egs/ami/s5c_apt2141/run.sh @@ -0,0 +1,168 @@ +#!/usr/bin/env bash +# Copyright 2020 Johns Hopkins University (Author: Desh Raj) +# Apache 2.0. +# +# This recipe performs diarization for the mix-headset data in the +# AMI dataset. The x-vector extractor we use is trained on VoxCeleb v2 +# corpus with simulated RIRs. We use oracle SAD in this recipe. +# This recipe demonstrates the following: +# 1. Diarization using x-vector and clustering (AHC, VBx, spectral) +# 2. Training an overlap detector (using annotations) and corresponding +# inference on full recordings. + +# We do not provide training script for an x-vector extractor. You +# can download a pretrained extractor from: +# http://kaldi-asr.org/models/12/0012_diarization_v1.tar.gz +# and extract it. + +. ./cmd.sh +. ./path.sh +set -euo pipefail +mfccdir=`pwd`/mfcc + +stage=7 +overlap_stage=0 +diarizer_stage=0 +nj=10 +decode_nj=15 + +export mic=ihm + +model_dir=exp/xvector_nnet_1a + +train_set=train +test_sets="dev test" + +diarizer_type=spectral # must be one of (ahc, spectral, vbx) + +. utils/parse_options.sh + +# Path where AMI gets downloaded (or where locally available): +AMI_DIR=$PWD/wav_db # Default, + +# Download AMI corpus, You need around 130GB of free space to get whole data +if [ $stage -le 1 ]; then + if [ -d $AMI_DIR ] && ! touch $AMI_DIR/.foo 2>/dev/null; then + echo "$0: directory $AMI_DIR seems to exist and not be owned by you." + echo " ... Assuming the data does not need to be downloaded. Please use --stage 2 or more." + exit 1 + fi + if [ -e data/local/downloads/wget_$mic.sh ]; then + echo "data/local/downloads/wget_$mic.sh already exists, better quit than re-download... (use --stage N)" + exit 1 + fi + local/ami_download.sh $mic $AMI_DIR +fi + +# Prepare data directories. +if [ $stage -le 2 ]; then + # Download the data split and references from BUT's AMI setup + if ! [ -d AMI-diarization-setup ]; then + git clone https://github.com/BUTSpeechFIT/AMI-diarization-setup + fi + + for dataset in train $test_sets; do + echo "$0: preparing $dataset set.." + mkdir -p data/$dataset + # Prepare wav.scp and segments file from meeting lists and oracle SAD + # labels, and concatenate all reference RTTMs into one file. + local/prepare_data.py --sad-labels-dir AMI-diarization-setup/only_words/labs/${dataset} \ + AMI-diarization-setup/lists/${dataset}.meetings.txt \ + $AMI_DIR data/$dataset + cat AMI-diarization-setup/only_words/rttms/${dataset}/*.rttm \ + > data/${dataset}/rttm.annotation + + awk '{print $1,$2}' data/$dataset/segments > data/$dataset/utt2spk + utils/utt2spk_to_spk2utt.pl data/$dataset/utt2spk > data/$dataset/spk2utt + utils/fix_data_dir.sh data/$dataset + done +fi + +# Feature extraction +if [ $stage -le 3 ]; then + for dataset in train $test_sets; do + steps/make_mfcc.sh --mfcc-config conf/mfcc_hires.conf --nj $nj --cmd "$train_cmd" data/$dataset + steps/compute_cmvn_stats.sh data/$dataset + utils/fix_data_dir.sh data/$dataset + echo "FEATURES COMPLETE FOR DATASET" + done +fi + +if [ $stage -le 4 ]; then + echo "$0: preparing a AMI training data to train PLDA model" + #local/nnet3/xvector/prepare_feats.sh --nj $nj --cmd "$train_cmd" \ + # data/train data/plda_train exp/plda_train_cmn + local/nnet3/xvector/run_xvector.sh --stage $stage --train-stage -1 \ + --data data/plda_train +fi + +if [ $stage -le 5 ]; then + echo "$0: extracting x-vector for PLDA training data" + utils/fix_data_dir.sh data/plda_train + diarization/nnet3/xvector/extract_xvectors.sh --cmd "$train_cmd --mem 10G" \ + --nj $nj --window 3.0 --period 10.0 --min-segment 1.5 --apply-cmn false \ + --hard-min true $model_dir \ + data/plda_train $model_dir/xvectors_plda_train +fi + +# Train PLDA models +if [ $stage -le 6 ]; then + echo "$0: training PLDA model" + # Compute the mean vector for centering the evaluation xvectors. + $train_cmd $model_dir/xvectors_plda_train/log/compute_mean.log \ + ivector-mean scp:$model_dir/xvectors_plda_train/xvector.scp \ + $model_dir/xvectors_plda_train/mean.vec || exit 1; + + # Train the PLDA model. + $train_cmd $model_dir/xvectors_plda_train/log/plda.log \ + ivector-compute-plda ark:$model_dir/xvectors_plda_train/spk2utt \ + "ark:ivector-subtract-global-mean scp:$model_dir/xvectors_plda_train/xvector.scp ark:- |\ + transform-vec $model_dir/xvectors_plda_train/transform.mat ark:- ark:- |\ + ivector-normalize-length ark:- ark:- |" \ + $model_dir/xvectors_plda_train/plda || exit 1; + + cp $model_dir/xvectors_plda_train/plda $model_dir/ + cp $model_dir/xvectors_plda_train/transform.mat $model_dir/ + cp $model_dir/xvectors_plda_train/mean.vec $model_dir/ +fi + +if [ $stage -le 7 ]; then + for datadir in ${test_sets}; do + ref_rttm=data/${datadir}/rttm.annotation + + diarize_nj=$(wc -l < "data/$datadir/wav.scp") + nj=$((decode_nj>diarize_nj ? diarize_nj : decode_nj)) + local/diarize_${diarizer_type}.sh --nj $nj --cmd "$train_cmd" --stage $diarizer_stage \ + $model_dir data/${datadir} exp/${datadir}_diarization_${diarizer_type} + + # Evaluate RTTM using md-eval.pl + rttm_affix= + if [ $diarizer_type == "vbx" ]; then + rttm_affix=".vb" + fi + md-eval.pl -r $ref_rttm -s exp/${datadir}_diarization_${diarizer_type}/rttm${rttm_affix} + done +fi + +# These stages demonstrate how to perform training and inference +# for an overlap detector. +if [ $stage -le 8 ]; then + echo "$0: training overlap detector" + local/train_overlap_detector.sh --stage $overlap_stage --test-sets "$test_sets" $AMI_DIR +fi + +overlap_affix=1a +if [ $stage -le 9 ]; then + for dataset in $test_sets; do + echo "$0: performing overlap detection on $dataset" + local/detect_overlaps.sh --convert_data_dir_to_whole true \ + --output-scale "1 2 1" data/${dataset} \ + exp/overlap_$overlap_affix/tdnn_lstm_1a exp/overlap_$overlap_affix/$dataset + + echo "$0: evaluating output.." + steps/overlap/get_overlap_segments.py data/$dataset/rttm.annotation | grep "overlap" |\ + md-eval.pl -r - -s exp/overlap_$overlap_affix/$dataset/rttm_overlap |\ + awk 'or(/MISSED SPEAKER TIME/,/FALARM SPEAKER TIME/)' + done +fi + diff --git a/egs/ami/s5c_apt2141/sid b/egs/ami/s5c_apt2141/sid new file mode 120000 index 00000000000..893a12f30c9 --- /dev/null +++ b/egs/ami/s5c_apt2141/sid @@ -0,0 +1 @@ +../../sre08/v1/sid \ No newline at end of file diff --git a/egs/ami/s5c_apt2141/steps b/egs/ami/s5c_apt2141/steps new file mode 120000 index 00000000000..6e99bf5b5ad --- /dev/null +++ b/egs/ami/s5c_apt2141/steps @@ -0,0 +1 @@ +../../wsj/s5/steps \ No newline at end of file diff --git a/egs/ami/s5c_apt2141/utils b/egs/ami/s5c_apt2141/utils new file mode 120000 index 00000000000..b240885218f --- /dev/null +++ b/egs/ami/s5c_apt2141/utils @@ -0,0 +1 @@ +../../wsj/s5/utils \ No newline at end of file