@@ -32,9 +32,9 @@ mkdir -p data/all data/train data/test data/eval data/local/train
3232
3333
3434# make utt2spk, wav.scp and text
35- find $train_dir -name * .wav -exec sh -c ' x={}; y=$(basename -s .wav $x); printf "%s %s\n" $y $y' \; | dos2unix > data/all/utt2spk
36- find $train_dir -name * .wav -exec sh -c ' x={}; y=$(basename -s .wav $x); printf "%s %s\n" $y $x' \; | dos2unix > data/all/wav.scp
37- find $train_dir -name * .txt -exec sh -c ' x={}; y=$(basename -s .txt $x); printf "%s " $y; cat $x' \; | dos2unix > data/all/text
35+ find -L $train_dir -name * .wav -exec sh -c ' x={}; y=$(basename -s .wav $x); printf "%s %s\n" $y $y' \; | dos2unix > data/all/utt2spk
36+ find -L $train_dir -name * .wav -exec sh -c ' x={}; y=$(basename -s .wav $x); printf "%s %s\n" $y $x' \; | dos2unix > data/all/wav.scp
37+ find -L $train_dir -name * .txt -exec sh -c ' x={}; y=$(basename -s .txt $x); printf "%s " $y; cat $x' \; | dos2unix > data/all/text
3838
3939# fix_data_dir.sh fixes common mistakes (unsorted entries in wav.scp,
4040# duplicate entries and so on). Also, it regenerates the spk2utt from
@@ -51,9 +51,9 @@ echo "cp data/train/text data/local/train/text for language model training"
5151cat data/train/text | awk ' {$1=""}1;' | awk ' {$1=$1}1;' > data/local/train/text
5252
5353# preparing EVAL set.
54- find $eval_dir -name * .wav -exec sh -c ' x={}; y=$(basename -s .wav $x); printf "%s %s\n" $y $y' \; | dos2unix > data/eval/utt2spk
55- find $eval_dir -name * .wav -exec sh -c ' x={}; y=$(basename -s .wav $x); printf "%s %s\n" $y $x' \; | dos2unix > data/eval/wav.scp
56- find $eval_key_dir -name * .txt -exec sh -c ' x={}; y=$(basename -s .txt $x); printf "%s " $y; cat $x' \; | dos2unix > data/eval/text
54+ find -L $eval_dir -name * .wav -exec sh -c ' x={}; y=$(basename -s .wav $x); printf "%s %s\n" $y $y' \; | dos2unix > data/eval/utt2spk
55+ find -L $eval_dir -name * .wav -exec sh -c ' x={}; y=$(basename -s .wav $x); printf "%s %s\n" $y $x' \; | dos2unix > data/eval/wav.scp
56+ find -L $eval_key_dir -name * .txt -exec sh -c ' x={}; y=$(basename -s .txt $x); printf "%s " $y; cat $x' \; | dos2unix > data/eval/text
5757utils/fix_data_dir.sh data/eval
5858
5959echo " Data preparation completed."
0 commit comments