From dfa475f2cd233773e713bb638da4e518bb82a9c5 Mon Sep 17 00:00:00 2001 From: Herman Kamper Date: Fri, 28 Aug 2020 08:30:52 +0200 Subject: [PATCH 1/9] Added validation set --- preprocess.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/preprocess.py b/preprocess.py index 3085708..7aa0b68 100644 --- a/preprocess.py +++ b/preprocess.py @@ -58,10 +58,13 @@ def preprocess_dataset(cfg): out_dir.mkdir(parents=True, exist_ok=True) executor = ProcessPoolExecutor(max_workers=cpu_count()) - for split in ["train", "test"]: + for split in ["train", "test", "val"]: print("Extracting features for {} set".format(split)) futures = [] split_path = out_dir / cfg.dataset.language / split + if not split_path.with_suffix(".json").exists(): + print("Skipping {} (no json file)".format(split)) + continue with open(split_path.with_suffix(".json")) as file: metadata = json.load(file) for in_path, start, duration, out_path in metadata: From cd2a9ebdce8cadcc713d1662fd4f9a81442f831b Mon Sep 17 00:00:00 2001 From: Herman Kamper Date: Fri, 28 Aug 2020 08:34:12 +0200 Subject: [PATCH 2/9] Changes for dataset where language isn't specified --- dataset.py | 2 +- preprocess.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/dataset.py b/dataset.py index e762272..6dc63e6 100755 --- a/dataset.py +++ b/dataset.py @@ -37,7 +37,7 @@ def __getitem__(self, index): mels = list() paths = random.sample(paths, self.n_utterances_per_speaker) for path in paths: - path = self.root.parent / path + path = self.root / path mel = np.load(path.with_suffix(".mel.npy")) pos = random.randint(0, mel.shape[1] - self.n_sample_frames) mel = mel[:, pos:pos + self.n_sample_frames] diff --git a/preprocess.py b/preprocess.py index 7aa0b68..53f09ae 100644 --- a/preprocess.py +++ b/preprocess.py @@ -61,7 +61,10 @@ def preprocess_dataset(cfg): for split in ["train", "test", "val"]: print("Extracting features for {} set".format(split)) futures = [] - split_path = out_dir / cfg.dataset.language / split + if "language" in cfg.dataset: + split_path = out_dir / cfg.dataset.language / split + else: + split_path = out_dir / split if not split_path.with_suffix(".json").exists(): print("Skipping {} (no json file)".format(split)) continue From 1803a2c75489dd193d26a73f3b942e076a044972 Mon Sep 17 00:00:00 2001 From: Herman Kamper Date: Fri, 28 Aug 2020 08:39:16 +0200 Subject: [PATCH 3/9] Changes to fix path issues --- encode.py | 19 ++++++++++++++++--- preprocess.py | 2 ++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/encode.py b/encode.py index 0f70177..be79734 100644 --- a/encode.py +++ b/encode.py @@ -32,6 +32,10 @@ def encode_dataset(cfg): encoder.eval() + if cfg.save_embedding: + embedding_path = out_dir / "embedding.npy" + np.save(embedding_path, encoder.codebook.embedding.cpu().numpy()) + if cfg.save_auxiliary: auxiliary = [] @@ -41,7 +45,7 @@ def hook(module, input, output): encoder.encoder[-1].register_forward_hook(hook) for _, _, _, path in tqdm(metadata): - path = root_path.parent / path + path = root_path / path mel = torch.from_numpy(np.load(path.with_suffix(".mel.npy"))).unsqueeze(0).to(device) with torch.no_grad(): z, c, indices = encoder.encode(mel) @@ -52,15 +56,24 @@ def hook(module, input, output): with open(out_path.with_suffix(".txt"), "w") as file: np.savetxt(file, z, fmt="%.16f") + if cfg.save_indices: + indices_path = out_dir / "indices" + indices_path.mkdir(exist_ok=True, parents=True) + out_path = indices_path / path.stem + indices = indices.squeeze().cpu().numpy() + if not indices.shape==(): + with open(out_path.with_suffix(".txt"), "w") as file: + np.savetxt(file, indices, fmt="%d") + if cfg.save_auxiliary: - aux_path = out_dir.parent / "auxiliary_embedding1" + aux_path = out_dir / "auxiliary_embedding1" aux_path.mkdir(exist_ok=True, parents=True) out_path = aux_path / path.stem c = c.squeeze().cpu().numpy() with open(out_path.with_suffix(".txt"), "w") as file: np.savetxt(file, c, fmt="%.16f") - aux_path = out_dir.parent / "auxiliary_embedding2" + aux_path = out_dir / "auxiliary_embedding2" aux_path.mkdir(exist_ok=True, parents=True) out_path = aux_path / path.stem aux = auxiliary.pop().squeeze().cpu().numpy() diff --git a/preprocess.py b/preprocess.py index 53f09ae..80b2881 100644 --- a/preprocess.py +++ b/preprocess.py @@ -72,6 +72,8 @@ def preprocess_dataset(cfg): metadata = json.load(file) for in_path, start, duration, out_path in metadata: wav_path = in_dir / in_path + assert wav_path.with_suffix(".wav").exists(), "'{}' does not exist".format( + wav_path.with_suffix(".wav")) out_path = out_dir / out_path out_path.parent.mkdir(parents=True, exist_ok=True) futures.append(executor.submit( From d3b86dbda4ca450f9f09f0f4b00a38b61854a7d5 Mon Sep 17 00:00:00 2001 From: Herman Kamper Date: Fri, 28 Aug 2020 09:02:20 +0200 Subject: [PATCH 4/9] Removed special language flag --- preprocess.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/preprocess.py b/preprocess.py index 80b2881..1929e01 100644 --- a/preprocess.py +++ b/preprocess.py @@ -61,10 +61,7 @@ def preprocess_dataset(cfg): for split in ["train", "test", "val"]: print("Extracting features for {} set".format(split)) futures = [] - if "language" in cfg.dataset: - split_path = out_dir / cfg.dataset.language / split - else: - split_path = out_dir / split + split_path = out_dir / split if not split_path.with_suffix(".json").exists(): print("Skipping {} (no json file)".format(split)) continue From 10d0abaff084a9343714ae563668a5b0a005bb76 Mon Sep 17 00:00:00 2001 From: Herman Kamper Date: Fri, 28 Aug 2020 09:10:11 +0200 Subject: [PATCH 5/9] Added buckeye config --- config/dataset/2019/english.yaml | 2 -- config/dataset/2019/surprise.yaml | 2 -- config/dataset/buckeye.yaml | 3 +++ 3 files changed, 3 insertions(+), 4 deletions(-) create mode 100644 config/dataset/buckeye.yaml diff --git a/config/dataset/2019/english.yaml b/config/dataset/2019/english.yaml index 66a2dab..e594cb4 100644 --- a/config/dataset/2019/english.yaml +++ b/config/dataset/2019/english.yaml @@ -1,5 +1,3 @@ dataset: - dataset: 2019 - language: english path: 2019/english n_speakers: 102 diff --git a/config/dataset/2019/surprise.yaml b/config/dataset/2019/surprise.yaml index a2bd290..4094a96 100644 --- a/config/dataset/2019/surprise.yaml +++ b/config/dataset/2019/surprise.yaml @@ -1,5 +1,3 @@ dataset: - dataset: 2019 - language: surprise path: 2019/surprise n_speakers: 113 \ No newline at end of file diff --git a/config/dataset/buckeye.yaml b/config/dataset/buckeye.yaml new file mode 100644 index 0000000..a7e2a6f --- /dev/null +++ b/config/dataset/buckeye.yaml @@ -0,0 +1,3 @@ +dataset: + path: buckeye + n_speakers: 32 From cb165071280dee77d1c6284acec632269246b537 Mon Sep 17 00:00:00 2001 From: Herman Kamper Date: Fri, 28 Aug 2020 09:47:57 +0200 Subject: [PATCH 6/9] Added Buckeye dataset and cleaned up paths --- README.md | 23 ++++++++++++++++------- config/encode.yaml | 5 ++++- encode.py | 2 +- preprocess.py | 2 +- 4 files changed, 22 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index b47cd5f..8fb942d 100644 --- a/README.md +++ b/README.md @@ -33,11 +33,11 @@ Leader-board for the ZeroSpeech 2020 challenge can be found [here](https://zeros 3. Preprocess audio and extract train/test log-Mel spectrograms: ``` - python preprocess.py in_dir=/path/to/dataset dataset=[2019/english or 2019/surprise] + python preprocess.py in_dir=/path/to/dataset dataset=[2019/english or 2019/surprise or buckeye] ``` - Note: `in_dir` must be the path to the `2019` folder. - For `dataset` choose between `2019/english` or `2019/surprise`. - Other datasets will be added in the future. + For `dataset` choose between `2019/english`, `2019/surprise` or `buckeye`. + Note: `in_dir` must be the path to the `2019` folder or the original + Buckeye dataset directory. Other datasets will be added in the future. Example usage: ``` @@ -48,12 +48,16 @@ Leader-board for the ZeroSpeech 2020 challenge can be found [here](https://zeros 1. Train the VQ-CPC model (or download pretrained weights [here](https://github.com/bshall/VectorQuantizedCPC/releases/tag/v0.1)): ``` - python train_cpc.py checkpoint_dir=path/to/checkpoint_dir dataset=[2019/english or 2019/surprise] + python train_cpc.py checkpoint_dir=path/to/checkpoint_dir dataset=[2019/english or 2019/surprise or buckeye] ``` Example usage: ``` python train_cpc.py checkpoint_dir=checkpoints/cpc/2019english dataset=2019/english ``` + or + ``` + python train_cpc.py checkpoint_dir=checkpoints/cpc/buckeye dataset=buckeye training.sample_frames=64 model.encoder.n_embeddings=256 training.scheduler.warmup_epochs=250 + ``` 2. Train the vocoder: ``` @@ -95,10 +99,15 @@ Voice conversion samples are available [here](https://bshall.github.io/VectorQua 1. Encode test data for evaluation: ``` - python encode.py checkpoint=path/to/checkpoint out_dir=path/to/out_dir dataset=[2019/english or 2019/surprise] + python encode.py checkpoint=path/to/checkpoint out_dir=path/to/out_dir dataset=[2019/english or 2019/surprise or buckeye] + ``` + Example usage: + ``` + python encode.py checkpoint=checkpoints/cpc/english2019/model.ckpt-22000.pt out_dir=submission/2019/english/test dataset=2019/english ``` + or ``` - e.g. python encode.py checkpoint=checkpoints/2019english/model.ckpt-500000.pt out_dir=submission/2019/english/test dataset=2019/english + python encode.py checkpoint=checkpoints/cpc/english2019/model.ckpt-22000.pt split=val save_indices=True out_dir=outputs/buckeye/val_zs2019/ dataset=buckeye ``` 2. Run ABX evaluation script (see [bootphon/zerospeech2020](https://github.com/bootphon/zerospeech2020)). diff --git a/config/encode.yaml b/config/encode.yaml index 95f9ec9..2ca9cd4 100644 --- a/config/encode.yaml +++ b/config/encode.yaml @@ -3,6 +3,9 @@ defaults: - preprocessing: default - model: default +split: test checkpoint: ??? out_dir: ??? -save_auxiliary: False \ No newline at end of file +save_auxiliary: False +save_indices: False +save_embedding: False diff --git a/encode.py b/encode.py index be79734..ae614a4 100644 --- a/encode.py +++ b/encode.py @@ -17,7 +17,7 @@ def encode_dataset(cfg): out_dir.mkdir(exist_ok=True, parents=True) root_path = Path(utils.to_absolute_path("datasets")) / cfg.dataset.path - with open(root_path / "test.json") as file: + with open((root_path / cfg.split).with_suffix(".json")) as file: metadata = json.load(file) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") diff --git a/preprocess.py b/preprocess.py index 1929e01..ba05ff1 100644 --- a/preprocess.py +++ b/preprocess.py @@ -54,7 +54,7 @@ def process_wav(wav_path, out_path, sr=160000, preemph=0.97, n_fft=2048, n_mels= @hydra.main(config_path="config/preprocessing.yaml") def preprocess_dataset(cfg): in_dir = Path(utils.to_absolute_path(cfg.in_dir)) - out_dir = Path(utils.to_absolute_path("datasets")) / str(cfg.dataset.dataset) + out_dir = Path(utils.to_absolute_path("datasets")) / str(cfg.dataset.path) out_dir.mkdir(parents=True, exist_ok=True) executor = ProcessPoolExecutor(max_workers=cpu_count()) From b7f9b5c08faa29c2fad9e4e9d8f5dc91ebe7f071 Mon Sep 17 00:00:00 2001 From: Herman Kamper Date: Fri, 28 Aug 2020 11:27:45 +0200 Subject: [PATCH 7/9] Added Switchboard configs --- config/dataset/swbd.yaml | 3 +++ config/preprocessing/8khz.yaml | 10 ++++++++++ 2 files changed, 13 insertions(+) create mode 100644 config/dataset/swbd.yaml create mode 100644 config/preprocessing/8khz.yaml diff --git a/config/dataset/swbd.yaml b/config/dataset/swbd.yaml new file mode 100644 index 0000000..3a68a91 --- /dev/null +++ b/config/dataset/swbd.yaml @@ -0,0 +1,3 @@ +dataset: + path: swbd + n_speakers: 1131 diff --git a/config/preprocessing/8khz.yaml b/config/preprocessing/8khz.yaml new file mode 100644 index 0000000..182b63e --- /dev/null +++ b/config/preprocessing/8khz.yaml @@ -0,0 +1,10 @@ +preprocessing: + sr: 8000 + n_fft: 1024 + n_mels: 40 + fmin: 50 + preemph: 0.97 + top_db: 80 + hop_length: 80 + win_length: 200 + bits: 8 From 7120f35123eebb3bc8c254700c2f06b028277fb1 Mon Sep 17 00:00:00 2001 From: Herman Kamper Date: Fri, 28 Aug 2020 11:45:29 +0200 Subject: [PATCH 8/9] Updated readme --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 8fb942d..5c8d8a8 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,10 @@ Leader-board for the ZeroSpeech 2020 challenge can be found [here](https://zeros ``` python preprocess.py in_dir=../datasets/2020/2019 dataset=2019/english ``` + or + ``` + python preprocess.py in_dir=/home/kamperh/endgame/projects/stellenbosch/vqseg/datasets/swb300-wavs/ dataset=swbd preprocessing=8khz + ``` ## Training From 1fefeadf092e8a1744e8ca1e0879674540181a70 Mon Sep 17 00:00:00 2001 From: Herman Kamper Date: Tue, 1 Sep 2020 09:26:22 +0200 Subject: [PATCH 9/9] Small changes to readme --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 5c8d8a8..aeb53de 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,7 @@ Leader-board for the ZeroSpeech 2020 challenge can be found [here](https://zeros or ``` python train_cpc.py checkpoint_dir=checkpoints/cpc/buckeye dataset=buckeye training.sample_frames=64 model.encoder.n_embeddings=256 training.scheduler.warmup_epochs=250 + python train_cpc.py checkpoint_dir=checkpoints/cpc/swbd1 dataset=swbd training.sample_frames=64 preprocessing=8khz ``` 2. Train the vocoder: @@ -112,6 +113,7 @@ Voice conversion samples are available [here](https://bshall.github.io/VectorQua or ``` python encode.py checkpoint=checkpoints/cpc/english2019/model.ckpt-22000.pt split=val save_indices=True out_dir=outputs/buckeye/val_zs2019/ dataset=buckeye + python encode.py checkpoint=checkpoints/cpc/swbd1/model.ckpt-22000.pt split=val save_indices=True out_dir=outputs/swbd/val_swbd1/ dataset=swbd preprocessing=8khz ``` 2. Run ABX evaluation script (see [bootphon/zerospeech2020](https://github.com/bootphon/zerospeech2020)).