Skip to content

Commit 5cb75d8

Browse files
authored
Fix bugs for content extractor's args (#165)
1 parent 1e4a650 commit 5cb75d8

File tree

2 files changed

+33
-11
lines changed

2 files changed

+33
-11
lines changed

config/svc/base.json

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,4 +86,34 @@
8686
"spk2id": "singers.json",
8787
"utt2spk": "utt2singer"
8888
},
89+
"model": {
90+
"condition_encoder": {
91+
"merge_mode": "add",
92+
// Prosody Features
93+
"use_f0": true,
94+
"use_uv": true,
95+
"use_energy": true,
96+
// Quantization (0 for not quantization)
97+
"input_melody_dim": 1,
98+
"n_bins_melody": 256,
99+
"output_melody_dim": 384,
100+
"input_loudness_dim": 1,
101+
"n_bins_loudness": 256,
102+
"output_loudness_dim": 384,
103+
// Semantic Features
104+
"use_whisper": false,
105+
"use_contentvec": false,
106+
"use_wenet": false,
107+
"use_mert": false,
108+
"whisper_dim": 1024,
109+
"contentvec_dim": 256,
110+
"mert_dim": 256,
111+
"wenet_dim": 512,
112+
"content_encoder_dim": 384,
113+
// Speaker Features
114+
"output_singer_dim": 384,
115+
"singer_table_size": 512,
116+
"use_spkid": true
117+
}
118+
},
89119
}

processors/content_extractor.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -526,10 +526,7 @@ def extract_utt_content_features_dataloader(cfg, metadata, num_workers):
526526
for batch_idx, items in enumerate(tqdm(data_loader)):
527527
_metadata, wavs, lens = items
528528

529-
batch_content_features = extractor.extract_content_features(
530-
wavs,
531-
lens,
532-
)
529+
batch_content_features = extractor.extract_content_features(wavs)
533530
for index, utt in enumerate(_metadata):
534531
extractor.save_feature(utt, batch_content_features[index])
535532

@@ -561,9 +558,7 @@ def extract_utt_content_features_dataloader(cfg, metadata, num_workers):
561558
for batch_idx, items in enumerate(tqdm(data_loader)):
562559
_metadata, wavs, lens = items
563560

564-
batch_content_features = extractor.extract_content_features(
565-
wavs, lens
566-
)
561+
batch_content_features = extractor.extract_content_features(wavs)
567562
for index, utt in enumerate(_metadata):
568563
extractor.save_feature(utt, batch_content_features[index])
569564

@@ -626,9 +621,6 @@ def extract_utt_content_features_dataloader(cfg, metadata, num_workers):
626621
for batch_idx, items in enumerate(tqdm(data_loader)):
627622
_metadata, wavs, lens = items
628623

629-
batch_content_features = extractor.extract_content_features(
630-
wavs,
631-
lens,
632-
)
624+
batch_content_features = extractor.extract_content_features(wavs)
633625
for index, utt in enumerate(_metadata):
634626
extractor.save_feature(utt, batch_content_features[index])

0 commit comments

Comments
 (0)