Skip to content

Commit db9a70b

Browse files
committed
move to earnings instead of earnings21
Signed-off-by: Nithin Rao Koluguri <nithinrao.koluguri@gmail.com>
1 parent cece201 commit db9a70b

File tree

5 files changed

+17
-17
lines changed

5 files changed

+17
-17
lines changed

dataset_configs/english/earnings21/config.yaml renamed to dataset_configs/english/earnings/config.yaml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ documentation: |
134134
135135
.. code-block:: bash
136136
137-
python main.py --config-path=dataset_configs/english/earnings21 --config-name=config \
137+
python main.py --config-path=dataset_configs/english/earnings --config-name=config \
138138
dataset_type=earnings21 \
139139
dataset_root=/path/to/earnings21 \
140140
output_directory=/path/to/output
@@ -143,7 +143,7 @@ documentation: |
143143
144144
.. code-block:: bash
145145
146-
python main.py --config-path=dataset_configs/english/earnings21 --config-name=config \
146+
python main.py --config-path=dataset_configs/english/earnings --config-name=config \
147147
dataset_type=earnings22 \
148148
forced_alignment_model=nvidia/parakeet-tdt_ctc-1.1b \
149149
dataset_root=/path/to/earnings22 \
@@ -153,7 +153,7 @@ documentation: |
153153
154154
.. code-block:: bash
155155
156-
python main.py --config-path=dataset_configs/english/earnings21 --config-name=config \
156+
python main.py --config-path=dataset_configs/english/earnings --config-name=config \
157157
dataset_type=earnings21 \
158158
subset=eval10 \
159159
dataset_root=/path/to/earnings21 \
@@ -193,7 +193,7 @@ forced_alignment_model: nvidia/parakeet-tdt_ctc-1.1b
193193
device: "cuda"
194194

195195
processors:
196-
- _target_: sdp.processors.datasets.earnings21.CreateInitialAudioAndManifest
196+
- _target_: sdp.processors.datasets.earnings.CreateInitialAudioAndManifest
197197
dataset_root: ${dataset_root}
198198
raw_audio_source_dir: ${raw_audio_input_dir}
199199
output_manifest_file: ${output_directory}/01_initial_audio_manifest.json
@@ -211,7 +211,7 @@ processors:
211211
target_samplerate: 16000
212212
target_nchannels: 1
213213

214-
- _target_: sdp.processors.datasets.earnings21.CreateFullAudioManifestEarnings21
214+
- _target_: sdp.processors.datasets.earnings.CreateFullAudioManifestEarnings21
215215
input_manifest_file: ${output_directory}/02_converted_audio_manifest.json
216216
dataset_root: ${dataset_root}
217217
output_manifest_file: ${output_directory}/03_full_audio_with_text_manifest.json
@@ -227,20 +227,20 @@ processors:
227227
- {"pattern": "<.*?>", "repl": ""}
228228
- {"pattern": "\\[.*?\\]", "repl": ""}
229229

230-
- _target_: sdp.processors.datasets.earnings21.NeMoForcedAligner
230+
- _target_: sdp.processors.datasets.earnings.NeMoForcedAligner
231231
input_manifest_file: ${output_directory}/04_full_audio_with_text_manifest_cleaned.json
232232
output_manifest_file: ${output_directory}/05_aligned_manifest.json
233233
output_dir: ${output_directory}/forced_alignment_output
234234
pretrained_name: ${forced_alignment_model}
235235
device: ${device}
236236
batch_size: 1
237237

238-
- _target_: sdp.processors.datasets.earnings21.CreateSentenceSegmentedManifest
238+
- _target_: sdp.processors.datasets.earnings.CreateSentenceSegmentedManifest
239239
input_manifest_file: ${output_directory}/05_aligned_manifest.json
240240
ctm_dir: ${output_directory}/forced_alignment_output/ctm/words
241241
output_manifest_file: ${output_directory}/06_sentence_segmented_manifest.json
242242

243-
- _target_: sdp.processors.datasets.earnings21.SpeakerSegmentedManifest
243+
- _target_: sdp.processors.datasets.earnings.SpeakerSegmentedManifest
244244
input_manifest_file: ${output_directory}/04_full_audio_with_text_manifest_cleaned.json
245245
dataset_root: ${dataset_root}
246246
output_manifest_file: ${output_directory}/07_speaker_segmented_manifest.json

sdp/processors/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
CreateInitialManifestCORAAL,
2222
TrainDevTestSplitCORAAL,
2323
)
24-
from sdp.processors.datasets.earnings21 import (
24+
from sdp.processors.datasets.earnings import (
2525
CreateInitialAudioAndManifest,
2626
CreateFullAudioManifestEarnings21,
2727
SpeakerSegmentedManifest,

sdp/processors/datasets/earnings21/__init__.py renamed to sdp/processors/datasets/earnings/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,13 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
from sdp.processors.datasets.earnings21.create_initial_manifest import (
15+
from sdp.processors.datasets.earnings.create_initial_manifest import (
1616
CreateInitialAudioAndManifest,
1717
CreateFullAudioManifestEarnings21,
1818
SpeakerSegmentedManifest,
1919
CreateSentenceSegmentedManifest,
2020
NeMoForcedAligner,
2121
)
22-
from sdp.processors.datasets.earnings21.apply_normalizations import (
22+
from sdp.processors.datasets.earnings.apply_normalizations import (
2323
ApplyEarnings21Normalizations,
2424
)

sdp/processors/datasets/earnings21/apply_normalizations.py renamed to sdp/processors/datasets/earnings/apply_normalizations.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ class ApplyEarnings21Normalizations(BaseProcessor):
3838
Example:
3939
.. code-block:: yaml
4040
41-
- _target_: sdp.processors.datasets.earnings21.ApplyEarnings21Normalizations
41+
- _target_: sdp.processors.datasets.earnings.ApplyEarnings21Normalizations
4242
earnings21_root: /path/to/earnings21
4343
use_top_candidate: true
4444
fallback_to_original: true

sdp/processors/datasets/earnings21/create_initial_manifest.py renamed to sdp/processors/datasets/earnings/create_initial_manifest.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ class CreateInitialAudioAndManifest(BaseParallelProcessor):
5050
Example:
5151
.. code-block:: yaml
5252
53-
- _target_: sdp.processors.datasets.earnings21.CreateInitialAudioAndManifest
53+
- _target_: sdp.processors.datasets.earnings.CreateInitialAudioAndManifest
5454
dataset_root: /path/to/earnings21
5555
raw_audio_source_dir: ${dataset_root}/media
5656
output_manifest_file: ${output_dir}/01_initial_manifest.json
@@ -181,7 +181,7 @@ class CreateFullAudioManifestEarnings21(BaseParallelProcessor):
181181
Example:
182182
.. code-block:: yaml
183183
184-
- _target_: sdp.processors.datasets.earnings21.CreateFullAudioManifestEarnings21
184+
- _target_: sdp.processors.datasets.earnings.CreateFullAudioManifestEarnings21
185185
input_manifest_file: ${output_dir}/01_initial_manifest.json
186186
dataset_root: /path/to/earnings21
187187
output_manifest_file: ${output_dir}/02_manifest_with_text.json
@@ -334,7 +334,7 @@ class SpeakerSegmentedManifest(BaseParallelProcessor):
334334
Example:
335335
.. code-block:: yaml
336336
337-
- _target_: sdp.processors.datasets.earnings21.SpeakerSegmentedManifest
337+
- _target_: sdp.processors.datasets.earnings.SpeakerSegmentedManifest
338338
input_manifest_file: ${output_dir}/02_manifest_with_text.json
339339
dataset_root: /path/to/earnings21
340340
output_manifest_file: ${output_dir}/06_speaker_segments.json
@@ -627,7 +627,7 @@ class CreateSentenceSegmentedManifest(BaseParallelProcessor):
627627
Example:
628628
.. code-block:: yaml
629629
630-
- _target_: sdp.processors.datasets.earnings21.CreateSentenceSegmentedManifest
630+
- _target_: sdp.processors.datasets.earnings.CreateSentenceSegmentedManifest
631631
input_manifest_file: ${output_dir}/04_aligned_manifest.json
632632
ctm_dir: ${output_dir}/forced_alignment_output/ctm/words
633633
output_manifest_file: ${output_dir}/05_sentence_segments.json
@@ -839,7 +839,7 @@ class NeMoForcedAligner(BaseProcessor):
839839
Example:
840840
.. code-block:: yaml
841841
842-
- _target_: sdp.processors.datasets.earnings21.NeMoForcedAligner
842+
- _target_: sdp.processors.datasets.earnings.NeMoForcedAligner
843843
input_manifest_file: ${output_dir}/03_cleaned_manifest.json
844844
output_manifest_file: ${output_dir}/04_aligned_manifest.json
845845
output_dir: ${output_dir}/forced_alignment_output

0 commit comments

Comments
 (0)