Skip to content

Commit 9466a50

Browse files
Remove package dependency (#127)
* Remove package dependency Signed-off-by: Sushmitha Deva <[email protected]> * Update nemo_asr_align.py Signed-off-by: Sushmitha Deva <[email protected]> --------- Signed-off-by: Sushmitha Deva <[email protected]>
1 parent 830c7ec commit 9466a50

File tree

9 files changed

+41
-59
lines changed

9 files changed

+41
-59
lines changed

sdp/processors/datasets/ytc/create_initial_manifest.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
import subprocess
1818

1919
from sdp.processors.base_processor import BaseParallelProcessor, DataEntry
20-
20+
from sdp.utils.common import load_manifest
2121

2222
class CreateInitialManifestYTC(BaseParallelProcessor):
2323
"""A processor class for creating initial manifest files for a TTS dataset.
@@ -64,13 +64,11 @@ def prepare(self):
6464
os.makedirs(self.resampled_audio_dir, exist_ok=True)
6565

6666
def read_manifest(self):
67-
""" Reads metadata from NDJSON file in the input manifest
67+
""" Reads metadata from JSONL file in the input manifest
6868
Returns:
69-
list: A list of dataset entries parsed from the NDJSON manifest file
69+
list: A list of dataset entries parsed from the JSONL manifest file
7070
"""
71-
import ndjson
72-
with open(self.input_manifest_file, "rt", encoding="utf8") as fin:
73-
dataset_entries = ndjson.load(fin)
71+
dataset_entries = load_manifest(self.input_manifest_file, encoding="utf8")
7472

7573
return dataset_entries
7674

sdp/processors/tts/merge_alignment_diarization.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,8 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import ndjson
1615
from sdp.processors.base_processor import BaseProcessor
17-
16+
from sdp.utils.common import load_manifest, save_manifest
1817

1918
class MergeAlignmentDiarization(BaseProcessor):
2019
"""This processor merges alignment and diarization information from a manifest file.
@@ -41,8 +40,7 @@ def __init__(self,
4140
super().__init__(**kwargs)
4241

4342
def process(self):
44-
with open(self.input_manifest_file) as f:
45-
manifest = ndjson.load(f)
43+
manifest = load_manifest(self.input_manifest_file)
4644

4745
# Manifest here needs to contain both paths to alignment files and 'segments'
4846
# from pyannote. We identify all the words that belong in each pyannote segment
@@ -97,6 +95,5 @@ def process(self):
9795
segment['text'] = ' '.join([x['word'] for x in words_in_segment])
9896
segment['words'] = words_in_segment
9997

100-
with open(self.output_manifest_file, 'w') as f:
101-
ndjson.dump(manifest, f)
98+
save_manifest(manifest, self.output_manifest_file)
10299

sdp/processors/tts/metrics.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,12 @@
1414

1515
import librosa
1616
import math
17-
import ndjson
1817
import numpy as np
1918
from tqdm import tqdm
2019

2120
from sdp.logging import logger
2221
from sdp.processors.base_processor import BaseProcessor
22+
from sdp.utils.common import load_manifest, save_manifest
2323

2424
import torch
2525
import torchaudio
@@ -71,8 +71,7 @@ def __init__(self, device: str = "cuda", **kwargs):
7171
self.model = SQUIM_OBJECTIVE.get_model()
7272

7373
def process(self):
74-
with open(self.input_manifest_file) as f:
75-
manifest = ndjson.load(f)
74+
manifest = load_manifest(self.input_manifest_file)
7675

7776
results = []
7877

@@ -130,8 +129,7 @@ def process(self):
130129
continue
131130
results.append(metadata)
132131

133-
with open(self.output_manifest_file, 'w') as f:
134-
ndjson.dump(results, f)
132+
save_manifest(results, self.output_manifest_file)
135133

136134

137135
class BandwidthEstimationProcessor(BaseProcessor):
@@ -204,8 +202,7 @@ def _estimate_bandwidth(self, audio, sample_rate):
204202
return bandwidth
205203

206204
def process(self):
207-
with open(self.input_manifest_file) as f:
208-
manifest = ndjson.load(f)
205+
manifest = load_manifest(self.input_manifest_file)
209206

210207
results = []
211208

@@ -237,6 +234,5 @@ def process(self):
237234

238235
results.append(metadata)
239236

240-
with open(self.output_manifest_file, 'w') as f:
241-
ndjson.dump(results, f)
237+
save_manifest(results, self.output_manifest_file)
242238

sdp/processors/tts/nemo_asr_align.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,13 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import ndjson
1615
import omegaconf
1716
import torch
1817
import torchaudio
1918
import nemo.collections.asr as nemo_asr
2019
from sdp.logging import logger
2120
from sdp.processors.base_processor import BaseProcessor
21+
from sdp.utils.common import load_manifest, save_manifest
2222

2323
class NeMoASRAligner(BaseProcessor):
2424
"""This processor aligns text and audio using NeMo ASR models.
@@ -188,10 +188,9 @@ def process(self):
188188
1. Full audio processing (infer_segment_only=False)
189189
2. Segment-only processing (infer_segment_only=True)
190190
191-
Results are saved in NDJSON format with alignments and transcriptions added to the original metadata.
191+
Results are saved in JSONL format with alignments and transcriptions added to the original metadata.
192192
"""
193-
with open(self.input_manifest_file) as f:
194-
manifest = ndjson.load(f)
193+
manifest = load_manifest(self.input_manifest_file)
195194

196195
results = []
197196
if not self.infer_segment_only:
@@ -250,5 +249,4 @@ def process(self):
250249

251250
results.extend(metadata_batch)
252251

253-
with open(self.output_manifest_file, "w") as f:
254-
ndjson.dump(results, f)
252+
save_manifest(results, self.output_manifest_file)

sdp/processors/tts/prepare_tts_segments.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,10 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import ndjson
16-
from typing import List, Union
1715

1816
from sdp.logging import logger
1917
from sdp.processors.base_processor import BaseParallelProcessor, DataEntry
18+
from sdp.utils.common import load_manifest
2019

2120
class PrepareTTSSegmentsProcessor(BaseParallelProcessor):
2221
"""This processor merges adjacent segments from the same speaker and splits segments to have a complete utterance.
@@ -60,11 +59,10 @@ def __init__(self,
6059
self.punctuation_split_only = punctuation_split_only
6160

6261
def read_manifest(self):
63-
''' Reads metadata from NDJSON file in the input manifest
62+
''' Reads metadata from JSONL file in the input manifest
6463
and converts it to data entries '''
6564

66-
with open(self.input_manifest_file, "r", encoding="utf8") as fin:
67-
dataset_entries = ndjson.load(fin)
65+
dataset_entries = load_manifest(self.input_manifest_file, encoding="utf8")
6866

6967
return dataset_entries
7068

sdp/processors/tts/pyannote.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
import os
1717
import logging
1818
from time import time
19-
import ndjson
2019
from pyannote.audio import Pipeline
2120
from pyannote.audio.pipelines.utils.hook import ProgressHook
2221
from whisperx.audio import SAMPLE_RATE
@@ -26,6 +25,7 @@
2625

2726
from sdp.logging import logger
2827
from sdp.processors.base_processor import BaseProcessor
28+
from sdp.utils.common import load_manifest, save_manifest
2929

3030
def has_overlap(turn, overlaps):
3131
"""Check if a given turn overlaps with any segment in the overlaps list.
@@ -208,8 +208,7 @@ def process(self):
208208
- Overlap segments
209209
- Non-speaker segments
210210
"""
211-
with open(self.input_manifest_file) as f:
212-
manifest = ndjson.load(f)
211+
manifest = load_manifest(self.input_manifest_file)
213212

214213
results = []
215214
start_time = time()
@@ -292,6 +291,5 @@ def process(self):
292291
results.append(metadata)
293292

294293
logger.info(f'Completed diarization in {(time()-start_time)/3600} hrs')
295-
with open(self.output_manifest_file, 'w') as f:
296-
ndjson.dump(results, f)
294+
save_manifest(results, self.output_manifest_file)
297295

sdp/processors/tts/split.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@
1313
# limitations under the License.
1414

1515
from sdp.processors.base_processor import BaseProcessor, DataEntry
16-
import ndjson
1716
import json
1817
import os
1918
import torchaudio
2019
import math
2120
from copy import deepcopy
21+
from sdp.utils.common import load_manifest, save_manifest
2222

2323
class SplitLongAudio(BaseProcessor):
2424
"""This processor splits long audio files into smaller segments.
@@ -70,8 +70,7 @@ def process(self):
7070
- Split entries with updated paths and durations
7171
- Meta-entries containing split information for later joining
7272
"""
73-
with open(self.input_manifest_file) as f:
74-
manifest = ndjson.load(f)
73+
manifest = load_manifest(self.input_manifest_file)
7574

7675
results = []
7776
for metadata in manifest:
@@ -141,8 +140,7 @@ def process(self):
141140
metadata['split_offsets'] = actual_splits
142141
results.append(metadata)
143142

144-
with open(self.output_manifest_file, 'w') as f:
145-
ndjson.dump(results, f)
143+
save_manifest(results, self.output_manifest_file)
146144

147145

148146
class JoinSplitAudioMetadata(BaseProcessor):
@@ -179,8 +177,7 @@ def process(self):
179177
- Original entries for unsplit audio files
180178
- Combined entries for previously split audio files
181179
"""
182-
with open(self.input_manifest_file) as f:
183-
manifest = ndjson.load(f)
180+
manifest = load_manifest(self.input_manifest_file)
184181

185182
fp_w = open(self.output_manifest_file, 'w')
186183

sdp/processors/tts/text.py

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,9 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
16-
from sdp.processors.base_processor import BaseProcessor, BaseParallelProcessor, DataEntry
1715
import json
18-
import ndjson
16+
from sdp.processors.base_processor import BaseProcessor, BaseParallelProcessor, DataEntry
17+
from sdp.utils.common import load_manifest, save_manifest
1918
from nemo_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer
2019
from nemo.collections.nlp.models import PunctuationCapitalizationModel
2120

@@ -47,11 +46,10 @@ def __init__(self,
4746
self.normalizer = InverseNormalizer(lang=language)
4847

4948
def read_manifest(self):
50-
''' Reads metadata from NDJSON file in the input manifest
49+
''' Reads metadata from JSONL file in the input manifest
5150
and converts it to data entries '''
5251

53-
with open(self.input_manifest_file, "r", encoding="utf8") as fin:
54-
dataset_entries = ndjson.load(fin)
52+
dataset_entries = load_manifest(self.input_manifest_file, encoding="utf8")
5553

5654
return dataset_entries
5755

@@ -102,8 +100,7 @@ def __init__(self,
102100
self.pnc_model.cuda()
103101

104102
def process(self):
105-
with open(self.input_manifest_file) as f:
106-
manifest = ndjson.load(f)
103+
manifest = load_manifest(self.input_manifest_file)
107104

108105
results = []
109106
all_text = []
@@ -123,8 +120,7 @@ def process(self):
123120
i+=1
124121
results.append(metadata)
125122

126-
with open(self.output_manifest_file, 'w') as f:
127-
ndjson.dump(results, f)
123+
save_manifest(results, self.output_manifest_file)
128124

129125
class PunctuationAndCapitalizationProcessor(BaseProcessor):
130126
"""This processor performs punctuation and capitalization on text data.
@@ -163,8 +159,7 @@ def __init__(self,
163159
self.pnc_model.cuda()
164160

165161
def process(self):
166-
with open(self.input_manifest_file) as f:
167-
manifest = ndjson.load(f)
162+
manifest = load_manifest(self.input_manifest_file)
168163

169164
all_text = []
170165

sdp/utils/common.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,27 @@
1919
import urllib
2020
import zipfile
2121
from pathlib import Path
22-
from typing import Dict, List, Union
22+
from typing import Dict, List, Union, Any, Optional
2323

2424
import wget
2525

2626
from sdp.logging import logger
2727

2828

29-
def load_manifest(manifest: Path) -> List[Dict[str, Union[str, float]]]:
29+
def load_manifest(manifest: Union[Path, str], encoding: Optional[str] = None) -> List[Dict[str, Union[str, float]]]:
3030
# read NeMo manifest as a list of dicts
3131
result = []
32-
with manifest.open() as f:
32+
with open(manifest, encoding=encoding) as f:
3333
for line in f:
3434
data = json.loads(line)
3535
result.append(data)
3636
return result
3737

38+
def save_manifest(manifest: List[Dict[str, Any]], manifest_file: Union[Path, str]):
39+
with open(manifest_file, 'w') as f:
40+
for item in manifest:
41+
f.write(json.dumps(item) + '\n')
42+
3843

3944
def download_file(source_url: str, target_directory: str, verbose=True):
4045
# make sure target_directory is an absolute path to avoid bugs when we change directories to download data later

0 commit comments

Comments
 (0)