Remove package dependency (#127)

sushmitha-deva-09 · web-flow · commit 9466a503623b · 2025-06-12T15:14:11.000+04:00
* Remove package dependency

Signed-off-by: Sushmitha Deva &lt;sdeva@nvidia.com&gt;

* Update nemo_asr_align.py

Signed-off-by: Sushmitha Deva &lt;sdeva@nvidia.com&gt;

---------

Signed-off-by: Sushmitha Deva &lt;sdeva@nvidia.com&gt;
diff --git a/sdp/processors/datasets/ytc/create_initial_manifest.py b/sdp/processors/datasets/ytc/create_initial_manifest.py
@@ -17,7 +17,7 @@
 import subprocess
 
 from sdp.processors.base_processor import BaseParallelProcessor, DataEntry
-
+from sdp.utils.common import load_manifest
 
 class CreateInitialManifestYTC(BaseParallelProcessor):
     """A processor class for creating initial manifest files for a TTS dataset.
@@ -64,13 +64,11 @@ def prepare(self):
         os.makedirs(self.resampled_audio_dir, exist_ok=True)
 
     def read_manifest(self):
-        """ Reads metadata from NDJSON file in the input manifest
+        """ Reads metadata from JSONL file in the input manifest
           Returns:
-            list: A list of dataset entries parsed from the NDJSON manifest file
+            list: A list of dataset entries parsed from the JSONL manifest file
         """
-        import ndjson
-        with open(self.input_manifest_file, "rt", encoding="utf8") as fin:
-            dataset_entries = ndjson.load(fin)
+        dataset_entries = load_manifest(self.input_manifest_file, encoding="utf8")
 
         return dataset_entries
 
diff --git a/sdp/processors/tts/merge_alignment_diarization.py b/sdp/processors/tts/merge_alignment_diarization.py
@@ -12,9 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import ndjson
 from sdp.processors.base_processor import BaseProcessor
-
+from sdp.utils.common import load_manifest, save_manifest
 
 class MergeAlignmentDiarization(BaseProcessor):
     """This processor merges alignment and diarization information from a manifest file.
@@ -41,8 +40,7 @@ def __init__(self,
         super().__init__(**kwargs)
 
     def process(self):
-        with open(self.input_manifest_file) as f:
-            manifest = ndjson.load(f)
+        manifest = load_manifest(self.input_manifest_file)
 
         # Manifest here needs to contain both paths to alignment files and 'segments'
         # from pyannote. We identify all the words that belong in each pyannote segment
@@ -97,6 +95,5 @@ def process(self):
                     segment['text'] = ' '.join([x['word'] for x in words_in_segment])
                     segment['words'] = words_in_segment
 
-        with open(self.output_manifest_file, 'w') as f:
-            ndjson.dump(manifest, f)
+        save_manifest(manifest, self.output_manifest_file)
 
diff --git a/sdp/processors/tts/metrics.py b/sdp/processors/tts/metrics.py
@@ -14,12 +14,12 @@
 
 import librosa
 import math
-import ndjson
 import numpy as np
 from tqdm import tqdm
 
 from sdp.logging import logger
 from sdp.processors.base_processor import BaseProcessor
+from sdp.utils.common import load_manifest, save_manifest
 
 import torch
 import torchaudio
@@ -71,8 +71,7 @@ def __init__(self, device: str = "cuda", **kwargs):
             self.model = SQUIM_OBJECTIVE.get_model()
 
     def process(self):
-        with open(self.input_manifest_file) as f:
-            manifest = ndjson.load(f)
+        manifest = load_manifest(self.input_manifest_file)
 
         results = []
 
@@ -130,8 +129,7 @@ def process(self):
                     continue
             results.append(metadata)
 
-        with open(self.output_manifest_file, 'w') as f:
-            ndjson.dump(results, f)
+        save_manifest(results, self.output_manifest_file)
 
 
 class BandwidthEstimationProcessor(BaseProcessor):
@@ -204,8 +202,7 @@ def _estimate_bandwidth(self, audio, sample_rate):
         return bandwidth
 
     def process(self):
-        with open(self.input_manifest_file) as f:
-            manifest = ndjson.load(f)
+        manifest = load_manifest(self.input_manifest_file)
 
         results = []
 
@@ -237,6 +234,5 @@ def process(self):
 
             results.append(metadata)
 
-        with open(self.output_manifest_file, 'w') as f:
-            ndjson.dump(results, f)
+        save_manifest(results, self.output_manifest_file)
 
diff --git a/sdp/processors/tts/nemo_asr_align.py b/sdp/processors/tts/nemo_asr_align.py
@@ -12,13 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import ndjson
 import omegaconf
 import torch
 import torchaudio
 import nemo.collections.asr as nemo_asr
 from sdp.logging import logger
 from sdp.processors.base_processor import BaseProcessor
+from sdp.utils.common import load_manifest, save_manifest
 
 class NeMoASRAligner(BaseProcessor):
     """This processor aligns text and audio using NeMo ASR models.
@@ -188,10 +188,9 @@ def process(self):
         1. Full audio processing (infer_segment_only=False)
         2. Segment-only processing (infer_segment_only=True)
 
-        Results are saved in NDJSON format with alignments and transcriptions added to the original metadata.
+        Results are saved in JSONL format with alignments and transcriptions added to the original metadata.
         """
-        with open(self.input_manifest_file) as f:
-           manifest = ndjson.load(f)
+        manifest = load_manifest(self.input_manifest_file)
         
         results = []
         if not self.infer_segment_only:
@@ -250,5 +249,4 @@ def process(self):
 
                 results.extend(metadata_batch)
 
-        with open(self.output_manifest_file, "w") as f:
-            ndjson.dump(results, f)
+        save_manifest(results, self.output_manifest_file)
diff --git a/sdp/processors/tts/prepare_tts_segments.py b/sdp/processors/tts/prepare_tts_segments.py
@@ -12,11 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import ndjson
-from typing import List, Union
 
 from sdp.logging import logger
 from sdp.processors.base_processor import BaseParallelProcessor, DataEntry
+from sdp.utils.common import load_manifest
 
 class PrepareTTSSegmentsProcessor(BaseParallelProcessor):
     """This processor merges adjacent segments from the same speaker and splits segments to have a complete utterance.
@@ -60,11 +59,10 @@ def __init__(self,
         self.punctuation_split_only = punctuation_split_only
     
     def read_manifest(self):
-        ''' Reads metadata from NDJSON file in the input manifest
+        ''' Reads metadata from JSONL file in the input manifest
         and converts it to data entries '''
 
-        with open(self.input_manifest_file, "r", encoding="utf8") as fin:
-            dataset_entries = ndjson.load(fin)
+        dataset_entries = load_manifest(self.input_manifest_file, encoding="utf8")
 
         return dataset_entries
 
diff --git a/sdp/processors/tts/pyannote.py b/sdp/processors/tts/pyannote.py
@@ -16,7 +16,6 @@
 import os
 import logging
 from time import time
-import ndjson
 from pyannote.audio import Pipeline
 from pyannote.audio.pipelines.utils.hook import ProgressHook
 from whisperx.audio import SAMPLE_RATE
@@ -26,6 +25,7 @@
 
 from sdp.logging import logger
 from sdp.processors.base_processor import BaseProcessor
+from sdp.utils.common import load_manifest, save_manifest
 
 def has_overlap(turn, overlaps):
     """Check if a given turn overlaps with any segment in the overlaps list.
@@ -208,8 +208,7 @@ def process(self):
         - Overlap segments
         - Non-speaker segments
         """
-        with open(self.input_manifest_file) as f:
-            manifest = ndjson.load(f)
+        manifest = load_manifest(self.input_manifest_file)
 
         results = []
         start_time = time()
@@ -292,6 +291,5 @@ def process(self):
             results.append(metadata)
 
         logger.info(f'Completed diarization in {(time()-start_time)/3600} hrs')
-        with open(self.output_manifest_file, 'w') as f:
-            ndjson.dump(results, f)
+        save_manifest(results, self.output_manifest_file)
 
diff --git a/sdp/processors/tts/split.py b/sdp/processors/tts/split.py
@@ -13,12 +13,12 @@
 # limitations under the License.
 
 from sdp.processors.base_processor import BaseProcessor, DataEntry
-import ndjson
 import json
 import os
 import torchaudio
 import math
 from copy import deepcopy
+from sdp.utils.common import load_manifest, save_manifest
 
 class SplitLongAudio(BaseProcessor):
     """This processor splits long audio files into smaller segments.
@@ -70,8 +70,7 @@ def process(self):
         - Split entries with updated paths and durations
         - Meta-entries containing split information for later joining
         """
-        with open(self.input_manifest_file) as f:
-            manifest = ndjson.load(f)
+        manifest = load_manifest(self.input_manifest_file)
 
         results = []
         for metadata in manifest:
@@ -141,8 +140,7 @@ def process(self):
             metadata['split_offsets'] = actual_splits
             results.append(metadata)
 
-        with open(self.output_manifest_file, 'w') as f:
-            ndjson.dump(results, f)
+        save_manifest(results, self.output_manifest_file)
 
 
 class JoinSplitAudioMetadata(BaseProcessor):
@@ -179,8 +177,7 @@ def process(self):
         - Original entries for unsplit audio files
         - Combined entries for previously split audio files
         """
-        with open(self.input_manifest_file) as f:
-            manifest = ndjson.load(f)
+        manifest = load_manifest(self.input_manifest_file)
 
         fp_w = open(self.output_manifest_file, 'w')
 
diff --git a/sdp/processors/tts/text.py b/sdp/processors/tts/text.py
@@ -12,10 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
-from sdp.processors.base_processor import BaseProcessor, BaseParallelProcessor, DataEntry
 import json
-import ndjson
+from sdp.processors.base_processor import BaseProcessor, BaseParallelProcessor, DataEntry
+from sdp.utils.common import load_manifest, save_manifest
 from nemo_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer
 from nemo.collections.nlp.models import PunctuationCapitalizationModel
 
@@ -47,11 +46,10 @@ def __init__(self,
         self.normalizer = InverseNormalizer(lang=language)
     
     def read_manifest(self):
-        ''' Reads metadata from NDJSON file in the input manifest
+        ''' Reads metadata from JSONL file in the input manifest
         and converts it to data entries '''
 
-        with open(self.input_manifest_file, "r", encoding="utf8") as fin:
-            dataset_entries = ndjson.load(fin)
+        dataset_entries = load_manifest(self.input_manifest_file, encoding="utf8")
 
         return dataset_entries
 
@@ -102,8 +100,7 @@ def __init__(self,
         self.pnc_model.cuda()
     
     def process(self):
-        with open(self.input_manifest_file) as f:
-            manifest = ndjson.load(f)
+        manifest = load_manifest(self.input_manifest_file)
 
         results = []
         all_text = []
@@ -123,8 +120,7 @@ def process(self):
                     i+=1
             results.append(metadata)
 
-        with open(self.output_manifest_file, 'w') as f:
-            ndjson.dump(results, f)
+        save_manifest(results, self.output_manifest_file)
 
 class PunctuationAndCapitalizationProcessor(BaseProcessor):
     """This processor performs punctuation and capitalization on text data.
@@ -163,8 +159,7 @@ def __init__(self,
         self.pnc_model.cuda()
     
     def process(self):
-        with open(self.input_manifest_file) as f:
-            manifest = ndjson.load(f)
+        manifest = load_manifest(self.input_manifest_file)
 
         all_text = []
         
diff --git a/sdp/utils/common.py b/sdp/utils/common.py
@@ -19,22 +19,27 @@
 import urllib
 import zipfile
 from pathlib import Path
-from typing import Dict, List, Union
+from typing import Dict, List, Union, Any, Optional
 
 import wget
 
 from sdp.logging import logger
 
 
-def load_manifest(manifest: Path) -> List[Dict[str, Union[str, float]]]:
+def load_manifest(manifest: Union[Path, str], encoding: Optional[str] = None) -> List[Dict[str, Union[str, float]]]:
     # read NeMo manifest as a list of dicts
     result = []
-    with manifest.open() as f:
+    with open(manifest, encoding=encoding) as f:
         for line in f:
             data = json.loads(line)
             result.append(data)
     return result
 
+def save_manifest(manifest: List[Dict[str, Any]], manifest_file: Union[Path, str]):
+    with open(manifest_file, 'w') as f:
+        for item in manifest:
+            f.write(json.dumps(item) + '\n')
+
 
 def download_file(source_url: str, target_directory: str, verbose=True):
     # make sure target_directory is an absolute path to avoid bugs when we change directories to download data later