Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/gen_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,5 +53,6 @@ def gen_docs():
with open(destination_path, "wt", encoding="utf-8") as fout:
fout.write(docs + link)


if __name__ == '__main__':
gen_docs()
10 changes: 5 additions & 5 deletions docs/src/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@
"webvtt_py",
"python_docx",
"webvtt",
"docx",
"pyannote"
"docx",
"pyannote",
]

_skipped_autodoc_mock_imports = []
Expand Down Expand Up @@ -189,9 +189,9 @@ def setup(app):
]
# nitpick_ignore_regex = [('py:class', '*')]

#adding this especially for coraal, temporary
# adding this especially for coraal, temporary
linkcheck_ignore = [
r'https://lingtools\.uoregon\.edu/coraal/coraal_download_list\.txt',
r'https://ieeexplore\.ieee\.org/document/1326009'
r'https://ieeexplore\.ieee\.org/document/1326009',
]
# https://lingtools.uoregon.edu/coraal/coraal_download_list.txt
# https://lingtools.uoregon.edu/coraal/coraal_download_list.txt
7 changes: 4 additions & 3 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

import sys

import hydra
from omegaconf import DictConfig, open_dict

Expand All @@ -23,20 +24,20 @@
def main(cfg: DictConfig):
"""
Main entry point for the Speech Data Processor (SDP).

Args:
cfg: Hydra configuration object containing processing settings
"""
# Check if running in import manager mode
if hasattr(cfg, 'mode') and cfg.mode == 'update_imports':
update_processor_imports(cfg.config_path)

# Check arg for using Dask
if not hasattr(cfg, 'use_dask'):
with open_dict(cfg):
# Default to using Dask
cfg.use_dask = True

# Run the processors
run_processors(cfg)

Expand Down
2 changes: 1 addition & 1 deletion requirements/huggingface.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
accelerate
transformers>=0.2.1
huggingface_hub>=0.20.3,<0.24.0 # https://github.com/NVIDIA/NeMo/issues/9793
transformers>=0.2.1
16 changes: 8 additions & 8 deletions requirements/main.txt
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
dask
datasets>=2.14.0,<3.0.0
diff_match_patch
distributed
editdistance
ffmpeg
gdown
hydra-core
jiwer>=3.1.0,<4.0.0
joblib
librosa>=0.10.0 # specify >=0.10.0 so that librosa.get_duration(path=...) will work
numpy>=1.26, <2.0 # module was used numpy 1.x and may crash in 2.x
omegaconf
pandas
pyarrow>=8.0.0,<14.0.0
pydub
python-docx
rarfile
regex
sox
tqdm
gdown
webvtt-py
wget
python-docx
pydub
dask
distributed
jiwer>=3.1.0,<4.0.0
pyarrow>=8.0.0,<14.0.0
datasets>=2.14.0,<3.0.0
# toloka-kit # Temporarily disabled due to Toloka's technical pause; keep as reference for past and future API support
# for some processers, additionally https://github.com/NVIDIA/NeMo is required
# for some processers, additionally nemo_text_processing is required
Expand Down
6 changes: 3 additions & 3 deletions requirements/tests.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
boto3
# lhotse requires torch and torchaudio to be present
lhotse
# additional packages required to run tests
pytest
pytest-cov
# lhotse requires torch and torchaudio to be present
lhotse
torch
torchaudio
torchaudio
6 changes: 3 additions & 3 deletions requirements/tts.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
transformers
accelerate
torchaudio
pyannote-audio
ffmpeg-python
pyannote-audio
torchaudio
transformers
whisperx==3.3.1
26 changes: 9 additions & 17 deletions sdp/processors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@
TrainDevTestSplitCORAAL,
)
from sdp.processors.datasets.earnings import (
CreateInitialAudioAndManifest,
ApplyEarnings21Normalizations,
CreateFullAudioManifestEarnings21,
SpeakerSegmentedManifest,
CreateInitialAudioAndManifest,
CreateSentenceSegmentedManifest,
ApplyEarnings21Normalizations,
SpeakerSegmentedManifest,
)
from sdp.processors.datasets.fleurs.create_initial_manifest import (
CreateInitialManifestFleurs,
Expand Down Expand Up @@ -82,19 +82,21 @@
CreateInitialManifestHuggingFace,
)
from sdp.processors.huggingface.speech_recognition import ASRTransformers
from sdp.processors.manage_files.convert_audio import FfmpegConvert, SoxConvert
from sdp.processors.manage_files.extract import ExtractTar
from sdp.processors.manage_files.remove import RemoveFiles
from sdp.processors.modify_manifest.common import (
AddConstantFields,
ApplyInnerJoin,
ChangeToRelativePath,
CombineSources,
DropSpecifiedFields,
DuplicateFields,
KeepOnlySpecifiedFields,
RenameFields,
SortManifest,
SplitOnFixedDuration,
Subprocess,
DropSpecifiedFields,

)
from sdp.processors.modify_manifest.create_manifest import (
CreateCombinedManifests,
Expand All @@ -109,6 +111,8 @@
GetWER,
InsIfASRInsertion,
InverseNormalizeText,
LambdaExpression,
ListToEntries,
MakeSentence,
NormalizeText,
ReadDocxLines,
Expand All @@ -117,8 +121,6 @@
SubIfASRSubstitution,
SubMakeLowercase,
SubRegex,
ListToEntries,
LambdaExpression,
)
from sdp.processors.modify_manifest.data_to_dropbool import (
DropASRError,
Expand All @@ -141,16 +143,6 @@
from sdp.processors.modify_manifest.make_letters_uppercase_after_period import (
MakeLettersUppercaseAfterPeriod,
)
from sdp.processors.manage_files.convert_audio import (
FfmpegConvert,
SoxConvert,
)
from sdp.processors.manage_files.extract import (
ExtractTar,
)
from sdp.processors.manage_files.remove import (
RemoveFiles,
)
from sdp.processors.nemo.asr_inference import ASRInference
from sdp.processors.nemo.estimate_bandwidth import EstimateBandwidth
from sdp.processors.nemo.lid_inference import AudioLid
Expand Down
Loading
Loading