Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions requirements/curator.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
cd ray-api

# pip install cosmos-xenna[gpu]
git clone https://github.com/NVIDIA-NeMo/Curator.git
git switch ray-api
pip install .

# install NeMo
pip install "nemo_toolkit[all]"

# install nemo_text_processing
pip install nemo_text_processing

pip install -r requirements/main.txt
pip install -r requirements/tests.txt

RAY_ADDRESS=10.110.41.40:8265 python -m pytest tests/test_curator.py

# pip install loguru
# pip install -U "ray[default]"

# cd ~/workspace/Curator/ray-curator && pip install .
# ray start --include-dashboard=True --head # ray status # ray stop
# import ray
# ray.init()
# RAY_ADDRESS='http://127.0.0.1:8265' ray job submit --working-dir . -- python my_script.py

RAY_EXPERIMENTAL_NOSET_CUDA_VISIBLE_DEVICES="0" RAY_MAX_LIMIT_FROM_API_SERVER=40000 RAY_MAX_LIMIT_FROM_DATA_SOURCE=40000 ray start --include-dashboard=True --dashboard-host=0.0.0.0 --port=8265 --dashboard-port=8266 --head --temp-dir=/tmp
27 changes: 10 additions & 17 deletions sdp/processors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@
TrainDevTestSplitCORAAL,
)
from sdp.processors.datasets.earnings import (
CreateInitialAudioAndManifest,
ApplyEarnings21Normalizations,
CreateFullAudioManifestEarnings21,
SpeakerSegmentedManifest,
CreateInitialAudioAndManifest,
CreateSentenceSegmentedManifest,
ApplyEarnings21Normalizations,
SpeakerSegmentedManifest,
)
from sdp.processors.datasets.fleurs.create_initial_manifest import (
CreateInitialManifestFleurs,
Expand Down Expand Up @@ -82,23 +82,26 @@
CreateInitialManifestHuggingFace,
)
from sdp.processors.huggingface.speech_recognition import ASRTransformers
from sdp.processors.manage_files.convert_audio import FfmpegConvert, SoxConvert
from sdp.processors.manage_files.extract import ExtractTar
from sdp.processors.manage_files.remove import RemoveFiles
from sdp.processors.modify_manifest.common import (
AddConstantFields,
ApplyInnerJoin,
ChangeToRelativePath,
CombineSources,
DropSpecifiedFields,
DuplicateFields,
KeepOnlySpecifiedFields,
RenameFields,
SortManifest,
SplitOnFixedDuration,
Subprocess,
DropSpecifiedFields,

)
from sdp.processors.modify_manifest.create_manifest import (
CreateCombinedManifests,
CreateInitialManifestByExt,
SaveJsonl,
)
from sdp.processors.modify_manifest.data_to_data import (
ASRFileCheck,
Expand All @@ -109,6 +112,8 @@
GetWER,
InsIfASRInsertion,
InverseNormalizeText,
LambdaExpression,
ListToEntries,
MakeSentence,
NormalizeText,
ReadDocxLines,
Expand All @@ -117,8 +122,6 @@
SubIfASRSubstitution,
SubMakeLowercase,
SubRegex,
ListToEntries,
LambdaExpression,
)
from sdp.processors.modify_manifest.data_to_dropbool import (
DropASRError,
Expand All @@ -141,16 +144,6 @@
from sdp.processors.modify_manifest.make_letters_uppercase_after_period import (
MakeLettersUppercaseAfterPeriod,
)
from sdp.processors.manage_files.convert_audio import (
FfmpegConvert,
SoxConvert,
)
from sdp.processors.manage_files.extract import (
ExtractTar,
)
from sdp.processors.manage_files.remove import (
RemoveFiles,
)
from sdp.processors.nemo.asr_inference import ASRInference
from sdp.processors.nemo.estimate_bandwidth import EstimateBandwidth
from sdp.processors.nemo.lid_inference import AudioLid
Expand Down
Loading