Skip to content

Commit fe0927d

Browse files
committed
Fixed pymatian devices ids
Signed-off-by: Sasha Meister <ameister@nvidia.com>
1 parent ddd9ba7 commit fe0927d

File tree

2 files changed

+18
-18
lines changed

2 files changed

+18
-18
lines changed

dataset_configs/multilingual/yodas2/config.yaml

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,16 @@ documentation: |
33
############
44
Documentation is in progress.
55
6-
processors_to_run: "0:"
6+
processors_to_run: "27:"
77
workspace_dir: /data3/sdp_test #/home/ameister/SDP_YODAS2/test_config
88

99
filters:
1010
source_lang: en
1111
min_language_probability: 0.7
1212

13-
translation:
14-
source_lang: English
15-
target_lang: Italian
13+
translation:
14+
source_lang: English
15+
target_lang: Italian
1616

1717
processors:
1818
- _target_: sdp.processors.datasets.yodas2.ListYodas2Data
@@ -31,7 +31,7 @@ processors:
3131

3232
- _target_: sdp.processors.datasets.yodas2.DownloadYodas2Data
3333
output_manifest_file: ${workspace_dir}/${filters.source_lang}/manifest_03.json
34-
local_dir: ${workspace_dir}/${filters.source_lang}
34+
local_dir: ${workspace_dir}/${filters.source_lang}/
3535
max_workers: 8
3636

3737
- _target_: sdp.processors.ExtractTar
@@ -180,7 +180,7 @@ processors:
180180

181181
- _target_: sdp.processors.vLLMInference
182182
output_manifest_file: ${workspace_dir}/${filters.source_lang}/manifest_24.json
183-
prompt_file: /ameister/YODAS_PR/dataset_configs/multilingual/yodas2/prompts/pnc_restoration/en.yaml
183+
prompt_file: /ameister/YODAS_PR/NeMo-speech-data-processor/sdp/processors/inference/llm/prompts/vllm/qwen/pc_recovery/datasets/yodas2/${filters.source_lang}.yaml
184184
model:
185185
model: "Qwen/Qwen2.5-7B-Instruct-1M"
186186
tensor_parallel_size: 2
@@ -207,7 +207,7 @@ processors:
207207

208208
- _target_: sdp.processors.SubRegex
209209
text_key: generation
210-
regex_params_file: /home/ameister/SDP_YODAS2/NeMo-speech-data-processor/dataset_configs/multilingual/yodas2/regex.yaml
210+
regex_params_yaml: /ameister/YODAS_PR/NeMo-speech-data-processor/sdp/processors/langs/regex/datasets/multilingual/yodas2/yodas2.yaml
211211
output_manifest_file: ${workspace_dir}/${filters.source_lang}/manifest_26.json
212212

213213
# AST
@@ -221,26 +221,26 @@ processors:
221221
${translation.target_lang}:
222222
model:
223223
model: "utter-project/EuroLLM-9B-Instruct"
224-
dtype: torch.float16
224+
dtype: float16 #torch.float16
225225
inference:
226226
best_of: 1
227227
temperature: 0.0
228228
top_p: 1.0
229229
max_tokens: 1280
230-
use_beam_search: False
230+
#use_beam_search: False
231231
apply_chat_template:
232232
max_length: 512
233233
tokenize: False
234234
add_generation_prompt: True
235235

236236
- _target_: sdp.processors.CometoidWMTQualityEstimation
237-
input_manifest_file: /home/ameister/SDP_YODAS2/test_config/test_qe_m.json
238-
output_manifest_file: /home/ameister/SDP_YODAS2/test_config/test_qe_m_out.json
239-
source_text_field: source
240-
target_text_field: target
241-
model_name_or_path: /home/ameister/SDP_YODAS2/models/checkpoints/marian.model.bin
242-
vocab_path: /home/ameister/SDP_YODAS2/models/vocab.spm
243-
237+
output_manifest_file: ${workspace_dir}/${filters.source_lang}/manifest_28.json
238+
source_text_field: pred_text #source
239+
target_text_field: generation #target
240+
model_name_or_path: cometoid-wmt23
241+
device_type: gou
242+
num_devices: 4
243+
chunksize: 10
244244

245245

246246

sdp/processors/inference/qe/nmt/pymarian.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def load_model(self):
6767
if not os.path.exists(self.model_name_or_path):
6868
raise ValueError(f'`model_name_or_path`: model name is not valid or model path does not exist ({self.model_name_or_path}).')
6969

70-
if not self.vocab_path and self.repo_id:
70+
if not self.vocab_path and repo_id is not None:
7171
self.vocab_path = hf_hub_download(repo_id=repo_id, filename="vocab.spm", local_dir = self.save_model_to)
7272

7373
if not os.path.exists(self.vocab_path):
@@ -92,7 +92,7 @@ def load_model(self):
9292
except Exception:
9393
pass
9494

95-
device_indicies = ' '.join([i for i in range(self.max_workers)])
95+
device_indicies = ' '.join([str(i) for i in range(self.max_workers)])
9696
gpu_args = self.MARIAN_GPU_ARGS.format(device_indicies = device_indicies)
9797
marian_args += f' {gpu_args}'
9898

0 commit comments

Comments
 (0)