@@ -3,16 +3,16 @@ documentation: |
33 ############
44 Documentation is in progress.
55
6- processors_to_run : " 0 :"
6+ processors_to_run : " 27 :"
77workspace_dir : /data3/sdp_test # /home/ameister/SDP_YODAS2/test_config
88
99filters :
1010 source_lang : en
1111 min_language_probability : 0.7
1212
13- translation :
14- source_lang : English
15- target_lang : Italian
13+ translation :
14+ source_lang : English
15+ target_lang : Italian
1616
1717processors :
1818 - _target_ : sdp.processors.datasets.yodas2.ListYodas2Data
@@ -31,7 +31,7 @@ processors:
3131
3232 - _target_ : sdp.processors.datasets.yodas2.DownloadYodas2Data
3333 output_manifest_file : ${workspace_dir}/${filters.source_lang}/manifest_03.json
34- local_dir : ${workspace_dir}/${filters.source_lang}
34+ local_dir : ${workspace_dir}/${filters.source_lang}/
3535 max_workers : 8
3636
3737 - _target_ : sdp.processors.ExtractTar
@@ -180,7 +180,7 @@ processors:
180180
181181 - _target_ : sdp.processors.vLLMInference
182182 output_manifest_file : ${workspace_dir}/${filters.source_lang}/manifest_24.json
183- prompt_file : /ameister/YODAS_PR/dataset_configs/multilingual/yodas2/ prompts/pnc_restoration/en .yaml
183+ prompt_file : /ameister/YODAS_PR/NeMo-speech-data-processor/sdp/processors/inference/llm/ prompts/vllm/qwen/pc_recovery/datasets/yodas2/${filters.source_lang} .yaml
184184 model :
185185 model : " Qwen/Qwen2.5-7B-Instruct-1M"
186186 tensor_parallel_size : 2
@@ -207,7 +207,7 @@ processors:
207207
208208 - _target_ : sdp.processors.SubRegex
209209 text_key : generation
210- regex_params_file : /home/ ameister/SDP_YODAS2 /NeMo-speech-data-processor/dataset_configs/ multilingual/yodas2/regex .yaml
210+ regex_params_yaml : /ameister/YODAS_PR /NeMo-speech-data-processor/sdp/processors/langs/regex/datasets/ multilingual/yodas2/yodas2 .yaml
211211 output_manifest_file : ${workspace_dir}/${filters.source_lang}/manifest_26.json
212212
213213 # AST
@@ -221,26 +221,26 @@ processors:
221221 ${translation.target_lang}:
222222 model :
223223 model : " utter-project/EuroLLM-9B-Instruct"
224- dtype : torch.float16
224+ dtype : float16 # torch.float16
225225 inference :
226226 best_of : 1
227227 temperature : 0.0
228228 top_p : 1.0
229229 max_tokens : 1280
230- use_beam_search : False
230+ # use_beam_search: False
231231 apply_chat_template :
232232 max_length : 512
233233 tokenize : False
234234 add_generation_prompt : True
235235
236236 - _target_ : sdp.processors.CometoidWMTQualityEstimation
237- input_manifest_file : /home/ameister/SDP_YODAS2/test_config/test_qe_m .json
238- output_manifest_file : /home/ameister/SDP_YODAS2/test_config/test_qe_m_out.json
239- source_text_field : source
240- target_text_field : target
241- model_name_or_path : /home/ameister/SDP_YODAS2/models/checkpoints/marian.model.bin
242- vocab_path : /home/ameister/SDP_YODAS2/models/vocab.spm
243-
237+ output_manifest_file : ${workspace_dir}/${filters.source_lang}/manifest_28 .json
238+ source_text_field : pred_text # source
239+ target_text_field : generation # target
240+ model_name_or_path : cometoid-wmt23
241+ device_type : gou
242+ num_devices : 4
243+ chunksize : 10
244244
245245
246246
0 commit comments