Skip to content

Commit 1404a02

Browse files
author
root
committed
Changes addressing the reviewer’s comments
Signed-off-by: root <root@cpu-00064.cm.cluster>
1 parent 1682a17 commit 1404a02

File tree

3 files changed

+15
-12
lines changed

3 files changed

+15
-12
lines changed

requirements/docs.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,4 @@ pyyaml
33
Sphinx
44
sphinx-book-theme
55
sphinx-copybutton
6-
sphinxext-opengraph
7-
tabulate
6+
sphinxext-opengraph

sdp/processors/inference/quality_estimation/pymarian.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,10 @@
1919
import numpy as np
2020

2121
from sdp.logging import logger
22-
from sdp.processors.base_processor import BaseParallelProcessor
22+
from sdp.processors.base_processor import BaseProcessor
2323

24-
class CometoidWMTQualityEstimation(BaseParallelProcessor):
24+
25+
class CometoidWMTQualityEstimation(BaseProcessor):
2526
"""
2627
A processor for estimating translation quality using pretrained COMET-like models
2728
based on MarianNMT and the pymarian Evaluator.
@@ -78,20 +79,26 @@ def __init__(self,
7879
chunksize = 5000,
7980
**kwargs,
8081
):
81-
super().__init__(max_workers = num_devices, chunksize = chunksize, in_memory_chunksize = chunksize, **kwargs)
82+
super().__init__(**kwargs)
8283
self.source_text_field = source_text_field
8384
self.target_text_field = target_text_field
8485
self.model_name_or_path = model_name_or_path
8586
self.vocab_path = vocab_path
8687
self.save_model_to = save_model_to
8788
self.device_type = device_type
89+
self.max_workers = num_devices
8890
self.mini_batch = mini_batch
8991
self.maxi_batch = maxi_batch
9092
self.output_field = output_field
9193
self.model = None
94+
self.chunksize = chunksize
9295

9396
def load_model(self):
94-
from pymarian import Evaluator
97+
try:
98+
from pymarian import Evaluator
99+
except ImportError:
100+
raise ImportError("`pymarian` is not installed. Please install it using `pip install pymarian`.")
101+
95102
from huggingface_hub import hf_hub_download
96103

97104
"""
@@ -140,9 +147,6 @@ def load_model(self):
140147

141148
self.model = Evaluator(marian_args)
142149

143-
def process_dataset_entry(self):
144-
pass
145-
146150
def process(self):
147151
"""
148152
Process the entire manifest in chunks.

tests/test_cometoid_qe.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ def mock_processor():
2626
output_field="cometoid_score",
2727
device_type="cpu",
2828
num_devices=1,
29-
chunksize=1
29+
chunksize=1,
30+
output_manifest_file="/tmp/test_output.jsonl",
3031
)
3132
return processor
3233

@@ -51,12 +52,11 @@ def test_process_dataset_entry(mock_processor):
5152
"tgt": "Dies ist ein Testsatz."
5253
}
5354

54-
mock_processor.output_manifest_file = "/tmp/test_output.jsonl"
5555
mock_processor._chunk_manifest = lambda: [[entry]]
5656
mock_processor.finalize = MagicMock()
5757
mock_processor.number_of_entries = 0
5858

59-
# 👇 Patch load_model to avoid real downloading
59+
# Patch load_model to avoid real downloading
6060
with patch.object(mock_processor, "load_model"), \
6161
patch("builtins.open"), \
6262
patch("json.dump"), \

0 commit comments

Comments
 (0)