Changes addressing the reviewer’s comments

root · root · commit 1404a0283f82 · 2025-08-05T03:04:15.000-07:00
Signed-off-by: root &lt;root@cpu-00064.cm.cluster&gt;
diff --git a/requirements/docs.txt b/requirements/docs.txt
@@ -3,5 +3,4 @@ pyyaml
 Sphinx
 sphinx-book-theme
 sphinx-copybutton
-sphinxext-opengraph
-tabulate
+sphinxext-opengraph
diff --git a/sdp/processors/inference/quality_estimation/pymarian.py b/sdp/processors/inference/quality_estimation/pymarian.py
@@ -19,9 +19,10 @@
 import numpy as np
 
 from sdp.logging import logger
-from sdp.processors.base_processor import BaseParallelProcessor
+from sdp.processors.base_processor import BaseProcessor
 
-class CometoidWMTQualityEstimation(BaseParallelProcessor):
+
+class CometoidWMTQualityEstimation(BaseProcessor):
     """
     A processor for estimating translation quality using pretrained COMET-like models 
     based on MarianNMT and the pymarian Evaluator.
@@ -78,20 +79,26 @@ def __init__(self,
                  chunksize = 5000,
                  **kwargs,
     ):
-        super().__init__(max_workers = num_devices, chunksize = chunksize, in_memory_chunksize = chunksize, **kwargs)
+        super().__init__(**kwargs)
         self.source_text_field = source_text_field
         self.target_text_field = target_text_field
         self.model_name_or_path = model_name_or_path
         self.vocab_path = vocab_path
         self.save_model_to = save_model_to
         self.device_type = device_type
+        self.max_workers = num_devices
         self.mini_batch = mini_batch
         self.maxi_batch = maxi_batch
         self.output_field = output_field
         self.model = None
+        self.chunksize = chunksize
 
     def load_model(self):
-        from pymarian import Evaluator
+        try:
+            from pymarian import Evaluator
+        except ImportError:
+            raise ImportError("`pymarian` is not installed. Please install it using `pip install pymarian`.")
+        
         from huggingface_hub import hf_hub_download
 
         """
@@ -140,9 +147,6 @@ def load_model(self):
         
         self.model = Evaluator(marian_args)
 
-    def process_dataset_entry(self):
-        pass
-
     def process(self):
         """
         Process the entire manifest in chunks.
diff --git a/tests/test_cometoid_qe.py b/tests/test_cometoid_qe.py
@@ -26,7 +26,8 @@ def mock_processor():
         output_field="cometoid_score",
         device_type="cpu",
         num_devices=1,
-        chunksize=1
+        chunksize=1,
+        output_manifest_file="/tmp/test_output.jsonl",
     )
     return processor
 
@@ -51,12 +52,11 @@ def test_process_dataset_entry(mock_processor):
         "tgt": "Dies ist ein Testsatz."
     }
 
-    mock_processor.output_manifest_file = "/tmp/test_output.jsonl"
     mock_processor._chunk_manifest = lambda: [[entry]]
     mock_processor.finalize = MagicMock()
     mock_processor.number_of_entries = 0
 
-    # 👇 Patch load_model to avoid real downloading
+    # Patch load_model to avoid real downloading
     with patch.object(mock_processor, "load_model"), \
          patch("builtins.open"), \
          patch("json.dump"), \