fix: Use MedCAT's multiprocessing_batch_char_size (#8)

phoevos · web-flow · commit 72bbe83aa6b4 · 2025-01-06T09:47:21.000Z
Replace the call to MedCAT's 'multiprocessing' with the 'multiprocessing_batch_char_size' method in the 'MedCATModel' service. The former has been deprecated since version 1.10.0 and is removed in 1.12.0, therefore causing our bulk processing API to fail. Closes #7 Signed-off-by: Phoevos Kalemkeris <phoevos.kalemkeris@ucl.ac.uk>
diff --git a/app/model_services/medcat_model.py b/app/model_services/medcat_model.py
@@ -101,10 +101,12 @@ def annotate(self, text: str) -> Dict:
     def batch_annotate(self, texts: List[str]) -> List[Dict]:
         batch_size_chars = 500000
 
-        docs = self.model.multiprocessing(self._data_iterator(texts),
-                                          batch_size_chars=batch_size_chars,
-                                          nproc=max(int(cpu_count() / 2), 1),
-                                          addl_info=["cui2icd10", "cui2ontologies", "cui2snomed", "cui2athena_ids"])
+        docs = self.model.multiprocessing_batch_char_size(
+            self._data_iterator(texts),
+            batch_size_chars=batch_size_chars,
+            nproc=max(int(cpu_count() / 2), 1),
+            addl_info=["cui2icd10", "cui2ontologies", "cui2snomed", "cui2athena_ids"]
+        )
         annotations_list = []
         for _, doc in docs.items():
             annotations_list.append(self.get_records_from_doc(doc))