Skip to content

Commit a5b2bfa

Browse files
authored
build(medcat): CU-869aujr7h Add nightly workflow to check library stability (#171)
* CU-869aujr7h: Add nightly workflow to check library stability * CU-869aujr7h: Update working directory in new workflow * CU-869aujr7h: Update comment in new workflow * CU-869aujr7h: Disallow incompatible transformers version * CU-869aujr7h: Fix worklflow install / sync * CU-869aujr7h: Make worklflow only have read permissions * CU-869aujr7h: Install without lock * CU-869aujr7h: Use non-uv pip for lock-free install * CU-869aujr7h: Force usage of correct python version in workflow * CU-869aujr7h: Fix versions in workflow (3.10 instead of 3.1) * Typing fix for regression utils * Typing fix for modern bert RelCAT * CU-869aujr7h: Change the way tests timeout is set up * CU-869aujr7h: Attempt to fix builds on Windows by ignoring Windows + python 3.9 * CU-869aujr7h: Remove python 3.9 from matrix * CU-869aujr7h: Attempt fix mock for Windows * CU-869aujr7h: Use CPU-only torch for MacOS in workflow to avoid MPS issues * CU-869aujr7h: Force installation to happen through bash so IF works on Windows * CU-869aujr7h: Add 3.13 for lib stability workflow * CU-869aujr7h: [NEEDS TO BE REVERTED] Only run on MacOS and Windows on 3.10. This commit TEMPORARILY (while the workflows are failing) makes them only run on Windows and MacOS (which are the workflows that are failing) and on python 3.10 so as to lower the overall number of workflow runners. * CU-869aujr7h: Allow 45 minutes for tests so tests on MacOS don't time out * CU-869aujr7h: Use temporary directory instead of named temp file for Windows compatibility * CU-869aujr7h: Avoid heavy RAM tests (DeID) on MacOS during CI * CU-869aujr7h: Ignore further tests for MacOS runner * CU-869aujr7h: Make component tests more flexible * CU-869aujr7h: Fix test skip method call * Revert "CU-869aujr7h: [NEEDS TO BE REVERTED] Only run on MacOS and Windows on 3.10." This reverts commit f201270. * CU-869aujr7h: Remove push-specific workflow triggers * CU-869avau57: Require numpy 2.1 or above for python 3.13 * CU-869aujr7h: Rename helper method to avoid heavy RAM tests on MacOS in CI
1 parent 8960255 commit a5b2bfa

File tree

7 files changed

+112
-13
lines changed

7 files changed

+112
-13
lines changed
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
name: MedCAT-nightly-stability-check
2+
permissions:
3+
contents: read
4+
on:
5+
schedule:
6+
- cron: "0 3 * * *" # every day at 3am UTC
7+
workflow_dispatch: # allow manual runs
8+
pull_request:
9+
paths:
10+
- ".github/workflows/medcat-v2-lib-stabiliy.yml"
11+
12+
defaults:
13+
run:
14+
working-directory: ./medcat-v2
15+
16+
17+
jobs:
18+
test:
19+
runs-on: ${{ matrix.os }}
20+
strategy:
21+
fail-fast: false
22+
matrix:
23+
os: [ubuntu-latest, macos-latest, windows-latest]
24+
python-version: [ "3.10", "3.11", "3.12", "3.13"]
25+
26+
steps:
27+
- uses: actions/checkout@v4
28+
- uses: astral-sh/setup-uv@v3
29+
with:
30+
python-version: ${{ matrix.python-version }}
31+
32+
- name: Install with latest deps
33+
shell: bash
34+
run: |
35+
uv run --python ${{ matrix.python-version }} python -m ensurepip
36+
uv run --python ${{ matrix.python-version }} python -m pip install --upgrade pip
37+
# install cpu-only torch for MacOS
38+
if [[ "$RUNNER_OS" == "macOS" ]]; then
39+
uv run --python ${{ matrix.python-version }} python -m pip install torch --index-url https://download.pytorch.org/whl/cpu
40+
fi
41+
uv run --python ${{ matrix.python-version }} python -m pip install ".[spacy,deid,meta-cat,rel-cat,dict-ner,dev]"
42+
43+
- name: Check types
44+
run: |
45+
uv run --python ${{ matrix.python-version }} python -m mypy --follow-imports=normal medcat
46+
47+
- name: Ruff linting
48+
run: |
49+
uv run --python ${{ matrix.python-version }} python -m ruff check medcat --preview
50+
51+
- name: Test
52+
run: |
53+
uv run --python ${{ matrix.python-version }} python -m unittest discover
54+
timeout-minutes: 45
55+
56+
- name: Model regression
57+
run: |
58+
uv run --python ${{ matrix.python-version }} bash tests/backwards_compatibility/run_current.sh

medcat-v2/medcat/utils/cdb_state.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from typing import TypedDict, cast
44
import tempfile
55
import dill
6+
import os
67

78
from copy import deepcopy
89

@@ -216,7 +217,10 @@ def on_disk_memory_capture(cdb):
216217
Yields:
217218
None
218219
"""
219-
with tempfile.NamedTemporaryFile() as tf:
220-
save_cdb_state(cdb, tf.name)
220+
# NOTE: using temporary directory so that it also works on Windows
221+
# otherwise you can't reopen a temporary file in Windows (apparently)
222+
with tempfile.TemporaryDirectory() as temp_dir:
223+
temp_file_name = os.path.join(temp_dir, "cdb_state.dat")
224+
save_cdb_state(cdb, temp_file_name)
221225
yield
222-
load_and_apply_cdb_state(cdb, tf.name)
226+
load_and_apply_cdb_state(cdb, temp_file_name)

medcat-v2/pyproject.toml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,8 @@ classifiers = [
5757
# For an analysis of this field vs pip's requirements files see:
5858
# https://packaging.python.org/discussions/install-requires-vs-requirements/
5959
dependencies = [ # Optional
60-
"numpy>2.0",
60+
"numpy>=2.1; python_version >= '3.13'",
61+
"numpy>=2.0; python_version < '3.13'",
6162
"dill",
6263
"pandas>=2.2,<3.0",
6364
"tqdm>=4.64,<5.0",
@@ -102,6 +103,8 @@ dict_ner = [
102103
]
103104
deid = [
104105
"datasets>=2.2.2,<3.0.0",
106+
# Transformers 4.57 doesn't support 3.9
107+
"transformers!=4.57.0; python_version == '3.9'",
105108
"transformers>=4.41.0,<5.0", # avoid major bump
106109
# Transformers 4.57 doesn't support 3.9
107110
"transformers!=4.57.0; python_version == '3.9'",
@@ -112,6 +115,8 @@ deid = [
112115
"scipy>=1.14; python_version >= '3.13'",
113116
]
114117
rel_cat = [
118+
# Transformers 4.57 doesn't support 3.9
119+
"transformers!=4.57.0; python_version == '3.9'",
115120
"transformers>=4.41.0,<5.0", # avoid major bump
116121
# Transformers 4.57 doesn't support 3.9
117122
"transformers!=4.57.0; python_version == '3.9'",

medcat-v2/tests/components/helper.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,32 @@ def setUpClass(cls):
4040
cls.vtokenizer = FTokenizer()
4141
cls.comp_cnf: ComponentConfig = getattr(
4242
cls.cnf.components, cls.comp_type.name)
43+
if isinstance(cls.default_creator, Type):
44+
cls._def_creator_name_opts = (cls.default_creator.__name__,)
45+
else:
46+
# classmethod
47+
cls._def_creator_name_opts = (".".join((
48+
# etiher class.method_name
49+
cls.default_creator.__self__.__name__,
50+
cls.default_creator.__name__)),
51+
# or just method_name
52+
cls.default_creator.__name__
53+
)
4354

4455
def test_has_default(self):
4556
avail_components = types.get_registered_components(self.comp_type)
4657
self.assertEqual(len(avail_components), self.expected_def_components)
4758
name, cls_name = avail_components[0]
48-
self.assertEqual(name, self.default)
49-
self.assertIs(cls_name, self.default_creator.__name__)
59+
# 1 name / cls name
60+
eq_name = [name == self.default for name, _ in avail_components]
61+
eq_cls = [cls_name in self._def_creator_name_opts
62+
for _, cls_name in avail_components]
63+
self.assertEqual(sum(eq_name), 1)
64+
# NOTE: for NER both the default as well as the Dict based NER
65+
# have the came class name, so may be more than 1
66+
self.assertGreaterEqual(sum(eq_cls), 1)
67+
# needs to have the same class where name is equal
68+
self.assertTrue(eq_cls[eq_name.index(True)])
5069

5170
def test_can_create_def_component(self):
5271
component = types.create_core_component(

medcat-v2/tests/components/ner/trf/test_transformers_ner.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,13 @@
1414
from medcat.model_creation.cdb_maker import CDBMaker
1515
from transformers import TrainerCallback
1616

17-
from unittest import TestCase
17+
from unittest import TestCase, skipIf
1818
import unittest.mock
1919

2020
from ...addons.meta_cat.test_meta_cat import FakeTokenizer
2121
from ....pipeline.test_pipeline import FakeCDB, Config
2222
from .... import RESOURCES_PATH
23+
from ....utils.ner.test_deid import is_macos_on_ci
2324

2425

2526
class TransformersNERTests(TestCase):
@@ -280,6 +281,8 @@ def test_ignore_extra_labels(self):
280281
)
281282

282283

284+
@skipIf(not is_macos_on_ci(),
285+
"MacOS on workflow doesn't have enough memory")
283286
class AdditionalTransfromersNERTests(TestCase):
284287
TOKENIZER = FakeTokenizer()
285288
CNF = ConfigTransformersNER()

medcat-v2/tests/utils/ner/test_deid.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@
3636
cnf.general.nlp.provider = 'spacy'
3737

3838

39+
def is_macos_on_ci() -> bool:
40+
return os.getenv("RUNNER_OS", "None").lower() != "macos"
41+
42+
3943
def _get_def_cdb():
4044
return CDB(config=cnf)
4145

@@ -112,13 +116,16 @@ def _train_model_once() -> tuple[tuple[Any, Any, Any], deid.DeIdModel]:
112116
return retval, model
113117

114118

115-
_TRAINED_MODEL_AND_INFO = _train_model_once()
119+
if is_macos_on_ci():
120+
_TRAINED_MODEL_AND_INFO = _train_model_once()
116121

117122

118123
def train_model_once() -> tuple[tuple[Any, Any, Any], deid.DeIdModel]:
119124
return _TRAINED_MODEL_AND_INFO
120125

121126

127+
@unittest.skipIf(not is_macos_on_ci(),
128+
"MacOS on workflow doesn't have enough memory")
122129
class DeIDModelTests(unittest.TestCase):
123130
save_folder = os.path.join("results", "final_model")
124131

@@ -171,6 +178,8 @@ def test_add_new_concepts(self):
171178
''' # noqa
172179

173180

181+
@unittest.skipIf(not is_macos_on_ci(),
182+
"MacOS on workflow doesn't have enough memory")
174183
class DeIDModelWorks(unittest.TestCase):
175184
save_folder = os.path.join("results", "final_model")
176185

medcat-v2/tests/utils/test_cdb_state.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -113,18 +113,19 @@ def test_state_restored(self):
113113

114114
class StateSavedOnDiskTests(StateSavedTests):
115115
on_disk = True
116-
_named_tempory_file = tempfile.NamedTemporaryFile
116+
_named_tempory_directory = tempfile.TemporaryDirectory
117117

118118
@classmethod
119119
def saved_name_temp_file(cls):
120-
tf = cls._named_tempory_file()
121-
cls.temp_file_name = tf.name
120+
tf = cls._named_tempory_directory()
121+
cls.temp_file_name = os.path.join(tf.name, "cdb_state.dat")
122122
return tf
123123

124124
@classmethod
125125
def setUpClass(cls) -> None:
126-
with mock.patch("builtins.open", side_effect=open) as cls.popen:
127-
with mock.patch("tempfile.NamedTemporaryFile",
126+
with mock.patch("medcat.utils.cdb_state.open", side_effect=open
127+
) as cls.popen:
128+
with mock.patch("tempfile.TemporaryDirectory",
128129
side_effect=cls.saved_name_temp_file) as cls.pntf:
129130
return super().setUpClass()
130131

0 commit comments

Comments
 (0)