Skip to content

Commit 3ee15ef

Browse files
authored
Feat/extras matrix for tests (#269)
* add dependencies matrix * add pipeline tests for each scoring module * bug fix * upd extras installation in gh actions * upd extras in mypy ci * fix issues with dependencies for bert scorer test * fix gcn scorer tests * fix typing errors * upd assertions about catboost predictions * upd ci with presets tests * try to fix unit tests
1 parent 95e506f commit 3ee15ef

27 files changed

+515
-19
lines changed

.github/workflows/reusable-test.yaml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@ on:
77
required: true
88
type: string
99
description: 'Command to run tests'
10+
extras:
11+
required: false
12+
type: string
13+
default: ''
14+
description: 'Space-separated --extra flags (e.g., "--extra transformers --extra peft")'
1015

1116
jobs:
1217
test:
@@ -39,7 +44,7 @@ jobs:
3944
- name: Install dependencies for Python ${{ matrix.python-version }}
4045
run: |
4146
uv python pin ${{ matrix.python-version }}
42-
uv sync --group test
47+
uv sync --group test ${{ inputs.extras }}
4348
4449
- name: Run tests
4550
run: |
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
name: test embedder
2+
3+
on:
4+
push:
5+
branches:
6+
- dev
7+
pull_request:
8+
9+
jobs:
10+
test:
11+
uses: ./.github/workflows/reusable-test.yaml
12+
with:
13+
test_command: pytest -n auto tests/embedder/
14+
extras: --extra sentence-transformers
15+

.github/workflows/test-inference.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,4 @@ jobs:
1111
uses: ./.github/workflows/reusable-test.yaml
1212
with:
1313
test_command: pytest -n auto tests/pipeline/test_inference.py
14+
extras: --extra catboost --extra peft --extra transformers --extra sentence-transformers

.github/workflows/test-optimization.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,4 @@ jobs:
1111
uses: ./.github/workflows/reusable-test.yaml
1212
with:
1313
test_command: pytest -n auto tests/pipeline/test_optimization.py
14+
extras: --extra catboost --extra peft --extra transformers --extra sentence-transformers

.github/workflows/test-presets.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,4 @@ jobs:
1111
uses: ./.github/workflows/reusable-test.yaml
1212
with:
1313
test_command: pytest -n auto tests/pipeline/test_presets.py
14+
extras: --extra catboost --extra peft --extra transformers --extra sentence-transformers
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
name: test scorers
2+
3+
on:
4+
push:
5+
branches:
6+
- dev
7+
pull_request:
8+
9+
jobs:
10+
test:
11+
runs-on: ${{ matrix.os }}
12+
strategy:
13+
fail-fast: false
14+
matrix:
15+
os: [ ubuntu-latest ]
16+
python-version: [ "3.10", "3.11", "3.12" ]
17+
dependency-group: [ "base", "transformers", "peft", "catboost" ]
18+
include:
19+
- os: windows-latest
20+
python-version: "3.10"
21+
dependency-group: "base"
22+
23+
steps:
24+
- name: Checkout code
25+
uses: actions/checkout@v4
26+
27+
- name: Cache Hugging Face
28+
id: cache-hf
29+
uses: actions/cache@v4
30+
with:
31+
path: ~/.cache/huggingface
32+
key: ${{ runner.os }}-hf
33+
34+
- name: Install uv
35+
uses: astral-sh/setup-uv@v6
36+
with:
37+
version: "0.8.8"
38+
39+
- name: Install dependencies for Python ${{ matrix.python-version }}
40+
run: |
41+
uv python pin ${{ matrix.python-version }}
42+
uv sync --group test ${{ matrix.dependency-group != 'base' && format('--extra {0}', matrix.dependency-group) || '' }}
43+
44+
- name: Run scorer tests
45+
run: |
46+
uv run pytest -n auto tests/modules/scoring/
47+

.github/workflows/typing.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ jobs:
1818
- name: Install dependencies
1919
run: |
2020
uv lock
21-
uv sync --group typing
21+
uv sync --group typing --extra peft --extra sentence-transformers
2222
2323
- name: Run mypy
2424
run: uv run mypy src/autointent

.github/workflows/unit-tests.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,4 @@ jobs:
1010
test:
1111
uses: ./.github/workflows/reusable-test.yaml
1212
with:
13-
test_command: pytest -n auto --ignore=tests/nodes --ignore=tests/pipeline
13+
test_command: pytest -n auto --ignore=tests/modules/scoring/ --ignore=tests/pipeline --ignore=tests/embedder

pyproject.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,9 @@ dependencies = [
5151
[project.optional-dependencies]
5252
catboost = ["catboost (>=1.2.8,<2.0.0)"]
5353
peft = ["peft (>= 0.10.0, !=0.15.0, !=0.15.1, <1.0.0)"]
54-
transformers = ["transformers (>=4.49.0,<5.0.0)"]
54+
transformers = [
55+
"transformers[torch] (>=4.49.0,<5.0.0)",
56+
]
5557
sentence-transformers = ["sentence-transformers (>=3,<4)"]
5658
dspy = [
5759
"dspy (>=2.6.5,<3.0.0)",

src/autointent/_wrappers/embedder/hashing_vectorizer.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def get_hash(self) -> int:
6767
hasher.update(self.config.norm if self.config.norm is not None else "None")
6868
hasher.update(self.config.binary)
6969
hasher.update(self.config.dtype)
70-
return hasher.hexdigest()
70+
return int(hasher.hexdigest(), 16)
7171

7272
@overload
7373
def embed(
@@ -97,7 +97,7 @@ def embed(
9797
"""
9898
# Transform texts to sparse matrix, then convert to dense
9999
embeddings_sparse = self._vectorizer.transform(utterances)
100-
embeddings = embeddings_sparse.toarray().astype(np.float32)
100+
embeddings: npt.NDArray[np.float32] = embeddings_sparse.toarray().astype(np.float32)
101101

102102
if return_tensors:
103103
return torch.from_numpy(embeddings)
@@ -115,7 +115,8 @@ def similarity(
115115
Returns:
116116
Similarity matrix with shape (n_samples, m_samples).
117117
"""
118-
return cosine_similarity(embeddings1, embeddings2).astype(np.float32)
118+
similarity_matrix: npt.NDArray[np.float32] = cosine_similarity(embeddings1, embeddings2).astype(np.float32)
119+
return similarity_matrix
119120

120121
def dump(self, path: Path) -> None:
121122
"""Save the backend state to disk.
@@ -157,7 +158,7 @@ def load(cls, path: Path) -> "HashingVectorizerEmbeddingBackend":
157158
logger.debug("Loaded HashingVectorizer backend from %s", path)
158159
return instance
159160

160-
def train(self, utterances: list[str], labels: list[int], config) -> None: # noqa: ANN001
161+
def train(self, utterances: list[str], labels: list[int], config) -> None: # noqa: ANN001 # type: ignore[no-untyped-def]
161162
"""Train the backend.
162163
163164
HashingVectorizer is stateless and doesn't support training.

0 commit comments

Comments
 (0)