Skip to content

Commit ff2e810

Browse files
authored
Merge pull request #37 from sciknoworg/dev
#25 dropping dependency on setfit, ontospy and requirements update
2 parents a42d072 + 305aabd commit ff2e810

File tree

11 files changed

+54
-161
lines changed

11 files changed

+54
-161
lines changed

.github/workflows/test-package.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ jobs:
1212

1313
strategy:
1414
matrix:
15-
python-version: [3.10.x, 3.11.x]
15+
python-version: [3.10.x, 3.11.x, 3.12.x, 3.13.x]
1616

1717
steps:
1818
- name: Checkout repository

CHANGELOG.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
## Changelog
22

3-
### V1.4.0 Changelog (May 2025)
3+
### V1.4.1 Changelog (May 26, 2025)
4+
- Fixed an issue related to #25 and #36.
5+
- Sentence-transformer v4.1.0 is supported.
6+
- Adding Python 3.12 and 3.13 for automated testing.
7+
- Remove the dependency with ontospy since it is not being maintained. Initially it was used for `MaterialInformationOntoOntology` class.
8+
9+
### V1.4.0 Changelog (May 22, 2025)
410
- Fixed a security vulnerability by updating the Torch and Transformers dependency version.
511
- Integrated pytest into the pyproject.toml to enable testing support.
612
- Resolved Python version compatibility issues in the continuous integration (CI) pipeline for stable test runs.

CITATION.cff

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,5 +17,5 @@ keywords:
1717
- "Alignment"
1818
- "Python Library"
1919
license: "Apache-2.0"
20-
version: "1.4.0"
21-
date-released: "2025-05-22"
20+
version: "1.4.1"
21+
date-released: "2025-05-26"

CONTRIBUTING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ cd OntoAligner
4646
```
4747
3. Create a virtual environment with python=3.10, activate it, install the required dependencies and install the pre-commit configuration:
4848
```bash
49-
conda create -n my_env python=3.9
49+
conda create -n my_env python=3.10
5050
conda activate my_env
5151
pip install -r requirements.txt
5252
pre-commit install

ontoaligner/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# -*- coding: utf-8 -*-
2-
__version__ = "1.4.0"
2+
__version__ = "1.4.1"
33

44
from .pipeline import OntoAlignerPipeline
55
from ontoaligner import ontology, base, encoder, aligner, utils, postprocess

ontoaligner/ontology/oaei/mse.py

Lines changed: 3 additions & 127 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,8 @@
99
import re
1010
from typing import Any, List
1111

12-
import ontospy
13-
1412
from ...base import BaseOntologyParser, OMDataset
13+
from ..generic import GenericOntology
1514

1615
track = "mse"
1716

@@ -120,137 +119,14 @@ def get_synonyms(self, owl_class: Any) -> List:
120119
return []
121120

122121

123-
class MaterialInformationOntoOntology(BaseOntologyParser):
122+
class MaterialInformationOntoOntology(GenericOntology):
124123
"""
125124
A parser for the Material Information Ontology.
126125
127126
This class provides methods for handling ontology items such as labels, names, IRIs,
128127
parents, children, and more. It also provides functionality to load the ontology from a file.
129128
"""
130-
def is_contain_label(self, owl_class: Any) -> bool:
131-
"""
132-
Checks if the ontology class has a label.
133-
134-
Parameters:
135-
owl_class (Any): The ontology class whose label presence is to be checked.
136-
137-
Returns:
138-
bool: Always returns True as all classes are assumed to have labels.
139-
"""
140-
return True
141-
142-
def get_name(self, owl_class: Any) -> str:
143-
"""
144-
Retrieves the name of the ontology class.
145-
146-
Parameters:
147-
owl_class (Any): The ontology class whose name is to be retrieved.
148-
149-
Returns:
150-
str: The name of the ontology class.
151-
"""
152-
return str(owl_class.uri).split("#")[1]
153-
154-
def get_label(self, owl_class: Any) -> str:
155-
"""
156-
Retrieves and formats the label of the ontology class.
157-
158-
Parameters:
159-
owl_class (Any): The ontology class whose label is to be retrieved.
160-
161-
Returns:
162-
str: The formatted label of the ontology class.
163-
"""
164-
preprocessed_str = (
165-
self.get_iri(owl_class).split("#")[1].replace("_", " ").replace("-", "")
166-
)
167-
return split_string(preprocessed_str)
168-
169-
def get_iri(self, owl_class: Any) -> str:
170-
"""
171-
Retrieves the IRI of the ontology class.
172-
173-
Parameters:
174-
owl_class (Any): The ontology class whose IRI is to be retrieved.
175-
176-
Returns:
177-
str: The IRI of the ontology class.
178-
"""
179-
return str(owl_class.uri)
180-
181-
def get_childrens(self, owl_class: Any) -> List:
182-
"""
183-
Retrieves the children of the ontology class.
184-
185-
Parameters:
186-
owl_class (Any): The ontology class whose children are to be retrieved.
187-
188-
Returns:
189-
List: A list of child classes for the given ontology class.
190-
"""
191-
return self.get_owl_items(owl_class.children())
192-
193-
def get_parents(self, owl_class: Any) -> List:
194-
"""
195-
Retrieves the parents of the ontology class.
196-
197-
Parameters:
198-
owl_class (Any): The ontology class whose parents are to be retrieved.
199-
200-
Returns:
201-
List: A list of parent classes for the given ontology class.
202-
"""
203-
return self.get_owl_items(owl_class.parents())
204-
205-
def get_synonyms(self, owl_class: Any) -> List:
206-
"""
207-
Retrieves synonyms for the ontology class.
208-
209-
Parameters:
210-
owl_class (Any): The ontology class whose synonyms are to be retrieved.
211-
212-
Returns:
213-
List: An empty list as no synonyms are implemented for this ontology class.
214-
"""
215-
return []
216-
217-
def get_comments(self, owl_class: Any) -> List:
218-
"""
219-
Retrieves comments for the ontology class.
220-
221-
Parameters:
222-
owl_class (Any): The ontology class whose comments are to be retrieved.
223-
224-
Returns:
225-
List: An empty list as no comments are implemented for this ontology class.
226-
"""
227-
return []
228-
229-
def get_owl_classes(self, ontology: Any) -> Any:
230-
"""
231-
Retrieves all classes from the ontology.
232-
233-
Parameters:
234-
ontology (Any): The ontology whose classes are to be retrieved.
235-
236-
Returns:
237-
Any: The classes of the ontology.
238-
"""
239-
return ontology.all_classes
240-
241-
def load_ontology(self, input_file_path: str) -> Any:
242-
"""
243-
Loads an ontology from the specified file.
244-
245-
Parameters:
246-
input_file_path (str): The path to the ontology file to be loaded.
247-
248-
Returns:
249-
Any: The loaded ontology.
250-
"""
251-
ontology = ontospy.Ontospy(input_file_path, verbose=False)
252-
return ontology
253-
129+
pass
254130

255131
class MatOntoOntology(BaseOntologyParser):
256132
"""

ontoaligner/postprocess/label_mapper.py

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,12 @@
44
- `TFIDFLabelMapper`: Uses a TfidfVectorizer and a classifier for label prediction.
55
- `SetFitShallowLabelMapper`: Uses a pretrained SetFit model for label prediction.
66
"""
7-
87
from typing import Dict, List, Tuple, Any
98
from sklearn.feature_extraction.text import TfidfVectorizer
109
from sklearn.pipeline import Pipeline
11-
from setfit import SetFitModel
10+
from sentence_transformers import SentenceTransformer
11+
from sklearn.linear_model import LogisticRegression
12+
from sklearn.preprocessing import LabelEncoder
1213

1314

1415
class LabelMapper:
@@ -110,34 +111,50 @@ def _predict(self, X: List[str]) -> List[str]:
110111
return self.model.predict(X)
111112

112113

113-
class SetFitShallowLabelMapper(LabelMapper):
114+
class SBERTLabelMapper(LabelMapper):
114115
"""
115-
LabelMapper subclass using a pretrained SetFit model for label prediction.
116+
LabelMapper subclass using SentenceTransformer embeddings and a classifier for label prediction.
117+
118+
Example usage:
119+
>>> label_dict = {
120+
"yes":["yes", "correct", "true"],
121+
"no":["no", "incorrect", "false"]
122+
}
123+
>>> mapper = SBERTLabelMapper("all-MiniLM-L12-v2", label_dict)
124+
>>> mapper.fit()
125+
>>> mapper.predict(["yes", "correct", "false", "nice", "too bad", "very good"])
126+
['yes', 'yes', 'no', 'yes', 'no', 'yes']
116127
"""
117-
def __init__(self, model_id: str, label_dict: Dict[str, List[str]], iterator_no: int = 10):
128+
def __init__(self, model_id: str, label_dict: Dict[str, List[str]], classifier=None, iterator_no: int = 10):
118129
"""
119-
Initializes the SetFitShallowLabelMapper with a specified SetFit model.
130+
Initializes the SBERTLabelMapper.
120131
121132
Parameters:
122-
model_id (str): Identifier for the pretrained SetFit model.
133+
model_id (str): Name of the pretrained SentenceTransformer model.
123134
label_dict (Dict[str, List[str]]): Dictionary mapping each label to a list of candidate phrases.
124135
iterator_no (int): Number of iterations to replicate training data.
125136
"""
126137
super().__init__(label_dict, iterator_no)
127-
self.model = SetFitModel.from_pretrained(model_id)
138+
self.embedder = SentenceTransformer(model_id)
139+
self.classifier = classifier or LogisticRegression()
140+
self.label_encoder = LabelEncoder()
128141

129142
def fit(self):
130-
"""Fits the SetFit model on the training data."""
131-
self.model.fit(self.x_train, self.y_train, num_epochs=10)
143+
"""Fits the classifier on the sentence embeddings."""
144+
embeddings = self.embedder.encode(self.x_train, convert_to_numpy=True)
145+
y_encoded = self.label_encoder.fit_transform(self.y_train)
146+
self.classifier.fit(embeddings, y_encoded)
132147

133148
def _predict(self, X: List[str]) -> List[str]:
134149
"""
135-
Predicts labels for the given input using the SetFit model.
150+
Predicts labels using the sentence transformer + classifier pipeline
136151
137152
Parameters:
138153
X (List[str]): List of input texts to classify.
139154
140155
Returns:
141156
List[str]: Predicted labels.
142157
"""
143-
return self.model.predict(X)
158+
embeddings = self.embedder.encode(X, convert_to_numpy=True)
159+
y_pred_encoded = self.classifier.predict(embeddings)
160+
return [str(pred) for pred in self.label_encoder.inverse_transform(y_pred_encoded)]

pyproject.toml

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "OntoAligner"
3-
version = "1.4.0"
3+
version = "1.4.1"
44
description = "OntoAligner: A Comprehensive Modular and Robust Python Toolkit for Ontology Alignment."
55
authors = ["Hamed Babaei Giglou <[email protected]>"]
66
license="Apache-2.0"
@@ -17,18 +17,16 @@ numpy = "*"
1717
pandas = "*"
1818
datasets = "*"
1919
scikit-learn = "*"
20-
tqdm = "4.66.3"
20+
tqdm = "*"
2121
owlready2 = "0.44"
2222
rdflib = "7.1.1"
23-
ontospy = "2.1.1"
2423
torch = "2.7.0"
2524
transformers = "4.50.0"
2625
rapidfuzz = "3.5.2"
2726
openai = "1.56.0"
2827
rank_bm25 = "0.2.2"
2928
huggingface_hub="0.28.1"
30-
sentence-transformers = "3.4.1"
31-
setfit = "1.1.1"
29+
sentence-transformers = "4.1.0"
3230
bitsandbytes="0.45.1"
3331

3432
[tool.poetry.dev-dependencies]

requirements.txt

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,16 @@ numpy
44
pandas
55
datasets
66
scikit_learn
7-
ontospy==2.1.1
87
openai==1.56.0
98
owlready2==0.44
109
rank_bm25==0.2.2
1110
rapidfuzz==3.5.2
1211
rdflib==7.1.1
13-
sentence_transformers==3.4.1
12+
sentence_transformers==4.1.0
1413
torch==2.7.0
15-
tqdm==4.66.3
14+
tqdm
1615
transformers==4.50.0
1716
huggingface_hub==0.28.1
18-
setfit==1.1.1
1917
bitsandbytes==0.45.1
2018
pre-commit
2119
setuptools

setup.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setup(
77
name="OntoAligner",
8-
version="1.4.0",
8+
version="1.4.1",
99
author="Hamed Babaei Giglou",
1010
author_email="[email protected]",
1111
description="OntoAligner: A Comprehensive Modular and Robust Python Toolkit for Ontology Alignment",
@@ -20,18 +20,16 @@
2020
"numpy",
2121
"pandas",
2222
"scikit-learn",
23-
"ontospy==2.1.1",
23+
"tqdm",
2424
"openai==1.56.0",
2525
"owlready2==0.44",
2626
"rank_bm25==0.2.2",
2727
"rapidfuzz==3.5.2",
2828
"rdflib==7.1.1",
29-
"sentence-transformers==3.4.1",
29+
"sentence-transformers==4.1.0",
3030
"torch==2.7.0",
31-
"tqdm==4.66.3",
3231
"transformers==4.50.0",
3332
"huggingface_hub==0.28.1",
34-
"setfit==1.1.1",
3533
"bitsandbytes==0.45.1",
3634
],
3735
classifiers=[

0 commit comments

Comments
 (0)