Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,9 @@ dmypy.json
# Cython debug symbols
cython_debug/

# Pytest cache
test_output*/

# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
Expand Down
14 changes: 13 additions & 1 deletion ontoaligner/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,16 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Scientific Knowledge Organization (SciKnowOrg) Research Group.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__version__ = "1.4.1"

from .pipeline import OntoAlignerPipeline
Expand Down
13 changes: 13 additions & 0 deletions ontoaligner/aligner/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
# Copyright 2025 Scientific Knowledge Organization (SciKnowOrg) Research Group.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .fewshot.models import * # NOQA
from .fewshot.dataset import * # NOQA
from .icv.models import * # NOQA
Expand Down
14 changes: 13 additions & 1 deletion ontoaligner/aligner/fewshot/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,14 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Scientific Knowledge Organization (SciKnowOrg) Research Group.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .fewshot import * # NOQA
14 changes: 13 additions & 1 deletion ontoaligner/aligner/fewshot/dataset.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,16 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Scientific Knowledge Organization (SciKnowOrg) Research Group.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This script defines dataset classes for few-shot learning tasks, particularly for concept comparison tasks.
These classes inherit from the RAGDataset class and extend its functionality to handle few-shot learning
Expand Down
14 changes: 13 additions & 1 deletion ontoaligner/aligner/fewshot/fewshot.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,16 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Scientific Knowledge Organization (SciKnowOrg) Research Group.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This script defines the FewShotRAG class, an extension of the RAG model, designed for few-shot learning tasks.
The FewShotRAG class uses retrieval-augmented generation techniques, combining information retrieval and
Expand Down
14 changes: 13 additions & 1 deletion ontoaligner/aligner/fewshot/models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,16 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Scientific Knowledge Organization (SciKnowOrg) Research Group.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This script defines a collection of classes that extend the FewShotRAG model, each combining a specific
retrieval model and language model (LLM) configuration. These specialized configurations are tailored
Expand Down
14 changes: 13 additions & 1 deletion ontoaligner/aligner/icv/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,14 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Scientific Knowledge Organization (SciKnowOrg) Research Group.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .icv import * # NOQA
14 changes: 13 additions & 1 deletion ontoaligner/aligner/icv/icv.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,16 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Scientific Knowledge Organization (SciKnowOrg) Research Group.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Script for implementing ICV-based ontology matching using RAG and LLM architectures.

Expand Down
14 changes: 13 additions & 1 deletion ontoaligner/aligner/icv/models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,16 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Scientific Knowledge Organization (SciKnowOrg) Research Group.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Script for integrating ICV-based language models with various retrieval mechanisms.

Expand Down
14 changes: 13 additions & 1 deletion ontoaligner/aligner/lightweight/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,14 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Scientific Knowledge Organization (SciKnowOrg) Research Group.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .lightweight import * # NOQA
16 changes: 14 additions & 2 deletions ontoaligner/aligner/lightweight/lightweight.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,16 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Scientific Knowledge Organization (SciKnowOrg) Research Group.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This script defines models for ontology matching, specifically a lightweight model
and an extension that uses fuzzy string matching via the RapidFuzz library.
Expand Down Expand Up @@ -108,7 +120,7 @@ def calculate_similarity(self, source: str, candidates: List) -> [int, float]:
Returns:
List: A list containing the index of the most similar candidate and the normalized similarity score.
"""
selected_candid = rapidfuzz.process_cpp.extractOne(
selected_candid = rapidfuzz.process.extractOne(
source,
candidates,
scorer=self.ratio_estimate(),
Expand Down
14 changes: 13 additions & 1 deletion ontoaligner/aligner/lightweight/models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,16 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Scientific Knowledge Organization (SciKnowOrg) Research Group.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This script defines different variants of the `FuzzySMLightweight` class, each implementing
a different string similarity ratio estimation method using the RapidFuzz library.
Expand Down
14 changes: 13 additions & 1 deletion ontoaligner/aligner/llm/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,14 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Scientific Knowledge Organization (SciKnowOrg) Research Group.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .llm import * # NOQA
15 changes: 13 additions & 2 deletions ontoaligner/aligner/llm/dataset.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
# -*- coding: utf-8 -*-

# Copyright 2025 Scientific Knowledge Organization (SciKnowOrg) Research Group.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any, Dict

from torch.utils.data import Dataset
Expand Down
14 changes: 13 additions & 1 deletion ontoaligner/aligner/llm/llm.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,16 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Scientific Knowledge Organization (SciKnowOrg) Research Group.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This script defines several classes for interacting with large language models (LLMs) through
various architectures, such as a generic LLM class, OpenAI-based LLMs, and encoder-decoder
Expand Down
14 changes: 13 additions & 1 deletion ontoaligner/aligner/llm/models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,16 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Scientific Knowledge Organization (SciKnowOrg) Research Group.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This script defines various subclasses for different types of language models (LMs), including encoder-decoder
models, decoder-only models, and models interfacing with OpenAI's GPT. These classes inherit from
Expand Down
14 changes: 13 additions & 1 deletion ontoaligner/aligner/rag/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,14 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Scientific Knowledge Organization (SciKnowOrg) Research Group.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .rag import * # NOQA
14 changes: 13 additions & 1 deletion ontoaligner/aligner/rag/dataset.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,16 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Scientific Knowledge Organization (SciKnowOrg) Research Group.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This script defines a set of custom dataset classes for handling various types of data used in a real-world entity classification task.
These datasets preprocess and format the input data to create structured prompts for a classification model, with variations
Expand Down
14 changes: 13 additions & 1 deletion ontoaligner/aligner/rag/models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,16 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Scientific Knowledge Organization (SciKnowOrg) Research Group.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This script defines a series of Retrieval-Augmented Generation (RAG) classes that combine different retrieval models
and language models (LLMs). Each class specializes in pairing a specific retrieval model (e.g., AdaRetrieval, BERTRetrieval)
Expand Down
14 changes: 13 additions & 1 deletion ontoaligner/aligner/rag/rag.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,16 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Scientific Knowledge Organization (SciKnowOrg) Research Group.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This script defines several classes that implement retrieval-augmented generation (RAG) architectures for natural language generation tasks.
The architecture integrates retrieval models (such as AdaRetrieval and BERTRetrieval) and language models (such as AutoModelForCausalLM and OpenAI)
Expand Down
14 changes: 13 additions & 1 deletion ontoaligner/aligner/retrieval/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,14 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Scientific Knowledge Organization (SciKnowOrg) Research Group.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .retrieval import * # NOQA
Loading