Skip to content

Commit 6459fbf

Browse files
authored
Merge pull request #51 from sciknoworg/dev
cosmetic fix and encoders refactoring
2 parents 6376ad5 + 8399dad commit 6459fbf

File tree

10 files changed

+276
-244
lines changed

10 files changed

+276
-244
lines changed

docs/source/conf.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,7 @@
7070
"external_links": [
7171
("Github", "https://github.com/sciknoworg/OntoAligner"),
7272
("Pypi", "https://pypi.org/project/OntoAligner/")
73-
],
74-
"navigation_depth": 4,
75-
"collapse_navigation": True,
76-
"logo_only": True,
73+
]
7774
}
7875

7976
html_static_path = ["_static"]

ontoaligner/base/dataset.py

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2025 Scientific Knowledge Organization (SciKnowOrg) Research Group.
1+
# Copyright 2025 Scientific Knowledge Organization (SciKnowOrg) Research Group.
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.
@@ -19,7 +19,6 @@
1919
- OMDataset: A base class for handling ontology matching datasets, including parsing ontologies and alignments
2020
and collecting dataset-related information.
2121
"""
22-
import os
2322
import json
2423
from abc import ABC
2524
from typing import Any, Dict
@@ -95,17 +94,6 @@ def load_from_json(self, json_file_path: str) -> Dict:
9594
json_data = json.load(f)
9695
return json_data
9796

98-
def __dir__(self):
99-
"""
100-
Returns the directory structure for the dataset.
101-
102-
This method constructs the directory path based on the track and ontology name.
103-
104-
Returns:
105-
str: The constructed directory path for the dataset.
106-
"""
107-
return os.path.join(self.track, self.ontology_name)
108-
10997
def __str__(self):
11098
"""
11199
Returns a string representation of the dataset's ontology name.
@@ -115,4 +103,4 @@ def __str__(self):
115103
Returns:
116104
str: The ontology name as a string.
117105
"""
118-
return f"{self.ontology_name}"
106+
return f"Track: {self.track}, Source-Target sets: {self.ontology_name}"

ontoaligner/base/ontology.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2025 Scientific Knowledge Organization (SciKnowOrg) Research Group.
1+
# Copyright 2025 Scientific Knowledge Organization (SciKnowOrg) Research Group.
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.
@@ -22,7 +22,7 @@
2222
- BaseAlignmentsParser: A base class for parsing alignment data, extracting relationships between
2323
entities and their corresponding RDF data.
2424
"""
25-
from abc import ABC, abstractmethod
25+
from abc import ABC
2626
from typing import Any, Dict, List
2727

2828

@@ -126,7 +126,6 @@ def get_synonyms(self, owl_class: Any) -> List:
126126
"""
127127
return self.get_owl_items(owl_class.hasRelatedSynonym)
128128

129-
@abstractmethod
130129
def get_comments(self, owl_class: Any) -> List:
131130
"""
132131
Abstract method to retrieve comments for the given ontology class.

ontoaligner/encoder/encoders.py

Lines changed: 0 additions & 214 deletions
This file was deleted.

ontoaligner/encoder/lightweight.py

Lines changed: 69 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2025 Scientific Knowledge Organization (SciKnowOrg) Research Group.
1+
# Copyright 2025 Scientific Knowledge Organization (SciKnowOrg) Research Group.
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.
@@ -23,7 +23,74 @@
2323
"""
2424
from typing import Any, Dict
2525

26-
from .encoders import LightweightEncoder
26+
from ..base import BaseEncoder
27+
28+
class LightweightEncoder(BaseEncoder):
29+
"""
30+
A lightweight encoder for parsing ontology data and preprocessing it.
31+
32+
This class provides methods for parsing ontological data, applying text preprocessing,
33+
and formatting the data into a structure suitable for further processing.
34+
"""
35+
def parse(self, **kwargs) -> Any:
36+
"""
37+
Parses the source and target ontologies, applying preprocessing.
38+
39+
This method extracts ontology items (IRI and label) from the source and target ontologies,
40+
applies text preprocessing to the labels, and returns the encoded data.
41+
42+
Parameters:
43+
**kwargs: Contains the source and target ontologies as keyword arguments.
44+
45+
Returns:
46+
list: A list containing two elements, the processed source and target ontologies.
47+
"""
48+
source_onto, target_onto = kwargs["source"], kwargs["target"]
49+
source_ontos = []
50+
for source in source_onto:
51+
encoded_source = self.get_owl_items(owl=source)
52+
encoded_source["text"] = self.preprocess(encoded_source["text"])
53+
source_ontos.append(encoded_source)
54+
target_ontos = []
55+
for target in target_onto:
56+
encoded_target = self.get_owl_items(owl=target)
57+
encoded_target["text"] = self.preprocess(encoded_target["text"])
58+
target_ontos.append(encoded_target)
59+
return [source_ontos, target_ontos]
60+
61+
def __str__(self):
62+
"""
63+
Returns a string representation of the encoder.
64+
65+
Returns:
66+
dict: A dictionary with the class name as key and items_in_owl as value.
67+
"""
68+
return {"LightweightEncoder": self.items_in_owl}
69+
70+
def get_owl_items(self, owl: Dict) -> Any:
71+
"""
72+
Abstract method for extracting ontology data.
73+
74+
This method should be implemented by subclasses to extract specific ontology data
75+
(e.g., IRI and label) from the provided ontology item.
76+
77+
Parameters:
78+
owl (Dict): A dictionary representing an ontology item.
79+
80+
Returns:
81+
Any: The extracted ontology data.
82+
"""
83+
pass
84+
85+
def get_encoder_info(self):
86+
"""
87+
Provides information about the encoder.
88+
89+
Returns:
90+
str: A description of the encoder's function in the overall pipeline.
91+
"""
92+
return "INPUT CONSIST OF COMBINED INFORMATION TO FUZZY STRING MATCHING"
93+
2794

2895
class ConceptLightweightEncoder(LightweightEncoder):
2996
"""

0 commit comments

Comments
 (0)