Skip to content

Commit 068bb40

Browse files
committed
🐛 Fix bugs
1 parent e56faa6 commit 068bb40

File tree

3 files changed

+10
-7
lines changed

3 files changed

+10
-7
lines changed

ontoaligner/ontology_matchers/rag/rag.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,6 @@ def generate_for_llm(self, tokenized_input_data: Any) -> Any:
122122
**tokenized_input_data,
123123
pad_token_id=self.tokenizer.eos_token_id,
124124
max_new_tokens=self.kwargs["max_new_tokens"],
125-
do_sample=False,
126125
output_scores=True,
127126
return_dict_in_generate=True
128127
)

ontoaligner/ontology_matchers/retrieval/retrieval.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,10 @@ def get_top_k(self, query_embed: Any, candidate_embeds: Any) -> [List, List]:
114114
values = [(score, index) for index, score in enumerate(results)]
115115
dtype = [("score", float), ("index", int)]
116116
results = np.array(values, dtype=dtype)
117-
top_k_items = np.sort(results, order="score")[-self.kwargs["top_k"] :][::-1]
117+
try:
118+
top_k_items = np.sort(results, order="score")[-self.kwargs["top_k"]:][::-1]
119+
except IndexError:
120+
top_k_items = np.sort(results, order="score")[::-1]
118121
top_k_indexes, top_k_scores = [], []
119122
for top_k in top_k_items:
120123
top_k_scores.append(top_k[0])
@@ -170,7 +173,7 @@ def load(self, path: str):
170173
Returns:
171174
None
172175
"""
173-
self.model = SentenceTransformer(path, device=self.kwargs["device"])
176+
self.model = SentenceTransformer(path, device=self.kwargs["device"], trust_remote_code=True)
174177

175178
def fit(self, inputs: Any) -> Any:
176179
"""

ontoaligner/pipeline.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,9 @@ def _collect_dataset(self):
6767
def __call__(self, method: str, encoder_model: BaseEncoder = None, model_class: BaseOMModel = None, dataset_class: Dataset = None, postprocessor: Any = None,
6868
llm_path: str = None, retriever_path: str = None, device: str = "cuda", batch_size: int = 2048, max_length: int = 300, max_new_tokens: int = 10,
6969
top_k: int = 10, fuzzy_sm_threshold: float = 0.2, evaluate: bool = False, return_matching: bool = True, output_file_name: str = "matchings",
70-
save_matchings: bool = False, ir_threshold: float = 0.5, llm_threshold: float = 0.5, llm_mapper: LabelMapper = None, llm_mapper_interested_class: str = 'yes',
71-
answer_set: Dict = {"yes": ["yes", "true"], "no": ["no", "false"]}, huggingface_access_token: str = "", openai_key: str = "", device_map: str = "auto",
72-
positive_ratio: float = 0.7, n_shots: int = 5) -> [Any, Any]:
70+
save_matchings: bool = False, ir_threshold: float = 0.5, ir_rag_threshold: float = 0.7, llm_threshold: float = 0.5, llm_mapper: LabelMapper = None,
71+
llm_mapper_interested_class: str = 'yes', answer_set: Dict = {"yes": ["yes", "true"], "no": ["no", "false"]}, huggingface_access_token: str = "",
72+
openai_key: str = "", device_map: str = "auto", positive_ratio: float = 0.7, n_shots: int = 5) -> [Any, Any]:
7373
"""
7474
Executes the ontology alignment process using the specified method.
7575
@@ -92,6 +92,7 @@ def __call__(self, method: str, encoder_model: BaseEncoder = None, model_class:
9292
output_file_name (str, optional): Output file name without file type. Defaults to "matchings".
9393
save_matchings (bool, optional): Whether to save the matching results. Defaults to False.
9494
ir_threshold (float, optional): Retrieval postprocessor threshold.
95+
ir_rag_threshold (float, optional): Retrieval postprocessor threshold in RAG module.
9596
llm_threshold (float, optional): LLM postprocessor threshold.
9697
llm_mapper (LabelMapper, optional): Mapper for LLM outputs.
9798
llm_mapper_interested_class (str, optional): Class to filter output pairs in LLM postprocessing.
@@ -127,7 +128,7 @@ def __call__(self, method: str, encoder_model: BaseEncoder = None, model_class:
127128
else:
128129
encoder_model = encoder_model or ConceptRAGEncoder()
129130
matchings = self._run_rag(method, encoder_model, model_class, postprocessor or rag_hybrid_postprocessor,
130-
llm_threshold, ir_threshold, retriever_path, llm_path, rag_config)
131+
llm_threshold, ir_rag_threshold, retriever_path, llm_path, rag_config)
131132
else:
132133
raise ValueError(f"Unknown method: {method}")
133134
return self._process_results(matchings, method, evaluate, return_matching, output_file_name, save_matchings)

0 commit comments

Comments
 (0)