Skip to content

Commit f5a02fc

Browse files
author
Marcin Kardas
committed
Fix transformed metric format
1 parent 1378aa2 commit f5a02fc

File tree

4 files changed

+10
-3
lines changed

4 files changed

+10
-3
lines changed

sota_extractor2/models/linking/bm25_naive.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,11 +231,12 @@ def linked_proposals(proposals):
231231
df = taxonomy_linking(prop.dataset, datasets, desc, topk=topk, debug_info=prop)
232232
for _, row in df.iterrows():
233233
raw_value = prop.raw_value
234-
parsed = float(extract_value(raw_value, format))
234+
parsed = extract_value(raw_value, format)
235235
metric = row['metric']
236236
if metric != row['true_metric']:
237237
metric = row['true_metric']
238238
parsed = 1 - parsed if parsed < 1 else 100 - parsed
239+
parsed = float(parsed)
239240

240241
linked = {
241242
'dataset': row['dataset'],

sota_extractor2/models/linking/context_search.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ def __call__(self, query, datasets, caption, topk=1, debug_info=None):
211211
cellstr = debug_info.cell.cell_ext_id
212212
pipeline_logger("linking::taxonomy_linking::call", ext_id=cellstr, query=query, datasets=datasets, caption=caption)
213213
datasets = " ".join(datasets)
214-
key = (datasets, caption, query)
214+
key = (datasets, caption, query, topk)
215215
###print(f"[DEBUG] {cellstr}")
216216
###print("[DEBUG]", debug_info)
217217
###print("query:", query, caption)
@@ -229,7 +229,7 @@ def __call__(self, query, datasets, caption, topk=1, debug_info=None):
229229
###print("Taking result from cache")
230230
p = self.queries[key]
231231
else:
232-
dist = self.match(key)
232+
dist = self.match((datasets, caption, query))
233233
top_results = sorted(dist, key=lambda x: x[1], reverse=True)[:max(topk, 5)]
234234

235235
entries = []

sota_extractor2/models/linking/manual_dicts.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,9 @@
156156

157157
# Semantic Textual Similarity
158158
'STS Benchmark': ['sts b'],
159+
160+
# Paraphrase Identification
161+
'Microsoft Research Paraphrase Corpus': ['MRPC'],
159162
}
160163

161164
tasks = {}

sota_extractor2/models/structure/ulmfit.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ def __init__(self, path, file, sp_path=None, sp_model="spm.model", sp_vocab="spm
66
path = Path(path)
77
sp_path = path if sp_path is None else Path(sp_path)
88
self.learner = load_learner(path=path, file=file)
9+
import sys, os
10+
print(f"[PID {os.getpid()}] Load model {file}", file=sys.stderr)
11+
sys.stderr.flush()
912
self._fix_sp_processor(sp_path, sp_model, sp_vocab)
1013

1114
# disable multiprocessing to avoid celery deamon issues

0 commit comments

Comments
 (0)