|
9 | 9 |
|
10 | 10 | import evaluation |
11 | 11 | import pandas as pd |
12 | | -from asreview.data import ASReviewData |
13 | 12 |
|
14 | 13 | from bib_dedupe.bib_dedupe import block |
15 | 14 | from bib_dedupe.bib_dedupe import cluster |
@@ -336,14 +335,40 @@ def append_to_output(result: dict, *, package_name: str) -> None: |
336 | 335 | ) |
337 | 336 |
|
338 | 337 | # ASReview |
| 338 | + |
| 339 | + tmp_in = Path("notebooks/asreview_input.csv") |
| 340 | + tmp_out = Path("notebooks/asreview_dedup.csv") |
| 341 | + records_df.to_csv(tmp_in, index=False) |
| 342 | + |
339 | 343 | timestamp = datetime.now() |
340 | | - asdata = ASReviewData(records_df) |
341 | | - merged_df = asdata.drop_duplicates() |
| 344 | + subprocess.run( |
| 345 | + [ |
| 346 | + "asreview", |
| 347 | + "data", |
| 348 | + "dedup", |
| 349 | + str(tmp_in), |
| 350 | + "-o", |
| 351 | + str(tmp_out), |
| 352 | + "--pid", |
| 353 | + "DOI", |
| 354 | + ], |
| 355 | + check=True, |
| 356 | + ) |
| 357 | + merged_df = pd.read_csv(tmp_out) |
| 358 | + |
342 | 359 | result = dedupe_benchmark.compare_dedupe_id( |
343 | 360 | records_df=records_df, merged_df=merged_df, timestamp=timestamp |
344 | 361 | ) |
345 | 362 | evaluation.append_to_output(result, package_name="asreview") |
346 | 363 |
|
| 364 | + # timestamp = datetime.now() |
| 365 | + # asdata = ASReviewData(records_df) |
| 366 | + # merged_df = asdata.drop_duplicates() |
| 367 | + # result = dedupe_benchmark.compare_dedupe_id( |
| 368 | + # records_df=records_df, merged_df=merged_df, timestamp=timestamp |
| 369 | + # ) |
| 370 | + # evaluation.append_to_output(result, package_name="asreview") |
| 371 | + |
347 | 372 | # ASySD (R) |
348 | 373 | # temporarily skip (need to combine part1/2) |
349 | 374 | if benchmark_path == "depression": |
|
0 commit comments