update evaluation: asreview

Gerit Wagner · Gerit Wagner · commit ff98c5740403 · 2025-09-30T12:13:17.000+02:00
diff --git a/notebooks/evaluation.py b/notebooks/evaluation.py
@@ -9,7 +9,6 @@
 
 import evaluation
 import pandas as pd
-from asreview.data import ASReviewData
 
 from bib_dedupe.bib_dedupe import block
 from bib_dedupe.bib_dedupe import cluster
@@ -336,14 +335,40 @@ def append_to_output(result: dict, *, package_name: str) -> None:
         )
 
         # ASReview
+
+        tmp_in = Path("notebooks/asreview_input.csv")
+        tmp_out = Path("notebooks/asreview_dedup.csv")
+        records_df.to_csv(tmp_in, index=False)
+
         timestamp = datetime.now()
-        asdata = ASReviewData(records_df)
-        merged_df = asdata.drop_duplicates()
+        subprocess.run(
+            [
+                "asreview",
+                "data",
+                "dedup",
+                str(tmp_in),
+                "-o",
+                str(tmp_out),
+                "--pid",
+                "DOI",
+            ],
+            check=True,
+        )
+        merged_df = pd.read_csv(tmp_out)
+
         result = dedupe_benchmark.compare_dedupe_id(
             records_df=records_df, merged_df=merged_df, timestamp=timestamp
         )
         evaluation.append_to_output(result, package_name="asreview")
 
+        # timestamp = datetime.now()
+        # asdata = ASReviewData(records_df)
+        # merged_df = asdata.drop_duplicates()
+        # result = dedupe_benchmark.compare_dedupe_id(
+        #     records_df=records_df, merged_df=merged_df, timestamp=timestamp
+        # )
+        # evaluation.append_to_output(result, package_name="asreview")
+
         # ASySD (R)
         # temporarily skip (need to combine part1/2)
         if benchmark_path == "depression":
diff --git a/pyproject.toml b/pyproject.toml
@@ -36,6 +36,7 @@ dependencies = [
 [project.optional-dependencies]
 with-data = [
     "asreview>=1.5",
+    "asreview-datatools",
 ]
 dev = [
     "pylint==3.0.1",

Original file line number	Diff line number	Diff line change
`@@ -36,6 +36,7 @@ dependencies = [`
`36`	`36`	`[project.optional-dependencies]`
`37`	`37`	`with-data = [`
`38`	`38`	`"asreview>=1.5",`
	`39`	`+ "asreview-datatools",`
`39`	`40`	`]`
`40`	`41`	`dev = [`
`41`	`42`	`"pylint==3.0.1",`