Skip to content

Commit afec6a8

Browse files
committed
fix(conflation): ref column in agg_results
1 parent 10b03c2 commit afec6a8

File tree

3 files changed

+49
-39
lines changed

3 files changed

+49
-39
lines changed

mapswipe_workers/mapswipe_workers/firebase_to_postgres/transfer_results.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ def results_to_file(
271271
if type(result_data["results"]) is dict:
272272
for taskId, result in result_data["results"].items():
273273

274-
ref_data = result_data.get("ref", {}).get(taskId, {})
274+
ref_data = result_data.get("reference", {}).get(taskId, {})
275275
ref_json = json.dumps(ref_data) if ref_data else r"\N"
276276

277277
if result_type == "geometry":
@@ -299,7 +299,7 @@ def results_to_file(
299299
# list indicies 0-4 will have value None
300300
for taskId, result in enumerate(result_data["results"]):
301301

302-
ref_data = result_data.get("ref", {}).get(taskId, {})
302+
ref_data = result_data.get("reference", {}).get(taskId, {})
303303
ref_json = json.dumps(ref_data) if ref_data else r"\N"
304304

305305
if result is None:

mapswipe_workers/mapswipe_workers/generate_stats/project_stats.py

Lines changed: 21 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import os
66
import tempfile
77
import typing
8+
import csv
89

910
import pandas as pd
1011
from pandas.api.types import is_numeric_dtype
@@ -109,10 +110,7 @@ def get_results(
109110
if result_table == "mapping_sessions_results_geometry":
110111
result_sql = "ST_AsGeoJSON(msr.result) as result"
111112
else:
112-
result_sql = """
113-
(msr.result->>'result')::int as result,
114-
msr.result->'ref' as ref
115-
"""
113+
result_sql = "msr.result as result, msr.ref as ref"
116114

117115
sql_query = sql.SQL(
118116
f"""
@@ -431,6 +429,9 @@ def get_agg_results_by_task_id(
431429
:, ~agg_results_df.columns.str.contains("Unnamed")
432430
]
433431

432+
# Add ref column
433+
agg_results_df = add_ref_to_agg_results(results_df, agg_results_df)
434+
434435
return agg_results_df
435436

436437

@@ -508,41 +509,26 @@ def get_statistics_for_geometry_result_project(project_id: str):
508509
return project_stats_dict
509510

510511

511-
def unify_refs(ref_list):
512-
if not ref_list:
513-
return None
514-
first_ref = json.dumps(ref_list[0], sort_keys=True)
515-
for r in ref_list[1:]:
516-
if json.dumps(r, sort_keys=True) != first_ref:
517-
return "multiple"
518-
return ref_list[0]
519-
520-
521-
def add_ref_to_agg_results(
522-
results_df: pd.DataFrame, agg_results_df: pd.DataFrame
523-
) -> pd.DataFrame:
512+
def add_ref_to_agg_results(results_df: pd.DataFrame, agg_results_df: pd.DataFrame) -> pd.DataFrame:
524513
"""
525-
Add a 'ref' column to agg_results_df.
526-
If all user refs for a task are identical, use that ref.
527-
If refs differ, set ref to 'multiple'.
514+
Adds a 'ref' column to agg_results_df for writing to CSV
528515
"""
529516

530-
# collect refs per task
531-
refs_per_task = (
532-
results_df.groupby(["project_id", "group_id", "task_id"])["ref"]
533-
.apply(list)
534-
.reset_index()
535-
)
536-
537-
refs_per_task["ref"] = refs_per_task["ref"].apply(unify_refs)
517+
refs_per_task = results_df.groupby("task_id")["ref"].apply(list)
538518

539-
# merge into agg_results_df
540-
agg_results_df = agg_results_df.merge(
541-
refs_per_task[["project_id", "group_id", "task_id", "ref"]],
542-
on=["project_id", "group_id", "task_id"],
543-
how="left",
544-
)
519+
ref_values = {}
520+
for task_id, refs in refs_per_task.items():
521+
# Filter out None or empty dicts
522+
refs = [r for r in refs if r not in (None, {}, "") and not pd.isna(r)]
523+
if not refs:
524+
continue
525+
elif all(r == refs[0] for r in refs):
526+
ref_values[task_id] = refs[0]
527+
else:
528+
ref_values[task_id] = refs
545529

530+
if ref_values:
531+
agg_results_df["ref"] = agg_results_df["task_id"].map(ref_values).fillna("")
546532
return agg_results_df
547533

548534

@@ -593,9 +579,7 @@ def get_statistics_for_integer_result_project(
593579
project_info["custom_options"],
594580
)
595581

596-
agg_results_df = add_ref_to_agg_results(results_df, agg_results_df)
597-
598-
agg_results_df.to_csv(agg_results_filename, index_label="idx")
582+
agg_results_df.to_csv(agg_results_filename, index_label="idx", quotechar='"', quoting=csv.QUOTE_MINIMAL)
599583

600584
geojson_functions.gzipped_csv_to_gzipped_geojson(
601585
filename=agg_results_filename,

mapswipe_workers/tests/unittests/test_project_stats.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import pandas as pd
44

55
from mapswipe_workers.generate_stats.project_stats import (
6+
add_ref_to_agg_results,
67
add_missing_result_columns,
78
calc_agreement,
89
calc_count,
@@ -172,6 +173,31 @@ def test_calc_parent_option_count(self):
172173
assert list(compared["other"].index) == updated_index
173174
assert list(compared["other"]) == updated_value
174175

176+
def test_add_ref_single_ref(self):
177+
# All results have the same ref
178+
results_df = pd.DataFrame({
179+
"task_id": ["t1", "t1"],
180+
"ref": [{"osmId": 123, "osmType": "ways_poly"}, {"osmId": 123, "osmType": "ways_poly"}]
181+
})
182+
agg_results_df = pd.DataFrame({"task_id": ["t1"]})
183+
updated_df = add_ref_to_agg_results(results_df, agg_results_df.copy())
184+
self.assertIn("ref", updated_df.columns)
185+
self.assertEqual(updated_df["ref"].iloc[0], {"osmId": 123, "osmType": "ways_poly"})
186+
187+
def test_add_ref_multiple_refs(self):
188+
# Different refs for same task
189+
results_df = pd.DataFrame({
190+
"task_id": ["t1", "t1"],
191+
"ref": [{"osmId": 123}, {"osmId": 456}]
192+
})
193+
agg_results_df = pd.DataFrame({"task_id": ["t1"]})
194+
updated_df = add_ref_to_agg_results(results_df, agg_results_df.copy())
195+
self.assertIn("ref", updated_df.columns)
196+
self.assertEqual(
197+
updated_df["ref"].iloc[0],
198+
[{"osmId": 123}, {"osmId": 456}]
199+
)
200+
175201

176202
if __name__ == "__main__":
177203
unittest.main()

0 commit comments

Comments
 (0)