|
5 | 5 | import os |
6 | 6 | import tempfile |
7 | 7 | import typing |
| 8 | +import csv |
8 | 9 |
|
9 | 10 | import pandas as pd |
10 | 11 | from pandas.api.types import is_numeric_dtype |
@@ -109,10 +110,7 @@ def get_results( |
109 | 110 | if result_table == "mapping_sessions_results_geometry": |
110 | 111 | result_sql = "ST_AsGeoJSON(msr.result) as result" |
111 | 112 | else: |
112 | | - result_sql = """ |
113 | | - (msr.result->>'result')::int as result, |
114 | | - msr.result->'ref' as ref |
115 | | - """ |
| 113 | + result_sql = "msr.result as result, msr.ref as ref" |
116 | 114 |
|
117 | 115 | sql_query = sql.SQL( |
118 | 116 | f""" |
@@ -431,6 +429,9 @@ def get_agg_results_by_task_id( |
431 | 429 | :, ~agg_results_df.columns.str.contains("Unnamed") |
432 | 430 | ] |
433 | 431 |
|
| 432 | + # Add ref column |
| 433 | + agg_results_df = add_ref_to_agg_results(results_df, agg_results_df) |
| 434 | + |
434 | 435 | return agg_results_df |
435 | 436 |
|
436 | 437 |
|
@@ -508,41 +509,26 @@ def get_statistics_for_geometry_result_project(project_id: str): |
508 | 509 | return project_stats_dict |
509 | 510 |
|
510 | 511 |
|
511 | | -def unify_refs(ref_list): |
512 | | - if not ref_list: |
513 | | - return None |
514 | | - first_ref = json.dumps(ref_list[0], sort_keys=True) |
515 | | - for r in ref_list[1:]: |
516 | | - if json.dumps(r, sort_keys=True) != first_ref: |
517 | | - return "multiple" |
518 | | - return ref_list[0] |
519 | | - |
520 | | - |
521 | | -def add_ref_to_agg_results( |
522 | | - results_df: pd.DataFrame, agg_results_df: pd.DataFrame |
523 | | -) -> pd.DataFrame: |
| 512 | +def add_ref_to_agg_results(results_df: pd.DataFrame, agg_results_df: pd.DataFrame) -> pd.DataFrame: |
524 | 513 | """ |
525 | | - Add a 'ref' column to agg_results_df. |
526 | | - If all user refs for a task are identical, use that ref. |
527 | | - If refs differ, set ref to 'multiple'. |
| 514 | + Adds a 'ref' column to agg_results_df for writing to CSV |
528 | 515 | """ |
529 | 516 |
|
530 | | - # collect refs per task |
531 | | - refs_per_task = ( |
532 | | - results_df.groupby(["project_id", "group_id", "task_id"])["ref"] |
533 | | - .apply(list) |
534 | | - .reset_index() |
535 | | - ) |
536 | | - |
537 | | - refs_per_task["ref"] = refs_per_task["ref"].apply(unify_refs) |
| 517 | + refs_per_task = results_df.groupby("task_id")["ref"].apply(list) |
538 | 518 |
|
539 | | - # merge into agg_results_df |
540 | | - agg_results_df = agg_results_df.merge( |
541 | | - refs_per_task[["project_id", "group_id", "task_id", "ref"]], |
542 | | - on=["project_id", "group_id", "task_id"], |
543 | | - how="left", |
544 | | - ) |
| 519 | + ref_values = {} |
| 520 | + for task_id, refs in refs_per_task.items(): |
| 521 | + # Filter out None or empty dicts |
| 522 | + refs = [r for r in refs if r not in (None, {}, "") and not pd.isna(r)] |
| 523 | + if not refs: |
| 524 | + continue |
| 525 | + elif all(r == refs[0] for r in refs): |
| 526 | + ref_values[task_id] = refs[0] |
| 527 | + else: |
| 528 | + ref_values[task_id] = refs |
545 | 529 |
|
| 530 | + if ref_values: |
| 531 | + agg_results_df["ref"] = agg_results_df["task_id"].map(ref_values).fillna("") |
546 | 532 | return agg_results_df |
547 | 533 |
|
548 | 534 |
|
@@ -593,9 +579,7 @@ def get_statistics_for_integer_result_project( |
593 | 579 | project_info["custom_options"], |
594 | 580 | ) |
595 | 581 |
|
596 | | - agg_results_df = add_ref_to_agg_results(results_df, agg_results_df) |
597 | | - |
598 | | - agg_results_df.to_csv(agg_results_filename, index_label="idx") |
| 582 | + agg_results_df.to_csv(agg_results_filename, index_label="idx", quotechar='"', quoting=csv.QUOTE_MINIMAL) |
599 | 583 |
|
600 | 584 | geojson_functions.gzipped_csv_to_gzipped_geojson( |
601 | 585 | filename=agg_results_filename, |
|
0 commit comments