Skip to content

Commit 963bb25

Browse files
authored
Merge pull request #1051 from mapswipe/conflation-project-type-osm-ref
Conflation project type osm ref
2 parents 087a17b + 004ac56 commit 963bb25

File tree

9 files changed

+184
-9
lines changed

9 files changed

+184
-9
lines changed

mapswipe_workers/mapswipe_workers/firebase_to_postgres/transfer_results.py

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import csv
22
import io
3+
import json
34
from typing import List, Tuple
45

56
import dateutil.parser
@@ -269,6 +270,10 @@ def results_to_file(
269270

270271
if type(result_data["results"]) is dict:
271272
for taskId, result in result_data["results"].items():
273+
274+
ref_data = result_data.get("reference", {}).get(taskId, {})
275+
ref_json = json.dumps(ref_data) if ref_data else r"\N"
276+
272277
if result_type == "geometry":
273278
result = geojson.dumps(geojson.GeometryCollection(result))
274279
w.writerow(
@@ -283,6 +288,7 @@ def results_to_file(
283288
result,
284289
app_version,
285290
client_type,
291+
ref_json,
286292
]
287293
)
288294
elif type(result_data["results"]) is list:
@@ -292,6 +298,10 @@ def results_to_file(
292298
# if first key (list index) is 5
293299
# list indicies 0-4 will have value None
294300
for taskId, result in enumerate(result_data["results"]):
301+
302+
ref_data = result_data.get("reference", {}).get(taskId, {})
303+
ref_json = json.dumps(ref_data) if ref_data else r"\N"
304+
295305
if result is None:
296306
continue
297307
else:
@@ -309,6 +319,7 @@ def results_to_file(
309319
result,
310320
app_version,
311321
client_type,
322+
ref_json,
312323
]
313324
)
314325
else:
@@ -369,6 +380,7 @@ def save_results_to_postgres(
369380
"result",
370381
"app_version",
371382
"client_type",
383+
"ref",
372384
]
373385
p_con.copy_from(results_file, result_temp_table, columns)
374386
results_file.close()
@@ -420,6 +432,7 @@ def save_results_to_postgres(
420432

421433
query_insert_mapping_sessions = f"""
422434
BEGIN;
435+
423436
INSERT INTO mapping_sessions
424437
SELECT
425438
project_id,
@@ -433,9 +446,10 @@ def save_results_to_postgres(
433446
client_type
434447
FROM {result_temp_table}
435448
GROUP BY project_id, group_id, user_id, app_version, client_type
436-
ON CONFLICT (project_id,group_id,user_id)
449+
ON CONFLICT (project_id, group_id, user_id)
437450
DO NOTHING;
438-
INSERT INTO {result_table}
451+
452+
INSERT INTO {result_table} (mapping_session_id, task_id, result)
439453
SELECT
440454
ms.mapping_session_id,
441455
r.task_id,
@@ -447,6 +461,21 @@ def save_results_to_postgres(
447461
AND ms.user_id = r.user_id
448462
ON CONFLICT (mapping_session_id, task_id)
449463
DO NOTHING;
464+
465+
INSERT INTO mapping_sessions_refs (mapping_session_id, task_id, ref)
466+
SELECT
467+
ms.mapping_session_id,
468+
r.task_id,
469+
r.ref
470+
FROM {result_temp_table} r
471+
JOIN mapping_sessions ms ON
472+
ms.project_id = r.project_id
473+
AND ms.group_id = r.group_id
474+
AND ms.user_id = r.user_id
475+
WHERE r.ref IS NOT NULL
476+
ON CONFLICT (mapping_session_id, task_id)
477+
DO NOTHING;
478+
450479
COMMIT;
451480
"""
452481
p_con.query(query_insert_mapping_sessions)

mapswipe_workers/mapswipe_workers/generate_stats/project_stats.py

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import ast
2+
import csv
23
import datetime
34
import gzip
45
import json
@@ -98,6 +99,7 @@ def get_results(
9899
Parse timestamp as datetime object and add attribute "day" for each result.
99100
Return None if there are no results for this project.
100101
Otherwise, return dataframe.
102+
Include the 'ref' JSON field in integer results if it exists.
101103
102104
Parameters
103105
----------
@@ -108,7 +110,7 @@ def get_results(
108110
if result_table == "mapping_sessions_results_geometry":
109111
result_sql = "ST_AsGeoJSON(msr.result) as result"
110112
else:
111-
result_sql = "msr.result"
113+
result_sql = "msr.result as result"
112114

113115
sql_query = sql.SQL(
114116
f"""
@@ -124,6 +126,7 @@ def get_results(
124126
ms.app_version,
125127
ms.client_type,
126128
{result_sql},
129+
refs.ref as ref,
127130
-- the username for users which login to MapSwipe with their
128131
-- OSM account is not defined or ''.
129132
-- We capture this here as it will cause problems
@@ -136,7 +139,10 @@ def get_results(
136139
LEFT JOIN mapping_sessions ms ON
137140
ms.mapping_session_id = msr.mapping_session_id
138141
LEFT JOIN users U USING (user_id)
139-
WHERE project_id = {"{}"}
142+
LEFT JOIN mapping_sessions_refs refs
143+
ON msr.mapping_session_id = refs.mapping_session_id
144+
AND msr.task_id = refs.task_id
145+
WHERE ms.project_id = {"{}"}
140146
) TO STDOUT WITH CSV HEADER
141147
"""
142148
).format(sql.Literal(project_id))
@@ -427,6 +433,8 @@ def get_agg_results_by_task_id(
427433
:, ~agg_results_df.columns.str.contains("Unnamed")
428434
]
429435

436+
agg_results_df = add_ref_to_agg_results(results_df, agg_results_df)
437+
430438
return agg_results_df
431439

432440

@@ -504,6 +512,30 @@ def get_statistics_for_geometry_result_project(project_id: str):
504512
return project_stats_dict
505513

506514

515+
def add_ref_to_agg_results(
516+
results_df: pd.DataFrame, agg_results_df: pd.DataFrame
517+
) -> pd.DataFrame:
518+
"""
519+
Adds a 'ref' column to agg_results_df if it exists in results_df.
520+
For each task_id, all unique non-empty refs are collected into a list.
521+
If no refs exist for a task, the corresponding value is empty string.
522+
If results_df has no 'ref' column, agg_results_df is returned unchanged.
523+
"""
524+
if "ref" not in results_df.columns:
525+
return agg_results_df
526+
527+
refs_per_task = (
528+
results_df.groupby("task_id")["ref"]
529+
.apply(lambda x: list({r for r in x if pd.notna(r) and r not in ({}, "")}))
530+
.apply(lambda lst: json.dumps([json.loads(r) for r in lst]) if lst else "")
531+
)
532+
533+
if refs_per_task.apply(lambda x: len(x) > 0).any():
534+
agg_results_df["ref"] = agg_results_df["task_id"].map(refs_per_task).fillna("")
535+
536+
return agg_results_df
537+
538+
507539
def get_statistics_for_integer_result_project(
508540
project_id: str, project_info: pd.Series, generate_hot_tm_geometries: bool
509541
) -> dict:
@@ -550,7 +582,13 @@ def get_statistics_for_integer_result_project(
550582
tasks_df,
551583
project_info["custom_options"],
552584
)
553-
agg_results_df.to_csv(agg_results_filename, index_label="idx")
585+
586+
agg_results_df.to_csv(
587+
agg_results_filename,
588+
index_label="idx",
589+
quotechar='"',
590+
quoting=csv.QUOTE_MINIMAL,
591+
)
554592

555593
geojson_functions.gzipped_csv_to_gzipped_geojson(
556594
filename=agg_results_filename,
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
{
2+
"createdBy": "atCSosZACaN0qhcVjtMO1tq9d1G3",
3+
"geometry": "https://firebasestorage.googleapis.com/v0/b/dev-mapswipe.appspot.com/o/all_predictions_192.geojson?alt=media&token=b7a85e56-6ab1-4e0d-a734-a772025a88b8",
4+
"filter": "way['building']",
5+
"groupSize": 25,
6+
"image": "https://firebasestorage.googleapis.com/v0/b/dev-mapswipe.appspot.com/o/projectImages%2F1742895229710-project-image-1x1.png?alt=media&token=26cf1956-9ab7-4348-b529-9952f2f8424e",
7+
"lookFor": "Buildings",
8+
"manualUrl": "https://fair-dev.hotosm.org/start-mapping/358",
9+
"name": "Conflate fAIr buildings - Kathmandu (1)\nHOT",
10+
"projectDetails": "This is a test.",
11+
"projectNumber": 1,
12+
"projectRegion": "Kathmandu",
13+
"projectTopic": "Conflate fAIr buildings",
14+
"projectTopicKey": "conflate fair buildings - kathmandu (1) hot",
15+
"projectType": 8,
16+
"requestingOrganisation": "HOT",
17+
"tileServer": {
18+
"credits": "Please add imagery credits here.",
19+
"name": "custom",
20+
"url": "https://2glp8ghj65.execute-api.us-east-1.amazonaws.com/cog/tiles/WebMercatorQuad/{z}/{x}/{y}@1x?url=https%3A%2F%2Foin-hotosm-temp.s3.us-east-1.amazonaws.com%2F62d85d11d8499800053796c1%2F0%2F62d85d11d8499800053796c2.tif",
21+
"wmtsLayerName": "-"
22+
},
23+
"tutorialId": "tutorial_-MQsj5VWpNcJxCTVTOyH",
24+
"verificationNumber": 3
25+
}

mapswipe_workers/tests/integration/set_up_db.sql

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,8 @@ CREATE TABLE IF NOT EXISTS results_temp (
9393
end_time timestamp,
9494
result int,
9595
app_version varchar,
96-
client_type varchar
96+
client_type varchar,
97+
ref jsonb
9798
);
9899

99100
-- create table for results import through csv
@@ -107,7 +108,8 @@ CREATE TABLE IF NOT EXISTS results_geometry_temp (
107108
end_time timestamp,
108109
result varchar,
109110
app_version varchar,
110-
client_type varchar
111+
client_type varchar,
112+
ref jsonb
111113
);
112114

113115

@@ -206,6 +208,15 @@ CREATE TABLE IF NOT EXISTS mapping_sessions_results_geometry (
206208
references mapping_sessions (mapping_session_id)
207209
);
208210

211+
CREATE TABLE IF NOT EXISTS mapping_sessions_refs (
212+
mapping_session_id int8,
213+
task_id varchar,
214+
ref JSONB not null,
215+
PRIMARY KEY (mapping_session_id, task_id),
216+
FOREIGN KEY (mapping_session_id)
217+
references mapping_sessions (mapping_session_id)
218+
);
219+
209220
CREATE OR REPLACE FUNCTION mapping_sessions_results_constraint() RETURNS trigger
210221
LANGUAGE plpgsql AS
211222
$$

mapswipe_workers/tests/integration/test_get_results.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ def test_get_results_df_from_postgres(self):
3838
"app_version",
3939
"client_type",
4040
"result",
41+
"ref",
4142
"username",
4243
"day",
4344
],

mapswipe_workers/tests/integration/test_get_results_real_project.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ def test_get_results_df_from_postgres(self):
7171
"app_version",
7272
"client_type",
7373
"result",
74+
"ref",
7475
"username",
7576
"day",
7677
],

mapswipe_workers/tests/unittests/test_project_stats.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
1+
import json
12
import unittest
23

34
import pandas as pd
45

56
from mapswipe_workers.generate_stats.project_stats import (
67
add_missing_result_columns,
8+
add_ref_to_agg_results,
79
calc_agreement,
810
calc_count,
911
calc_parent_option_count,
@@ -172,6 +174,49 @@ def test_calc_parent_option_count(self):
172174
assert list(compared["other"].index) == updated_index
173175
assert list(compared["other"]) == updated_value
174176

177+
def test_add_ref_single_ref(self):
178+
# All results have the same ref
179+
results_df = pd.DataFrame(
180+
{
181+
"task_id": ["t1", "t1"],
182+
"ref": [
183+
json.dumps({"osmId": 123, "osmType": "ways_poly"}),
184+
json.dumps({"osmId": 123, "osmType": "ways_poly"}),
185+
],
186+
}
187+
)
188+
agg_results_df = pd.DataFrame({"task_id": ["t1"]})
189+
updated_df = add_ref_to_agg_results(results_df, agg_results_df.copy())
190+
191+
self.assertIn("ref", updated_df.columns)
192+
ref_value = json.loads(updated_df["ref"].iloc[0])
193+
self.assertEqual(ref_value, [{"osmId": 123, "osmType": "ways_poly"}])
194+
195+
def test_add_ref_multiple_refs(self):
196+
# Different refs for same task
197+
results_df = pd.DataFrame(
198+
{
199+
"task_id": ["t1", "t1"],
200+
"ref": [json.dumps({"osmId": 123}), json.dumps({"osmId": 456})],
201+
}
202+
)
203+
agg_results_df = pd.DataFrame({"task_id": ["t1"]})
204+
updated_df = add_ref_to_agg_results(results_df, agg_results_df.copy())
205+
206+
self.assertIn("ref", updated_df.columns)
207+
ref_value = json.loads(updated_df["ref"].iloc[0])
208+
self.assertCountEqual(ref_value, [{"osmId": 123}, {"osmId": 456}])
209+
210+
def test_add_ref_no_refs_column(self):
211+
# results_df has no 'ref' column
212+
results_df = pd.DataFrame({"task_id": ["t1", "t2"], "result": [1, 2]})
213+
agg_results_df = pd.DataFrame({"task_id": ["t1", "t2"]})
214+
215+
updated_df = add_ref_to_agg_results(results_df, agg_results_df.copy())
216+
217+
self.assertNotIn("ref", updated_df.columns)
218+
pd.testing.assert_frame_equal(updated_df, agg_results_df)
219+
175220

176221
if __name__ == "__main__":
177222
unittest.main()

postgres/initdb.sql

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,8 @@ CREATE TABLE IF NOT EXISTS results_temp (
9393
end_time timestamp,
9494
result int,
9595
app_version varchar,
96-
client_type varchar
96+
client_type varchar,
97+
ref jsonb
9798
);
9899

99100
-- create table for results import through csv
@@ -107,7 +108,8 @@ CREATE TABLE IF NOT EXISTS results_geometry_temp (
107108
end_time timestamp,
108109
result varchar,
109110
app_version varchar,
110-
client_type varchar
111+
client_type varchar,
112+
ref jsonb
111113
);
112114

113115

@@ -206,6 +208,15 @@ CREATE TABLE IF NOT EXISTS mapping_sessions_results_geometry (
206208
references mapping_sessions (mapping_session_id)
207209
);
208210

211+
CREATE TABLE IF NOT EXISTS mapping_sessions_refs (
212+
mapping_session_id int8,
213+
task_id varchar,
214+
ref JSONB not null,
215+
PRIMARY KEY (mapping_session_id, task_id),
216+
FOREIGN KEY (mapping_session_id)
217+
references mapping_sessions (mapping_session_id)
218+
);
219+
209220
CREATE OR REPLACE FUNCTION mapping_sessions_results_constraint() RETURNS trigger
210221
LANGUAGE plpgsql AS
211222
$$
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
ALTER TABLE results_temp
2+
ADD COLUMN ref jsonb;
3+
4+
ALTER TABLE results_geometry_temp
5+
ADD COLUMN ref jsonb;
6+
7+
CREATE TABLE IF NOT EXISTS public.mapping_sessions_refs (
8+
mapping_session_id int8,
9+
task_id varchar,
10+
ref JSONB not null,
11+
PRIMARY KEY (mapping_session_id, task_id),
12+
FOREIGN KEY (mapping_session_id)
13+
references mapping_sessions (mapping_session_id)
14+
);

0 commit comments

Comments
 (0)