Skip to content

Commit c80cc6e

Browse files
authored
Merge pull request #609 from mapswipe/dev
Dev - fix missing timestamps in mapping_sessions
2 parents 9b43810 + 0b4503d commit c80cc6e

File tree

2 files changed

+84
-0
lines changed

2 files changed

+84
-0
lines changed
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import sys
2+
3+
import pandas as pd
4+
5+
from mapswipe_workers import auth
6+
from mapswipe_workers.definitions import logger
7+
8+
9+
def copy_results_batch(first_timestamp, last_timestamp):
10+
logger.info(
11+
f"Start process for : ms.start_time>={first_timestamp} "
12+
f"and ms.start_time<{last_timestamp}"
13+
)
14+
p_con = auth.postgresDB()
15+
query = """
16+
-- create table with results for given time span
17+
drop table if exists results_batch;
18+
create table results_batch as
19+
select ms.mapping_session_id, r.task_id, r."result"
20+
from mapping_sessions ms, results r
21+
where
22+
ms.start_time >= %(first_timestamp)s
23+
and ms.start_time < %(last_timestamp)s
24+
and ms.project_id = r.project_id
25+
and ms.group_id = r.group_id
26+
and ms.user_id = r.user_id;
27+
insert into mapping_sessions_results
28+
(select r.* from results_batch r)
29+
on conflict do nothing;
30+
"""
31+
p_con.query(
32+
query, {"first_timestamp": first_timestamp, "last_timestamp": last_timestamp}
33+
)
34+
logger.info(
35+
f"Finished process for : ms.start_time >= {first_timestamp} "
36+
f"and ms.start_time < {last_timestamp}"
37+
)
38+
39+
40+
if __name__ == "__main__":
41+
"""Use this command to run in docker container.
42+
docker-compose run -d mapswipe_workers_creation python3 python_scripts/results_to_mapping_sessions_results.py "2016-01-01" "2022-10-01" # noqa
43+
"""
44+
min_timestamp = sys.argv[1]
45+
max_timestamp = sys.argv[2]
46+
timestamps_list = (
47+
pd.date_range(min_timestamp, max_timestamp, freq="MS")
48+
.strftime("%Y-%m-%d")
49+
.tolist()
50+
)
51+
52+
for i in range(0, len(timestamps_list) - 1):
53+
first_timestamp = timestamps_list[i]
54+
last_timestamp = timestamps_list[i + 1]
55+
copy_results_batch(first_timestamp, last_timestamp)
56+
logger.info(f"progress: {i+1}/{len(timestamps_list) - 1}")
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/*
2+
* This script takes the data from the 'results' table.
3+
* Results submitted before 2019-09-30 11:30:02.823
4+
* do NOT provide the "start_time" and "end_time" attribute,
5+
* but only use a single attribute "timestamp".
6+
* During the initial transfer this has not been considered.
7+
*/
8+
set search_path = 'public';
9+
10+
insert into mapping_sessions
11+
(
12+
select
13+
project_id
14+
,group_id
15+
,user_id
16+
,nextval('mapping_sessions_mapping_session_id_seq') as mapping_session_id
17+
,Min(timestamp) - INTERVAL '2 Minutes' as start_time
18+
,Max(timestamp) as end_time
19+
,count(*) as items_count
20+
from results
21+
where start_time is null and end_time is null
22+
group by project_id, group_id, user_id
23+
)
24+
on conflict (project_id, group_id, user_id)
25+
DO UPDATE SET
26+
start_time = EXCLUDED.start_time,
27+
end_time = EXCLUDED.end_time;
28+

0 commit comments

Comments
 (0)