|
| 1 | +import sys |
| 2 | + |
| 3 | +import pandas as pd |
| 4 | + |
| 5 | +from mapswipe_workers import auth |
| 6 | +from mapswipe_workers.definitions import logger |
| 7 | + |
| 8 | + |
| 9 | +def copy_results_batch(first_timestamp, last_timestamp): |
| 10 | + logger.info( |
| 11 | + f"Start process for : ms.start_time>={first_timestamp} " |
| 12 | + f"and ms.start_time<{last_timestamp}" |
| 13 | + ) |
| 14 | + p_con = auth.postgresDB() |
| 15 | + query = """ |
| 16 | + -- create table with results for given time span |
| 17 | + drop table if exists results_batch; |
| 18 | + create table results_batch as |
| 19 | + select ms.mapping_session_id, r.task_id, r."result" |
| 20 | + from mapping_sessions ms, results r |
| 21 | + where |
| 22 | + ms.start_time >= %(first_timestamp)s |
| 23 | + and ms.start_time < %(last_timestamp)s |
| 24 | + and ms.project_id = r.project_id |
| 25 | + and ms.group_id = r.group_id |
| 26 | + and ms.user_id = r.user_id; |
| 27 | + insert into mapping_sessions_results |
| 28 | + (select r.* from results_batch r) |
| 29 | + on conflict do nothing; |
| 30 | + """ |
| 31 | + p_con.query( |
| 32 | + query, {"first_timestamp": first_timestamp, "last_timestamp": last_timestamp} |
| 33 | + ) |
| 34 | + logger.info( |
| 35 | + f"Finished process for : ms.start_time >= {first_timestamp} " |
| 36 | + f"and ms.start_time < {last_timestamp}" |
| 37 | + ) |
| 38 | + |
| 39 | + |
| 40 | +if __name__ == "__main__": |
| 41 | + """Use this command to run in docker container. |
| 42 | + docker-compose run -d mapswipe_workers_creation python3 python_scripts/results_to_mapping_sessions_results.py "2016-01-01" "2022-10-01" # noqa |
| 43 | + """ |
| 44 | + min_timestamp = sys.argv[1] |
| 45 | + max_timestamp = sys.argv[2] |
| 46 | + timestamps_list = ( |
| 47 | + pd.date_range(min_timestamp, max_timestamp, freq="MS") |
| 48 | + .strftime("%Y-%m-%d") |
| 49 | + .tolist() |
| 50 | + ) |
| 51 | + |
| 52 | + for i in range(0, len(timestamps_list) - 1): |
| 53 | + first_timestamp = timestamps_list[i] |
| 54 | + last_timestamp = timestamps_list[i + 1] |
| 55 | + copy_results_batch(first_timestamp, last_timestamp) |
| 56 | + logger.info(f"progress: {i+1}/{len(timestamps_list) - 1}") |
0 commit comments