Skip to content

Commit 69b11e7

Browse files
authored
Merge pull request #607 from mapswipe/feature/update-aggragated-script
Feature/update aggragated script
2 parents c80cc6e + 3feb640 commit 69b11e7

File tree

17 files changed

+378
-349
lines changed

17 files changed

+378
-349
lines changed

.github/workflows/actions.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,13 @@ jobs:
2727
black --check mapswipe_workers ../django
2828
flake8 --count --config setup.cfg mapswipe_workers/ ../django/
2929
isort --check --settings-file setup.cfg mapswipe_workers/ ../django/
30+
- name: Assert check
31+
run: |
32+
cmp --silent ./postgres/initdb.sql ./mapswipe_workers/tests/integration/set_up_db.sql || {
33+
echo 'The set_up_db.sql is not same as initdb.sql. Please sync this files and push';
34+
diff ./postgres/initdb.sql ./mapswipe_workers/tests/integration/set_up_db.sql;
35+
exit 1;
36+
}
3037
- name: Setup Postgres Database Container
3138
env:
3239
POSTGRES_PASSWORD: postgres

django/apps/aggregated/management/commands/update_aggregated_data.py

Lines changed: 60 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
AggregatedUserGroupStatData,
77
AggregatedUserStatData,
88
)
9-
from apps.existing_database.models import Result
9+
from apps.existing_database.models import MappingSession
1010
from django.core.management.base import BaseCommand
1111
from django.db import connection, models, transaction
1212
from django.utils import timezone
@@ -22,44 +22,44 @@
2222
swipes
2323
)
2424
(
25-
-- Retrieve used tasks
26-
WITH used_tasks as (
25+
-- Retrieve used task groups
26+
WITH used_task_groups as (
2727
SELECT
28-
project_id, group_id, task_id
29-
FROM results R
30-
INNER JOIN tasks T USING (project_id, group_id, task_id)
28+
MS.project_id, MS.group_id
29+
FROM mapping_sessions MS
3130
WHERE
32-
R.timestamp >= %(from_date)s and R.timestamp < %(until_date)s
33-
GROUP BY project_id, group_id, task_id
31+
MS.start_time >= %(from_date)s and MS.start_time < %(until_date)s
32+
GROUP BY project_id, group_id -- To get unique
3433
),
35-
-- Calculated task area.
36-
task_data as (
34+
-- Calculated area by task_groups
35+
task_group_data as (
3736
SELECT
3837
project_id,
3938
group_id,
40-
task_id,
41-
ST_Area(geom::geography(GEOMETRY,4326)) / 1000000 as area -- sqkm
42-
FROM used_tasks
43-
INNER JOIN tasks T USING (project_id, group_id, task_id)
39+
SUM(
40+
ST_Area(geom::geography(GEOMETRY,4326)) / 1000000
41+
) as total_task_group_area -- sqkm
42+
FROM tasks T
43+
INNER JOIN used_task_groups UG USING (project_id, group_id)
44+
GROUP BY project_id, group_id
4445
),
45-
-- Aggregate data by group
46+
-- Aggregate data by user
4647
user_data as (
4748
SELECT
48-
R.project_id,
49-
R.group_id,
50-
R.user_id,
51-
MAX(R.timestamp::date) as timestamp_date,
52-
MIN(R.start_time) as start_time,
53-
MAX(R.end_time) as end_time,
54-
COUNT(DISTINCT R.task_id) as task_count,
55-
SUM(T.area) as area_swiped
56-
From results R
57-
INNER JOIN task_data T USING (project_id, group_id, task_id)
49+
MS.project_id,
50+
MS.group_id,
51+
MS.user_id,
52+
MS.start_time::date as timestamp_date,
53+
MS.start_time,
54+
MS.end_time,
55+
MS.items_count as task_count,
56+
TG.total_task_group_area as area_swiped
57+
FROM mapping_sessions MS
58+
INNER JOIN task_group_data TG USING (project_id, group_id)
5859
WHERE
59-
R.timestamp >= %(from_date)s and R.timestamp < %(until_date)s
60-
GROUP BY R.project_id, R.group_id, R.user_id
60+
MS.start_time >= %(from_date)s and MS.start_time < %(until_date)s
6161
),
62-
-- Aggregate group data
62+
-- Additional aggregate by timestamp_date
6363
user_agg_data as (
6464
SELECT
6565
project_id,
@@ -102,47 +102,47 @@
102102
swipes
103103
)
104104
(
105-
-- Retrieve used tasks
106-
WITH used_tasks as (
105+
-- Retrieve used task groups
106+
WITH used_task_groups as (
107107
SELECT
108-
project_id, group_id, task_id
109-
From results_user_groups ug
110-
INNER JOIN results R USING (project_id, group_id, user_id)
111-
INNER JOIN tasks T USING (project_id, group_id, task_id)
108+
MS.project_id, MS.group_id
109+
From mapping_sessions_user_groups MSUR
110+
INNER JOIN mapping_sessions MS USING (mapping_session_id)
112111
WHERE
113-
R.timestamp >= %(from_date)s and R.timestamp < %(until_date)s
114-
GROUP BY project_id, group_id, task_id
112+
MS.start_time >= %(from_date)s and MS.start_time < %(until_date)s
113+
GROUP BY project_id, group_id -- To get unique
115114
),
116-
-- Calculated task area.
117-
task_data as (
115+
-- Calculated area by task_groups
116+
task_group_data as (
118117
SELECT
119118
project_id,
120119
group_id,
121-
task_id,
122-
ST_Area(geom::geography(GEOMETRY,4326)) / 1000000 as area -- sqkm
123-
FROM used_tasks
124-
INNER JOIN tasks T USING (project_id, group_id, task_id)
120+
SUM(
121+
ST_Area(geom::geography(GEOMETRY,4326)) / 1000000
122+
) as total_task_group_area -- sqkm
123+
FROM tasks T
124+
INNER JOIN used_task_groups UG USING (project_id, group_id)
125+
GROUP BY project_id, group_id
125126
),
126-
-- Aggregate data by group
127+
-- Aggregate data by user-group
127128
user_group_data as (
128129
SELECT
129-
ug.project_id,
130-
ug.group_id,
131-
ug.user_id,
132-
ug.user_group_id,
133-
MAX(R.timestamp::date) as timestamp_date,
134-
MIN(R.start_time) as start_time,
135-
MAX(R.end_time) as end_time,
136-
COUNT(DISTINCT R.task_id) as task_count,
137-
SUM(T.area) as area_swiped
138-
From results_user_groups ug
139-
INNER JOIN results R USING (project_id, group_id, user_id)
140-
INNER JOIN task_data T USING (task_id)
130+
MS.project_id,
131+
MS.group_id,
132+
MS.user_id,
133+
MSUR.user_group_id,
134+
MS.start_time::date as timestamp_date,
135+
MS.start_time as start_time,
136+
MS.end_time as end_time,
137+
MS.items_count as task_count,
138+
TG.total_task_group_area as area_swiped
139+
From mapping_sessions_user_groups MSUR
140+
INNER JOIN mapping_sessions MS USING (mapping_session_id)
141+
INNER JOIN task_group_data TG USING (project_id, group_id)
141142
WHERE
142-
R.timestamp >= %(from_date)s and R.timestamp < %(until_date)s
143-
GROUP BY ug.project_id, ug.group_id, ug.user_id, ug.user_group_id
143+
MS.start_time >= %(from_date)s and MS.start_time < %(until_date)s
144144
),
145-
-- Aggregate group data
145+
-- Additional aggregate by timestamp_date
146146
user_group_agg_data as (
147147
SELECT
148148
project_id,
@@ -188,8 +188,8 @@ def _track(self, tracker_type, label, sql):
188188
from_date = datetime.datetime.strptime(tracker.value, "%Y-%m-%d").date()
189189
else:
190190
self.stdout.write(f"{label.title()} Last tracker data not found.")
191-
timestamp_min = Result.objects.aggregate(
192-
timestamp_min=models.Min("timestamp")
191+
timestamp_min = MappingSession.objects.aggregate(
192+
timestamp_min=models.Min("start_time")
193193
)["timestamp_min"]
194194
if timestamp_min:
195195
self.stdout.write(f"Using min timestamp from database {timestamp_min}")

django/apps/existing_database/factories.py

Lines changed: 13 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,17 @@
11
import factory
22
import factory.fuzzy
33
from factory.django import DjangoModelFactory
4+
from mapswipe.utils import raise_if_field_not_found
45

56
from .models import (
67
Group,
8+
MappingSession,
9+
MappingSessionResult,
10+
MappingSessionUserGroup,
711
Project,
8-
Result,
912
Task,
1013
User,
1114
UserGroup,
12-
UserGroupResult,
1315
UserGroupUserMembership,
1416
)
1517

@@ -44,40 +46,29 @@ class Meta:
4446

4547
@classmethod
4648
def _create(cls, model_class, *args, **kwargs):
47-
if "group" not in kwargs:
48-
raise Exception("Please define group")
49+
raise_if_field_not_found(kwargs, ["group"])
4950
group = kwargs.pop("group")
5051
kwargs["project"] = group.project
5152
kwargs["group_id"] = group.group_id
5253
return super()._create(model_class, *args, **kwargs)
5354

5455

55-
class ResultFactory(DjangoModelFactory):
56+
class MappingSessionFactory(DjangoModelFactory):
5657
class Meta:
57-
model = Result
58+
model = MappingSession
5859

59-
@classmethod
60-
def _create(cls, model_class, *args, **kwargs):
61-
if "task" not in kwargs:
62-
raise Exception("Please define task")
63-
task = kwargs.pop("task")
64-
kwargs["project"] = task.project
65-
kwargs["group_id"] = task.group_id
66-
kwargs["task_id"] = task.task_id
67-
return super()._create(model_class, *args, **kwargs)
60+
61+
class MappingSessionResultFactory(DjangoModelFactory):
62+
class Meta:
63+
model = MappingSessionResult
6864

6965

70-
class UserGroupResultFactory(DjangoModelFactory):
66+
class MappingSessionUserGroupFactory(DjangoModelFactory):
7167
class Meta:
72-
model = UserGroupResult
68+
model = MappingSessionUserGroup
7369

7470
@classmethod
7571
def _create(cls, model_class, *args, **kwargs):
76-
if "group" not in kwargs:
77-
raise Exception("Please define group")
78-
group = kwargs.pop("group")
79-
kwargs["project"] = group.project
80-
kwargs["group_id"] = group.group_id
8172
return super()._create(model_class, *args, **kwargs)
8273

8374

0 commit comments

Comments
 (0)