Skip to content

Commit ec64f11

Browse files
authored
Merge pull request #780 from mapswipe/feature/speed-up-aggregate-calculation
Pre-calculate total geo area and time max limit for project groups
2 parents 1959557 + 6c0dbd3 commit ec64f11

File tree

14 files changed

+521
-484
lines changed

14 files changed

+521
-484
lines changed

django/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ RUN apt-get update -y \
1515
# For postgis
1616
gdal-bin \
1717
# Upgrade pip and install python packages for code
18-
&& pip install --upgrade --no-cache-dir pip poetry==1.2.1 \
18+
&& pip install --upgrade --no-cache-dir pip poetry==1.5.1 \
1919
&& poetry --version \
2020
# Configure to use system instead of virtualenvs
2121
&& poetry config virtualenvs.create false \

django/apps/aggregated/management/commands/update_aggregated_data.py

Lines changed: 78 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -19,27 +19,70 @@
1919
# |1|00:00:00.208768|00:00:01.398161|00:00:28.951521|
2020
# |2|00:00:01.330297|00:00:06.076814|00:00:03.481192|
2121
# |3|00:00:02.092967|00:00:11.271081|00:00:06.045881|
22-
TASK_GROUP_METADATA_QUERY = f"""
23-
SELECT
24-
project_id,
25-
group_id,
26-
SUM(
27-
ST_Area(geom::geography(GEOMETRY,4326)) / 1000000
28-
) as total_task_group_area, -- sqkm
29-
(
30-
CASE
31-
-- Using 95_percent value of existing data for each project_type
32-
WHEN UG.project_type = {Project.Type.BUILD_AREA.value} THEN 1.4
33-
WHEN UG.project_type = {Project.Type.COMPLETENESS.value} THEN 1.4
34-
WHEN UG.project_type = {Project.Type.CHANGE_DETECTION.value} THEN 11.2
35-
-- FOOTPRINT: Not calculated right now
36-
WHEN UG.project_type = {Project.Type.FOOTPRINT.value} THEN 6.1
37-
ELSE 1
38-
END
39-
) * COUNT(*) as time_spent_max_allowed
40-
FROM tasks T
41-
INNER JOIN used_task_groups UG USING (project_id, group_id)
42-
GROUP BY project_id, project_type, group_id
22+
UPDATE_PROJECT_GROUP_DATA = f"""
23+
WITH to_calculate_groups AS (
24+
SELECT
25+
project_id,
26+
group_id
27+
FROM groups
28+
WHERE
29+
(project_id, group_id) in (
30+
SELECT
31+
MS.project_id,
32+
MS.group_id
33+
FROM mapping_sessions MS
34+
WHERE
35+
MS.start_time >= %(from_date)s
36+
AND MS.start_time < %(until_date)s
37+
GROUP BY MS.project_id, MS.group_id
38+
) AND
39+
(
40+
total_area is NULL OR time_spent_max_allowed is NULL
41+
)
42+
),
43+
groups_data AS (
44+
SELECT
45+
T.project_id,
46+
T.group_id,
47+
SUM( -- sqkm
48+
ST_Area(T.geom::geography(GEOMETRY,4326)) / 1000000
49+
) as total_task_group_area,
50+
(
51+
CASE
52+
-- Using 95_percent value of existing data for each project_type
53+
WHEN P.project_type = {Project.Type.BUILD_AREA.value} THEN 1.4
54+
WHEN P.project_type = {Project.Type.COMPLETENESS.value} THEN 1.4
55+
WHEN P.project_type = {Project.Type.CHANGE_DETECTION.value} THEN 11.2
56+
-- FOOTPRINT: Not calculated right now
57+
WHEN P.project_type = {Project.Type.FOOTPRINT.value} THEN 6.1
58+
ELSE 1
59+
END
60+
) * COUNT(*) as time_spent_max_allowed
61+
FROM tasks T
62+
INNER JOIN to_calculate_groups G USING (project_id, group_id)
63+
INNER JOIN projects P USING (project_id)
64+
GROUP BY project_id, P.project_type, group_id
65+
)
66+
UPDATE groups G
67+
SET
68+
total_area = GD.total_task_group_area,
69+
time_spent_max_allowed = GD.time_spent_max_allowed
70+
FROM groups_data GD
71+
WHERE
72+
G.project_id = GD.project_id AND
73+
G.group_id = GD.group_id;
74+
"""
75+
76+
TASK_GROUP_METADATA_QUERY = """
77+
SELECT
78+
G.project_id,
79+
G.group_id,
80+
G.total_area as total_task_group_area,
81+
G.time_spent_max_allowed
82+
FROM groups G
83+
INNER JOIN used_task_groups UG USING (project_id, group_id)
84+
INNER JOIN projects P USING (project_id)
85+
GROUP BY G.project_id, P.project_type, G.group_id
4386
"""
4487

4588

@@ -239,6 +282,20 @@ def _track(self, tracker_type, label, sql):
239282
until_date=until_date.strftime("%Y-%m-%d"),
240283
)
241284
start_time = time.time()
285+
286+
self.stdout.write(
287+
f"Updating Project Group Data for {label.title()} for date: {params}"
288+
)
289+
with transaction.atomic():
290+
with connection.cursor() as cursor:
291+
cursor.execute(UPDATE_PROJECT_GROUP_DATA, params)
292+
self.stdout.write(
293+
self.style.SUCCESS(
294+
f"Successfull. Runtime: {time.time() - start_time} seconds"
295+
)
296+
)
297+
298+
start_time = time.time()
242299
self.stdout.write(f"Updating {label.title()} Data for date: {params}")
243300
with transaction.atomic():
244301
with connection.cursor() as cursor:

django/apps/existing_database/migrations/0001_initial.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@ class Migration(migrations.Migration):
2525
("required_count", models.IntegerField(blank=True, null=True)),
2626
("progress", models.IntegerField(blank=True, null=True)),
2727
("project_type_specifics", models.TextField(blank=True, null=True)),
28+
("total_area", models.FloatField(blank=True, null=True, default=None)),
29+
(
30+
"time_spent_max_allowed",
31+
models.FloatField(blank=True, null=True, default=None),
32+
),
2833
],
2934
options={
3035
"db_table": "groups",

django/apps/existing_database/models.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,10 @@ class Group(Model):
105105
required_count = models.IntegerField(blank=True, null=True)
106106
progress = models.IntegerField(blank=True, null=True)
107107
# Database uses JSON instead of JSONB (not supported by django)
108-
project_type_specifics = models.TextField(blank=True, null=True)
108+
project_type_specifics = models.TextField(blank=True, null=True, default=None)
109+
# Used by aggreagated module
110+
total_area = models.FloatField(blank=True, null=True, default=None)
111+
time_spent_max_allowed = models.FloatField(blank=True, null=True, default=None)
109112

110113
# Django derived fields from ForeignKey
111114
project_id: str

0 commit comments

Comments
 (0)