Skip to content

Commit 071f032

Browse files
committed
chore(load-data): optimization and verbose for mapping data transfer
1 parent 6b92356 commit 071f032

File tree

2 files changed

+59
-10
lines changed

2 files changed

+59
-10
lines changed

apps/existing_database/management/commands/loaddata_from_existing_database.py

Lines changed: 55 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,12 @@
3939
)
4040
from apps.existing_database import models as existing_db_models
4141
from apps.mapping.firebase.utils import transfer_results_from_temp_tables
42-
from apps.mapping.models import MappingSession, MappingSessionClientTypeEnum, MappingSessionResultTemp
42+
from apps.mapping.models import (
43+
MappingSession,
44+
MappingSessionClientTypeEnum,
45+
MappingSessionResultTemp,
46+
MappingSessionUserGroupTemp,
47+
)
4348
from apps.project.models import (
4449
Geometry,
4550
Organization,
@@ -317,12 +322,15 @@ def store_project_image(
317322
def _process_project_results(
318323
bulk_create_manager: BulkCreateManager,
319324
existing_project: existing_db_models.Project,
325+
project: Project,
320326
):
321327
existing_ms_result_qs = existing_db_models.MappingSessionResult.objects.filter(
322-
mapping_session__project_id=existing_project.project_id,
328+
# NOTE: Using this to avoid join on the large table MappingSessionResult compare to MappingSession
329+
mapping_session__in=existing_db_models.MappingSession.objects.filter(project_id=existing_project.project_id),
323330
).select_related("mapping_session")
324331

325-
# TODO: user groups
332+
logger.info("Project - %s: Results to temp tables - Started", project.generate_name())
333+
last_start_time = time.time()
326334

327335
for existing_ms_result in existing_ms_result_qs.iterator():
328336
existing_ms = existing_ms_result.mapping_session
@@ -343,6 +351,38 @@ def _process_project_results(
343351
),
344352
)
345353

354+
logger.info(
355+
"Project - %s: Results to temp tables - Finished (runtime %s)",
356+
project.generate_name(),
357+
timedelta(seconds=time.time() - last_start_time),
358+
)
359+
last_start_time = time.time()
360+
361+
logger.info("Project - %s: UserGroup Mapping Session to temp tables - Started", project.generate_name())
362+
363+
existing_ms_user_group_qs = existing_db_models.MappingSessionUserGroup.objects.filter(
364+
# NOTE: Using this to avoid join on the large table MappingSessionResult compare to MappingSession
365+
mapping_session__in=existing_db_models.MappingSession.objects.filter(project_id=existing_project.project_id),
366+
).select_related("mapping_session")
367+
368+
for existing_ms_user_group in existing_ms_user_group_qs.iterator():
369+
existing_ms = existing_ms_user_group.mapping_session
370+
371+
bulk_create_manager.add(
372+
MappingSessionUserGroupTemp(
373+
project_firebase_id=existing_ms.project_id,
374+
group_firebase_id=existing_ms.group_id,
375+
contributor_user_firebase_id=existing_ms.user_id,
376+
user_group_firebase_id=existing_ms_user_group.user_group_id,
377+
),
378+
)
379+
380+
logger.info(
381+
"Project - %s: UserGroup Mapping Session to temp tables - Finished (runtime %s)",
382+
project.generate_name(),
383+
timedelta(seconds=time.time() - last_start_time),
384+
)
385+
346386

347387
def process_mapping_data_for_project(
348388
project: Project,
@@ -352,16 +392,19 @@ def process_mapping_data_for_project(
352392
logger.info("Project - %s: Raw mapping data already exists", project.generate_name())
353393
return
354394

355-
logger.info("Project - %s: Fetching raw mapping data", project.generate_name())
395+
logger.info("Project - %s: Mapping session load - Started", project.generate_name())
356396

357397
start_time = time.time()
358398
last_start_time = time.time()
399+
359400
existing_group_qs = existing_db_models.Group.objects.filter(
360401
project_id=existing_project.project_id,
361402
)
362403

363404
bulk_create_manager = BulkCreateManager(chunk_size=1000)
364405

406+
logger.info("Project - %s: Groups/Tasks - Started", project.generate_name())
407+
365408
for existing_group in existing_group_qs.iterator():
366409
project_task_group, _ = ProjectTaskGroup.objects.get_or_create(
367410
project=project,
@@ -393,34 +436,36 @@ def process_mapping_data_for_project(
393436
)
394437

395438
logger.info(
396-
"Project - %s: Created groups/tasks (runtime %s)",
439+
"Project - %s: Groups/Tasks - Finished (runtime %s)",
397440
project.generate_name(),
398441
timedelta(seconds=time.time() - last_start_time),
399442
)
400443
last_start_time = time.time()
401444

402-
_process_project_results(bulk_create_manager, existing_project)
445+
logger.info("Project - %s: Temp table data - Started", project.generate_name())
446+
_process_project_results(bulk_create_manager, existing_project, project)
403447
logger.info(
404-
"Project - %s: Created temp mapping data (runtime %s)",
448+
"Project - %s: Temp table data - Finished (runtime %s)",
405449
project.generate_name(),
406450
timedelta(seconds=time.time() - last_start_time),
407451
)
408452
last_start_time = time.time()
409453

410454
bulk_create_manager.done()
411455

456+
logger.info("Project - %s: Temp to mapping tables - Started", project.generate_name())
412457
transfer_results_from_temp_tables(
413458
typing.cast("FirebaseCleanup", FakeFirebaseCleanup()),
414459
)
415460
logger.info(
416-
"Project - %s: Stored mapping data (runtime %s)",
461+
"Project - %s: Temp to mapping tables - Finished (runtime %s)",
417462
project.generate_name(),
418463
timedelta(seconds=time.time() - last_start_time),
419464
)
420465
last_start_time = time.time()
421466

422467
logger.info(
423-
"Project - %s: Raw mapping success (runtime %s)",
468+
"Project - %s: Mapping session load - Finished (runtime %s)",
424469
project.generate_name(),
425470
timedelta(seconds=time.time() - start_time),
426471
)
@@ -927,7 +972,7 @@ def handle_project(self):
927972
if self.migrate_project_active_results:
928973
process_mapping_data_for_project(project, existing_project)
929974
else:
930-
logger.info(
975+
logger.warning(
931976
"Project - %s: Active project found. Migrate mapping data using --migrate-active-results",
932977
project.generate_name(),
933978
)

apps/existing_database/models.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,10 @@ class MappingSessionUserGroup(Model):
283283
mapping_session = models.ForeignKey[MappingSession, MappingSession](MappingSession, on_delete=models.DO_NOTHING)
284284
user_group = models.ForeignKey[UserGroup, UserGroup](UserGroup, on_delete=models.DO_NOTHING, related_name="+")
285285

286+
# Type hints
287+
user_group_id: str
288+
mapping_session_id: str
289+
286290
class Meta:
287291
managed = False
288292
db_table = "mapping_sessions_user_groups"

0 commit comments

Comments
 (0)