Skip to content

Commit c761e19

Browse files
authored
refactor(scraper): use the MERGE command instead of DELETE + INSERT (#213)
MERGE appears to have insane performance compared to the previous way we did this (16s vs 200ms)
1 parent 0465768 commit c761e19

File tree

1 file changed

+17
-10
lines changed

1 file changed

+17
-10
lines changed

backend/commands/scraper.ts

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,7 @@ export default class Scraper extends BaseCommand {
210210
async synchronizeArchivesTask() {
211211
await db.rawQuery(`
212212
BEGIN;
213+
213214
-- Update the groups table
214215
INSERT INTO "groups_archive" ("id", "name", "start_time", "end_time", "group", "week", "day", "type", "url", "course_id", "created_at", "updated_at", "spots_occupied", "spots_total", "is_active")
215216
SELECT "id", "name", "start_time", "end_time", "group", "week", "day", "type", "url", "course_id", "created_at", "updated_at", "spots_occupied", "spots_total", "is_active" FROM "groups"
@@ -227,16 +228,22 @@ export default class Scraper extends BaseCommand {
227228
"spots_occupied" = EXCLUDED."spots_occupied",
228229
"spots_total" = EXCLUDED."spots_total",
229230
"is_active" = EXCLUDED."is_active";
230-
-- Delete unlinked lecturers
231-
DELETE FROM "group_archive_lecturers"
232-
USING "group_lecturers", "groups"
233-
WHERE "group_archive_lecturers"."group_id" IN (SELECT "id" FROM "groups")
234-
AND "group_archive_lecturers"."lecturer_id" NOT IN (SELECT DISTINCT "lecturer_id" FROM "group_lecturers" WHERE "group_lecturers"."group_id" = "group_archive_lecturers"."group_id");
235-
-- Insert new lecturers
236-
INSERT INTO "group_archive_lecturers" ("group_id", "lecturer_id", "created_at", "updated_at")
237-
SELECT "group_id", "lecturer_id", "created_at", "updated_at"
238-
FROM "group_lecturers"
239-
WHERE "group_lecturers"."lecturer_id" NOT IN (SELECT DISTINCT "lecturer_id" FROM "group_archive_lecturers" WHERE "group_archive_lecturers"."group_id" = "group_lecturers"."group_id");
231+
232+
-- Update the pivot table
233+
MERGE INTO "group_archive_lecturers" AS "archive"
234+
USING "group_lecturers" AS "current"
235+
ON "archive"."group_id" = "current"."group_id" AND "archive"."lecturer_id" = "current"."lecturer_id"
236+
-- when the row exists in both tables
237+
WHEN MATCHED THEN DO NOTHING
238+
-- when the row doesnt exist in the current table
239+
WHEN NOT MATCHED BY SOURCE
240+
AND EXISTS (SELECT 1 FROM "groups" WHERE "id" = "archive"."group_id")
241+
THEN DELETE
242+
-- when the row doesnt exist in the archive table
243+
WHEN NOT MATCHED BY TARGET
244+
THEN INSERT ("group_id", "lecturer_id", "updated_at", "created_at")
245+
VALUES ("current"."group_id", "current"."lecturer_id", "current"."updated_at", "current"."created_at");
246+
240247
COMMIT;
241248
`);
242249
}

0 commit comments

Comments
 (0)