|
1 | 1 | import { TaskCallback } from "@poppinss/cliui/types"; |
| 2 | +import { DateTime } from "luxon"; |
2 | 3 | import assert from "node:assert"; |
3 | 4 |
|
4 | 5 | import { BaseCommand, flags } from "@adonisjs/core/ace"; |
5 | 6 | import type { CommandOptions } from "@adonisjs/core/types/ace"; |
| 7 | +import db from "@adonisjs/lucid/services/db"; |
6 | 8 |
|
7 | 9 | import Course from "#models/course"; |
8 | 10 | import Department from "#models/department"; |
@@ -388,72 +390,108 @@ export default class Scraper extends BaseCommand { |
388 | 390 | const lecturerMap = new Map(zip(lecturerSet, lecturersIds)); |
389 | 391 |
|
390 | 392 | task.update("Updating groups"); |
| 393 | + const currentDate = DateTime.now(); |
391 | 394 | // set all groups to inactive, query below will activate scraped ones |
392 | 395 | await Group.query().update({ isActive: false }); |
393 | | - const groupQueue = fetchedDetails.flatMap( |
394 | | - ({ url, registration, course, details }) => |
| 396 | + const preparedGroups = fetchedDetails.flatMap( |
| 397 | + ({ url, registration, course, details, lecturers }) => |
395 | 398 | details.days.map((day) => { |
396 | 399 | return { |
397 | | - name: details.name.slice(0, 255), |
398 | | - startTime: details.startTimeEndTimes[ |
399 | | - details.days.indexOf(day) |
400 | | - ].startTime.slice(0, 255), |
401 | | - endTime: details.startTimeEndTimes[ |
402 | | - details.days.indexOf(day) |
403 | | - ].endTime.slice(0, 255), |
404 | | - group: details.group.slice(0, 255), |
405 | | - week: details.week as "-" | "TP" | "TN", |
406 | | - day: day.slice(0, 255), |
407 | | - type: details.type.slice(0, 255), |
408 | | - courseId: |
409 | | - course.courseCode.slice(0, 255) + |
410 | | - (extractLastStringInBrackets(registration.name) ?? |
411 | | - registration.name), |
412 | | - spotsOccupied: details.spotsOccupied, |
413 | | - spotsTotal: details.spotsTotal, |
414 | | - url: url.slice(0, 255), |
415 | | - isActive: true, |
| 400 | + row: { |
| 401 | + name: details.name.slice(0, 255), |
| 402 | + start_time: details.startTimeEndTimes[ |
| 403 | + details.days.indexOf(day) |
| 404 | + ].startTime.slice(0, 255), |
| 405 | + end_time: details.startTimeEndTimes[ |
| 406 | + details.days.indexOf(day) |
| 407 | + ].endTime.slice(0, 255), |
| 408 | + group: details.group.slice(0, 255), |
| 409 | + week: details.week as "-" | "TP" | "TN", |
| 410 | + day: day.slice(0, 255), |
| 411 | + type: details.type.slice(0, 255), |
| 412 | + course_id: |
| 413 | + course.courseCode.slice(0, 255) + |
| 414 | + (extractLastStringInBrackets(registration.name) ?? |
| 415 | + registration.name), |
| 416 | + spots_occupied: details.spotsOccupied, |
| 417 | + spots_total: details.spotsTotal, |
| 418 | + url: url.slice(0, 255), |
| 419 | + is_active: true, |
| 420 | + created_at: currentDate, |
| 421 | + updated_at: currentDate, |
| 422 | + }, |
| 423 | + lecturers, |
416 | 424 | }; |
417 | 425 | }), |
418 | 426 | ); |
| 427 | + |
| 428 | + const uniqueRows = Array.from( |
| 429 | + new Map( |
| 430 | + preparedGroups.map(({ row, lecturers }) => [ |
| 431 | + JSON.stringify([ |
| 432 | + row.name, |
| 433 | + row.start_time, |
| 434 | + row.end_time, |
| 435 | + row.group, |
| 436 | + row.week, |
| 437 | + row.day, |
| 438 | + row.type, |
| 439 | + row.course_id, |
| 440 | + ]), |
| 441 | + { row, lecturers }, |
| 442 | + ]), |
| 443 | + ).values(), |
| 444 | + ); |
| 445 | + const mergedProps = Array.from( |
| 446 | + new Set(Object.keys(uniqueRows[0].row)).difference( |
| 447 | + new Set([ |
| 448 | + "created_at", |
| 449 | + "name", |
| 450 | + "start_time", |
| 451 | + "end_time", |
| 452 | + "group", |
| 453 | + "week", |
| 454 | + "day", |
| 455 | + "type", |
| 456 | + "course_id", |
| 457 | + ]), |
| 458 | + ), |
| 459 | + ); |
419 | 460 | const groups = await Promise.all( |
420 | | - chunkArray(groupQueue, QUERY_CHUNK_SIZE).map((chunk) => |
421 | | - this.dbSemaphore.runTask(() => |
422 | | - Group.updateOrCreateMany( |
423 | | - [ |
424 | | - "name", |
425 | | - "startTime", |
426 | | - "endTime", |
427 | | - "group", |
428 | | - "week", |
429 | | - "day", |
430 | | - "type", |
431 | | - "courseId", |
432 | | - ], |
433 | | - chunk, |
434 | | - ), |
435 | | - ), |
| 461 | + chunkArray(uniqueRows, QUERY_CHUNK_SIZE).map((chunk) => |
| 462 | + this.dbSemaphore.runTask(async () => { |
| 463 | + const ids = (await db |
| 464 | + .knexQuery() |
| 465 | + .insert(chunk.map((el) => el.row)) |
| 466 | + .into("groups") |
| 467 | + .onConflict( |
| 468 | + db.knexRawQuery('ON CONSTRAINT "groups_scraper_uindex"'), |
| 469 | + ) |
| 470 | + .merge(mergedProps) |
| 471 | + .returning("id")) as { id: number }[]; |
| 472 | + const updatedGroups = await Group.findMany(ids.map((i) => i.id)); |
| 473 | + return zip(updatedGroups, chunk).map(([group, { lecturers }]) => { |
| 474 | + return { group, lecturers }; |
| 475 | + }); |
| 476 | + //Group.updateOrCreateMany( |
| 477 | + // ["url", "startTime", "day", "week", "courseId"], |
| 478 | + // chunk, |
| 479 | + //), |
| 480 | + }), |
436 | 481 | ), |
437 | 482 | ).then((a) => a.flat()); |
438 | 483 |
|
439 | 484 | task.update("Updating group lecturers"); |
440 | | - const groupLecturers = zip( |
441 | | - fetchedDetails.flatMap(({ lecturers, details }) => { |
442 | | - const ids = lecturers.map((lecturer) => { |
443 | | - const id = lecturerMap.get(lecturer); |
444 | | - assert(id !== undefined); |
445 | | - return id; |
446 | | - }); |
447 | | - return details.days.map(() => ids); |
448 | | - }), |
449 | | - groups, |
450 | | - ); |
451 | | - |
452 | 485 | await Promise.all( |
453 | | - groupLecturers.map(([lecturers, group]) => |
454 | | - this.dbSemaphore.runTask(() => |
455 | | - group.related("lecturers").sync(lecturers), |
456 | | - ), |
| 486 | + groups.map(({ group, lecturers }) => |
| 487 | + this.dbSemaphore.runTask(async () => { |
| 488 | + const ids = lecturers.map((lecturer) => { |
| 489 | + const id = lecturerMap.get(lecturer); |
| 490 | + assert(id !== undefined); |
| 491 | + return id; |
| 492 | + }); |
| 493 | + await group.related("lecturers").sync(ids); |
| 494 | + }), |
457 | 495 | ), |
458 | 496 | ); |
459 | 497 | } |
|
0 commit comments