diff --git a/lib/graphql/questionsDataSource.tsx b/lib/graphql/questionsDataSource.tsx index 74a2ce6..a64014a 100644 --- a/lib/graphql/questionsDataSource.tsx +++ b/lib/graphql/questionsDataSource.tsx @@ -1,5 +1,5 @@ import { Container } from "@azure/cosmos"; -import { fetchQuestions } from "./repoQuestions"; +import { fetchQuestions, fetchQuestionsAndChecksum } from "./repoQuestions"; import { getQuestionsContainer } from "./cosmos-client"; export const QuestionsDataSource = (container: Container) => { @@ -74,6 +74,101 @@ const extractExamId = (link: string): string => { return segments[segments.length - 3].replace(/-/g, "_").toLowerCase(); }; +// Metadata helpers used to detect dataset updates and resync a partition +const META_TYPE = "meta"; +const metaIdForExam = (examId: string) => `_meta_${examId}`; +const SYNC_COOLDOWN_MS = 10 * 60 * 1000; // 10 minutes + +const readMeta = async (container: Container, examId: string) => { + try { + const { resource } = await container + .item(metaIdForExam(examId), examId) + .read(); + return resource as any | undefined; + } catch (_err) { + return undefined; + } +}; + +const writeMeta = async ( + container: Container, + examId: string, + checksum: string, +) => { + const metaDoc = { + id: metaIdForExam(examId), + examId, + type: META_TYPE, + checksum, + updatedAt: new Date().toISOString(), + }; + await container.items.upsert(metaDoc); +}; + +const purgeExamPartition = async (container: Container, examId: string) => { + // Select IDs for non-meta docs within the partition + const querySpec = { + query: + "SELECT c.id FROM c WHERE c.examId = @examId AND (NOT IS_DEFINED(c.type) OR c.type != @metaType)", + parameters: [ + { name: "@examId", value: examId }, + { name: "@metaType", value: META_TYPE }, + ], + }; + const { resources } = await container.items.query(querySpec).fetchAll(); + for (const { id } of resources as Array<{ id: string }>) { + try { + await container.item(id, examId).delete(); + } catch (err) { + console.warn(`Failed to delete item ${id} in exam ${examId}:`, err); + } + } +}; + +const seedExamPartition = async ( + container: Container, + examId: string, + questions: any[], + checksum: string, +) => { + for (const question of questions) { + const questionWithExamId = { ...question, examId }; + await container.items.upsert(questionWithExamId); + } + await writeMeta(container, examId, checksum); +}; + +const ensureExamSynced = async ( + container: Container, + examId: string, + link: string, +) => { + try { + const meta = await readMeta(container, examId); + // Cooldown: avoid fetching upstream too frequently + if (meta && meta.updatedAt) { + const last = new Date(meta.updatedAt).getTime(); + if (!Number.isNaN(last) && Date.now() - last < SYNC_COOLDOWN_MS) { + return; + } + } + + const result = await fetchQuestionsAndChecksum(link); + if (!result) return; + + const { questions, checksum } = result; + if (!meta || meta.checksum !== checksum) { + await purgeExamPartition(container, examId); + await seedExamPartition(container, examId, questions, checksum); + } else if (meta && meta.checksum === checksum && !meta.updatedAt) { + // Backfill updatedAt for old meta docs + await writeMeta(container, examId, checksum); + } + } catch (err) { + console.warn("ensureExamSynced failed:", err); + } +}; + export const CombinedQuestionsDataSource = () => { return { async getQuestion(id: string, link: string) { @@ -81,6 +176,9 @@ export const CombinedQuestionsDataSource = () => { const examId = extractExamId(link); const questionsContainer = await getQuestionsContainer(); + // Ensure the partition is up to date with source content + await ensureExamSynced(questionsContainer, examId, link); + // Try Cosmos DB first (most efficient) const querySpec = { query: "SELECT * FROM c WHERE c.id = @id AND c.examId = @examId", @@ -128,6 +226,8 @@ export const CombinedQuestionsDataSource = () => { const examId = extractExamId(link); const questionsContainer = await getQuestionsContainer(); + await ensureExamSynced(questionsContainer, examId, link); + // Try Cosmos DB first const querySpec = { query: "SELECT VALUE COUNT(c.id) FROM c WHERE c.examId = @examId", @@ -170,6 +270,8 @@ export const CombinedQuestionsDataSource = () => { const examId = extractExamId(link); const questionsContainer = await getQuestionsContainer(); + await ensureExamSynced(questionsContainer, examId, link); + // Try Cosmos DB first const querySpec = { query: "SELECT * FROM c WHERE c.examId = @examId", diff --git a/lib/graphql/repoQuestions.tsx b/lib/graphql/repoQuestions.tsx index 8822753..409dc10 100644 --- a/lib/graphql/repoQuestions.tsx +++ b/lib/graphql/repoQuestions.tsx @@ -1,3 +1,4 @@ +import { createHash } from "crypto"; const scrapeQuestions = (markdownText: string) => { const regex = /### (.*?)\s*\r?\n\r?\n((?:\!\[.*?\]\(.*?\)\s*\r?\n\r?\n)*?)((?:- \[(?:x| )\] .*?\r?\n)+)/gs; @@ -57,3 +58,20 @@ export const fetchQuestions = async (link: string) => { console.error(err.message); } }; + +export const fetchQuestionsAndChecksum = async ( + link: string, +): Promise<{ questions: any[]; checksum: string } | undefined> => { + try { + const res = await fetch(link); + if (!res.ok) { + throw new Error(res.statusText); + } + const markdown = await res.text(); + const questions = scrapeQuestions(markdown); + const checksum = createHash("sha256").update(markdown).digest("hex"); + return { questions, checksum }; + } catch (err: any) { + console.error(err.message); + } +};