Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 103 additions & 1 deletion lib/graphql/questionsDataSource.tsx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { Container } from "@azure/cosmos";
import { fetchQuestions } from "./repoQuestions";
import { fetchQuestions, fetchQuestionsAndChecksum } from "./repoQuestions";
import { getQuestionsContainer } from "./cosmos-client";

export const QuestionsDataSource = (container: Container) => {
Expand Down Expand Up @@ -74,13 +74,111 @@ const extractExamId = (link: string): string => {
return segments[segments.length - 3].replace(/-/g, "_").toLowerCase();
};

// Metadata helpers used to detect dataset updates and resync a partition
const META_TYPE = "meta";
const metaIdForExam = (examId: string) => `_meta_${examId}`;
const SYNC_COOLDOWN_MS = 10 * 60 * 1000; // 10 minutes

const readMeta = async (container: Container, examId: string) => {
try {
const { resource } = await container
.item(metaIdForExam(examId), examId)
.read<any>();
return resource as any | undefined;
} catch (_err) {
return undefined;
}
};

const writeMeta = async (
container: Container,
examId: string,
checksum: string,
) => {
const metaDoc = {
id: metaIdForExam(examId),
examId,
type: META_TYPE,
checksum,
updatedAt: new Date().toISOString(),
};
await container.items.upsert(metaDoc);
};

const purgeExamPartition = async (container: Container, examId: string) => {
// Select IDs for non-meta docs within the partition
const querySpec = {
query:
"SELECT c.id FROM c WHERE c.examId = @examId AND (NOT IS_DEFINED(c.type) OR c.type != @metaType)",
parameters: [
{ name: "@examId", value: examId },
{ name: "@metaType", value: META_TYPE },
],
};
const { resources } = await container.items.query(querySpec).fetchAll();
for (const { id } of resources as Array<{ id: string }>) {
try {
await container.item(id, examId).delete();
} catch (err) {
console.warn(`Failed to delete item ${id} in exam ${examId}:`, err);
}
}
};

const seedExamPartition = async (
container: Container,
examId: string,
questions: any[],
checksum: string,
) => {
for (const question of questions) {
const questionWithExamId = { ...question, examId };
await container.items.upsert(questionWithExamId);
}
await writeMeta(container, examId, checksum);
};

const ensureExamSynced = async (
container: Container,
examId: string,
link: string,
) => {
try {
const meta = await readMeta(container, examId);
// Cooldown: avoid fetching upstream too frequently
if (meta && meta.updatedAt) {
const last = new Date(meta.updatedAt).getTime();
if (!Number.isNaN(last) && Date.now() - last < SYNC_COOLDOWN_MS) {
return;
}
}

const result = await fetchQuestionsAndChecksum(link);
if (!result) return;

const { questions, checksum } = result;
if (!meta || meta.checksum !== checksum) {
await purgeExamPartition(container, examId);
await seedExamPartition(container, examId, questions, checksum);
} else if (meta && meta.checksum === checksum && !meta.updatedAt) {
// Backfill updatedAt for old meta docs
await writeMeta(container, examId, checksum);
}
} catch (err) {
console.warn("ensureExamSynced failed:", err);
}
};

export const CombinedQuestionsDataSource = () => {
return {
async getQuestion(id: string, link: string) {
try {
const examId = extractExamId(link);
const questionsContainer = await getQuestionsContainer();

// Ensure the partition is up to date with source content
await ensureExamSynced(questionsContainer, examId, link);

// Try Cosmos DB first (most efficient)
const querySpec = {
query: "SELECT * FROM c WHERE c.id = @id AND c.examId = @examId",
Expand Down Expand Up @@ -128,6 +226,8 @@ export const CombinedQuestionsDataSource = () => {
const examId = extractExamId(link);
const questionsContainer = await getQuestionsContainer();

await ensureExamSynced(questionsContainer, examId, link);

// Try Cosmos DB first
const querySpec = {
query: "SELECT VALUE COUNT(c.id) FROM c WHERE c.examId = @examId",
Expand Down Expand Up @@ -170,6 +270,8 @@ export const CombinedQuestionsDataSource = () => {
const examId = extractExamId(link);
const questionsContainer = await getQuestionsContainer();

await ensureExamSynced(questionsContainer, examId, link);

// Try Cosmos DB first
const querySpec = {
query: "SELECT * FROM c WHERE c.examId = @examId",
Expand Down
18 changes: 18 additions & 0 deletions lib/graphql/repoQuestions.tsx
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { createHash } from "crypto";
const scrapeQuestions = (markdownText: string) => {
const regex =
/### (.*?)\s*\r?\n\r?\n((?:\!\[.*?\]\(.*?\)\s*\r?\n\r?\n)*?)((?:- \[(?:x| )\] .*?\r?\n)+)/gs;
Expand Down Expand Up @@ -57,3 +58,20 @@ export const fetchQuestions = async (link: string) => {
console.error(err.message);
}
};

export const fetchQuestionsAndChecksum = async (
link: string,
): Promise<{ questions: any[]; checksum: string } | undefined> => {
try {
const res = await fetch(link);
if (!res.ok) {
throw new Error(res.statusText);
}
const markdown = await res.text();
const questions = scrapeQuestions(markdown);
const checksum = createHash("sha256").update(markdown).digest("hex");
return { questions, checksum };
} catch (err: any) {
console.error(err.message);
}
};
Loading