Skip to content
This repository was archived by the owner on Aug 6, 2025. It is now read-only.

Commit 93645ef

Browse files
authored
DOP-3942: Find previous ASTs based on user (#882)
1 parent b98d703 commit 93645ef

File tree

4 files changed

+27
-32
lines changed

4 files changed

+27
-32
lines changed

modules/persistence/index.ts

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,19 +33,16 @@ const argv: ModuleArgs = minimist(process.argv.slice(2));
3333
const app = async (path: string, githubUser: string) => {
3434
try {
3535
if (!path) throw missingPathMessage;
36+
const user = githubUser || 'docs-builder-bot';
3637
const zip = new AdmZip(path);
3738
// atomic buildId for all artifacts read by this module - fundamental assumption
3839
// that only one build will be used per run of this module.
3940
const buildId = new mongodb.ObjectId();
40-
const metadata = await metadataFromZip(zip, githubUser);
41+
const metadata = await metadataFromZip(zip, user);
4142
// initialize db connections to handle shared connections
4243
await snootyDb();
4344
await poolDb();
44-
await Promise.all([
45-
insertAndUpdatePages(buildId, zip, githubUser),
46-
insertMetadata(buildId, metadata),
47-
upsertAssets(zip),
48-
]);
45+
await Promise.all([insertAndUpdatePages(buildId, zip, user), insertMetadata(buildId, metadata), upsertAssets(zip)]);
4946
await insertMergedMetadataEntries(buildId, metadata);
5047
// DOP-3447 clean up stale metadata
5148
await deleteStaleMetadata(metadata);

modules/persistence/src/services/metadata/index.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,13 @@ export interface Metadata {
2020
// Service responsible for memoization of metadata entries.
2121
// Any extraneous logic performed on metadata entries as part of upload should be added here
2222
// or within subfolders of this module
23-
export const metadataFromZip = async (zip: AdmZip, githubUser?: string) => {
23+
export const metadataFromZip = async (zip: AdmZip, githubUser: string) => {
2424
const zipEntries = zip.getEntries();
2525
const metadata = zipEntries
2626
.filter((entry) => entry.entryName === 'site.bson')
2727
.map((entry) => deserialize(entry.getData()))[0] as Metadata;
2828
await verifyMetadata(metadata);
29-
metadata.github_username = githubUser || 'docs-builder-bot';
29+
metadata.github_username = githubUser;
3030
return metadata;
3131
};
3232

modules/persistence/src/services/pages/index.ts

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import AdmZip from 'adm-zip';
22
import { deserialize } from 'bson';
33
import isEqual from 'fast-deep-equal';
4-
import { AnyBulkWriteOperation, Document, FindCursor, ObjectId } from 'mongodb';
4+
import { AnyBulkWriteOperation, FindCursor, ObjectId } from 'mongodb';
55
import { bulkWrite, db, insert } from '../connector';
66

77
interface StaticAsset {
@@ -22,6 +22,7 @@ export interface Page {
2222
filename: string;
2323
ast: PageAst;
2424
static_assets: UpdatedAsset[];
25+
github_username: string;
2526
}
2627

2728
export interface UpdatedPage extends Page {
@@ -43,29 +44,30 @@ const UPDATED_AST_COLL_NAME = 'updated_documents';
4344
// Service responsible for memoization of page level documents.
4445
// Any extraneous logic performed on page level documents as part of upload should be added here
4546
// or within subfolders of this module
46-
const pagesFromZip = (zip: AdmZip, githubUser?: string) => {
47+
const pagesFromZip = (zip: AdmZip, githubUser: string): Page[] => {
4748
const zipPages = zip.getEntries();
4849
return zipPages
4950
.filter((entry) => entry.entryName?.startsWith('documents/'))
5051
.map((entry) => {
51-
const document = deserialize(entry.getData());
52-
document.github_username = githubUser || 'docs-builder-bot';
52+
const document = deserialize(entry.getData()) as Page;
53+
document.github_username = githubUser;
5354
return document;
5455
});
5556
};
5657

5758
/**
58-
* Finds the page documents for a given Snooty project name + branch combination.
59-
* If this is the first build for the Snooty project name + branch, no documents
59+
* Finds the page documents for a given Snooty project + branch + user combination.
60+
* If this is the first build for the Snooty project + branch + user, no documents
6061
* will be found.
6162
*
6263
* @param pageIdPrefix - Includes the Snooty project name, user (docsworker-xlarge), and branch
6364
* @param collection - The collection to perform the find query on
6465
*/
65-
const findPrevPageDocs = async (pageIdPrefix: string, collection: string) => {
66+
const findPrevPageDocs = async (pageIdPrefix: string, collection: string, githubUser: string) => {
6667
const dbSession = await db();
6768
const findQuery = {
6869
page_id: { $regex: new RegExp(`^${pageIdPrefix}/`) },
70+
github_username: githubUser,
6971
deleted: false,
7072
};
7173
const projection = {
@@ -101,17 +103,19 @@ const createPageAstMapping = async (docsCursor: FindCursor) => {
101103
};
102104

103105
class UpdatedPagesManager {
104-
currentPages: Document[];
106+
currentPages: Page[];
105107
operations: AnyBulkWriteOperation[];
106108
prevPageDocsMapping: PreviousPageMapping;
107109
prevPageIds: Set<string>;
108110
updateTime: Date;
111+
githubUser: string;
109112

110-
constructor(prevPageDocsMapping: PreviousPageMapping, prevPagesIds: Set<string>, pages: Document[]) {
113+
constructor(prevPageDocsMapping: PreviousPageMapping, prevPagesIds: Set<string>, pages: Page[], githubUser: string) {
111114
this.currentPages = pages;
112115
this.operations = [];
113116
this.prevPageDocsMapping = prevPageDocsMapping;
114117
this.prevPageIds = prevPagesIds;
118+
this.githubUser = githubUser;
115119

116120
this.updateTime = new Date();
117121
this.checkForPageDiffs();
@@ -139,7 +143,7 @@ class UpdatedPagesManager {
139143
if (!isEqual(page.ast, prevPageData?.ast)) {
140144
const operation = {
141145
updateOne: {
142-
filter: { page_id: currentPageId },
146+
filter: { page_id: currentPageId, github_username: page.github_username },
143147
update: {
144148
$set: {
145149
page_id: currentPageId,
@@ -148,7 +152,6 @@ class UpdatedPagesManager {
148152
static_assets: this.findUpdatedAssets(page.static_assets, prevPageData?.static_assets),
149153
updated_at: this.updateTime,
150154
deleted: false,
151-
github_username: page.github_username || 'docs-builder-bot',
152155
},
153156
$setOnInsert: {
154157
created_at: this.updateTime,
@@ -218,7 +221,7 @@ class UpdatedPagesManager {
218221
this.prevPageIds.forEach((unseenPageId) => {
219222
const operation = {
220223
updateOne: {
221-
filter: { page_id: unseenPageId },
224+
filter: { page_id: unseenPageId, github_username: this.githubUser },
222225
update: {
223226
$set: {
224227
deleted: true,
@@ -244,7 +247,7 @@ class UpdatedPagesManager {
244247
* @param pages
245248
* @param collection
246249
*/
247-
const updatePages = async (pages: Document[], collection: string) => {
250+
const updatePages = async (pages: Page[], collection: string, githubUser: string) => {
248251
if (pages.length === 0) {
249252
return;
250253
}
@@ -256,12 +259,12 @@ const updatePages = async (pages: Document[], collection: string) => {
256259
// Find all pages that share the same project name + branch. Expects page IDs
257260
// to include these two properties after parse
258261
const pageIdPrefix = pages[0].page_id.split('/').slice(0, 3).join('/');
259-
const previousPagesCursor = await findPrevPageDocs(pageIdPrefix, collection);
262+
const previousPagesCursor = await findPrevPageDocs(pageIdPrefix, collection, githubUser);
260263
const { mapping: prevPageDocsMapping, pageIds: prevPageIds } = await createPageAstMapping(previousPagesCursor);
261264

262265
const diffsTimerLabel = 'finding page differences';
263266
console.time(diffsTimerLabel);
264-
const updatedPagesManager = new UpdatedPagesManager(prevPageDocsMapping, prevPageIds, pages);
267+
const updatedPagesManager = new UpdatedPagesManager(prevPageDocsMapping, prevPageIds, pages, githubUser);
265268
const operations = updatedPagesManager.getOperations();
266269
console.timeEnd(diffsTimerLabel);
267270

@@ -283,14 +286,14 @@ const updatePages = async (pages: Document[], collection: string) => {
283286
}
284287
};
285288

286-
export const insertAndUpdatePages = async (buildId: ObjectId, zip: AdmZip, githubUser?: string) => {
289+
export const insertAndUpdatePages = async (buildId: ObjectId, zip: AdmZip, githubUser: string) => {
287290
try {
288291
const pages = pagesFromZip(zip, githubUser);
289292
const ops: PromiseLike<any>[] = [insert(pages, COLLECTION_NAME, buildId)];
290293

291294
const featureEnabled = process.env.FEATURE_FLAG_UPDATE_PAGES;
292295
if (featureEnabled && featureEnabled.toUpperCase() === 'TRUE') {
293-
ops.push(updatePages(pages, UPDATED_AST_COLL_NAME));
296+
ops.push(updatePages(pages, UPDATED_AST_COLL_NAME, githubUser));
294297
}
295298

296299
return Promise.all(ops);

modules/persistence/tests/metadata/metadata.test.ts

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,6 @@ describe('metadata module', () => {
4848

4949
describe('metadataFromZip', () => {
5050
it('should get metadata from site.bson', async () => {
51-
const metaFromZip = await _metadataFromZip(zip);
52-
expect(metaFromZip).toEqual({ ...meta, github_username: 'docs-builder-bot' });
53-
});
54-
55-
it('should add github username to metadata', async () => {
5651
const githubUser = 'gritty';
5752
const metaFromZip = await _metadataFromZip(zip, githubUser);
5853
expect(metaFromZip).toEqual({ ...meta, github_username: githubUser });
@@ -63,7 +58,7 @@ describe('metadata module', () => {
6358
const buildId = new ObjectId();
6459
it('should insert metadata docs into metadata collection', async () => {
6560
try {
66-
const metaFromZip = await _metadataFromZip(zip);
61+
const metaFromZip = await _metadataFromZip(zip, 'gritty');
6762
await insertMetadata(buildId, metaFromZip);
6863
} catch (e) {
6964
console.log(e);
@@ -90,7 +85,7 @@ describe('metadata module', () => {
9085

9186
it('removes copies of metadata for same project-branch, keeping the most recent ones', async () => {
9287
await mockDb.collection('metadata').insertMany(testData);
93-
const metaFromZip = await _metadataFromZip(zip);
88+
const metaFromZip = await _metadataFromZip(zip, 'gritty');
9489
await deleteStaleMetadata(metaFromZip);
9590
const res = await mockDb
9691
.collection('metadata')

0 commit comments

Comments
 (0)