Skip to content
This repository was archived by the owner on Aug 6, 2025. It is now read-only.

Commit 6f1c48a

Browse files
authored
DOP-3751: Identify last updated time in page's static assets (#829)
1 parent 88c2186 commit 6f1c48a

File tree

2 files changed

+247
-30
lines changed

2 files changed

+247
-30
lines changed

modules/persistence/src/services/pages/index.ts

Lines changed: 83 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -9,21 +9,34 @@ interface StaticAsset {
99
key: string;
1010
}
1111

12+
interface UpdatedAsset extends StaticAsset {
13+
updated_at?: Date;
14+
}
15+
1216
interface PageAst {
1317
[key: string]: any;
1418
}
1519

16-
export interface UpdatedPage {
20+
export interface Page {
1721
page_id: string;
1822
filename: string;
1923
ast: PageAst;
20-
static_assets: StaticAsset[];
24+
static_assets: UpdatedAsset[];
25+
}
2126

27+
export interface UpdatedPage extends Page {
2228
created_at: Date;
2329
updated_at: Date;
2430
deleted: boolean;
2531
}
2632

33+
interface PreviousPageMapping {
34+
[key: string]: {
35+
ast: PageAst;
36+
static_assets: StaticAsset[];
37+
};
38+
}
39+
2740
const COLLECTION_NAME = 'documents';
2841
const UPDATED_AST_COLL_NAME = 'updated_documents';
2942

@@ -38,7 +51,6 @@ const pagesFromZip = (zip: AdmZip) => {
3851
};
3952

4053
/**
41-
*
4254
* Finds the page documents for a given Snooty project name + branch combination.
4355
* If this is the first build for the Snooty project name + branch, no documents
4456
* will be found.
@@ -56,6 +68,7 @@ const findPrevPageDocs = async (pageIdPrefix: string, collection: string) => {
5668
_id: 0,
5769
page_id: 1,
5870
ast: 1,
71+
static_assets: 1,
5972
};
6073

6174
try {
@@ -70,11 +83,14 @@ const findPrevPageDocs = async (pageIdPrefix: string, collection: string) => {
7083

7184
const createPageAstMapping = async (docsCursor: FindCursor) => {
7285
// Create mapping for page id and its AST
73-
const mapping: Record<string, object> = {};
86+
const mapping: PreviousPageMapping = {};
7487
// Create set of all page ids. To be used for tracking unseen pages in the current build
7588
const pageIds = new Set<string>();
7689
for await (const doc of docsCursor) {
77-
mapping[doc.page_id] = doc.ast;
90+
mapping[doc.page_id] = {
91+
ast: doc.ast,
92+
static_assets: doc.static_assets,
93+
};
7894
pageIds.add(doc.page_id);
7995
}
8096
return { mapping, pageIds };
@@ -83,29 +99,27 @@ const createPageAstMapping = async (docsCursor: FindCursor) => {
8399
class UpdatedPagesManager {
84100
currentPages: Document[];
85101
operations: AnyBulkWriteOperation[];
86-
prevPageDocsMapping: Record<string, object>;
102+
prevPageDocsMapping: PreviousPageMapping;
87103
prevPageIds: Set<string>;
104+
updateTime: Date;
88105

89-
constructor(prevPageDocsMapping: Record<string, object>, prevPagesIds: Set<string>, pages: Document[]) {
106+
constructor(prevPageDocsMapping: PreviousPageMapping, prevPagesIds: Set<string>, pages: Document[]) {
90107
this.currentPages = pages;
91108
this.operations = [];
92109
this.prevPageDocsMapping = prevPageDocsMapping;
93110
this.prevPageIds = prevPagesIds;
94111

95-
const updateTime = new Date();
96-
this.checkForPageDiffs(updateTime);
97-
this.markUnseenPagesAsDeleted(updateTime);
112+
this.updateTime = new Date();
113+
this.checkForPageDiffs();
114+
this.markUnseenPagesAsDeleted();
98115
}
99116

100117
/**
101-
*
102118
* Compares the ASTs of the current pages with the previous pages. New update
103119
* operations are added whenever a diff in the page ASTs is found. Page IDs are
104120
* removed from `prevPageIds` to signal that the previous page has been "seen"
105-
*
106-
* @param updateTime - the time to set updates to
107121
*/
108-
checkForPageDiffs(updateTime: Date) {
122+
checkForPageDiffs() {
109123
this.currentPages.forEach((page) => {
110124
// Filter out rst (non-page) files
111125
if (!page.filename.endsWith('.txt')) {
@@ -114,10 +128,11 @@ class UpdatedPagesManager {
114128

115129
const currentPageId = page.page_id;
116130
this.prevPageIds.delete(currentPageId);
131+
const prevPageData = this.prevPageDocsMapping[currentPageId];
117132

118133
// Update the document if page's current AST is different from previous build's.
119134
// New pages should always count as having a "different" AST
120-
if (!isEqual(page.ast, this.prevPageDocsMapping[currentPageId])) {
135+
if (!isEqual(page.ast, prevPageData?.ast)) {
121136
const operation = {
122137
updateOne: {
123138
filter: { page_id: currentPageId },
@@ -126,12 +141,12 @@ class UpdatedPagesManager {
126141
page_id: currentPageId,
127142
filename: page.filename,
128143
ast: page.ast,
129-
static_assets: page.static_assets,
130-
updated_at: updateTime,
144+
static_assets: this.findUpdatedAssets(page.static_assets, prevPageData?.static_assets),
145+
updated_at: this.updateTime,
131146
deleted: false,
132147
},
133148
$setOnInsert: {
134-
created_at: updateTime,
149+
created_at: this.updateTime,
135150
},
136151
},
137152
upsert: true,
@@ -143,20 +158,66 @@ class UpdatedPagesManager {
143158
}
144159

145160
/**
161+
* Identifies any changes in assets between the current page and its previous page.
162+
* A new array of static assets with their last update time is returned.
146163
*
147-
* Marks any pages from the previous build that were not used as "deleted"
164+
* The Snooty Data API will take into account an asset's `updated_at` field to
165+
* compare with timestamps that it receives on requests for updated pages. When
166+
* the API sends an updated page, an updated page's asset will only be sent if that asset's
167+
* timestamp is greater than the timestamp sent in the request (denoting a change).
168+
* Unchanged assets with older timestamps will not be sent.
169+
*
170+
* Assets that are deleted between builds are not included since the Snooty Data API
171+
* will not need to return it for now.
148172
*
149-
* @param updateTime - the time to set updates to
173+
* @param currentPageAssets
174+
* @param prevPageAssets
175+
*/
176+
findUpdatedAssets(currentPageAssets: StaticAsset[], prevPageAssets?: UpdatedAsset[]) {
177+
const updatedAssets: UpdatedAsset[] = [];
178+
if (currentPageAssets && currentPageAssets.length === 0 && prevPageAssets && prevPageAssets.length === 0) {
179+
return updatedAssets;
180+
}
181+
182+
const prevAssetMapping: Record<string, { key: string; updated_at: Date }> = {};
183+
if (prevPageAssets) {
184+
prevPageAssets.forEach((asset) => {
185+
prevAssetMapping[asset.checksum] = {
186+
key: asset.key,
187+
updated_at: asset.updated_at ?? this.updateTime,
188+
};
189+
});
190+
}
191+
192+
currentPageAssets.forEach(({ checksum, key }) => {
193+
const prevAsset = prevAssetMapping[checksum];
194+
// Edge case: check to ensure previous asset exists with the same checksum,
195+
// but different key/filename. This can happen if an image is renamed
196+
const isSame = prevAsset && prevAsset.key === key;
197+
// Most common case: no change in asset; we keep the updated time the same
198+
const timeOfUpdate = isSame ? prevAsset.updated_at : this.updateTime;
199+
updatedAssets.push({
200+
checksum,
201+
key,
202+
updated_at: timeOfUpdate,
203+
});
204+
});
205+
206+
return updatedAssets;
207+
}
208+
209+
/**
210+
* Marks any pages from the previous build that were not used as "deleted"
150211
*/
151-
markUnseenPagesAsDeleted(updateTime: Date) {
212+
markUnseenPagesAsDeleted() {
152213
this.prevPageIds.forEach((unseenPageId) => {
153214
const operation = {
154215
updateOne: {
155216
filter: { page_id: unseenPageId },
156217
update: {
157218
$set: {
158219
deleted: true,
159-
updated_at: updateTime,
220+
updated_at: this.updateTime,
160221
},
161222
},
162223
},
@@ -171,7 +232,6 @@ class UpdatedPagesManager {
171232
}
172233

173234
/**
174-
*
175235
* Upserts pages in separate collection. Copies of a page are created by page_id.
176236
* Updated pages within the same Snooty project name + branch should only update
177237
* related page documents.

0 commit comments

Comments
 (0)