@@ -9,21 +9,34 @@ interface StaticAsset {
99 key : string ;
1010}
1111
12+ interface UpdatedAsset extends StaticAsset {
13+ updated_at ?: Date ;
14+ }
15+
1216interface PageAst {
1317 [ key : string ] : any ;
1418}
1519
16- export interface UpdatedPage {
20+ export interface Page {
1721 page_id : string ;
1822 filename : string ;
1923 ast : PageAst ;
20- static_assets : StaticAsset [ ] ;
24+ static_assets : UpdatedAsset [ ] ;
25+ }
2126
27+ export interface UpdatedPage extends Page {
2228 created_at : Date ;
2329 updated_at : Date ;
2430 deleted : boolean ;
2531}
2632
33+ interface PreviousPageMapping {
34+ [ key : string ] : {
35+ ast : PageAst ;
36+ static_assets : StaticAsset [ ] ;
37+ } ;
38+ }
39+
2740const COLLECTION_NAME = 'documents' ;
2841const UPDATED_AST_COLL_NAME = 'updated_documents' ;
2942
@@ -38,7 +51,6 @@ const pagesFromZip = (zip: AdmZip) => {
3851} ;
3952
4053/**
41- *
4254 * Finds the page documents for a given Snooty project name + branch combination.
4355 * If this is the first build for the Snooty project name + branch, no documents
4456 * will be found.
@@ -56,6 +68,7 @@ const findPrevPageDocs = async (pageIdPrefix: string, collection: string) => {
5668 _id : 0 ,
5769 page_id : 1 ,
5870 ast : 1 ,
71+ static_assets : 1 ,
5972 } ;
6073
6174 try {
@@ -70,11 +83,14 @@ const findPrevPageDocs = async (pageIdPrefix: string, collection: string) => {
7083
7184const createPageAstMapping = async ( docsCursor : FindCursor ) => {
7285 // Create mapping for page id and its AST
73- const mapping : Record < string , object > = { } ;
86+ const mapping : PreviousPageMapping = { } ;
7487 // Create set of all page ids. To be used for tracking unseen pages in the current build
7588 const pageIds = new Set < string > ( ) ;
7689 for await ( const doc of docsCursor ) {
77- mapping [ doc . page_id ] = doc . ast ;
90+ mapping [ doc . page_id ] = {
91+ ast : doc . ast ,
92+ static_assets : doc . static_assets ,
93+ } ;
7894 pageIds . add ( doc . page_id ) ;
7995 }
8096 return { mapping, pageIds } ;
@@ -83,29 +99,27 @@ const createPageAstMapping = async (docsCursor: FindCursor) => {
8399class UpdatedPagesManager {
84100 currentPages : Document [ ] ;
85101 operations : AnyBulkWriteOperation [ ] ;
86- prevPageDocsMapping : Record < string , object > ;
102+ prevPageDocsMapping : PreviousPageMapping ;
87103 prevPageIds : Set < string > ;
104+ updateTime : Date ;
88105
89- constructor ( prevPageDocsMapping : Record < string , object > , prevPagesIds : Set < string > , pages : Document [ ] ) {
106+ constructor ( prevPageDocsMapping : PreviousPageMapping , prevPagesIds : Set < string > , pages : Document [ ] ) {
90107 this . currentPages = pages ;
91108 this . operations = [ ] ;
92109 this . prevPageDocsMapping = prevPageDocsMapping ;
93110 this . prevPageIds = prevPagesIds ;
94111
95- const updateTime = new Date ( ) ;
96- this . checkForPageDiffs ( updateTime ) ;
97- this . markUnseenPagesAsDeleted ( updateTime ) ;
112+ this . updateTime = new Date ( ) ;
113+ this . checkForPageDiffs ( ) ;
114+ this . markUnseenPagesAsDeleted ( ) ;
98115 }
99116
100117 /**
101- *
102118 * Compares the ASTs of the current pages with the previous pages. New update
103119 * operations are added whenever a diff in the page ASTs is found. Page IDs are
104120 * removed from `prevPageIds` to signal that the previous page has been "seen"
105- *
106- * @param updateTime - the time to set updates to
107121 */
108- checkForPageDiffs ( updateTime : Date ) {
122+ checkForPageDiffs ( ) {
109123 this . currentPages . forEach ( ( page ) => {
110124 // Filter out rst (non-page) files
111125 if ( ! page . filename . endsWith ( '.txt' ) ) {
@@ -114,10 +128,11 @@ class UpdatedPagesManager {
114128
115129 const currentPageId = page . page_id ;
116130 this . prevPageIds . delete ( currentPageId ) ;
131+ const prevPageData = this . prevPageDocsMapping [ currentPageId ] ;
117132
118133 // Update the document if page's current AST is different from previous build's.
119134 // New pages should always count as having a "different" AST
120- if ( ! isEqual ( page . ast , this . prevPageDocsMapping [ currentPageId ] ) ) {
135+ if ( ! isEqual ( page . ast , prevPageData ?. ast ) ) {
121136 const operation = {
122137 updateOne : {
123138 filter : { page_id : currentPageId } ,
@@ -126,12 +141,12 @@ class UpdatedPagesManager {
126141 page_id : currentPageId ,
127142 filename : page . filename ,
128143 ast : page . ast ,
129- static_assets : page . static_assets ,
130- updated_at : updateTime ,
144+ static_assets : this . findUpdatedAssets ( page . static_assets , prevPageData ?. static_assets ) ,
145+ updated_at : this . updateTime ,
131146 deleted : false ,
132147 } ,
133148 $setOnInsert : {
134- created_at : updateTime ,
149+ created_at : this . updateTime ,
135150 } ,
136151 } ,
137152 upsert : true ,
@@ -143,20 +158,66 @@ class UpdatedPagesManager {
143158 }
144159
145160 /**
161+ * Identifies any changes in assets between the current page and its previous page.
162+ * A new array of static assets with their last update time is returned.
146163 *
147- * Marks any pages from the previous build that were not used as "deleted"
164+ * The Snooty Data API will take into account an asset's `updated_at` field to
165+ * compare with timestamps that it receives on requests for updated pages. When
166+ * the API sends an updated page, an updated page's asset will only be sent if that asset's
167+ * timestamp is greater than the timestamp sent in the request (denoting a change).
168+ * Unchanged assets with older timestamps will not be sent.
169+ *
170+ * Assets that are deleted between builds are not included since the Snooty Data API
171+ * will not need to return it for now.
148172 *
149- * @param updateTime - the time to set updates to
173+ * @param currentPageAssets
174+ * @param prevPageAssets
175+ */
176+ findUpdatedAssets ( currentPageAssets : StaticAsset [ ] , prevPageAssets ?: UpdatedAsset [ ] ) {
177+ const updatedAssets : UpdatedAsset [ ] = [ ] ;
178+ if ( currentPageAssets && currentPageAssets . length === 0 && prevPageAssets && prevPageAssets . length === 0 ) {
179+ return updatedAssets ;
180+ }
181+
182+ const prevAssetMapping : Record < string , { key : string ; updated_at : Date } > = { } ;
183+ if ( prevPageAssets ) {
184+ prevPageAssets . forEach ( ( asset ) => {
185+ prevAssetMapping [ asset . checksum ] = {
186+ key : asset . key ,
187+ updated_at : asset . updated_at ?? this . updateTime ,
188+ } ;
189+ } ) ;
190+ }
191+
192+ currentPageAssets . forEach ( ( { checksum, key } ) => {
193+ const prevAsset = prevAssetMapping [ checksum ] ;
194+ // Edge case: check to ensure previous asset exists with the same checksum,
195+ // but different key/filename. This can happen if an image is renamed
196+ const isSame = prevAsset && prevAsset . key === key ;
197+ // Most common case: no change in asset; we keep the updated time the same
198+ const timeOfUpdate = isSame ? prevAsset . updated_at : this . updateTime ;
199+ updatedAssets . push ( {
200+ checksum,
201+ key,
202+ updated_at : timeOfUpdate ,
203+ } ) ;
204+ } ) ;
205+
206+ return updatedAssets ;
207+ }
208+
209+ /**
210+ * Marks any pages from the previous build that were not used as "deleted"
150211 */
151- markUnseenPagesAsDeleted ( updateTime : Date ) {
212+ markUnseenPagesAsDeleted ( ) {
152213 this . prevPageIds . forEach ( ( unseenPageId ) => {
153214 const operation = {
154215 updateOne : {
155216 filter : { page_id : unseenPageId } ,
156217 update : {
157218 $set : {
158219 deleted : true ,
159- updated_at : updateTime ,
220+ updated_at : this . updateTime ,
160221 } ,
161222 } ,
162223 } ,
@@ -171,7 +232,6 @@ class UpdatedPagesManager {
171232}
172233
173234/**
174- *
175235 * Upserts pages in separate collection. Copies of a page are created by page_id.
176236 * Updated pages within the same Snooty project name + branch should only update
177237 * related page documents.
0 commit comments