11import AdmZip from 'adm-zip' ;
22import { deserialize } from 'bson' ;
3- import isEqual from 'fast-deep-equal' ;
4- import { AnyBulkWriteOperation , Document , FindCursor , ObjectId } from 'mongodb' ;
5- import { bulkWrite , db , insert } from '../connector' ;
6-
7- interface StaticAsset {
8- checksum : string ;
9- key : string ;
10- }
11-
12- interface PageAst {
13- [ key : string ] : any ;
14- }
15-
16- export interface UpdatedPage {
17- page_id : string ;
18- filename : string ;
19- ast : PageAst ;
20- static_assets : StaticAsset [ ] ;
21-
22- created_at : Date ;
23- updated_at : Date ;
24- deleted : boolean ;
25- }
3+ import { ObjectId } from 'mongodb' ;
4+ import { insert } from '../connector' ;
265
276const COLLECTION_NAME = 'documents' ;
28- const UPDATED_AST_COLL_NAME = 'updated_documents' ;
297
308// Service responsible for memoization of page level documents.
319// Any extraneous logic performed on page level documents as part of upload should be added here
@@ -37,175 +15,12 @@ const pagesFromZip = (zip: AdmZip) => {
3715 . map ( ( entry ) => deserialize ( entry . getData ( ) ) ) ;
3816} ;
3917
40- /**
41- *
42- * Finds the page documents for a given Snooty project name + branch combination.
43- * If this is the first build for the Snooty project name + branch, no documents
44- * will be found.
45- *
46- * @param pageIdPrefix - Includes the Snooty project name, user (docsworker-xlarge), and branch
47- * @param collection - The collection to perform the find query on
48- */
49- const findPrevPageDocs = async ( pageIdPrefix : string , collection : string ) => {
50- const dbSession = await db ( ) ;
51- const findQuery = {
52- page_id : { $regex : new RegExp ( `^${ pageIdPrefix } ` ) } ,
53- deleted : false ,
54- } ;
55- const projection = {
56- _id : 0 ,
57- page_id : 1 ,
58- ast : 1 ,
59- } ;
60-
61- try {
62- return dbSession . collection < UpdatedPage > ( collection ) . find ( findQuery ) . project ( projection ) ;
63- } catch ( error ) {
64- console . error (
65- `Error trying to find previous page documents using prefix ${ pageIdPrefix } in ${ collection } }: ${ error } `
66- ) ;
67- throw error ;
68- }
69- } ;
70-
71- const createPageAstMapping = async ( docsCursor : FindCursor ) => {
72- // Create mapping for page id and its AST
73- const mapping : Record < string , object > = { } ;
74- // Create set of all page ids. To be used for tracking unseen pages in the current build
75- const pageIds = new Set < string > ( ) ;
76- for await ( const doc of docsCursor ) {
77- mapping [ doc . page_id ] = doc . ast ;
78- pageIds . add ( doc . page_id ) ;
79- }
80- return { mapping, pageIds } ;
81- } ;
82-
83- class UpdatedPagesManager {
84- currentPages : Document [ ] ;
85- operations : AnyBulkWriteOperation [ ] ;
86- prevPageDocsMapping : Record < string , object > ;
87- prevPageIds : Set < string > ;
88-
89- constructor ( prevPageDocsMapping : Record < string , object > , prevPagesIds : Set < string > , pages : Document [ ] ) {
90- this . currentPages = pages ;
91- this . operations = [ ] ;
92- this . prevPageDocsMapping = prevPageDocsMapping ;
93- this . prevPageIds = prevPagesIds ;
94-
95- const updateTime = new Date ( ) ;
96- this . checkForPageDiffs ( updateTime ) ;
97- this . markUnseenPagesAsDeleted ( updateTime ) ;
98- }
99-
100- /**
101- *
102- * Compares the ASTs of the current pages with the previous pages. New update
103- * operations are added whenever a diff in the page ASTs is found. Page IDs are
104- * removed from `prevPageIds` to signal that the previous page has been "seen"
105- *
106- * @param updateTime - the time to set updates to
107- */
108- checkForPageDiffs ( updateTime : Date ) {
109- this . currentPages . forEach ( ( page ) => {
110- // Filter out rst (non-page) files
111- if ( ! page . filename . endsWith ( '.txt' ) ) {
112- return ;
113- }
114-
115- const currentPageId = page . page_id ;
116- this . prevPageIds . delete ( currentPageId ) ;
117-
118- // Update the document if page's current AST is different from previous build's.
119- // New pages should always count as having a "different" AST
120- if ( ! isEqual ( page . ast , this . prevPageDocsMapping [ currentPageId ] ) ) {
121- const operation = {
122- updateOne : {
123- filter : { page_id : currentPageId } ,
124- update : {
125- $set : {
126- page_id : currentPageId ,
127- filename : page . filename ,
128- ast : page . ast ,
129- static_assets : page . static_assets ,
130- updated_at : updateTime ,
131- deleted : false ,
132- } ,
133- $setOnInsert : {
134- created_at : updateTime ,
135- } ,
136- } ,
137- upsert : true ,
138- } ,
139- } ;
140- this . operations . push ( operation ) ;
141- }
142- } ) ;
143- }
144-
145- /**
146- *
147- * Marks any pages from the previous build that were not used as "deleted"
148- *
149- * @param updateTime - the time to set updates to
150- */
151- markUnseenPagesAsDeleted ( updateTime : Date ) {
152- this . prevPageIds . forEach ( ( unseenPageId ) => {
153- const operation = {
154- updateOne : {
155- filter : { page_id : unseenPageId } ,
156- update : {
157- $set : {
158- deleted : true ,
159- updated_at : updateTime ,
160- } ,
161- } ,
162- } ,
163- } ;
164- this . operations . push ( operation ) ;
165- } ) ;
166- }
167-
168- getOperations ( ) {
169- return this . operations ;
170- }
171- }
172-
173- /**
174- *
175- * Upserts pages in separate collection. Copies of a page are created by page_id.
176- * Updated pages within the same Snooty project name + branch should only update
177- * related page documents.
178- *
179- * @param pages
180- * @param collection
181- */
182- const updatePages = async ( pages : Document [ ] , collection : string ) => {
183- if ( pages . length === 0 ) {
184- return ;
185- }
186-
187- // Find all pages that share the same project name + branch. Expects page IDs
188- // to include these two properties after parse
189- const pageIdPrefix = pages [ 0 ] . page_id . split ( '/' ) . slice ( 0 , 3 ) . join ( '/' ) ;
190- const previousPagesCursor = await findPrevPageDocs ( pageIdPrefix , collection ) ;
191- const { mapping : prevPageDocsMapping , pageIds : prevPageIds } = await createPageAstMapping ( previousPagesCursor ) ;
192-
193- const updatedPagesManager = new UpdatedPagesManager ( prevPageDocsMapping , prevPageIds , pages ) ;
194- const operations = updatedPagesManager . getOperations ( ) ;
195-
196- if ( operations . length > 0 ) {
197- await bulkWrite ( operations , collection ) ;
198- }
199- } ;
200-
201- export const insertAndUpdatePages = async ( buildId : ObjectId , zip : AdmZip ) => {
18+ export const insertPages = async ( buildId : ObjectId , zip : AdmZip ) => {
20219 try {
203- const pages = pagesFromZip ( zip ) ;
204- return Promise . all ( [ insert ( pages , COLLECTION_NAME , buildId ) , updatePages ( pages , UPDATED_AST_COLL_NAME ) ] ) ;
20+ const pages = await pagesFromZip ( zip ) ;
21+ return insert ( pages , COLLECTION_NAME , buildId ) ;
20522 } catch ( error ) {
20623 console . error ( `Error at insertion time for ${ COLLECTION_NAME } : ${ error } ` ) ;
20724 throw error ;
20825 }
20926} ;
210-
211- export const _updatePages = updatePages ;
0 commit comments