Skip to content

Commit 4924940

Browse files
committed
Fix extractOnly step
Fetch missing snapshots for newly added source documents in combined terms declarations
1 parent aec5133 commit 4924940

File tree

2 files changed

+53
-2
lines changed

2 files changed

+53
-2
lines changed

src/archivist/index.js

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,10 @@ export default class Archivist extends events.EventEmitter {
159159
async trackTermsChanges({ terms, extractOnly = false }) {
160160
if (!extractOnly) {
161161
await this.fetchAndRecordSnapshots(terms);
162+
} else {
163+
// In extractOnly mode (technical upgrade pass), fetch and record snapshots only for new source documents
164+
// that don't have existing snapshots yet (e.g., when a declaration is updated to add a new source document)
165+
await this.fetchAndRecordMissingSnapshots(terms);
162166
}
163167

164168
const contents = await this.extractContentsFromSnapshots(terms);
@@ -190,6 +194,50 @@ export default class Archivist extends events.EventEmitter {
190194
}
191195
}
192196

197+
async fetchAndRecordMissingSnapshots(terms) {
198+
if (!terms.hasMultipleSourceDocuments) { // If the terms has only one source document, there is nothing to do
199+
return;
200+
}
201+
202+
const existingVersion = await this.recorder.versionsRepository.findLatest(terms.service.id, terms.type);
203+
204+
if (!existingVersion) { // If the terms already has a version recorded, skip this step as the next version will be tagged as "First record…" anyway
205+
return;
206+
}
207+
208+
const missingSourceDocuments = [];
209+
210+
for (const sourceDocument of terms.sourceDocuments) {
211+
const snapshot = await this.recorder.getLatestSnapshot(terms, sourceDocument.id);
212+
213+
if (!snapshot) {
214+
missingSourceDocuments.push(sourceDocument);
215+
}
216+
}
217+
218+
if (missingSourceDocuments.length) {
219+
return;
220+
}
221+
222+
terms.fetchDate = new Date();
223+
const fetchDocumentErrors = [];
224+
225+
for (const sourceDocument of missingSourceDocuments) {
226+
const error = await this.fetchSourceDocument(sourceDocument);
227+
228+
if (error) {
229+
fetchDocumentErrors.push(error);
230+
} else {
231+
await this.recordSnapshot(terms, sourceDocument);
232+
sourceDocument.clearContent(); // Reduce memory usage by clearing no longer needed large content strings
233+
}
234+
}
235+
236+
if (fetchDocumentErrors.length) {
237+
throw new InaccessibleContentError(fetchDocumentErrors);
238+
}
239+
}
240+
193241
async fetchSourceDocument(sourceDocument) {
194242
const { location: url, executeClientScripts, cssSelectors } = sourceDocument;
195243

src/index.js

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,11 @@ export default async function track({ services, types, extractOnly, schedule })
4040
});
4141
}
4242

43-
// The result of the extraction step that generates the version from the snapshots may depend on changes to the engine or its dependencies.
44-
// The process thus starts by only performing the extraction process so that any version following such changes can be labelled (to avoid sending notifications, for example)
43+
// Technical upgrade pass: apply changes from engine, dependency, or declaration upgrades.
44+
// This regenerates versions from existing snapshots with updated extraction logic.
45+
// For terms with combined source documents, if a new document was added to the declaration,
46+
// it will be fetched and combined with existing snapshots to regenerate the complete version.
47+
// All versions from this pass are labeled as technical upgrades to avoid false notifications about content changes.
4548
await archivist.track({ services, types, extractOnly: true });
4649

4750
if (extractOnly) {

0 commit comments

Comments
 (0)