Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ jobs:
node_version:
- 20
- 24
exclude:
- operating_system: windows-latest
node_version: 24
- operating_system: macos-latest
node_version: 24
fail-fast: false # run tests on other operating systems even if one fails

runs-on: ${{ matrix.operating_system }}
Expand Down Expand Up @@ -46,6 +51,11 @@ jobs:
node_version:
- 20
- 24
exclude:
- operating_system: windows-latest
node_version: 24
- operating_system: macos-latest
node_version: 24
fail-fast: false # run tests on other operating systems even if one fails

runs-on: ${{ matrix.operating_system }}
Expand Down Expand Up @@ -76,6 +86,11 @@ jobs:
node_version:
- 20
- 24
exclude:
- operating_system: windows-latest
node_version: 24
- operating_system: macos-latest
node_version: 24
fail-fast: false

runs-on: ${{ matrix.operating_system }}
Expand Down
16 changes: 16 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,22 @@

All changes that impact users of this module are documented in this file, in the [Common Changelog](https://common-changelog.org) format with some additional specifications defined in the CONTRIBUTING file. This codebase adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## Unreleased [major]

> Development of this release was supported by the [French Ministry for Foreign Affairs](https://www.diplomatie.gouv.fr/fr/politique-etrangere-de-la-france/diplomatie-numerique/) through its ministerial [State Startups incubator](https://beta.gouv.fr/startups/open-terms-archive.html) under the aegis of the Ambassador for Digital Affairs.

### Added

- Add `ota apply-technical-upgrades` CLI command to apply technical upgrades independently

### Changed

- **Breaking:** Remove `--extract-only` option from `ota track` command; use the new `ota apply-technical-upgrades` command instead

### Fixed

- Fix incorrect versioning that occurred when adding new source documents to combined terms declarations

## 9.2.3 - 2025-11-19

_Full changeset and discussions: [#1204](https://github.com/OpenTermsArchive/engine/pull/1204)._
Expand Down
5 changes: 2 additions & 3 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -165,10 +165,9 @@ For command-line examples and documentation, we follow the [docopt usage pattern

In order to improve the understandability of commands, we document all CLI options and examples with the long version of the options.


```diff
- ota track -s $service_id -r
+ ota track --services <service_id> --extract-only
- ota track -s <service_id> -t <terms_type>
+ ota track --services <service_id> --types <terms_type>
```

## Naming
Expand Down
19 changes: 19 additions & 0 deletions bin/ota-apply-technical-upgrades.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#! /usr/bin/env node
import './env.js';

import path from 'path';
import { fileURLToPath, pathToFileURL } from 'url';

import { program } from 'commander';

const __dirname = path.dirname(fileURLToPath(import.meta.url));

const { applyTechnicalUpgrades } = await import(pathToFileURL(path.resolve(__dirname, '../src/index.js'))); // load asynchronously to ensure env.js is loaded before

program
.name('ota apply-technical-upgrades')
.description('Apply technical upgrades by generating new versions from the latest snapshots using updated declarations, engine logic, or dependencies, and by retrieving any missing snapshots for newly added source documents')
.option('-s, --services [serviceId...]', 'service IDs to apply technical upgrades to')
.option('-t, --types [termsType...]', 'terms types to apply technical upgrades to');

applyTechnicalUpgrades(program.parse(process.argv).opts());
1 change: 0 additions & 1 deletion bin/ota-track.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ program
.description('Retrieve declared documents, record snapshots, extract versions and publish the resulting records')
.option('-s, --services [serviceId...]', 'service IDs of services to track')
.option('-t, --types [termsType...]', 'terms types to track')
.option('-e, --extract-only', 'extract versions from existing snapshots with latest declarations and engine, without recording new snapshots')
.option('--schedule', 'track automatically at a regular interval');

track(program.parse(process.argv).opts());
1 change: 1 addition & 0 deletions bin/ota.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ program
.description(description)
.version(version)
.command('track', 'Track the current terms of services according to provided declarations')
.command('apply-technical-upgrades', 'Apply technical upgrades by generating new versions from the latest snapshots using updated declarations, engine logic, or dependencies')
.command('validate', 'Run a series of tests to check the validity of terms declarations')
.command('lint', 'Check format and stylistic errors in declarations and auto fix them')
.command('dataset', 'Export the versions dataset into a ZIP file and optionally publish it to GitHub releases')
Expand Down
86 changes: 75 additions & 11 deletions src/archivist/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ const { version: PACKAGE_VERSION } = require('../../package.json');
// - too many requests on the same endpoint yield 403
// - sometimes when creating a commit no SHA are returned for unknown reasons
const MAX_PARALLEL_TRACKING = 1;
const MAX_PARALLEL_EXTRACTING = 10;
const MAX_PARALLEL_TECHNICAL_UPGRADES = 10;

export const EVENTS = [
'snapshotRecorded',
Expand Down Expand Up @@ -128,22 +128,40 @@ export default class Archivist extends events.EventEmitter {
});
}

async track({ services: servicesIds = this.servicesIds, types: termsTypes = [], extractOnly = false } = {}) {
async track({ services: servicesIds = this.servicesIds, types: termsTypes = [] } = {}) {
await this.processTerms({
servicesIds,
termsTypes,
technicalUpgradeOnly: false,
concurrency: MAX_PARALLEL_TRACKING,
});
}

async applyTechnicalUpgrades({ services: servicesIds = this.servicesIds, types: termsTypes = [] } = {}) {
await this.processTerms({
servicesIds,
termsTypes,
technicalUpgradeOnly: true,
concurrency: MAX_PARALLEL_TECHNICAL_UPGRADES,
});
}

async processTerms({ servicesIds, termsTypes, technicalUpgradeOnly, concurrency }) {
const numberOfTerms = Service.getNumberOfTerms(this.services, servicesIds, termsTypes);

this.emit('trackingStarted', servicesIds.length, numberOfTerms, extractOnly);
this.emit('trackingStarted', servicesIds.length, numberOfTerms, technicalUpgradeOnly);

await Promise.all([ launchHeadlessBrowser(), this.recorder.initialize() ]);

this.trackingQueue.concurrency = extractOnly ? MAX_PARALLEL_EXTRACTING : MAX_PARALLEL_TRACKING;
this.trackingQueue.concurrency = concurrency;

servicesIds.forEach(serviceId => {
this.services[serviceId].getTermsTypes().forEach(termsType => {
if (termsTypes.length && !termsTypes.includes(termsType)) {
return;
}

this.trackingQueue.push({ terms: this.services[serviceId].getTerms({ type: termsType }), extractOnly });
this.trackingQueue.push({ terms: this.services[serviceId].getTerms({ type: termsType }), technicalUpgradeOnly });
});
});

Expand All @@ -153,12 +171,14 @@ export default class Archivist extends events.EventEmitter {

await Promise.all([ stopHeadlessBrowser(), this.recorder.finalize() ]);

this.emit('trackingCompleted', servicesIds.length, numberOfTerms, extractOnly);
this.emit('trackingCompleted', servicesIds.length, numberOfTerms, technicalUpgradeOnly);
}

async trackTermsChanges({ terms, extractOnly = false }) {
if (!extractOnly) {
async trackTermsChanges({ terms, technicalUpgradeOnly = false }) {
if (!technicalUpgradeOnly) {
await this.fetchAndRecordSnapshots(terms);
} else {
await this.fetchAndRecordNewSourceDocuments(terms); // In technical upgrade mode, fetch and record snapshots only for new source documents that don't have existing snapshots yet (e.g., when a declaration is updated to add a new source document)
}

const contents = await this.extractContentsFromSnapshots(terms);
Expand All @@ -167,7 +187,7 @@ export default class Archivist extends events.EventEmitter {
return;
}

await this.recordVersion(terms, contents.join(Version.SOURCE_DOCUMENTS_SEPARATOR), extractOnly);
await this.recordVersion(terms, contents.join(Version.SOURCE_DOCUMENTS_SEPARATOR), technicalUpgradeOnly);
}

async fetchAndRecordSnapshots(terms) {
Expand All @@ -190,6 +210,50 @@ export default class Archivist extends events.EventEmitter {
}
}

async fetchAndRecordNewSourceDocuments(terms) {
if (!terms.hasMultipleSourceDocuments) { // If the terms has only one source document, there is nothing to do
return;
}

const existingVersion = await this.recorder.versionsRepository.findLatest(terms.service.id, terms.type);

if (!existingVersion) { // If the terms does not have a version recorded, skip this step as the next version will be tagged as "First record…" anyway
return;
}

const missingSourceDocuments = [];

for (const sourceDocument of terms.sourceDocuments) {
const snapshot = await this.recorder.getLatestSnapshot(terms, sourceDocument.id);

if (!snapshot) {
missingSourceDocuments.push(sourceDocument);
}
}

if (!missingSourceDocuments.length) {
return;
}

terms.fetchDate = new Date();
const fetchDocumentErrors = [];

for (const sourceDocument of missingSourceDocuments) {
const error = await this.fetchSourceDocument(sourceDocument);

if (error) {
fetchDocumentErrors.push(error);
} else {
await this.recordSnapshot(terms, sourceDocument);
sourceDocument.clearContent(); // Reduce memory usage by clearing no longer needed large content strings
}
}

if (fetchDocumentErrors.length) {
throw new InaccessibleContentError(fetchDocumentErrors);
}
}

async fetchSourceDocument(sourceDocument) {
const { location: url, executeClientScripts, cssSelectors } = sourceDocument;

Expand Down Expand Up @@ -249,14 +313,14 @@ export default class Archivist extends events.EventEmitter {
return contents;
}

async recordVersion(terms, content, extractOnly) {
async recordVersion(terms, content, technicalUpgradeOnly) {
const record = new Version({
content,
snapshotIds: terms.sourceDocuments.map(sourceDocuments => sourceDocuments.snapshotId),
serviceId: terms.service.id,
termsType: terms.type,
fetchDate: terms.fetchDate,
isExtractOnly: extractOnly,
isTechnicalUpgrade: technicalUpgradeOnly,
metadata: { 'x-engine-version': PACKAGE_VERSION },
});

Expand Down
Loading
Loading