Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
OTA_ENGINE_SENDINBLUE_API_KEY='xkeysib-3f51c…'
OTA_ENGINE_SMTP_PASSWORD='password'
OTA_ENGINE_GITHUB_TOKEN=ghp_XXXXXXXXX
OTA_ENGINE_GITLAB_TOKEN=XXXXXXXXXX
OTA_ENGINE_GITLAB_RELEASES_TOKEN=XXXXXXXXXX
OTA_ENGINE_GITLAB_API_BASE_URL=https://gitlab.com/api/v4
OTA_ENGINE_GITLAB_BASE_URL=https://gitlab.com
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

All changes that impact users of this module are documented in this file, in the [Common Changelog](https://common-changelog.org) format with some additional specifications defined in the CONTRIBUTING file. This codebase adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## Unreleased [minor]

### Added

- Add GitLab functionalities

## 2.5.0 - 2024-10-29

_Full changeset and discussions: [#1115](https://github.com/OpenTermsArchive/engine/pull/1115)._
Expand Down
1 change: 1 addition & 0 deletions config/default.json
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
"dataset": {
"title": "sandbox",
"versionsRepositoryURL": "https://github.com/OpenTermsArchive/sandbox",
"versionsRepositoryURLGitLab": "https://gitlab.com/ota-sandbox-example/sandbox",
"publishingSchedule": "30 8 * * MON"
}
}
Expand Down
30 changes: 19 additions & 11 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 3 additions & 5 deletions scripts/dataset/assets/README.template.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ import config from 'config';
const LOCALE = 'en-EN';
const DATE_OPTIONS = { year: 'numeric', month: 'long', day: 'numeric' };

export default function readme({ releaseDate, servicesCount, firstVersionDate, lastVersionDate }) {
export default function readme({ releaseDate, servicesCount, firstVersionDate, lastVersionDate, versionsRepositoryURL }) {
return `# Open Terms Archive — ${title({ releaseDate })}

${body({ servicesCount, firstVersionDate, lastVersionDate })}`;
${body({ servicesCount, firstVersionDate, lastVersionDate, versionsRepositoryURL })}`;
}

export function title({ releaseDate }) {
Expand All @@ -17,12 +17,10 @@ export function title({ releaseDate }) {
return `${title} — ${releaseDate} dataset`;
}

export function body({ servicesCount, firstVersionDate, lastVersionDate }) {
export function body({ servicesCount, firstVersionDate, lastVersionDate, versionsRepositoryURL }) {
firstVersionDate = firstVersionDate.toLocaleDateString(LOCALE, DATE_OPTIONS);
lastVersionDate = lastVersionDate.toLocaleDateString(LOCALE, DATE_OPTIONS);

const versionsRepositoryURL = config.get('@opentermsarchive/engine.dataset.versionsRepositoryURL');

return `This dataset consolidates the contractual documents of ${servicesCount} service providers, in all their versions that were accessible online between ${firstVersionDate} and ${lastVersionDate}.

This dataset is tailored for datascientists and other analysts. You can also explore all these versions interactively on [${versionsRepositoryURL}](${versionsRepositoryURL}).
Expand Down
3 changes: 2 additions & 1 deletion scripts/dataset/export/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ const fs = fsApi.promises;

const ARCHIVE_FORMAT = 'zip'; // for supported formats, see https://www.archiverjs.com/docs/archive-formats

export default async function generate({ archivePath, releaseDate }) {
export default async function generate({ archivePath, releaseDate, versionsRepositoryURL }) {
const versionsRepository = await RepositoryFactory.create(config.get('@opentermsarchive/engine.recorder.versions.storage')).initialize();

const archive = await initializeArchive(archivePath);
Expand Down Expand Up @@ -61,6 +61,7 @@ export default async function generate({ archivePath, releaseDate }) {
releaseDate,
firstVersionDate,
lastVersionDate,
versionsRepositoryURL,
}),
{ name: `${archive.basename}/README.md` },
);
Expand Down
35 changes: 28 additions & 7 deletions scripts/dataset/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import config from 'config';
import generateRelease from './export/index.js';
import logger from './logger/index.js';
import publishRelease from './publish/index.js';
import publishReleaseGitLab from './publishGitLab/index.js';

export async function release({ shouldPublish, shouldRemoveLocalCopy, fileName }) {
const releaseDate = new Date();
Expand All @@ -14,7 +15,15 @@ export async function release({ shouldPublish, shouldRemoveLocalCopy, fileName }

logger.info('Start exporting dataset…');

const stats = await generateRelease({ archivePath, releaseDate });
const usesGitHub = (typeof process.env.OTA_ENGINE_GITHUB_TOKEN !== 'undefined');
const usesGitLab = (typeof process.env.OTA_ENGINE_GITLAB_TOKEN !== 'undefined');

let versionsRepositoryURL = '';

if (usesGitHub) versionsRepositoryURL = config.get('@opentermsarchive/engine.dataset.versionsRepositoryURL');
if (usesGitLab) versionsRepositoryURL = config.get('@opentermsarchive/engine.dataset.versionsRepositoryURLGitLab');

const stats = await generateRelease({ archivePath, releaseDate, versionsRepositoryURL });

logger.info(`Dataset exported in ${archivePath}`);

Expand All @@ -24,13 +33,25 @@ export async function release({ shouldPublish, shouldRemoveLocalCopy, fileName }

logger.info('Start publishing dataset…');

const releaseUrl = await publishRelease({
archivePath,
releaseDate,
stats,
});
if (usesGitHub) {
const releaseUrl = await publishRelease({
archivePath,
releaseDate,
stats,
});

logger.info(`Dataset published to ${releaseUrl}`);
logger.info(`Dataset published to ${releaseUrl}`);
}

if (usesGitLab) {
const releaseUrl = await publishReleaseGitLab({
archivePath,
releaseDate,
stats,
});

logger.info(`Dataset published to ${releaseUrl}`);
}

if (!shouldRemoveLocalCopy) {
return;
Expand Down
135 changes: 135 additions & 0 deletions scripts/dataset/publishGitLab/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
import fsApi from 'fs';
import path from 'path';
import url from 'url';

import config from 'config';
import dotenv from 'dotenv';
import FormData from 'form-data';
import nodeFetch from 'node-fetch';

import GitLab from '../../../src/reporterGitlab/gitlab.js';
import * as readme from '../assets/README.template.js';
import logger from '../logger/index.js';

dotenv.config();

const gitlabAPIUrl = process.env.OTA_ENGINE_GITLAB_API_BASE_URL;

export default async function publishReleaseGitLab({
archivePath,
releaseDate,
stats,
}) {
let projectId = null;

const [ owner, repo ] = url
.parse(config.get('@opentermsarchive/engine.dataset.versionsRepositoryURLGitLab'))
.pathname.split('/')
.filter(component => component);
const commonParams = { owner, repo };

try {
const repositoryPath = `${commonParams.owner}/${commonParams.repo}`;

const options = GitLab.baseOptionsHttpReq(process.env.OTA_ENGINE_GITLAB_RELEASES_TOKEN);

options.method = 'GET';
options.headers = {
'Content-Type': 'application/json',
...options.headers,
};

const response = await nodeFetch(
`${gitlabAPIUrl}/projects/${encodeURIComponent(repositoryPath)}`,
options,
);
const res = await response.json();

projectId = res.id;
} catch (error) {
logger.error(`Error while obtaining projectId: ${error}`);
projectId = null;
}

const tagName = `${path.basename(archivePath, path.extname(archivePath))}`; // use archive filename as Git tag

try {
let options = GitLab.baseOptionsHttpReq(process.env.OTA_ENGINE_GITLAB_RELEASES_TOKEN);

options.method = 'POST';
options.body = {
ref: 'main',
tag_name: tagName,
name: readme.title({ releaseDate }),
description: readme.body(stats),
};
options.headers = {
'Content-Type': 'application/json',
...options.headers,
};

options.body = JSON.stringify(options.body);

const releaseResponse = await nodeFetch(
`${gitlabAPIUrl}/projects/${projectId}/releases`,
options,
);
const releaseRes = await releaseResponse.json();

const releaseId = releaseRes.commit.id;

logger.info(`Created release with releaseId: ${releaseId}`);

// Upload the package
options = GitLab.baseOptionsHttpReq(process.env.OTA_ENGINE_GITLAB_RELEASES_TOKEN);
options.method = 'PUT';
options.body = fsApi.createReadStream(archivePath);

// restrict characters to the ones allowed by GitLab APIs
const packageName = config.get('@opentermsarchive/engine.dataset.title').replace(/[^a-zA-Z0-9.\-_]/g, '-');
const packageVersion = tagName.replace(/[^a-zA-Z0-9.\-_]/g, '-');
const packageFileName = archivePath.replace(/[^a-zA-Z0-9.\-_/]/g, '-');

logger.debug(`packageName: ${packageName}, packageVersion: ${packageVersion} packageFileName: ${packageFileName}`);

const packageResponse = await nodeFetch(
`${gitlabAPIUrl}/projects/${projectId}/packages/generic/${packageName}/${packageVersion}/${packageFileName}?status=default&select=package_file`,
options,
);
const packageRes = await packageResponse.json();

const packageFilesId = packageRes.id;

logger.debug(`package file id: ${packageFilesId}`);

// use the package id to build the download url for the release
const publishedPackageUrl = `${config.get('@opentermsarchive/engine.dataset.versionsRepositoryURLGitLab')}/-/package_files/${packageFilesId}/download`;

// Create the release and link the package
const formData = new FormData();

formData.append('name', archivePath);
formData.append('url', publishedPackageUrl);
formData.append('file', fsApi.createReadStream(archivePath), { filename: path.basename(archivePath) });

options = GitLab.baseOptionsHttpReq(process.env.OTA_ENGINE_GITLAB_RELEASES_TOKEN);
options.method = 'POST';
options.headers = {
...formData.getHeaders(),
...options.headers,
};
options.body = formData;

const uploadResponse = await nodeFetch(
`${gitlabAPIUrl}/projects/${projectId}/releases/${tagName}/assets/links`,
options,
);
const uploadRes = await uploadResponse.json();
const releaseUrl = uploadRes.direct_asset_url;

return releaseUrl;
} catch (error) {
logger.error('Failed to create release or upload ZIP file:', error);
throw error;
}
}
18 changes: 18 additions & 0 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import Archivist from './archivist/index.js';
import logger from './logger/index.js';
import Notifier from './notifier/index.js';
import Reporter from './reporter/index.js';
import ReporterGitlab from './reporterGitlab/index.js';

const require = createRequire(import.meta.url);

Expand Down Expand Up @@ -72,6 +73,23 @@ export default async function track({ services, types, extractOnly, schedule })
logger.warn('Environment variable "OTA_ENGINE_GITHUB_TOKEN" was not found; the Reporter module will be ignored');
}

if (process.env.OTA_ENGINE_GITLAB_TOKEN) {
if (config.has('@opentermsarchive/engine.reporter.gitlabIssues.repositories.declarations')) {
try {
const reporter = new ReporterGitlab(config.get('@opentermsarchive/engine.reporter'));

await reporter.initialize();
archivist.attach(reporter);
} catch (error) {
logger.error('Cannot instantiate the ReporterGitlab module; it will be ignored:', error);
}
} else {
logger.warn('Configuration key "reporter.gitlabIssues.repositories.declarations" was not found; issues on the declarations repository cannot be created');
}
} else {
logger.warn('Environment variable "OTA_ENGINE_GITLAB_TOKEN" was not found; the ReporterGitlab module will be ignored');
}

if (!schedule) {
await archivist.track({ services, types });

Expand Down
Loading
Loading