diff --git a/content/_index.md b/content/_index.md index 8a120245..fb105180 100644 --- a/content/_index.md +++ b/content/_index.md @@ -24,7 +24,7 @@ This documentation follows the [Diátaxis](https://diataxis.fr) approach and str - **Tutorials:** step-by-step learning guides that help beginners get started with Open Terms Archive, providing foundational knowledge and hands-on experience. - **How-to guides:** task-focused instructions that help experienced users accomplish specific goals efficiently and effectively. -- **Reference:** comprehensive technical documentation detailing configuration options and specifications for advanced users. +- **References:** comprehensive technical documentation detailing configuration options and specifications for advanced users. - **Explanations:** background knowledge that enables understanding the constraints and how choices that are made. ### Table of contents diff --git a/content/analysis/how-to/_index.md b/content/analysis/how-to/_index.md index 537c3cba..b4ac9c61 100644 --- a/content/analysis/how-to/_index.md +++ b/content/analysis/how-to/_index.md @@ -1,4 +1,4 @@ --- -title: How to +title: How to guides weight: 1 --- diff --git a/content/analysis/how-to/publish-memo.md b/content/analysis/how-to/publish-memo.md index 31e57696..18699bdb 100644 --- a/content/analysis/how-to/publish-memo.md +++ b/content/analysis/how-to/publish-memo.md @@ -1,6 +1,7 @@ --- title: Publish a memo -aliases: /memos/how-to-publish/ +aliases: + - /memos/how-to-publish/ --- # How to publish a memo diff --git a/content/analysis/reference/_index.md b/content/analysis/reference/_index.md index 6f4fd0f8..98ee9611 100644 --- a/content/analysis/reference/_index.md +++ b/content/analysis/reference/_index.md @@ -1,4 +1,4 @@ --- -title: Reference +title: References weight: 2 --- diff --git a/content/api/cli.md b/content/api/cli.md index c858ebd6..8acb8beb 100644 --- a/content/api/cli.md +++ b/content/api/cli.md @@ -14,50 +14,50 @@ In these commands: ## Tracking terms -{{< configOption name="ota track" description="Track the current terms of services according to provided declarations. The declarations, snapshots and versions paths are defined in the configuration." example="`npx ota track`" >}} +{{< refItem name="ota track" description="Track the current terms of services according to provided declarations. The declarations, snapshots and versions paths are defined in the configuration." example="npx ota track" />}} > Note that the snapshots and versions will be recorded at the moment the command is executed, on top of the existing local history. If a shared history already exists and the goal is to add on top of it, that history has to be downloaded before executing that command. -{{< configOption name="ota track --help" description="Show help and available options for track command" example="`npx ota track --help`" >}} +{{< refItem name="ota track --help" description="Show help and available options for track command" example="npx ota track --help" />}} -{{< configOption name="ota track [--services ...]" description="Track terms of specific services only" example="`npx ota track --services \"Facebook\" \"LinkedIn\"`" >}} +{{< refItem name="ota track [--services ...]" description="Track terms of specific services only" example="npx ota track --services \"Facebook\" \"LinkedIn\"" />}} -{{< configOption name="ota track [--services ...] [--types ...]" description="Track specific terms types of specific services only" example="`npx ota track --services \"Facebook\" \"LinkedIn\" --types \"Privacy Policy\" \"Terms of Service\"`" >}} +{{< refItem name="ota track [--services ...] [--types ...]" description="Track specific terms types of specific services only" example="npx ota track --services \"Facebook\" \"LinkedIn\" --types \"Privacy Policy\" \"Terms of Service\"" />}} -{{< configOption name="ota track --schedule [--services ...] [--types ...]" description="Track terms on the schedule defined in the configuration" example="`npx ota track --schedule`" >}} +{{< refItem name="ota track --schedule [--services ...] [--types ...]" description="Track terms on the schedule defined in the configuration" example="npx ota track --schedule" />}} ## Validating declarations -{{< configOption name="ota validate [--services ...] [--types ...]" description="Check that all declarations allow recording a snapshot and a version properly. If service IDs are provided, check only those services." example="`npx ota validate --services \"Facebook\" \"LinkedIn\" --types \"Privacy Policy\" \"Terms of Service\"`" >}} +{{< refItem name="ota validate [--services ...] [--types ...]" description="Check that all declarations allow recording a snapshot and a version properly. If service IDs are provided, check only those services." example="npx ota validate --services \"Facebook\" \"LinkedIn\" --types \"Privacy Policy\" \"Terms of Service\"" />}} -{{< configOption name="ota validate --schema-only [--services ...] [--types ...]" description="Check that all declarations are readable by the engine. Allows for a much faster check of declarations, but does not check that the terms are actually accessible." example="`npx ota validate --schema-only --services \"Facebook\" \"LinkedIn\" --types \"Privacy Policy\" \"Terms of Service\"`" >}} +{{< refItem name="ota validate --schema-only [--services ...] [--types ...]" description="Check that all declarations are readable by the engine. Allows for a much faster check of declarations, but does not check that the terms are actually accessible." example="npx ota validate --schema-only --services \"Facebook\" \"LinkedIn\" --types \"Privacy Policy\" \"Terms of Service\"" />}} -{{< configOption name="ota validate --modified" description="Run ota validate only on files that have been modified in Git" example="`npx ota validate --modified`" >}} +{{< refItem name="ota validate --modified" description="Run ota validate only on files that have been modified in Git" example="npx ota validate --modified" />}} ## Linting declarations -{{< configOption name="ota lint [--services ...]" description="Test the format of declarations' normalisation." example="`npx ota lint --services \"Facebook\" \"LinkedIn\"`" >}} +{{< refItem name="ota lint [--services ...]" description="Test the format of declarations' normalisation." example="npx ota lint --services \"Facebook\" \"LinkedIn\"" />}} -{{< configOption name="ota lint --fix [--services ...]" description="Automatically correct formatting mistakes and ensure that all declarations are standardised" example="`npx ota lint --fix`" >}} +{{< refItem name="ota lint --fix [--services ...]" description="Automatically correct formatting mistakes and ensure that all declarations are standardised" example="npx ota lint --fix" />}} -{{< configOption name="ota lint --modified" description="Run ota lint only on files that have been modified in Git" example="`npx ota lint --modified`" >}} +{{< refItem name="ota lint --modified" description="Run ota lint only on files that have been modified in Git" example="npx ota lint --modified" />}} ## Publishing dataset -{{< configOption name="ota dataset [--file ]" description="Export the versions dataset into a ZIP file and publish it to GitHub releases. The dataset title and the URL of the versions repository are defined in the configuration." example="`npx ota dataset --file dataset.zip`" >}} +{{< refItem name="ota dataset [--file ]" description="Export the versions dataset into a ZIP file and publish it to GitHub releases. The dataset title and the URL of the versions repository are defined in the configuration." example="npx ota dataset --file dataset.zip" />}} To export the dataset into a ZIP file and publish it on GitHub releases: -{{< configOption name="ota dataset --publish [--file ]" description="Export and publish dataset to GitHub releases" example="`GITHUB_TOKEN=ghp_XXXXXXXXX npx ota dataset --publish`" >}} +{{< refItem name="ota dataset --publish [--file ]" description="Export and publish dataset to GitHub releases" example="GITHUB_TOKEN=ghp_XXXXXXXXX npx ota dataset --publish" />}} The `GITHUB_TOKEN` can also be defined in a [`.env` file]({{< relref "collections/reference/environment-variables" >}}). To export, publish the dataset and remove the local copy that was created after it has been uploaded: -{{< configOption name="ota dataset --publish --remove-local-copy [--file ]" description="Export, publish dataset and remove local copy after upload" example="`GITHUB_TOKEN=ghp_XXXXXXXXX npx ota dataset --publish --remove-local-copy`" >}} +{{< refItem name="ota dataset --publish --remove-local-copy [--file ]" description="Export, publish dataset and remove local copy after upload" example="GITHUB_TOKEN=ghp_XXXXXXXXX npx ota dataset --publish --remove-local-copy" />}} -{{< configOption name="ota dataset --schedule [--file ]" description="Schedule export, publishing and local copy removal" example="`GITHUB_TOKEN=ghp_XXXXXXXXX npx ota dataset --schedule --publish --remove-local-copy`" >}} +{{< refItem name="ota dataset --schedule [--file ]" description="Schedule export, publishing and local copy removal" example="GITHUB_TOKEN=ghp_XXXXXXXXX npx ota dataset --schedule --publish --remove-local-copy" />}} ## Exposing the collection API -{{< configOption name="ota serve" description="Start the collection Web API server. The Web API will be available under `:///`. The server port and base path are defined in the configuration." example="`npx ota serve`" >}} +{{< refItem name="ota serve" description="Start the collection Web API server. The Web API will be available under `:///`. The server port and base path are defined in the configuration." example="npx ota serve" />}} diff --git a/content/collections/how-to/_index.md b/content/collections/how-to/_index.md index e5d7a8d9..01a5161f 100644 --- a/content/collections/how-to/_index.md +++ b/content/collections/how-to/_index.md @@ -1,5 +1,5 @@ --- -title: How to +title: How to guides weight: 2 --- diff --git a/content/collections/reference/_index.md b/content/collections/reference/_index.md index 63aae4a6..fd021bb8 100644 --- a/content/collections/reference/_index.md +++ b/content/collections/reference/_index.md @@ -1,4 +1,4 @@ --- -title: Reference +title: References weight: 3 --- diff --git a/content/collections/reference/configuration.md b/content/collections/reference/configuration.md index 9ff14ca2..e5be913b 100644 --- a/content/collections/reference/configuration.md +++ b/content/collections/reference/configuration.md @@ -11,217 +11,217 @@ As an example, see the [production configuration file](https://github.com/OpenTe ## Options -{{< configOption +{{< refItem name="trackingSchedule" type="string" - description="Defines how often the engine should check for changes in terms. Uses standard cron syntax to set the schedule." - default="`30 */12 * * *` (runs every 12 hours at minute 30)" ->}} + description="Defines how often the engine should check for changes in terms. Uses standard cron syntax to set the schedule. By default, it runs every 12 hours at minute 30." + default="30 */12 * * *" +/>}} -{{< configOption +{{< refItem name="collectionPath" type="string" description="Path to the collection's directory containing declarations directory and metadata file, relative to the engine execution location" - example="`../collections/demo-declarations`" - default="`./`" ->}} + example="../collections/demo-declarations" + default="./" +/>}} ### Recorder The recorder section manages how versions and snapshots of terms are stored, supporting multiple storage backends. -{{< configOption +{{< refItem name="recorder.versions.storage" type="object" description="Configuration for storing versions. Supports Git and MongoDB. See [Storage Repositories](#storage-repositories) for more information." ->}} +/>}} -{{< configOption +{{< refItem name="recorder.snapshots.storage" type="object" description="Configuration for storing snapshots. Supports Git and MongoDB. See [Storage Repositories](#storage-repositories) for more information." ->}} +/>}} ### Fetcher The fetcher section configures how the engine retrieves documents from the web. -{{< configOption +{{< refItem name="fetcher.waitForElementsTimeout" type="number" description="Maximum wait time for elements to appear in a page (milliseconds)." - default="`10000`" ->}} + default="10000" +/>}} -{{< configOption +{{< refItem name="fetcher.navigationTimeout" type="number" description="Maximum wait time for a page to load (milliseconds)." - default="`30000`" ->}} + default="30000" +/>}} -{{< configOption +{{< refItem name="fetcher.language" type="string" description="Language code (ISO 639-1) for request headers." - default="`en`" ->}} + default="en" +/>}} ### Notifier The notifier section sets up how notifications are sent when new versions of terms are recorded. -{{< configOption +{{< refItem name="notifier.sendInBlue.updatesListId" type="string" description="SendInBlue contacts list ID of persons to notify on terms updates." - default="`850`" ->}} + default="850" +/>}} -{{< configOption +{{< refItem name="notifier.sendInBlue.updateTemplateId" type="string" description="SendInBlue email template ID used for updates notifications." - default="`7`" ->}} + default="7" +/>}} ### Logger The logger section configures logging and error notification settings. -{{< configOption +{{< refItem name="logger.smtp.host" type="string" description="SMTP server hostname." - default="`smtp-relay.sendinblue.com`" ->}} + default="smtp-relay.sendinblue.com" +/>}} -{{< configOption +{{< refItem name="logger.smtp.username" type="string" description="Username for SMTP server authentication." - default="`admin@opentermsarchive.org`" ->}} + default="admin@opentermsarchive.org" +/>}} -{{< configOption +{{< refItem name="logger.sendMailOnError.to" type="string" description="Email address for error notifications." - example="`admin@example.com`" ->}} + example="admin@example.com" +/>}} -{{< configOption +{{< refItem name="logger.sendMailOnError.from" type="string" description="Sender email address for error notifications." - example="`noreply@example.com`" ->}} + example="noreply@example.com" +/>}} -{{< configOption +{{< refItem name="logger.sendMailOnError.sendWarnings" type="boolean" description="Set to true to also send email in case of warning." - default="`false`" ->}} + default="false" +/>}} -{{< configOption +{{< refItem name="logger.timestampPrefix" type="boolean" description="Set to false to avoid duplicate timestamps if logs are managed by a process manager." - default="`true`" ->}} + default="true" +/>}} ### Reporter The reporter section manages how issues are reported when terms content is inaccessible, supporting GitHub and GitLab. -{{< configOption +{{< refItem name="reporter.type" type="string" description="Type of reporter" - example="`github`" - allowedValues="`github`, `gitlab`" ->}} + example="github" + allowedValues="github, gitlab" +/>}} -{{< configOption +{{< refItem name="reporter.repositories.declarations" type="string" description="Repository for creating issues." - example="`OpenTermsArchive/demo-declarations`" ->}} + example="OpenTermsArchive/demo-declarations" +/>}} -{{< configOption +{{< refItem name="reporter.repositories.versions" type="string" description="Repository for versions." - example="`OpenTermsArchive/demo-versions`" ->}} + example="OpenTermsArchive/demo-versions" +/>}} -{{< configOption +{{< refItem name="reporter.repositories.snapshots" type="string" description="Repository for snapshots." - example="`OpenTermsArchive/demo-snapshots`" ->}} + example="OpenTermsArchive/demo-snapshots" +/>}} -{{< configOption +{{< refItem name="reporter.baseURL" type="string" description="Base URL for GitLab (if applicable)." - example="`https://gitlab.example.com`" ->}} + example="https://gitlab.example.com" +/>}} -{{< configOption +{{< refItem name="reporter.apiBaseURL" type="string" description="API base URL for GitLab (if applicable)." - example="`https://api.gitlab.example.com`" ->}} + example="https://api.gitlab.example.com" +/>}} ### Dataset The dataset section configures how datasets are published. -{{< configOption +{{< refItem name="dataset.title" type="string" description="Title of the dataset." - default="`sandbox`" ->}} + default="sandbox" +/>}} -{{< configOption +{{< refItem name="dataset.versionsRepositoryURL" type="string" description="Repository URL for dataset releases." - default="`https://github.com/OpenTermsArchive/sandbox`" ->}} + default="https://github.com/OpenTermsArchive/sandbox" +/>}} -{{< configOption +{{< refItem name="dataset.publishingSchedule" type="string" - description="Cron expression for dataset publishing." - default="`30 8 * * MON` (runs every Monday at 8:30 AM)" ->}} + description="Cron expression for dataset publishing. By default, it runs every Monday at 8:30 AM." + default="30 8 * * MON" +/>}} ### Collection API The collection API section sets the parameters for the API server. -{{< configOption +{{< refItem name="collection-api.api.port" type="number" description="Port number for the API server." - example="`8080`" + example="8080" required=true ->}} +/>}} -{{< configOption +{{< refItem name="collection-api.api.basePath" type="string" description="Base path for API endpoints." - example="`/collection-api`" + example="/collection-api" required=true ->}} +/>}} --- @@ -229,74 +229,74 @@ The collection API section sets the parameters for the API server. The storage repositories section set the parameters for supported backends for storing versions and snapshots, supporting Git and MongoDB. -{{< configOption +{{< refItem name="storage.type" type="string" description="Type of storage backend." - default="`git`" - allowedValues="`git`, `mongo`" ->}} + default="git" + allowedValues="git, mongo" +/>}} ### Git The Git storage configuration allows to store versions in a Git repository. -{{< configOption +{{< refItem name="storage.git.path" type="string" description="Path to the versions database directory." - default="`./data/versions`" ->}} + default="./data/versions" +/>}} -{{< configOption +{{< refItem name="storage.git.publish" type="boolean" description="Boolean to push changes to the origin." - default="`false`" ->}} + default="false" +/>}} -{{< configOption +{{< refItem name="storage.git.snapshotIdentiferTemplate" type="string" description="Template for snapshot ID reference. `%SNAPSHOT_ID` will be replaced with the actual snapshot ID." - default="`./data/snapshots/%SNAPSHOT_ID`" ->}} + default="./data/snapshots/%SNAPSHOT_ID" +/>}} -{{< configOption +{{< refItem name="storage.git.author.name" type="string" description="Author name for changes." - default="`Open Terms Archive Bot`" ->}} + default="Open Terms Archive Bot" +/>}} -{{< configOption +{{< refItem name="storage.git.author.email" type="string" description="Author email for changes." - default="`bot@opentermsarchive.org`" ->}} + default="bot@opentermsarchive.org" +/>}} ### MongoDB The MongoDB storage configuration allows to store versions in a MongoDB database. -{{< configOption +{{< refItem name="storage.mongo.connectionURI" type="string" description="MongoDB connection URI." - default="`mongodb://127.0.0.1:27017`" ->}} + default="mongodb://127.0.0.1:27017" +/>}} -{{< configOption +{{< refItem name="storage.mongo.database" type="string" description="Database name." - default="`open-terms-archive`" ->}} + default="open-terms-archive" +/>}} -{{< configOption +{{< refItem name="storage.mongo.collection" type="string" description="Collection name." - default="`snapshots`" ->}} + default="snapshots" +/>}} diff --git a/content/collections/reference/environment-variables.md b/content/collections/reference/environment-variables.md index 16f2d415..c17178bc 100644 --- a/content/collections/reference/environment-variables.md +++ b/content/collections/reference/environment-variables.md @@ -9,18 +9,18 @@ This reference documentation details all available environment variables that ca ### Engine -{{< configOption name="OTA_ENGINE_SMTP_PASSWORD" type="string" description="SMTP password for email error notifications." >}} +{{< refItem name="OTA_ENGINE_SMTP_PASSWORD" type="string" description="SMTP password for email error notifications." />}} -{{< configOption name="OTA_ENGINE_SENDINBLUE_API_KEY" type="string" description="API key for SendInBlue." >}} +{{< refItem name="OTA_ENGINE_SENDINBLUE_API_KEY" type="string" description="API key for SendInBlue." />}} -{{< configOption name="OTA_ENGINE_GITHUB_TOKEN" type="string" description="GitHub token for API access." >}} +{{< refItem name="OTA_ENGINE_GITHUB_TOKEN" type="string" description="GitHub token for API access." />}} -{{< configOption name="OTA_ENGINE_GITLAB_TOKEN" type="string" description="GitLab token for API access." >}} +{{< refItem name="OTA_ENGINE_GITLAB_TOKEN" type="string" description="GitLab token for API access." />}} -{{< configOption name="OTA_ENGINE_GITLAB_RELEASES_TOKEN" type="string" description="GitLab token for dataset releases." >}} +{{< refItem name="OTA_ENGINE_GITLAB_RELEASES_TOKEN" type="string" description="GitLab token for dataset releases." />}} --- ### Federation API -{{< configOption name="OTA_FEDERATION_API_SMTP_PASSWORD" type="string" description="SMTP password for email error notifications." >}} +{{< refItem name="OTA_FEDERATION_API_SMTP_PASSWORD" type="string" description="SMTP password for email error notifications." />}} diff --git a/content/collections/reference/metadata.md b/content/collections/reference/metadata.md index 6daa747d..06a1191d 100644 --- a/content/collections/reference/metadata.md +++ b/content/collections/reference/metadata.md @@ -13,196 +13,202 @@ The examples given throughout this reference can be seen in context in the [comp ## Fields -{{< configOption +{{< refItem name="id" type="string" description="Unique identifier derived from name (acronyms, dash-separated)." example="demo" required=true ->}} +/>}} -{{< configOption +{{< refItem name="name" type="string" description="Display name of the collection." example="Demo Collection" required=true ->}} +/>}} -{{< configOption +{{< refItem name="tagline" type="string" description="Concise description of collection topic." example="Services used by Open Terms Archive" required=true ->}} +/>}} -{{< configOption +{{< refItem name="languages" type="array of strings" description="List of [ISO 639-1 (two-letter)](https://en.wikipedia.org/wiki/ISO_639) language codes representing languages allowed in the collection." example="[en, fr, de]" required=true ->}} +/>}} -{{< configOption +{{< refItem name="jurisdictions" type="array of strings" description="List of [ISO 3166-2 country codes](https://en.wikipedia.org/wiki/ISO_3166-2) representing jurisdictions covered by the collection." example="[EU]" required=true ->}} +/>}} -{{< configOption +{{< refItem name="description" type="string" description="Detailed description of the collection" - example=` The **Demo** collection tracks changes to the terms of use of services used by Open Terms Archive. - - This provides a reference collection for best practices and enables the Open Terms Archive Core Team to be a user of the software it produces. - ` required=false >}} -{{< configOption +```yaml +description: > + The **Demo** collection tracks changes to the terms of use of services used by Open Terms Archive. + + This provides a reference collection for best practices and enables the Open Terms Archive Core Team to be a user of the software it produces. +``` +{{< /refItem >}} + +{{< refItem name="dataset" type="uri" description="URL to the dataset releases." example="https://github.com/OpenTermsArchive/demo-versions/releases" required=false ->}} +/>}} -{{< configOption +{{< refItem name="declarations" type="uri" description="URL to the declarations repository." example="https://github.com/OpenTermsArchive/demo-declarations" required=false ->}} +/>}} -{{< configOption +{{< refItem name="versions" type="uri" description="URL to the versions repository." example="https://github.com/OpenTermsArchive/demo-versions" required=false ->}} +/>}} -{{< configOption +{{< refItem name="snapshots" type="uri" description="URL to the snapshots repository." example="https://github.com/OpenTermsArchive/demo-snapshots" required=false ->}} +/>}} -{{< configOption +{{< refItem name="donations" type="uri" description="URL to the donations page." example="https://opencollective.com/opentermsarchive" required=false ->}} +/>}} -{{< configOption +{{< refItem name="logo" type="uri" description="URL to the collection's logo. Optimized PNG transparent image (minimum width 240px)." example="https://opentermsarchive.org/images/logo/logo-open-terms-archive-black.png" required=false ->}} +/>}} -{{< configOption +{{< refItem name="trackingPeriods" type="array of objects" description="List of time periods during which terms were tracked, with their tracking configuration. Gaps between periods indicate times when tracking was interrupted. See [TrackingPeriods]({{< relref \"#trackingperiods\" >}}) section." required=false ->}} +/>}} -{{< configOption +{{< refItem name="governance" type="object of objects" description="Map of organizations involved in the collection's governance, with organization names as keys and governance objects as values. See [Governance]({{< relref \"#governance\" >}}) section." required=false ->}} +/>}} -{{< configOption +{{< refItem name="i18n" type="object of objects" description="Internationalization of any of the Metadata properties (except i18n itself) for different language codes" - example=` fr: - name: Démo - tagline: Services utilisés par Open Terms Archive - governance: - Ministry for Europe and Foreign Affairs: - name: Ministère de l'Europe et des Affaires étrangères - url: https://www.diplomatie.gouv.fr - ` required=false >}} +```yaml +fr: + name: Démo + tagline: Services utilisés par Open Terms Archive + governance: + Ministry for Europe and Foreign Affairs: + name: Ministère de l'Europe et des Affaires étrangères + url: https://www.diplomatie.gouv.fr +``` +{{< /refItem >}} --- ### TrackingPeriods -{{< configOption +{{< refItem name="startDate" type="date" description="The date when tracking started for this period (ISO 8601 format YYYY-MM-DD)." example="2023-01-01" required=true ->}} +/>}} -{{< configOption +{{< refItem name="schedule" type="cron-expression" description="A [cron expression](https://en.wikipedia.org/wiki/Cron#Cron_expression) that defines the tracking frequency." example="0 0 * * *" required=true ->}} +/>}} -{{< configOption +{{< refItem name="serverLocation" type="string" description="The geographic location of the tracking server (city name and ISO 3166-2 country code)." example="Paris, FR" required=true ->}} +/>}} -{{< configOption +{{< refItem name="endDate" type="date" description="The date when tracking ended for this period (ISO 8601 format YYYY-MM-DD). If not specified, tracking is ongoing." example="2023-12-01" required=false ->}} +/>}} --- ### Governance -{{< configOption +{{< refItem name="url" type="uri" description="URL to the entity's website" example="https://opentermsarchive.org/" required=false ->}} +/>}} -{{< configOption +{{< refItem name="logo" type="uri" description="URL to the entity's logo. Optimized PNG transparent image (minimum width 240px)." example="https://opentermsarchive.org/images/logo/logo-open-terms-archive-black.png" required=false ->}} +/>}} -{{< configOption +{{< refItem name="roles" type="array of strings" - description="Roles of the entity within the governance, see [collection governance](https://docs.opentermsarchive.org/collections/reference/governance/)" - allowedValues="`host`, `administrator`, `curator`, `maintainer`, `sponsor`" + description="Roles of the entity within the governance, see [collection governance](https://docs.opentermsarchive.org/collections/references/governance/)" + allowedValues="host, administrator, curator, maintainer, sponsor" example="[host, administrator]" required=true ->}} +/>}} diff --git a/content/collections/tutorials/_index.md b/content/collections/tutorial/_index.md similarity index 100% rename from content/collections/tutorials/_index.md rename to content/collections/tutorial/_index.md diff --git a/content/collections/tutorials/create.md b/content/collections/tutorial/create.md similarity index 96% rename from content/collections/tutorials/create.md rename to content/collections/tutorial/create.md index 00f00884..aacd5ecd 100644 --- a/content/collections/tutorials/create.md +++ b/content/collections/tutorial/create.md @@ -1,7 +1,9 @@ --- title: Create your first collection weight: 1 -aliases: /collections/create/ +aliases: + - /collections/create/ + - /collections/tutorials/create/ --- # Create your first collection @@ -38,7 +40,7 @@ By the end, you'll have a working collection that tracks changes to a service's ### Step 2: Create the service declaration -4. Create a file `declarations/Open Terms Archive.json` with the following content. For detailed instructions on how to structure it, follow the [Tracking terms tutorial]({{< relref "/terms/tutorials/track" >}}): +4. Create a file `declarations/Open Terms Archive.json` with the following content. For detailed instructions on how to structure it, follow the [Tracking terms tutorial]({{< relref "/terms/tutorial/track" >}}): ```json { "name": "Open Terms Archive", diff --git a/content/community/how-to/_index.md b/content/community/how-to/_index.md index 537c3cba..b4ac9c61 100644 --- a/content/community/how-to/_index.md +++ b/content/community/how-to/_index.md @@ -1,4 +1,4 @@ --- -title: How to +title: How to guides weight: 1 --- diff --git a/content/deployment/how-to/_index.md b/content/deployment/how-to/_index.md index 537c3cba..b4ac9c61 100644 --- a/content/deployment/how-to/_index.md +++ b/content/deployment/how-to/_index.md @@ -1,4 +1,4 @@ --- -title: How to +title: How to guides weight: 1 --- diff --git a/content/federation/how-to/_index.md b/content/federation/how-to/_index.md index 537c3cba..b4ac9c61 100644 --- a/content/federation/how-to/_index.md +++ b/content/federation/how-to/_index.md @@ -1,4 +1,4 @@ --- -title: How to +title: How to guides weight: 1 --- diff --git a/content/federation/reference/_index.md b/content/federation/reference/_index.md index 6f4fd0f8..98ee9611 100644 --- a/content/federation/reference/_index.md +++ b/content/federation/reference/_index.md @@ -1,4 +1,4 @@ --- -title: Reference +title: References weight: 2 --- diff --git a/content/terms/explanation/_index.md b/content/terms/explanation/_index.md new file mode 100644 index 00000000..381720df --- /dev/null +++ b/content/terms/explanation/_index.md @@ -0,0 +1,4 @@ +--- +title: Explanations +weight: 4 +--- diff --git a/content/terms/declarations-maintenance.md b/content/terms/explanation/declarations-maintenance.md similarity index 98% rename from content/terms/declarations-maintenance.md rename to content/terms/explanation/declarations-maintenance.md index c36c22f0..b572df81 100644 --- a/content/terms/declarations-maintenance.md +++ b/content/terms/explanation/declarations-maintenance.md @@ -1,6 +1,8 @@ --- title: "Declarations maintenance" weight: 6 +aliases: + - /terms/declarations-maintenance/ --- # Declarations maintenance diff --git a/content/terms/explanation/range-selectors.md b/content/terms/explanation/range-selectors.md new file mode 100644 index 00000000..5194574a --- /dev/null +++ b/content/terms/explanation/range-selectors.md @@ -0,0 +1,66 @@ +--- +title: Range selectors +--- + +## Range selectors + +When no unique wrapper element exists for the whole terms content, there is no easy way to select the content with only CSS selectors. Content between two elements in a document can be selected using a range selector, regardless of their DOM position. The concept is inspired by the DOM [Range API](https://developer.mozilla.org/en-US/docs/Web/API/Range), where content is defined by start and end points that may be included or excluded. The format is defined as a JSON object: + +```json +{ + "start[Before|After]": "CSS selector that marks where to begin capturing content", + "end[Before|After]": "CSS selector that marks where to stop capturing content" +} +``` + +### Example + +Let's take an example to see when range selectors can be useful. Given the following HTML: + +```html + + + +
+ +
    +
  • Home
  • +
  • Terms and Conditions
  • +
+ + +

Example Terms

+

Effective as of: January 1, 2024

+ +

Authorized uses

+

You can use this service in the following cases:

+ +
    +
  • At home
  • +
  • In your office
  • +
  • In a coffee shop
  • +
+
+
+ +
+ + + +``` + +In this case, there is no unique wrapper element for the terms content which is represented by all elements after the main title in the `main` element. Here selecting the whole `main` would result in selecting elements that are not part of the terms content, like the breadcrumb and sub navigation. The range selector can be used to select the terms content by specifying the main title `#terms-title` as the start point and the footer `#footer-menu` as the end point. The selection starts *before* the main title, so it includes it, and ends *before* the footer, so it excludes it. + +So the resulting range selector is: + +```json +{ + "startBefore": "#terms-title", + "endBefore": "#footer-menu" +} +``` + +This range selector will select the terms content between the main title and the footer element. diff --git a/content/terms/guideline/_index.md b/content/terms/guideline/_index.md new file mode 100644 index 00000000..82f8dd80 --- /dev/null +++ b/content/terms/guideline/_index.md @@ -0,0 +1,4 @@ +--- +title: Guidelines +weight: 5 +--- diff --git a/content/terms/guidelines/choosing-selectors.md b/content/terms/guideline/choosing-selectors.md similarity index 98% rename from content/terms/guidelines/choosing-selectors.md rename to content/terms/guideline/choosing-selectors.md index 5b309088..194e45b9 100644 --- a/content/terms/guidelines/choosing-selectors.md +++ b/content/terms/guideline/choosing-selectors.md @@ -1,6 +1,8 @@ --- title: "Choosing selectors" -aliases: /guidelines/choosing-selectors/ +aliases: + - /guidelines/choosing-selectors/ + - /terms/guidelines/choosing-selectors/ --- # Choosing selectors diff --git a/content/terms/guidelines/declaring.md b/content/terms/guideline/declaring.md similarity index 99% rename from content/terms/guidelines/declaring.md rename to content/terms/guideline/declaring.md index 019a5a29..a1cb0ee7 100644 --- a/content/terms/guidelines/declaring.md +++ b/content/terms/guideline/declaring.md @@ -1,6 +1,8 @@ --- title: "Declaring terms" -aliases: /guidelines/declaring/ +aliases: + - /guidelines/declaring/ + - /terms/guidelines/declaring/ --- # Declaring terms diff --git a/content/terms/guidelines/reviewing.md b/content/terms/guideline/reviewing.md similarity index 97% rename from content/terms/guidelines/reviewing.md rename to content/terms/guideline/reviewing.md index 1f58e455..6a44399f 100644 --- a/content/terms/guidelines/reviewing.md +++ b/content/terms/guideline/reviewing.md @@ -1,6 +1,8 @@ --- title: "Reviewing contributions" -aliases: /guidelines/reviewing/ +aliases: + - /guidelines/reviewing/ + - /terms/guidelines/reviewing/ --- # Reviewing contributions @@ -53,10 +55,10 @@ Your focus should be on two aspects: accuracy and quality. 1. Click on the _Inspect the declaration_ link to view the declaration in a graphical user interface. 2. Use the link provided in the URL section of the contribution tool to check out the original document. -3. Verify that the name of the service matches the JSON file and complies with the [guidelines]({{< relref "terms/guidelines/declaring#service-name" >}}). +3. Verify that the name of the service matches the JSON file and complies with the [guidelines]({{< relref "terms/guideline/declaring#service-name" >}}). 4. Quickly scan the document to ensure that the correct term type has been selected. To determine the term type, consider who the intended audience is and what the document is discussing. You can also refer to the [terms types list](https://github.com/OpenTermsArchive/terms-types/blob/main/termsTypes.json) to find the best term type for the document. 5. Confirm that the selected area of the document contains only one term type and does not include any other types. -6. Check both the significant and insignificant parts of the document. Ensure that the suggested selectors abide by the [selectors guidelines]({{< relref "terms/guidelines/choosing-selectors" >}}). +6. Check both the significant and insignificant parts of the document. Ensure that the suggested selectors abide by the [selectors guidelines]({{< relref "terms/guideline/choosing-selectors" >}}). - Ensure that the significant parts do not include navigation items, contact links, or other insignificant details that may cause confusion by triggering a change detection when the legal terms have actually not been updated. 7. Verify the version of the document in the contribution tool by clicking on the _Verify version_ button. 8. Ensure that all checks generated by the OTA-bot are manually checked. @@ -72,19 +74,19 @@ The pull request created will consist of fewer checks than those that add declar For pull requests that update declarations, you should focus should be on two things: history file and declaration. -- **History file:** The history file is a JSON file that keeps track of a service declaration changes. It contains a `validUntil` property that specifies the date a specific version of a service declaration was last effective. You have to confirm that this date is the same as the date in the issue opened for the declaration when the bot couldn't track it for the first time. This issue is usually included in the pull request message. The history file is updated with every `update` pull request. You can find more information about the history file [here]({{< relref "terms/declarations-maintenance" >}}). +- **History file:** The history file is a JSON file that keeps track of a service declaration changes. It contains a `validUntil` property that specifies the date a specific version of a service declaration was last effective. You have to confirm that this date is the same as the date in the issue opened for the declaration when the bot couldn't track it for the first time. This issue is usually included in the pull request message. The history file is updated with every `update` pull request. You can find more information about the history file [here]({{< relref "terms/explanation/declarations-maintenance" >}}). - **Declaration:** for `update` pull requests, you only look at the selectors to make sure they are **simple** and also verify the **generated version** is ok. ### Step-by-step Review Guide 1. Click on the inspect the declaration suggestion link to view contribution using the contribution tool. -2. Check both the significant and insignificant parts of the document. Ensure that the suggested selectors abide by the [selectors guidelines]({{< relref "terms/guidelines/choosing-selectors" >}}). +2. Check both the significant and insignificant parts of the document. Ensure that the suggested selectors abide by the [selectors guidelines]({{< relref "terms/guideline/choosing-selectors" >}}). 3. Verify the version of the document in the contribution tool by clicking on the `verify version` button. 4. Open the issue linked with the pull request. Confirm the date when the declaration was last tracked from the bot's comment. 5. Compare it with the `validUntil` property in the history file under the `Files changes` section of the pull request. If the dates are the same, proceed to approve the pull request. 6. Merge the contribution. -You can read more about maintaining declarations from the [official documentation]({{< relref "terms/declarations-maintenance" >}}). +You can read more about maintaining declarations from the [official documentation]({{< relref "terms/explanation/declarations-maintenance" >}}). ## When to Make Changes to a Contribution diff --git a/content/terms/guidelines/targeting.md b/content/terms/guideline/targeting.md similarity index 87% rename from content/terms/guidelines/targeting.md rename to content/terms/guideline/targeting.md index 3793b1e5..545ee7c6 100644 --- a/content/terms/guidelines/targeting.md +++ b/content/terms/guideline/targeting.md @@ -1,6 +1,7 @@ --- title: "Targeting" -aliases: /guidelines/targeting/ +aliases: + - /terms/guidelines/targeting/ --- # What to track? diff --git a/content/terms/guidelines/_index.md b/content/terms/guidelines/_index.md deleted file mode 100644 index 07956d32..00000000 --- a/content/terms/guidelines/_index.md +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: "Guidelines" -weight: 5 ---- diff --git a/content/terms/how-to/_index.md b/content/terms/how-to/_index.md index 6b204c17..d6db9c3c 100644 --- a/content/terms/how-to/_index.md +++ b/content/terms/how-to/_index.md @@ -1,4 +1,4 @@ --- -title: How to +title: How to guides weight: 2 --- diff --git a/content/terms/how-to/add-terms-using-UI.md b/content/terms/how-to/add-terms-using-UI.md index 813541b7..bfafa6e2 100644 --- a/content/terms/how-to/add-terms-using-UI.md +++ b/content/terms/how-to/add-terms-using-UI.md @@ -1,7 +1,8 @@ --- title: Add terms with the graphical contribution interface linkTitle: Add terms with UI -aliases: /terms/how-to-add-terms-using-with-the-graphical-contribution-interface/ +aliases: + - /terms/how-to-add-terms-using-with-the-graphical-contribution-interface/ --- # How to add terms with the graphical contribution interface diff --git a/content/terms/how-to/test-declaration.md b/content/terms/how-to/test-declaration.md new file mode 100644 index 00000000..22589203 --- /dev/null +++ b/content/terms/how-to/test-declaration.md @@ -0,0 +1,49 @@ +--- +title: "Test your declarations" +weight: 4 +--- + +# Test your declarations + +When creating or modifying service declarations, it’s important to verify they work as expected before starting to track in production. The Open Terms Archive engine provides automated testing tools to that end. + +The basic usage to test all declarations in a collection is: + +```sh +npx ota validate declarations +``` + +## Testing specific services + +When iterating over a specific declaration, or to narrow down testing in larger collections, the `--services` and `--terms` options enable testing a single service declaration, or even single terms: + +```sh +npx ota validate declarations --services "" +npx ota validate declarations --services "" --terms "" +``` + +## Schema validation only + +During initial development, it is useful to check that the JSON structure is correct before even testing the actual document fetching. The `--schema-only` option enables faster validation by only checking the declaration structure without fetching any documents: + +```sh +npx ota validate declarations --schema-only +``` + +The same options for targeting specific services and terms are available. + +## Formatting + +To ensure formatting consistency across all declarations, a linter is provided: + +```sh +npx ota lint +``` + +To automatically apply the default Open Terms Archive formatting options: + +```sh +npx ota lint --fix +``` + +The same options for targeting specific services and terms are available. diff --git a/content/terms/how-to/track-new-terms.md b/content/terms/how-to/track-new-terms.md index 38f1b799..a936fec2 100644 --- a/content/terms/how-to/track-new-terms.md +++ b/content/terms/how-to/track-new-terms.md @@ -1,7 +1,8 @@ --- title: Track new terms weight: 2 -aliases: /contributing-terms/ +aliases: + - /contributing-terms/ --- # How to track new terms @@ -29,7 +30,7 @@ To add a declaration, you need to follow these steps: 6. After you've properly added your selectors and structured your JSON file, you need to test and validate your JSON file to make sure it is ok. To do this, you need to run `npx ota validate --services [service name]` from the root of the repository. This will run a validation on the declaration, highlighting any changes required. 7. If all tests are good, make a pull request to the main repository. -> If you have a hard time finding the service name, check out the [practical guidelines to find the service name]({{< relref "/terms/guidelines/declaring" >}}), and feel free to mention your uncertainties in the pull request! We will help you improve the service name if necessary 🙂 +> If you have a hard time finding the service name, check out the [practical guidelines to find the service name]({{< relref "/terms/guideline/declaring" >}}), and feel free to mention your uncertainties in the pull request! We will help you improve the service name if necessary 🙂 ## Service name @@ -51,7 +52,7 @@ The service name is exposed to end users. It should reflect as closely as possib - _Example: `Firebase` (by Google) → `Firebase`_. - _Example: `App Store` (by Apple) → `App Store`_. -> If you have a hard time finding the service name, check out the [practical guidelines to find the service name]({{< relref "/terms/guidelines/declaring#service-name" >}}), and feel free to mention your uncertainties in the pull request! We will help you improve the service name if necessary 🙂 +> If you have a hard time finding the service name, check out the [practical guidelines to find the service name]({{< relref "/terms/guideline/declaring#service-name" >}}), and feel free to mention your uncertainties in the pull request! We will help you improve the service name if necessary 🙂 ## Service ID @@ -71,7 +72,7 @@ The service ID is exposed to developers. It should be easy to handle with script - _Example: `App Store` → `App Store`_. - _Example: `DeviantArt` → `DeviantArt`_. -> If you have a hard time defining the service ID, check out the [practical guidelines to derive the ID from the service name]({{< relref "/terms/guidelines/declaring#service-id" >}}), and feel free to mention your uncertainties in the pull request! We will help you improve the service ID if necessary 🙂 +> If you have a hard time defining the service ID, check out the [practical guidelines to derive the ID from the service name]({{< relref "/terms/guideline/declaring#service-id" >}}), and feel free to mention your uncertainties in the pull request! We will help you improve the service ID if necessary 🙂 > More details on the ID and naming constraints and recommendations can be found in the relevant [decision record](https://github.com/OpenTermsArchive/engine/blob/main/decision-records/0001-service-name-and-id.md). diff --git a/content/terms/reference/_index.md b/content/terms/reference/_index.md index 63aae4a6..fd021bb8 100644 --- a/content/terms/reference/_index.md +++ b/content/terms/reference/_index.md @@ -1,4 +1,4 @@ --- -title: Reference +title: References weight: 3 --- diff --git a/content/terms/reference/declaration.md b/content/terms/reference/declaration.md index cc98caa3..b9c1f154 100644 --- a/content/terms/reference/declaration.md +++ b/content/terms/reference/declaration.md @@ -1,355 +1,217 @@ --- -title: "Declaration format" +title: "Service declaration" aliases: "/terms/reference/" --- -# Terms declaration format reference +# Service declaration -Terms are declared in a service declaration file, under the `terms` property. +This reference documentation details all available properties that can be specified in a service's declaration file. -Most of the time, terms are written in only one source document (for example [Facebook Terms of Service](https://www.facebook.com/legal/terms)) but sometimes terms can be spread across multiple online source documents, and their combination constitutes the terms (for example [Facebook Community Guidelines](https://transparency.fb.com/policies/community-standards/)). +The examples given throughout this reference can be seen in context in the [declarations files](https://github.com/OpenTermsArchive/demo-declarations/tree/main/declarations) from the [Demo collection](https://github.com/OpenTermsArchive/demo-declarations) -## Source document +## Properties -The way in which a source document is obtained is defined in a JSON object: +{{< refItem + name="name" + type="string" + description="The name of the service." + example="Open Terms Archive" + required=true +/>}} -```json -{ - "fetch": "The URL where the document can be found", - "executeClientScripts": "A boolean to execute client-side JavaScript loaded by the document before accessing the content, in case the DOM modifications are needed to access the content; defaults to false (fetch HTML only)", - "filter": "An array of service specific filter function names", - "remove": "A CSS selector, a range selector or an array of selectors that target the insignificant parts of the document that has to be removed. Useful to remove parts that are inside the selected parts", - "select": "A CSS selector, a range selector or an array of selectors that target the meaningful parts of the document, excluding elements such as headers, footers and navigation" -} -``` - -- For HTML files, `fetch` and `select` are mandatory. -- For PDF files, only `fetch` is mandatory. - -Let’s start by defining these keys! - -## `fetch` - -This property should simply contain the URL at which the terms you want to track can be downloaded. HTML and PDF files are supported. - -When terms coexist in different languages and jurisdictions, please refer to the [scope of the collection]({{< relref "collections/reference/metadata" >}}) to which you are contributing. This scope is usually defined in the README. - -## `select` - -_This property is not needed for PDF documents._ - -Most of the time, contractual documents are exposed as web pages, with a header, a footer, navigation menus, possibly ads… We aim at tracking only the significant parts of the document. In order to achieve that, the `select` property allows to extract only those parts in the process of [converting from snapshot to version](https://opentermsarchive.org/#how-it-works). - -The `select` value can be either a [CSS selector](https://developer.mozilla.org/en-US/docs/Web/CSS/CSS_Selectors), a [range selector](#range-selectors) or an array of those. +{{< refItem + name="terms" + type="object of objects" + description=`Map of terms associated with a service, where keys are standardized term types (e.g., "Privacy Policy", "Terms of Service"), and values are term objects containing the configuration for fetching and processing each document, as detailed in the [Terms declaration]({{< relref \"#terms-declaration\" >}}) section. -### CSS selectors - -CSS selectors should be provided as a string. See the [specification](https://developer.mozilla.org/en-US/docs/Web/CSS/CSS_Selectors) for how to write CSS selectors. - -> For example, the following selector will select the content in the `
` tag of the HTML document: -> -> ```json -> "select": "main" -> ``` - -### Range selectors - -A range selector is defined with a _start_ and an _end_ CSS selector. It is also necessary to define if the range starts before or after the element targeted by the _start_ CSS selector and to define if it ends before or after the element targeted by the _end_ CSS selector. - -To that end, a range selector is a JSON object containing two keys out of the four that are available: `startBefore`, `startAfter`, `endBefore` and `endAfter`. +To facilitate cross-service comparisons and ensure consistency, a standardized list of term types is maintained in a [dedicated repository](https://github.com/OpenTermsArchive/terms-types). +Please note, the terms type may differ from the exact name provided by the service, but it should align with the underlying commitment. For example, some providers might call “Terms and Conditions” or “Terms of Use” what some others call “Terms of Service”.` + required=true + reference="[Terms Types](https://github.com/OpenTermsArchive/terms-types)." +>}} ```json -{ - "start[Before|After]": "", - "end[Before|After]": "" +"terms": { + "Terms of Service": { + "fetch": "https://opencollective.com/tos", + "select": ".markdown" + }, + "Privacy Policy": { + "fetch": "https://opencollective.com/privacypolicy", + "select": ".markdown" + } } ``` +{{< /refItem >}} -> For example, the following selector will select the content between the element targeted by the CSS selector `#privacy-eea`, including it, and the element targeted by the CSS selector `footer`, excluding it: -> -> ```json -> { -> "startBefore": "#privacy-eea", -> "endBefore": "footer" -> } -> ``` - -## `remove` - -_This property is optional._ - -Beyond [selecting a subset of a web page](#select), some documents will have non-significant parts in the middle of otherwise significant parts. For example, they can have “go to top” links or banner ads. These can be removed by listing [CSS selectors](https://developer.mozilla.org/en-US/docs/Web/CSS/CSS_Selectors), [range selectors](#range-selectors) or an array of them under the `remove` property. - -### Example - -Let's assume a web page contains the following content: - -```html -
-
- -
-

User Agreement

-
…terms…
-
-``` - -If only `main` is used in `select`, the following version will be extracted: - -```md -User Agreement Privacy Policy Content Policy Broadcasting Content Policy Moderator Guidelines Transparency Report 2017 Transparency Report 2018 Guidelines for Law Enforcement Transparency Report 2019 - -User Agreement -============== - -…terms… -``` - -Whereas we want instead: - -```md -User Agreement -============== - -…terms… -``` - -This result can be obtained with the following declaration: +--- +### Terms declaration + +{{< refItem + name="fetch" + type="uri" + description="The URL where the terms document can be downloaded." + example="https://opentermsarchive.org/en/privacy-policy" + required=true +/>}} + +{{< refItem + name="select" + type="string, object or array" + description=` +The way to select the parts of the document to extract. Can be: + +- a CSS selector string. See the [CSS Selectors specification](https://developer.mozilla.org/en-US/docs/Web/CSS/CSS_Selectors) +- a range selector object. See the [range selector]({{< relref \"#range-selector\" >}}) section +- an array of those` + required="required for HTML documents" +>}} +As a direct CSS selector: ```json -{ - "fetch": "https://example.com/user-agreement", - "select": "main", - "remove": ".filter-holder" -} +"select": "#article-contents" ``` -### Complex selectors examples - +As a range selector object: ```json -{ - "fetch": "https://support.google.com/adsense/answer/48182", - "select": ".article-container", - "remove": ".print-button, .go-to-top" +"select": { + "startBefore": "h1", + "endBefore": "#toc-heading" } ``` +As an array of those: ```json -{ - "fetch": "https://www.wechat.com/en/service_terms.html", - "select": "#agreement", - "remove": { - "startBefore": "#wechat-terms-of-service-usa-specific-terms-", - "endBefore": "#wechat-terms-of-service-european-union-specific-terms-" - } -} -``` - +"select": [ + "#article-contents", + { + "startBefore": "h1", + "endBefore": "#toc-heading" + } +] +``` +{{< /refItem >}} + +{{< refItem + name="executeClientScripts" + type="boolean" + description=`Boolean flag to execute client-side JavaScript before accessing content. + +When enabled, this loads the page in a headless browser to execute client-side scripts and load dynamic content, which is necessary when JavaScript modifies or loads content after the initial page load.` + default=false + example="true" +/>}} + +{{< refItem + name="remove" + type="string, object or array" + description=` +The way to remove the parts of the document that are not part of the terms and can be considered as noise. Can be: + +- a CSS selector string. See the [CSS Selectors specification](https://developer.mozilla.org/en-US/docs/Web/CSS/CSS_Selectors) +- a range selector object. See the [range selector]({{< relref \"#range-selector\" >}}) section +- an array of those` +>}} +As a direct CSS selector: ```json -{ - "fetch": "https://fr-fr.facebook.com/legal/terms/plain_text_terms", - "select": "div[role=main]", - "remove": [ - { - "startBefore": "[role=\"separator\"]", - "endAfter": "body" - }, - "[style=\"display:none\"]" - ] -} -``` - -## `executeClientScripts` - -_This property is optional._ - -In some cases, the content of the document is only loaded (or is modified dynamically) by client scripts. -When set to `true`, this boolean property loads the page in a headless browser to load all assets and execute client scripts before trying to get the document contents. - -Since the performance cost of this approach is high, it is set to `false` by default, relying on the HTML content only. - -## `filter` - -_This property is optional._ - -Finally, some documents will need more complex filtering beyond simple element selection and removal, for example to remove noise (changes in textual content that are not meaningful to the terms of services). Such filters are declared as JavaScript functions that modify the downloaded web page through the [DOM API](https://developer.mozilla.org/en-US/docs/Web/API/Document_Object_Model). - -Filters take the document DOM and the terms declaration as parameters and are: - -- **in-place**: they modify the document structure and content directly; -- **idempotent**: they should return the same document structure and content even if run repeatedly on their own result. - -Filters are loaded automatically from files named after the service they operate on. For example, filters for the Meetup service, which is declared in `declarations/Meetup.json`, are loaded from `declarations/Meetup.filters.js`. - -The generic function signature for a filter is: - -```js -export [async] function filterName(document, documentDeclaration) -``` - -Each filter is exposed as a named function export that takes a `document` parameter and behaves like the `document` object in a browser DOM. These functions can be `async`, but they will still run sequentially. The whole document declaration is passed as second parameter. - -> The `document` parameter is actually a [JSDOM](https://github.com/jsdom/jsdom) document instance. - -You can learn more about usual noise and ways to handle it [in the guidelines]({{< relref "/terms/guidelines/declaring#usual-noise" >}}). - -### Example - -Let's assume a service adds a unique `clickId` parameter in the query string of all link destinations. These parameters change on each page load, leading to recording noise in versions. Since links should still be recorded, it is not appropriate to use `remove` to remove the links entirely. Instead, a filter will manipulate the links destinations to remove the always-changing parameter. Concretely, the goal is to apply the following filter: - -```diff -- Read the list of our affiliates. -+ Read the list of our affiliates. -``` - -The code below implements this filter: - -```js -function removeTrackingIdsQueryParam(document) { - const QUERY_PARAM_TO_REMOVE = 'clickId'; - - document.querySelectorAll('a').forEach(link => { // iterate over every link in the page - const url = new URL(link.getAttribute('href'), document.location); // URL is part of the DOM API, see https://developer.mozilla.org/en-US/docs/Web/API/URL - const params = new URLSearchParams(url.search); // URLSearchParams is part of the DOM API, see https://developer.mozilla.org/en-US/docs/Web/API/URLSearchParams - - params.delete(QUERY_PARAM_TO_REMOVE); // we use the DOM API instead of RegExp because we can't know in advance in which order parameters will be written - url.search = params.toString(); // store the query string without the parameter - link.setAttribute('href', url.toString()); // write the destination URL without the parameter - }); -} +"remove": ".nav, .breadcrumb" ``` -### Example usage of declaration parameter - -The second parameter can be used to access the defined document URL or selector inside the filter. - -Let's assume a service stores some of its legally-binding terms in images. To track these changes properly, images should be stored as part of the terms. By default, images are not stored since they significantly increase the document size. The filter below will store images inline in the terms, encoded in a [data URL](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs). In order to download the images for conversion, the base URL of the web page is needed to resolve relative links. This information is obtained from the declaration. - -```js -import fetch from 'isomorphic-fetch'; - -export async function convertImagesToBase64(document, documentDeclaration) { - const { fetch: baseUrl, select: selector } = documentDeclaration; - - const images = Array.from(document.querySelectorAll(`${selector} img`)); - - return Promise.all(images.map(async ({ src }, index) => { - const imageAbsoluteUrl = new URL(src, baseUrl).href; - const response = await fetch(imageAbsoluteUrl); - const mimeType = response.headers.get('content-type'); - const content = await response.arrayBuffer(); - - const base64Image = btoa(String.fromCharCode(...new Uint8Array(content))); - - images[index].src = `data:${mimeType};base64,${base64Image}`; - })); +As a range selector object: +```json +"remove": { + "startBefore": ".nav", + "endBefore": ".breadcrumb" } ``` -## Terms with a single source document - -In the case where terms are extracted from one single source document, they are declared by simply declaring that source document: - +As an array of those: ```json - … - "terms": { - "": { - "fetch": "…", - "executeClientScripts": "…", - "filter": "…", - "remove": "…", - "select": "…" +"remove": [ + ".nav, .breadcrumb", + { + "startBefore": "#contact-us", + "endBefore": "#footer" } - } - … -``` - -## Terms with multiple source documents - -When the terms are spread across multiple source documents, they should be declared by declaring their combination: - +] +``` +{{< /refItem >}} + +{{< refItem + name="filter" + type="array of strings" + description="Array of filter function names to apply. Function will be executed in the order of the array. See the [Filters]({{< relref \"terms/reference/filters\" >}}) section for more information." + example="[\"filterName1\", \"filterName2\"]" +/>}} + +{{< refItem + name="combine" + type="array of objects" + description=` +An array of terms declaration objects that will be combined into a single terms document. Each object in the array can contain all the same properties as a regular terms declaration (except "combine"). + +Common properties (can be a combination of "select", "remove", "filter" and "executeClientScripts") that are shared across all source documents can be factorized by declaring them at the root level of the terms declaration. + ` +>}} ```json - … - "terms": { - "": { - "combine": [ - { - "fetch": "…", - "executeClientScripts": "…", - "filter": "…", - "remove": "…", - "select": "…" - }, - { - "fetch": "…", - "executeClientScripts": "…", - "filter": "…", - "remove": "…", - "select": "…" - } - ] +"combine": [ + { + "fetch": "https://example.com/terms/part1", + "select": "#main-content", + "remove": ".ads" + }, + { + "fetch": "https://example.com/terms/part2", + "select": "#main-content", + "remove": ".ads" } - } - … -``` - -If some parts of the source documents are repeated, they can be factorised. For example, it is common for the structure of HTML pages to be similar from page to page, so `select`, `remove` and `filter` would be the same. These elements can be shared instead of being duplicated: - -```json - … - "terms": { - "": - "executeClientScripts": "…", - "filter": "…", - "remove": "…", - "select": "…", - "combine": [ - { - "fetch": "…", - }, - { - "fetch": "…", - } - ] - } - … -``` - -## Terms type - -Great, your terms declaration is now almost complete! You simply need to write it under the appropriate terms type in the `terms` JSON object within the service declaration. - -In order to distinguish between the many terms that can be associated with a service and enable cross-services comparison of similar terms, we maintain a unique list of terms types in a [dedicated repository](https://github.com/OpenTermsArchive/terms-types). - -Please note, the terms type may differ from the exact name provided by the service, but it should align with the underlying commitment. For example, some providers might call “Terms and Conditions” or “Terms of Use” what some others call “Terms of Service”. - -If the terms you want to add don't match an existing type, you can [suggest a new one](https://github.com/OpenTermsArchive/terms-types/blob/main/CONTRIBUTING.md). - -## Testing your declaration - -You can test the declarations you created or changed by running the following command: - -```sh -npm test [$service_id [$another_service_id …]] +] ``` +{{< /refItem >}} -Since this operation fetches documents and could be long, you can also validate the declaration structure only: - -```sh -npm run test:schema [$service_id [$another_service_id …]] -``` - -## Linting - -In order to ensure consistency across declarations, all declarations files have to be formatted homogeneously. +--- -In order to achieve this, you can use the following command: +### Range selector + +{{< refItem + name="startBefore" + type="CSS selector" + description="The CSS selector for the element before which the range starts." + example="#privacy-eea" + required="either `startBefore` or `startAfter` is required" +/>}} + +{{< refItem + name="startAfter" + type="CSS selector" + description="The CSS selector for the element after which the range starts." + example="#privacy-eea" + required="either `startBefore` or `startAfter` is required" +/>}} + +{{< refItem + name="endBefore" + type="CSS selector" + description="The CSS selector for the element before which the range ends." + example="footer" + required="either `endBefore` or `endAfter` is required" +/>}} + +{{< refItem + name="endAfter" + type="CSS selector" + description="The CSS selector for the element after which the range ends." + example="footer" + required="either `endBefore` or `endAfter` is required" +/>}} + +#### Example + +To capture content starting from and including a privacy section up until but excluding the footer: -```sh -npm run lint [$service_id [$another_service_id …]] +```json +{ + "startBefore": "#privacy-section", + "endBefore": "footer" +} ``` diff --git a/content/terms/reference/filters.md b/content/terms/reference/filters.md new file mode 100644 index 00000000..6006c8a0 --- /dev/null +++ b/content/terms/reference/filters.md @@ -0,0 +1,54 @@ +--- +title: "Filters" +--- + +# Filters + +Some documents require more complex filtering beyond basic element selection and removal. For example, web pages often contain dynamically generated content like tracking IDs in URLs that change on each page load. While these elements are part of the page, they are not meaningful to the terms content itself. If such dynamic content is included in the archived versions, it creates a lot of insignificant versions and pollutes the archive with noise that makes it harder to identify actual changes to the terms. + +Filters address this need by providing a way to programmatically clean up and normalize the content before archiving. They are implemented as JavaScript functions that can manipulate the downloaded web page using the [DOM API](https://developer.mozilla.org/en-US/docs/Web/API/Document_Object_Model), allowing for sophisticated content transformations beyond what's possible with simple CSS selectors. + +Filters take the document DOM and the terms declaration as parameters and are: + +- **in-place**: they modify the document structure and content directly; +- **idempotent**: they return the same document structure and content even if run repeatedly on their own result. + +Filters are loaded automatically from files named after the service they operate on. For example, filters for the Meetup service, which is declared in `declarations/Meetup.json`, are loaded from `declarations/Meetup.filters.js`. + +The generic function signature for a filter is: + +```js +export [async] function filterName(document, documentDeclaration) +``` + +Each filter is exposed as a named function export that takes a `document` parameter and behaves like the `document` object in a browser DOM. These functions can be `async`, but they will still run sequentially. The whole document declaration is passed as second parameter. + +> The `document` parameter is actually a [JSDOM](https://github.com/jsdom/jsdom) document instance. + +You can learn more about usual noise and ways to handle it [in the guidelines]({{< relref "/terms/guideline/declaring#usual-noise" >}}). + +### Example + +Let's assume a service adds a unique `clickId` parameter in the query string of all link destinations. These parameters change on each page load, leading to recording noise in versions. Since links should still be recorded, it is not appropriate to use `remove` to remove the links entirely. Instead, a filter will manipulate the links destinations to remove the always-changing parameter. Concretely, the goal is to apply the following filter: + +```diff +- Read the list of our affiliates. ++ Read the list of our affiliates. +``` + +The code below implements this filter: + +```js +function removeTrackingIdsQueryParam(document) { + const QUERY_PARAM_TO_REMOVE = 'clickId'; + + document.querySelectorAll('a').forEach(link => { // iterate over every link in the page + const url = new URL(link.getAttribute('href'), document.location); // URL is part of the DOM API, see https://developer.mozilla.org/en-US/docs/Web/API/URL + const params = new URLSearchParams(url.search); // URLSearchParams is part of the DOM API, see https://developer.mozilla.org/en-US/docs/Web/API/URLSearchParams + + params.delete(QUERY_PARAM_TO_REMOVE); // we use the DOM API instead of RegExp because we can't know in advance in which order parameters will be written + url.search = params.toString(); // store the query string without the parameter + link.setAttribute('href', url.toString()); // write the destination URL without the parameter + }); +} +``` diff --git a/content/terms/tutorials/_index.md b/content/terms/tutorial/_index.md similarity index 100% rename from content/terms/tutorials/_index.md rename to content/terms/tutorial/_index.md diff --git a/content/terms/tutorials/track.md b/content/terms/tutorial/track.md similarity index 98% rename from content/terms/tutorials/track.md rename to content/terms/tutorial/track.md index 36d171cb..6ac25006 100644 --- a/content/terms/tutorials/track.md +++ b/content/terms/tutorial/track.md @@ -1,5 +1,7 @@ --- title: Track your first terms +aliases: + - /terms/tutorials/track/ --- # Track your first terms diff --git a/themes/opentermsarchive/assets/css/components/configOption.css b/themes/opentermsarchive/assets/css/components/configOption.css deleted file mode 100644 index a0276f9b..00000000 --- a/themes/opentermsarchive/assets/css/components/configOption.css +++ /dev/null @@ -1,47 +0,0 @@ -.config-option { - padding: 2rem 0; -} - -.config-option + .config-option { - box-shadow: inset 0 1px var(--colorBlack200); -} - -.config-option .config-option-name code { - font-weight: 600; -} - -.config-option-type { - color: var(--colorBlack600); - padding-left: 1rem; - text-transform: lowercase; -} - -.config-option-required { - padding-left: 1rem; - text-transform: lowercase; - color: rgb(154, 103, 0); -} - -.config-option-details { - display: flex; -} - -.config-option-description, -.config-option-default, -.config-option-example, -.config-option-allowed-values { - padding: 1rem 0.4em; -} - -.config-option-example code, -.config-option-default code, -.config-option-allowed-values code { - margin-left: 0.4em; - display: inline-block; -} - -.config-option-example, -.config-option-default, -.config-option-allowed-values { - font-size: 0.85em; -} diff --git a/themes/opentermsarchive/assets/css/components/refItem.css b/themes/opentermsarchive/assets/css/components/refItem.css new file mode 100644 index 00000000..5c84ee57 --- /dev/null +++ b/themes/opentermsarchive/assets/css/components/refItem.css @@ -0,0 +1,71 @@ +.refItem { + padding: 2rem 0; +} + +.refItem + .refItem { + box-shadow: inset 0 1px var(--colorBlack200); +} + +.refItem-name code { + font-weight: 600; + overflow-y: auto; + font-size: 1em; +} + +.refItem-type { + color: var(--colorBlack600); + padding-left: 1rem; + text-transform: lowercase; +} + +.refItem-required { + padding-left: 1rem; + color: rgb(154, 103, 0); +} + +.refItem-required code { + font-weight: 400; +} + +.refItem-details { + display: flex; +} + +.refItem-details__full { + flex-direction: column; +} + +.refItem-description, +.refItem-default, +.refItem-example, +.refItem-allowed-values, +.refItem-reference { + padding: 1rem 0.4em; +} + +.refItem-example code, +.refItem-default code, +.refItem-allowed-values-list { + margin-left: 0.4em; + display: inline-block; +} + +.refItem-example, +.refItem-default, +.refItem-allowed-values, +.refItem-reference { + font-size: 0.85em; +} + +.refItem-details_full .refItem-example { + overflow-y: auto; + width: 100%; +} + +.refItem-details_full .refItem-example .highlight { + margin-top: 0.4em; +} + +.refItem-reference span{ + margin-right: 0.4em; +} diff --git a/themes/opentermsarchive/assets/css/loader.css b/themes/opentermsarchive/assets/css/loader.css index e5446ce1..49d9e6fe 100644 --- a/themes/opentermsarchive/assets/css/loader.css +++ b/themes/opentermsarchive/assets/css/loader.css @@ -35,4 +35,4 @@ @import "/components/divider.css"; @import "/components/textContent.css"; @import "/components/aside.css"; -@import "/components/configOption.css"; +@import "/components/refItem.css"; diff --git a/themes/opentermsarchive/layouts/shortcodes/configOption.html b/themes/opentermsarchive/layouts/shortcodes/configOption.html deleted file mode 100644 index 99b48f03..00000000 --- a/themes/opentermsarchive/layouts/shortcodes/configOption.html +++ /dev/null @@ -1,28 +0,0 @@ -
-
- {{ .Get "name" }} - {{ .Get "type" }} - {{ if .Get "required" }} - Required - {{ end }} -
-
{{ .Get "description" | markdownify }}
-
- {{ if .Get "default" }} -
Default:{{ .Get "default" | markdownify}}
- {{ end }} - {{ if .Get "example" }} -
Example: - {{ $example := .Get "example" }} - {{ if not (strings.Contains $example "\n") }} - {{ $example | markdownify }} - {{ else }} - {{ $example | markdownify }} - {{ end }} -
- {{ end }} - {{ if .Get "allowedValues" }} -
Allowed values:{{ .Get "allowedValues" | markdownify }}
- {{ end }} -
-
diff --git a/themes/opentermsarchive/layouts/shortcodes/refItem.html b/themes/opentermsarchive/layouts/shortcodes/refItem.html new file mode 100644 index 00000000..8bd96902 --- /dev/null +++ b/themes/opentermsarchive/layouts/shortcodes/refItem.html @@ -0,0 +1,51 @@ +{{ $name := .Get "name" }} +{{ $type := .Get "type" }} +{{ $required := .Get "required" }} +{{ $allowedValues := .Get "allowedValues" }} +{{ $default := .Get "default" }} +{{ $reference := .Get "reference" }} +{{/* Get description either from attribute or nested content */}} +{{ $description := .Get "description" }} +{{ $example := .Get "example" }} + +
+
+ {{ $name }} + {{ $type }} + {{ with $required }} + {{ if eq . true }}required{{ else }}{{ . | markdownify }}{{ end }} + {{ end }} +
+
{{ $description | markdownify | safeHTML }}
+
+ {{ if $default }} +
+ Default: + {{ $default | markdownify | replaceRE "]*>(.*?)" "$1" | safeHTML }} +
+ {{ end }} + {{ if or $example .Inner }} +
Example: + {{ if .Inner }} + {{ .Inner | markdownify }} + {{ else }} + {{ $example | markdownify | replaceRE "]*>(.*?)" "$1" | safeHTML }} + {{ end }} +
+ {{ end }} + {{ if $allowedValues }} + {{ $values := split $allowedValues "," }} +
Allowed values: + + {{ range $values }} + {{ . | markdownify | replaceRE "]*>(.*?)" "$1" | safeHTML }} + {{ end }} + +
+ {{ end }} + {{ if $reference }} +
Reference:{{ $reference | markdownify }}
+ {{ end }} +
+
+