diff --git a/docusaurus.config.ts b/docusaurus.config.ts index 19ad882e..fe1b68b7 100644 --- a/docusaurus.config.ts +++ b/docusaurus.config.ts @@ -78,19 +78,19 @@ const config: Config = { return `https://github.com/HarperDB/documentation/blob/main/docs/${docPath}`; } }, - lastVersion: '4.6', - includeCurrentVersion: true, + lastVersion: '4.7', + includeCurrentVersion: false, versions: { - 'current': { - label: '4.7-beta', - banner: 'unreleased', + '4.7': { + // No banner for 4.7 as it's the latest stable version + banner: 'none', }, '4.6': { - // No banner for 4.6 as its the latest stable version + // No banner for 4.6 as it's still actively maintained banner: 'none', }, '4.5': { - // No banner for 4.5 as its still actively maintained + // No banner for 4.5 as it's still actively maintained banner: 'none', }, }, diff --git a/package.json b/package.json index 63ea2bd6..358176a5 100644 --- a/package.json +++ b/package.json @@ -8,7 +8,7 @@ "start": "npm run prebuild && docusaurus start", "dev": "npm run prebuild && docusaurus start", "build": "npm run prebuild && docusaurus build && node scripts/postbuild.js", - "version": "node scripts/cut-version.js", + "version": "docusaurus docs:version", "swizzle": "docusaurus swizzle", "deploy": "docusaurus deploy", "clear": "npm run prebuild -- clean && docusaurus clear", diff --git a/release-notes/v4-tucker/4.7.0.md b/release-notes/v4-tucker/4.7.0.md index 7744e6a7..be2a7509 100644 --- a/release-notes/v4-tucker/4.7.0.md +++ b/release-notes/v4-tucker/4.7.0.md @@ -1,14 +1,10 @@ --- -title: 4.7.0 alpha +title: 4.7.0 --- -# 4.7.0 alpha +# 4.7.0 -8/15/2025 - -:::warning Alpha Release -This is an alpha release available on npm with the 'next' tag. This version should not be considered stable and is intended for testing and evaluation purposes only. Use in production environments is not recommended. See the [current tags on npm](https://www.npmjs.com/package/harperdb?activeTab=versions#current-tags) for details. -::: +10/16/2025 - A new component status monitoring collects status from each component from loading and any registered notification of status changes. - OCSP is now supported, and can be used to invalidate TLS certificates used for replication and HTTP through an OCSP server. diff --git a/release-notes/v4-tucker/index.mdx b/release-notes/v4-tucker/index.mdx index a0577a40..adedc99b 100644 --- a/release-notes/v4-tucker/index.mdx +++ b/release-notes/v4-tucker/index.mdx @@ -8,7 +8,7 @@ import LatestPatchLink from '@site/src/components/LatestPatchLink'; HarperDB version 4 ([Tucker release](v4-tucker/tucker)) represents major step forward in database technology. This release line has ground-breaking architectural advancements including: -## +## - Component status monitoring for tracking loading and status changes across all components - OCSP support for TLS certificate validation in replication and HTTP connections diff --git a/versioned_docs/version-4.7/administration/_category_.json b/versioned_docs/version-4.7/administration/_category_.json new file mode 100644 index 00000000..59c33ea4 --- /dev/null +++ b/versioned_docs/version-4.7/administration/_category_.json @@ -0,0 +1,10 @@ +{ + "label": "Administration", + "position": 2, + "link": { + "type": "generated-index", + "title": "Administration Documentation", + "description": "Guides for managing and administering HarperDB instances", + "keywords": ["administration"] + } +} diff --git a/versioned_docs/version-4.7/administration/administration.md b/versioned_docs/version-4.7/administration/administration.md new file mode 100644 index 00000000..e0084bb5 --- /dev/null +++ b/versioned_docs/version-4.7/administration/administration.md @@ -0,0 +1,32 @@ +--- +title: Best Practices and Recommendations +--- + +# Best Practices and Recommendations + +Harper is designed for minimal administrative effort, and with managed services these are handled for you. But there are important things to consider for managing your own Harper servers. + +### Data Protection and (Backup and) Recovery + +As a distributed database, data protection and recovery can benefit from different data protection strategies than a traditional single-server database. But multiple aspects of data protection and recovery should be considered: + +- Availability: As a distributed database Harper is intrinsically built for high-availability and a cluster will continue to run even with complete server(s) failure. This is the first and primary defense for protecting against any downtime or data loss. Harper provides fast horizontal scaling functionality with node cloning, which facilitates ease of establishing high availability clusters. +- [Audit log](administration/logging/audit-logging): Harper defaults to tracking data changes so malicious data changes can be found, attributed, and reverted. This provides security-level defense against data loss, allowing for fine-grained isolation and reversion of individual data without the large-scale reversion/loss of data associated with point-in-time recovery approaches. +- Snapshots: When used as a source-of-truth database for crucial data, we recommend using snapshot tools to regularly snapshot databases as a final backup/defense against data loss (this should only be used as a last resort in recovery). Harper has a [`get_backup`](./developers/operations-api/databases-and-tables#get-backup) operation, which provides direct support for making and retrieving database snapshots. An HTTP request can be used to get a snapshot. Alternatively, volume snapshot tools can be used to snapshot data at the OS/VM level. Harper can also provide scripts for replaying transaction logs from snapshots to facilitate point-in-time recovery when necessary (often customization may be preferred in certain recovery situations to minimize data loss). + +### Horizontal Scaling with Node Cloning + +Harper provides rapid horizontal scaling capabilities through [node cloning functionality described here](administration/cloning). + +### Monitoring + +Harper provides robust capabilities for analytics and observability to facilitate effective and informative monitoring: + +- Analytics provides statistics on usage, request counts, load, memory usage with historical tracking. The analytics data can be [accessed through querying](./reference/analytics). +- A large variety of real-time statistics about load, system information, database metrics, thread usage can be retrieved through the [`system_information` API](./developers/operations-api/system-operations). +- Information about the current cluster configuration and status can be found in the [cluster APIs](./developers/operations-api/clustering). +- Analytics and system information can easily be exported to Prometheus with our [Prometheus exporter component](https://github.com/HarperDB-Add-Ons/prometheus_exporter), making it easy visualize and monitor Harper with Graphana. + +### Replication Transaction Logging + +Harper utilizes NATS for replication, which maintains a transaction log. See the [transaction log documentation for information on how to query this log](administration/logging/transaction-logging). diff --git a/versioned_docs/version-4.7/administration/cloning.md b/versioned_docs/version-4.7/administration/cloning.md new file mode 100644 index 00000000..b3698092 --- /dev/null +++ b/versioned_docs/version-4.7/administration/cloning.md @@ -0,0 +1,153 @@ +--- +title: Clone Node +--- + +# Clone Node + +Clone node is a configurable node script that when pointed to another instance of Harper will create a clone of that +instance's config, databases and setup full replication. If it is run in a location where there is no existing Harper install, +it will, along with cloning, install Harper. If it is run in a location where there is another Harper instance, it will +only clone config, databases and replication that do not already exist. + +Clone node is triggered when Harper is installed or started with certain environment or command line (CLI) variables set (see below). + +**Leader node** - the instance of Harper you are cloning.\ +**Clone node** - the new node which will be a clone of the leader node. + +To start clone run `harperdb` in the CLI with either of the following variables set: + +#### Environment variables + +- `HDB_LEADER_URL` - The URL of the leader node's operation API (usually port 9925). +- `HDB_LEADER_USERNAME` - The leader node admin username. +- `HDB_LEADER_PASSWORD` - The leader node admin password. +- `REPLICATION_HOSTNAME` - _(optional)_ The clones replication hostname. This value will be added to `replication.hostname` on the clone node. If this value is not set, replication will not be set up between the leader and clone. + +For example: + +``` +HDB_LEADER_URL=https://node-1.my-domain.com:9925 REPLICATION_HOSTNAME=node-2.my-domain.com HDB_LEADER_USERNAME=... HDB_LEADER_PASSWORD=... harperdb +``` + +#### Command line variables + +- `--HDB_LEADER_URL` - The URL of the leader node's operation API (usually port 9925). +- `--HDB_LEADER_USERNAME` - The leader node admin username. +- `--HDB_LEADER_PASSWORD` - The leader node admin password. +- `--REPLICATION_HOSTNAME` - _(optional)_ The clones clustering host. This value will be added to `replication.hostname` on the clone node. If this value is not set, replication will not be set up between the leader and clone. + +For example: + +``` +harperdb --HDB_LEADER_URL https://node-1.my-domain.com:9925 --REPLICATION_HOSTNAME node-2.my-domain.com --HDB_LEADER_USERNAME ... --HDB_LEADER_PASSWORD ... +``` + +Each time clone is run it will set a value `cloned: true` in `harperdb-config.yaml`. This value will prevent clone from +running again. If you want to run clone again set this value to `false`. If Harper is started with the clone variables +still present and `cloned` is true, Harper will just start as normal. + +Clone node does not require any additional configuration apart from the variables referenced above. +However, if you wish to set any configuration during clone this can be done by passing the config as environment/CLI +variables or cloning overtop of an existing `harperdb-config.yaml` file. + +More can be found in the Harper config documentation [here](../deployments/configuration). + +### Excluding database and components + +To set any specific (optional) clone config, including the exclusion of any database and/or replication, there is a file +called `clone-node-config.yaml` that can be used. + +The file must be located in the `ROOTPATH` directory of your clone (the `hdb` directory where you clone will be installed. +If the directory does not exist, create one and add the file to it). + +The config available in `clone-node-config.yaml` is: + +```yaml +databaseConfig: + excludeDatabases: + - database: null + excludeTables: + - database: null + table: null +componentConfig: + exclude: + - name: null +``` + +_Note: only include the configuration that you are using. If no clone config file is provided nothing will be excluded, +unless it already exists on the clone._ + +`databaseConfig` - Set any databases or tables that you wish to exclude from cloning. + +`componentConfig` - Set any components that you do not want cloned. Clone node will not clone the component code, +it will only clone the component reference that exists in the leader harperdb-config file. + +### Cloning configuration + +Clone node will not clone any configuration that is classed as unique to the leader node. This includes `replication.hostname`, `replication.url`,`clustering.nodeName`, +`rootPath` and any other path related values, for example `storage.path`, `logging.root`, `componentsRoot`, +any authentication certificate/key paths. + +### Cloning system database + +Harper uses a database called `system` to store operational information. Clone node will only clone the user and role +tables from this database. It will also set up replication on this table, which means that any existing and future user and roles +that are added will be replicated throughout the cluster. + +Cloning the user and role tables means that once clone node is complete, the clone will share the same login credentials with +the leader. + +### Replication + +If clone is run with the `REPLICATION_HOSTNAME` variable set, a fully replicating clone will be created. + +If any databases are excluded from the clone, replication will not be set up on these databases. + +### JWT Keys + +If cloning with replication, the leader's JWT private and public keys will be cloned. To disable this, include `CLONE_KEYS=false` in your clone variables. + +### Cloning overtop of an existing Harper instance + +Clone node will not overwrite any existing config, database or replication. It will write/clone any config database or replication +that does not exist on the node it is running on. + +An example of how this can be useful is if you want to set Harper config before the clone is created. To do this you +would create a harperdb-config.yaml file in your local `hdb` root directory with the config you wish to set. Then +when clone is run it will append the missing config to the file and install Harper with the desired config. + +Another useful example could be retroactively adding another database to an existing instance. Running clone on +an existing instance could create a full clone of another database and set up replication between the database on the +leader and the clone. + +### Cloning steps + +Clone node will execute the following steps when ran: + +1. Look for an existing Harper install. It does this by using the default (or user provided) `ROOTPATH`. +1. If an existing instance is found it will check for a `harperdb-config.yaml` file and search for the `cloned` value. If the value exists and is `true` clone will skip the clone logic and start Harper. +1. Clone harperdb-config.yaml values that don't already exist (excluding values unique to the leader node). +1. Fully clone any databases that don't already exist. +1. If classed as a "fresh clone", install Harper. An instance is classed as a fresh clone if there is no system database. +1. If `REPLICATION_HOSTNAME` is set, set up replication between the leader and clone. +1. Clone is complete, start Harper. + +### Cloning with Docker + +To run clone inside a container add the environment variables to your run command. + +For example: + +``` +docker run -d \ + -v :/home/harperdb/hdb \ + -e HDB_LEADER_PASSWORD=password \ + -e HDB_LEADER_USERNAME=admin \ + -e HDB_LEADER_URL=https://1.123.45.6:9925 \ + -e REPLICATION_HOSTNAME=1.123.45.7 \ + -p 9925:9925 \ + -p 9926:9926 \ + harperdb/harperdb +``` + +Clone will only run once, when you first start the container. If the container restarts the environment variables will be ignored. diff --git a/versioned_docs/version-4.7/administration/compact.md b/versioned_docs/version-4.7/administration/compact.md new file mode 100644 index 00000000..1a71db14 --- /dev/null +++ b/versioned_docs/version-4.7/administration/compact.md @@ -0,0 +1,60 @@ +--- +title: Compact +--- + +# Compact + +Database files can grow quickly as you use them, sometimes impeding performance. Harper has multiple compact features that can be used to reduce database file size and potentially improve performance. The compact process does not compress your data, it instead makes your database file smaller by eliminating free-space and fragmentation. + +There are two options that Harper offers for compacting a Database. + +_Note: Some of the storage configuration (such as compression) cannot be updated on existing databases, this is where the following options are useful. They will create a new compressed copy of the database with any updated configuration._ + +More information on the storage configuration options can be [found here](../deployments/configuration#storage) + +### Copy compaction + +It is recommended that, to prevent any record loss, Harper is not running when performing this operation. + +This will copy a Harper database with compaction. If you wish to use this new database in place of the original, you will need to move/rename it to the path of the original database. + +This command should be run in the [CLI](../deployments/harper-cli) + +```bash +harperdb copy-db +``` + +For example, to copy the default database: + +```bash +harperdb copy-db data /home/user/hdb/database/copy.mdb +``` + +### Compact on start + +Compact on start is a more automated option that will compact **all** databases when Harper is started. Harper will not start until compact is complete. Under the hood it loops through all non-system databases, creates a backup of each one and calls copy-db. After the copy/compaction is complete it will move the new database to where the original one is located and remove any backups. + +Compact on start is initiated by config in `harperdb-config.yaml` + +_Note: Compact on start will switch `compactOnStart` to `false` after it has run_ + +`compactOnStart` - _Type_: boolean; _Default_: false + +`compactOnStartKeepBackup` - _Type_: boolean; _Default_: false + +```yaml +storage: + compactOnStart: true + compactOnStartKeepBackup: false +``` + +Using CLI variables + +```bash +--STORAGE_COMPACTONSTART true --STORAGE_COMPACTONSTARTKEEPBACKUP true +``` + +```bash +STORAGE_COMPACTONSTART=true +STORAGE_COMPACTONSTARTKEEPBACKUP=true +``` diff --git a/versioned_docs/version-4.7/administration/harper-studio/create-account.md b/versioned_docs/version-4.7/administration/harper-studio/create-account.md new file mode 100644 index 00000000..e1ffbb87 --- /dev/null +++ b/versioned_docs/version-4.7/administration/harper-studio/create-account.md @@ -0,0 +1,27 @@ +--- +title: Create a Studio Account +--- + +# Create a Studio Account + +Start at the [Harper Studio sign up page](https://fabric.harper.fast/#/sign-up). + +1. Provide the following information: + - First Name + - Last Name + - Email Address + - Subdomain + + _Part of the URL that will be used to identify your Harper Cloud Instances. For example, with subdomain "demo" and instance name "c1" the instance URL would be: [https://c1-demo.harperdbcloud.com](https://c1-demo.harperdbcloud.com)._ + + - Coupon Code (optional) + +1. Review the Privacy Policy and Terms of Service. +1. Click the sign up for free button. +1. You will be taken to a new screen to add an account password. Enter your password. + _Passwords must be a minimum of 8 characters with at least 1 lower case character, 1 upper case character, 1 number, and 1 special character._ +1. Click the add account password button. + +You will receive a Studio welcome email confirming your registration. + +Note: Your email address will be used as your username and cannot be changed. diff --git a/versioned_docs/version-4.7/administration/harper-studio/enable-mixed-content.md b/versioned_docs/version-4.7/administration/harper-studio/enable-mixed-content.md new file mode 100644 index 00000000..2530fef0 --- /dev/null +++ b/versioned_docs/version-4.7/administration/harper-studio/enable-mixed-content.md @@ -0,0 +1,9 @@ +--- +title: Enable Mixed Content +--- + +# Enable Mixed Content + +If you want to connect insecure HTTP instances from the secure HTTPS Fabric Studio, you can enable mixed content temporarily. This isn't recommended in production systems. It would be better to add HTTPS / SSL Termination in front of your instances. But if you understand the risks, you can enable mixed content. Enabling mixed content is required in cases where you would like to connect the Harper Studio to Harper Instances via HTTP. This should not be used for production systems, but may be convenient for development and testing purposes. Doing so will allow your browser to reach HTTP traffic, which is considered insecure, through an HTTPS site like the Studio. + +A comprehensive guide is provided by Adobe [here](https://experienceleague.adobe.com/docs/target/using/experiences/vec/troubleshoot-composer/mixed-content.html). diff --git a/versioned_docs/version-4.7/administration/harper-studio/index.md b/versioned_docs/version-4.7/administration/harper-studio/index.md new file mode 100644 index 00000000..7d7192fe --- /dev/null +++ b/versioned_docs/version-4.7/administration/harper-studio/index.md @@ -0,0 +1,21 @@ +--- +title: Harper Studio +--- + +# Harper Studio + +Harper Studio is the web-based GUI for Harper. Studio enables you to administer, navigate, and monitor all of your Harper instances in a simple, user-friendly interface without any knowledge of the underlying Harper API. It’s free to sign up, get started today! + +[Sign up for free!](https://studio.harperdb.io/sign-up) + +Harper now includes a simplified local Studio that is packaged with all Harper installations and served directly from the instance. It can be enabled in the [configuration file](../deployments/configuration#localstudio). This section is dedicated to the hosted Studio accessed at [studio.harperdb.io](https://studio.harperdb.io). + +--- + +## How does Studio Work? + +While Harper Studio is web based and hosted by us, all database interactions are performed on the Harper instance the studio is connected to. The Harper Studio loads in your browser, at which point you login to your Harper instances. Credentials are stored in your browser cache and are not transmitted back to Harper. All database interactions are made via the Harper Operations API directly from your browser to your instance. + +## What type of instances can I manage? + +Harper Studio enables users to manage both Harper Cloud instances and privately hosted instances all from a single UI. All Harper instances feature identical behavior whether they are hosted by us or by you. diff --git a/versioned_docs/version-4.7/administration/harper-studio/instance-configuration.md b/versioned_docs/version-4.7/administration/harper-studio/instance-configuration.md new file mode 100644 index 00000000..06a6eb89 --- /dev/null +++ b/versioned_docs/version-4.7/administration/harper-studio/instance-configuration.md @@ -0,0 +1,108 @@ +--- +title: Instance Configuration +--- + +# Instance Configuration + +Harper instance configuration can be viewed and managed directly through the Harper Studio. Harper Cloud instances can be resized in two different ways via this page, either by modifying machine RAM or by increasing drive storage. Enterprise instances can have their licenses modified by modifying licensed RAM. + +All instance configuration is handled through the **config** page of the Harper Studio, accessed with the following instructions: + +1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. + +1. Click the appropriate organization that the instance belongs to. + +1. Select your desired instance. + +1. Click config in the instance control bar. + +_Note, the **config** page will only be available to super users and certain items are restricted to Studio organization owners._ + +## Instance Overview + +The **instance overview** panel displays the following instance specifications: + +- Instance URL + +- Applications URL + +- Instance Node Name (for clustering) + +- Instance API Auth Header (this user) + + _The Basic authentication header used for the logged in Harper database user_ + +- Created Date (Harper Cloud only) + +- Region (Harper Cloud only) + + _The geographic region where the instance is hosted._ + +- Total Price + +- RAM + +- Storage (Harper Cloud only) + +- Disk IOPS (Harper Cloud only) + +## Update Instance RAM + +Harper Cloud instance size and Enterprise instance licenses can be modified with the following instructions. This option is only available to Studio organization owners. + +Note: For Harper Cloud instances, upgrading RAM may add additional CPUs to your instance as well. Click here to see how many CPUs are provisioned for each instance size. + +1. In the **update ram** panel at the bottom left: + - Select the new instance size. + - If you do not have a credit card associated with your account, an **Add Credit Card To Account** button will appear. Click that to be taken to the billing screen where you can enter your credit card information before returning to the **config** tab to proceed with the upgrade. + - If you do have a credit card associated, you will be presented with the updated billing information. + - Click **Upgrade**. + +1. The instance will shut down and begin reprovisioning/relicensing itself. The instance will not be available during this time. You will be returned to the instance dashboard and the instance status will show UPDATING INSTANCE. + +1. Once your instance upgrade is complete, it will appear on the instance dashboard as status OK with your newly selected instance size. + +_Note, if Harper Cloud instance reprovisioning takes longer than 20 minutes, please submit a support ticket here: [https://harperdbhelp.zendesk.com/hc/en-us/requests/new](https://harperdbhelp.zendesk.com/hc/en-us/requests/new)._ + +## Update Instance Storage + +The Harper Cloud instance storage size can be increased with the following instructions. This option is only available to Studio organization owners. + +Note: Instance storage can only be upgraded once every 6 hours. + +1. In the **update storage** panel at the bottom left: + - Select the new instance storage size. + - If you do not have a credit card associated with your account, an **Add Credit Card To Account** button will appear. Click that to be taken to the billing screen where you can enter your credit card information before returning to the **config** tab to proceed with the upgrade. + - If you do have a credit card associated, you will be presented with the updated billing information. + - Click **Upgrade**. + +1. The instance will shut down and begin reprovisioning itself. The instance will not be available during this time. You will be returned to the instance dashboard and the instance status will show UPDATING INSTANCE. +1. Once your instance upgrade is complete, it will appear on the instance dashboard as status OK with your newly selected instance size. + +_Note, if this process takes longer than 20 minutes, please submit a support ticket here: [https://harperdbhelp.zendesk.com/hc/en-us/requests/new](https://harperdbhelp.zendesk.com/hc/en-us/requests/new)._ + +## Remove Instance + +The Harper instance can be deleted/removed from the Studio with the following instructions. Once this operation is started it cannot be undone. This option is only available to Studio organization owners. + +1. In the **remove instance** panel at the bottom left: + - Enter the instance name in the text box. + - The Studio will present you with a warning. + - Click **Remove**. + +1. The instance will begin deleting immediately. + +## Restart Instance + +The Harper Cloud instance can be restarted with the following instructions. + +1. In the **restart instance** panel at the bottom right: + - Enter the instance name in the text box. + - The Studio will present you with a warning. + - Click **Restart**. + +1. The instance will begin restarting immediately. + +## Instance Config (Read Only) + +A JSON preview of the instance config is available for reference at the bottom of the page. This is a read only visual and is not editable via the Studio. To make changes to the instance config, review the [configuration file documentation](../../deployments/configuration#using-the-configuration-file-and-naming-conventions). diff --git a/versioned_docs/version-4.7/administration/harper-studio/instance-metrics.md b/versioned_docs/version-4.7/administration/harper-studio/instance-metrics.md new file mode 100644 index 00000000..e9b48939 --- /dev/null +++ b/versioned_docs/version-4.7/administration/harper-studio/instance-metrics.md @@ -0,0 +1,16 @@ +--- +title: Instance Metrics +--- + +# Instance Metrics + +The Harper Studio display instance status and metrics on the instance status page, which can be accessed with the following instructions: + +1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. +1. Click the appropriate organization that the instance belongs to. +1. Select your desired instance. +1. Click **status** in the instance control bar. + +Once on the instance browse page you can view host system information, [Harper logs](../logging/standard-logging), and Harper Cloud alarms (if it is a cloud instance). + +_Note, the **status** page will only be available to super users._ diff --git a/versioned_docs/version-4.7/administration/harper-studio/instances.md b/versioned_docs/version-4.7/administration/harper-studio/instances.md new file mode 100644 index 00000000..b367ed96 --- /dev/null +++ b/versioned_docs/version-4.7/administration/harper-studio/instances.md @@ -0,0 +1,146 @@ +--- +title: Instances +--- + +# Instances + +The Harper Studio allows you to administer all of your HarperDinstances in one place. Harper currently offers the following instance types: + +- **Harper Cloud Instance** Managed installations of Harper, what we call [Harper Cloud](../../deployments/harper-cloud/). +- **5G Wavelength Instance** Managed installations of Harper running on the Verizon network through AWS Wavelength, what we call 5G Wavelength Instances. _Note, these instances are only accessible via the Verizon network._ +- **Enterprise Instance** Any Harper installation that is managed by you. These include instances hosted within your cloud provider accounts (for example, from the AWS or Digital Ocean Marketplaces), privately hosted instances, or instances installed locally. + +All interactions between the Studio and your instances take place directly from your browser. Harper stores metadata about your instances, which enables the Studio to display these instances when you log in. Beyond that, all traffic is routed from your browser to the Harper instances using the standard [Harper API](../../developers/operations-api/). + +## Organization Instance List + +A summary view of all instances within an organization can be viewed by clicking on the appropriate organization from the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. Each instance gets their own card. Harper Cloud and Enterprise instances are listed together. + +## Create a New Instance + +1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. +1. Click the appropriate organization for the instance to be created under. +1. Click the **Create New Harper Cloud Instance + Register Enterprise Instance** card. +1. Select your desired Instance Type. +1. For a Harper Cloud Instance or a Harper 5G Wavelength Instance, click **Create Harper Cloud Instance**. + 1. Fill out Instance Info. + 1. Enter Instance Name + + _This will be used to build your instance URL. For example, with subdomain "demo" and instance name "c1" the instance URL would be: [https://c1-demo.harperdbcloud.com](https://c1-demo.harperdbcloud.com). The Instance URL will be previewed below._ + + 1. Enter Instance Username + + _This is the username of the initial Harper instance super user._ + + 1. Enter Instance Password + + _This is the password of the initial Harper instance super user._ + + 1. Click **Instance Details** to move to the next page. + 1. Select Instance Specs + 1. Select Instance RAM + + _Harper Cloud Instances are billed based on Instance RAM, this will select the size of your provisioned instance._ _More on instance specs\_\_._ + + 1. Select Storage Size + + _Each instance has a mounted storage volume where your Harper data will reside. Storage is provisioned based on space and IOPS._ _More on IOPS Impact on Performance\_\_._ + + 1. Select Instance Region + + _The geographic area where your instance will be provisioned._ + + 1. Click **Confirm Instance Details** to move to the next page. + 1. Review your Instance Details, if there is an error, use the back button to correct it. + 1. Review the [Privacy Policy](https://harperdb.io/legal/privacy-policy/) and [Terms of Service](https://harperdb.io/legal/harperdb-cloud-terms-of-service/), if you agree, click the **I agree** radio button to confirm. + 1. Click **Add Instance**. + 1. Your Harper Cloud instance will be provisioned in the background. Provisioning typically takes 5-15 minutes. You will receive an email notification when your instance is ready. + +## Register Enterprise Instance + +1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. +1. Click the appropriate organization for the instance to be created under. +1. Click the **Create New Harper Cloud Instance + Register Enterprise Instance** card. +1. Select **Register Enterprise Instance**. + 1. Fill out Instance Info. + 1. Enter Instance Name + + _This is used for descriptive purposes only._ + + 1. Enter Instance Username + + _The username of a Harper super user that is already configured in your Harper installation._ + + 1. Enter Instance Password + + _The password of a Harper super user that is already configured in your Harper installation._ + + 1. Enter Host + + _The host to access the Harper instance. For example, `harperdb.myhost.com` or `localhost`._ + + 1. Enter Port + + _The port to access the Harper instance. Harper defaults `9925` for HTTP and `31283` for HTTPS._ + + 1. Select SSL + + _If your instance is running over SSL, select the SSL checkbox. If not, you will need to enable mixed content in your browser to allow the HTTPS Studio to access the HTTP instance. If there are issues connecting to the instance, the Studio will display a red error message._ + + 1. Click **Instance Details** to move to the next page. + 1. Select Instance Specs + 1. Select Instance RAM + + _Harper instances are billed based on Instance RAM. Selecting additional RAM will enable the ability for faster and more complex queries._ + + 1. Click **Confirm Instance Details** to move to the next page. + 1. Review your Instance Details, if there is an error, use the back button to correct it. + 1. Review the [Privacy Policy](https://harperdb.io/legal/privacy-policy/) and [Terms of Service](https://harperdb.io/legal/harperdb-cloud-terms-of-service/), if you agree, click the **I agree** radio button to confirm. + 1. Click **Add Instance**. + 1. The Harper Studio will register your instance and restart it for the registration to take effect. Your instance will be immediately available after this is complete. + +## Delete an Instance + +Instance deletion has two different behaviors depending on the instance type. + +- **Harper Cloud Instance** This instance will be permanently deleted, including all data. This process is irreversible and cannot be undone. +- **Enterprise Instance** The instance will be removed from the Harper Studio only. This does not uninstall Harper from your system and your data will remain intact. + +An instance can be deleted as follows: + +1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. +1. Click the appropriate organization that the instance belongs to. +1. Identify the proper instance card and click the trash can icon. +1. Enter the instance name into the text box. + + _This is done for confirmation purposes to ensure you do not accidentally delete an instance._ + +1. Click the **Do It** button. + +## Upgrade an Instance + +Harper instances can be resized on the [Instance Configuration](instance-configuration) page. + +## Instance Log In/Log Out + +The Studio enables users to log in and out of different database users from the instance control panel. To log out of an instance: + +1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. +1. Click the appropriate organization that the instance belongs to. +1. Identify the proper instance card and click the lock icon. +1. You will immediately be logged out of the instance. + +To log in to an instance: + +1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. +1. Click the appropriate organization that the instance belongs to. +1. Identify the proper instance card, it will have an unlocked icon and a status reading PLEASE LOG IN, and click the center of the card. +1. Enter the database username. + + _The username of a Harper user that is already configured in your Harper instance._ + +1. Enter the database password. + + _The password of a Harper user that is already configured in your Harper instance._ + +1. Click **Log In**. diff --git a/versioned_docs/version-4.7/administration/harper-studio/login-password-reset.md b/versioned_docs/version-4.7/administration/harper-studio/login-password-reset.md new file mode 100644 index 00000000..199d38ce --- /dev/null +++ b/versioned_docs/version-4.7/administration/harper-studio/login-password-reset.md @@ -0,0 +1,42 @@ +--- +title: Login and Password Reset +--- + +# Login and Password Reset + +## Log In to Your Harper Studio Account + +To log into your existing Harper Studio account: + +1. Navigate to the [Harper Studio](https://studio.harperdb.io/). +1. Enter your email address. +1. Enter your password. +1. Click **sign in**. + +## Reset a Forgotten Password + +To reset a forgotten password: + +1. Navigate to the Harper Studio password reset page. +1. Enter your email address. +1. Click **send password reset email**. +1. If the account exists, you will receive an email with a temporary password. +1. Navigate back to the Harper Studio login page. +1. Enter your email address. +1. Enter your temporary password. +1. Click **sign in**. +1. You will be taken to a new screen to reset your account password. Enter your new password. + _Passwords must be a minimum of 8 characters with at least 1 lower case character, 1 upper case character, 1 number, and 1 special character._ +1. Click the **add account password** button. + +## Change Your Password + +If you are already logged into the Studio, you can change your password though the user interface. + +1. Navigate to the Harper Studio profile page. +1. In the **password** section, enter: + - Current password. + - New password. + - New password again _(for verification)_. + +1. Click the **Update Password** button. diff --git a/versioned_docs/version-4.7/administration/harper-studio/manage-applications.md b/versioned_docs/version-4.7/administration/harper-studio/manage-applications.md new file mode 100644 index 00000000..52e8cc64 --- /dev/null +++ b/versioned_docs/version-4.7/administration/harper-studio/manage-applications.md @@ -0,0 +1,59 @@ +--- +title: Manage Applications +--- + +# Manage Applications + +[Harper Applications](../../developers/applications/) are enabled by default and can be configured further through the Harper Studio. It is recommended to read through the [Applications](../../developers/applications/) documentation first to gain a strong understanding of Harper Applications behavior. + +All Applications configuration and development is handled through the **applications** page of the Harper Studio, accessed with the following instructions: + +1. Navigate to the Harper Studio Organizations page. +1. Click the appropriate organization that the instance belongs to. +1. Select your desired instance. +1. Click **applications** in the instance control bar. + +_Note, the **applications** page will only be available to super users._ + +## Manage Applications + +The Applications editor is not required for development and deployment, though it is a useful tool to maintain and manage your Harper Applications. The editor provides the ability to create new applications or import/deploy remote application packages. + +The left bar is the applications file navigator, allowing you to select files to edit and add/remove files and folders. By default, this view is empty because there are no existing applications. To get started, either create a new application or import/deploy a remote application. + +The right side of the screen is the file editor. Here you can make edit individual files of your application directly in the Harper Studio. + +## Things to Keep in Mind + +To learn more about developing Harper Applications, make sure to read through the [Applications](../../developers/applications/) documentation. + +When working with Applications in the Harper Studio, by default the editor will restart the Harper Applications server every time a file is saved. Note, this behavior can be turned off by toggling the `auto` toggle at the top right of the applications page. If you are constantly editing your application, it may result in errors causing the application not to run. These errors will not be visible on the application page, however they will be available in the Harper logs, which can be found on the [status page](instance-metrics). + +The Applications editor stores unsaved changes in cache. This means that occasionally your editor will show a discrepancy from the code that is stored and running on your Harper instance. You can identify if the code in your Studio differs if the "save" and "revert" buttons are active. To revert the cached version in your editor to the version of the file stored on your Harper instance click the "revert" button. + +## Accessing Your Application Endpoints + +Accessing your application endpoints varies with which type of endpoint you're creating. All endpoints, regardless of type, will be accessed via the [Harper HTTP port found in the Harper configuration file](../../deployments/configuration#http). The default port is `9926`, but you can verify what your instances is set to by navigating to the [instance config page](instance-configuration) and examining the read only JSON version of your instance's config file looking specifically for either the `http: port: 9926` or `http: securePort: 9926` configs. If `port` is set, you will access your endpoints via `http` and if `securePort` is set, you will access your endpoints via `https`. + +Below is a breakdown of how to access each type of endpoint. In these examples, we will use a locally hosted instance with `securePort` set to `9926`: `https://localhost:9926`. + +- **Standard REST Endpoints**\ + Standard REST endpoints are defined via the `@export` directive to tables in your schema definition. You can read more about these in the [Adding an Endpoint section of the Applications documentation](../../developers/applications/#adding-an-endpoint). Here, if we are looking to access a record with ID `1` from table `Dog` on our instance, [per the REST documentation](../../developers/rest), we could send a `GET` (or since this is a GET, we could post the URL in our browser) to `https://localhost:9926/Dog/1`. +- **Augmented REST Endpoints**\ + Harper Applications enable you to write [Custom Functionality with JavaScript](../../developers/applications/#custom-functionality-with-javascript) for your resources. Accessing these endpoints is identical to accessing the standard REST endpoints above, though you may have defined custom behavior in each function. Taking the example from the [Applications documentation](../../developers/applications/#custom-functionality-with-javascript), if we are looking to access the `DogWithHumanAge` example, we could send the GET to `https://localhost:9926/DogWithHumanAge/1`. +- **Fastify Routes**\ + If you need more functionality than the REST applications can provide, you can define your own custom endpoints using [Fastify Routes](../../developers/applications/#define-fastify-routes). The paths to these routes are defined via the application `config.yaml` file. You can read more about how you can customize the configuration options in the [Define Fastify Routes documentation](../../developers/applications/define-routes). By default, routes are accessed via the following pattern: `[Instance URL]:[HTTP Port]/[Project Name]/[Route URL]`. Using the example from the [Harper Application Template](https://github.com/HarperDB/application-template/), where we've named our project `application-template`, we would access the `getAll` route at `https://localhost/application-template/getAll`. + +## Creating a New Application + +1. From the application page, click the "+ app" button at the top right. +1. Click "+ Create A New Application Using The Default Template". +1. Enter a name for your project, note project names must contain only alphanumeric characters, dashes and underscores. +1. Click OK. +1. Your project will be available in the applications file navigator on the left. Click a file to select a file to edit. + +## Editing an Application + +1. From the applications page, click the file you would like to edit from the file navigator on the left. +1. Edit the file with any changes you'd like. +1. Click "save" at the top right. Note, as mentioned above, when you save a file, the Harper Applications server will be restarted immediately. diff --git a/versioned_docs/version-4.7/administration/harper-studio/manage-databases-browse-data.md b/versioned_docs/version-4.7/administration/harper-studio/manage-databases-browse-data.md new file mode 100644 index 00000000..33482198 --- /dev/null +++ b/versioned_docs/version-4.7/administration/harper-studio/manage-databases-browse-data.md @@ -0,0 +1,123 @@ +--- +title: Manage Databases / Browse Data +--- + +# Manage Databases / Browse Data + +Manage instance databases/tables and browse data in tabular format with the following instructions: + +1. Navigate to the Harper Studio Organizations page. +1. Click the appropriate organization that the instance belongs to. +1. Select your desired instance. +1. Click **browse** in the instance control bar. + +Once on the instance browse page you can view data, manage databases and tables, add new data, and more. + +## Manage Databases and Tables + +#### Create a Database + +1. Click the plus icon at the top right of the databases section. +1. Enter the database name. +1. Click the green check mark. + +#### Delete a Database + +Deleting a database is permanent and irreversible. Deleting a database removes all tables and data within it. + +1. Click the minus icon at the top right of the databases section. +1. Identify the appropriate database to delete and click the red minus sign in the same row. +1. Click the red check mark to confirm deletion. + +#### Create a Table + +1. Select the desired database from the databases section. +1. Click the plus icon at the top right of the tables section. +1. Enter the table name. +1. Enter the primary key. + + _The primary key is also often referred to as the hash attribute in the studio, and it defines the unique identifier for each row in your table._ + +1. Click the green check mark. + +#### Delete a Table + +Deleting a table is permanent and irreversible. Deleting a table removes all data within it. + +1. Select the desired database from the databases section. +1. Click the minus icon at the top right of the tables section. +1. Identify the appropriate table to delete and click the red minus sign in the same row. +1. Click the red check mark to confirm deletion. + +## Manage Table Data + +The following section assumes you have selected the appropriate table from the database/table browser. + +#### Filter Table Data + +1. Click the magnifying glass icon at the top right of the table browser. +1. This expands the search filters. +1. The results will be filtered appropriately. + +#### Load CSV Data + +1. Click the data icon at the top right of the table browser. You will be directed to the CSV upload page where you can choose to import a CSV by URL or upload a CSV file. +1. To import a CSV by URL: + 1. Enter the URL in the **CSV file URL** textbox. + 1. Click **Import From URL**. + 1. The CSV will load, and you will be redirected back to browse table data. +1. To upload a CSV file: + 1. Click **Click or Drag to select a .csv file** (or drag your CSV file from your file browser). + 1. Navigate to your desired CSV file and select it. + 1. Click **Insert X Records**, where X is the number of records in your CSV. + 1. The CSV will load, and you will be redirected back to browse table data. + +#### Add a Record + +1. Click the plus icon at the top right of the table browser. +1. The Studio will pre-populate existing table attributes in JSON format. + + _The primary key is not included, but you can add it in and set it to your desired value. Auto-maintained fields are not included and cannot be manually set. You may enter a JSON array to insert multiple records in a single transaction._ + +1. Enter values to be added to the record. + + _You may add new attributes to the JSON; they will be reflexively added to the table._ + +1. Click the **Add New** button. + +#### Edit a Record + +1. Click the record/row you would like to edit. +1. Modify the desired values. + + _You may add new attributes to the JSON; they will be reflexively added to the table._ + +1. Click the **save icon**. + +#### Delete a Record + +Deleting a record is permanent and irreversible. If transaction logging is turned on, the delete transaction will be recorded as well as the data that was deleted. + +1. Click the record/row you would like to delete. +1. Click the **delete icon**. +1. Confirm deletion by clicking the **check icon**. + +## Browse Table Data + +The following section assumes you have selected the appropriate table from the database/table browser. + +#### Browse Table Data + +The first page of table data is automatically loaded on table selection. Paging controls are at the bottom of the table. Here you can: + +- Page left and right using the arrows. +- Type in the desired page. +- Change the page size (the amount of records displayed in the table). + +#### Refresh Table Data + +Click the refresh icon at the top right of the table browser. + +#### Automatically Refresh Table Data + +Toggle the auto switch at the top right of the table browser. The table data will now automatically refresh every 15 seconds. Filters and pages will remain set for refreshed data. diff --git a/versioned_docs/version-4.7/administration/harper-studio/manage-instance-roles.md b/versioned_docs/version-4.7/administration/harper-studio/manage-instance-roles.md new file mode 100644 index 00000000..3662013c --- /dev/null +++ b/versioned_docs/version-4.7/administration/harper-studio/manage-instance-roles.md @@ -0,0 +1,77 @@ +--- +title: Manage Instance Roles +--- + +# Manage Instance Roles + +Harper users and roles can be managed directly through the Harper Studio. It is recommended to read through the [users & roles documentation](../../developers/security/users-and-roles) to gain a strong understanding of how they operate. + +Instance role configuration is handled through the **roles** page of the Harper Studio, accessed with the following instructions: + +1. Navigate to the Harper Studio Organizations page. + +1. Click the appropriate organization that the instance belongs to. + +1. Select your desired instance. + +1. Click **roles** in the instance control bar. + +_Note, the **roles** page will only be available to super users._ + +The _roles management_ screen consists of the following panels: + +- **super users** + + Displays all super user roles for this instance. + +- **cluster users** + + Displays all cluster user roles for this instance. + +- **standard roles** + + Displays all standard roles for this instance. + +- **role permission editing** + + Once a role is selected for editing, permissions will be displayed here in JSON format. + +_Note, when new tables are added that are not configured, the Studio will generate configuration values with permissions defaulting to `false`._ + +## Role Management + +#### Create a Role + +1. Click the plus icon at the top right of the appropriate role section. + +1. Enter the role name. + +1. Click the green check mark. + +1. Optionally toggle the **manage databases/tables** switch to specify the `structure_user` config. + +1. Configure the role permissions in the role permission editing panel. + + _Note, to have the Studio generate attribute permissions JSON, toggle **show all attributes** at the top right of the role permission editing panel._ + +1. Click **Update Role Permissions**. + +#### Modify a Role + +1. Click the appropriate role from the appropriate role section. + +1. Modify the role permissions in the role permission editing panel. + + _Note, to have the Studio generate attribute permissions JSON, toggle **show all attributes** at the top right of the role permission editing panel._ + +1. Click **Update Role Permissions**. + +#### Delete a Role + +Deleting a role is permanent and irreversible. A role cannot be remove if users are associated with it. + +1. Click the minus icon at the top right of the roles section. + +1. Identify the appropriate role to delete and click the red minus sign in the same row. + +1. Click the red check mark to confirm deletion. diff --git a/versioned_docs/version-4.7/administration/harper-studio/manage-instance-users.md b/versioned_docs/version-4.7/administration/harper-studio/manage-instance-users.md new file mode 100644 index 00000000..fb91fbbb --- /dev/null +++ b/versioned_docs/version-4.7/administration/harper-studio/manage-instance-users.md @@ -0,0 +1,53 @@ +--- +title: Manage Instance Users +--- + +# Manage Instance Users + +Harper users and roles can be managed directly through the Harper Studio. It is recommended to read through the [users & roles documentation](../../developers/security/users-and-roles) to gain a strong understanding of how they operate. + +Instance user configuration is handled through the **users** page of the Harper Studio, accessed with the following instructions: + +1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. + +1. Click the appropriate organization that the instance belongs to. + +1. Select your desired instance. + +1. Click **users** in the instance control bar. + +_Note, the **users** page will only be available to super users._ + +## Add a User + +Harper instance users can be added with the following instructions. + +1. In the **add user** panel on the left enter: + - New user username. + - New user password. + - Select a role. + + _Learn more about role management here: [Manage Instance Roles](manage-instance-roles)._ + +1. Click **Add User**. + +## Edit a User + +Harper instance users can be modified with the following instructions. + +1. In the **existing users** panel, click the row of the user you would like to edit. + +1. To change a user’s password: + 1. In the **Change user password** section, enter the new password. + 1. Click **Update Password**. + +1. To change a user’s role: + 1. In the **Change user role** section, select the new role. + 1. Click **Update Role**. + +1. To delete a user: + 1. In the **Delete User** section, type the username into the textbox. + + _This is done for confirmation purposes._ + + 1. Click **Delete User**. diff --git a/versioned_docs/version-4.7/administration/harper-studio/manage-replication.md b/versioned_docs/version-4.7/administration/harper-studio/manage-replication.md new file mode 100644 index 00000000..619799a5 --- /dev/null +++ b/versioned_docs/version-4.7/administration/harper-studio/manage-replication.md @@ -0,0 +1,90 @@ +--- +title: Manage Replication +--- + +# Manage Replication + +Harper instance clustering and replication can be configured directly through the Harper Studio. It is recommended to read through the [clustering documentation](../../developers/clustering/) first to gain a strong understanding of Harper clustering behavior. + +All clustering configuration is handled through the **replication** page of the Harper Studio, accessed with the following instructions: + +1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. + +1. Click the appropriate organization that the instance belongs to. + +1. Select your desired instance. + +1. Click **replication** in the instance control bar. + +Note, the **replication** page will only be available to super users. + +--- + +## Initial Configuration + +Harper instances do not have clustering configured by default. The Harper Studio will walk you through the initial configuration. Upon entering the **replication** screen for the first time you will need to complete the following configuration. Configurations are set in the **enable clustering** panel on the left while actions are described in the middle of the screen. It is worth reviewing the [Creating a Cluster User](../../developers/clustering/creating-a-cluster-user) document before proceeding. + +1. Enter Cluster User username. (Defaults to `cluster_user`). +1. Enter Cluster Password. +1. Review and/or Set Cluster Node Name. +1. Click **Enable Clustering**. + +At this point the Studio will restart your Harper Instance, required for the configuration changes to take effect. + +--- + +## Manage Clustering + +Once initial clustering configuration is completed you a presented with a clustering management screen with the following properties: + +- **connected instances** + + Displays all instances within the Studio Organization that this instance manages a connection with. + +- **unconnected instances** + + Displays all instances within the Studio Organization that this instance does not manage a connection with. + +- **unregistered instances** + + Displays all instances outside the Studio Organization that this instance manages a connection with. + +- **manage clustering** + + Once instances are connected, this will display clustering management options for all connected instances and all databases and tables. + +--- + +## Connect an Instance + +Harper Instances can be clustered together with the following instructions. + +1. Ensure clustering has been configured on both instances and a cluster user with identical credentials exists on both. + +1. Identify the instance you would like to connect from the **unconnected instances** panel. + +1. Click the plus icon next the appropriate instance. + +1. If configurations are correct, all databases will sync across the cluster, then appear in the **manage clustering** panel. If there is a configuration issue, a red exclamation icon will appear, click it to learn more about what could be causing the issue. + +--- + +## Disconnect an Instance + +Harper Instances can be disconnected with the following instructions. + +1. Identify the instance you would like to disconnect from the **connected instances** panel. + +1. Click the minus icon next the appropriate instance. + +--- + +## Manage Replication + +Subscriptions must be configured in order to move data between connected instances. Read more about subscriptions here: Creating A Subscription. The **manage clustering** panel displays a table with each row representing an channel per instance. Cells are bolded to indicate a change in the column. Publish and subscribe replication can be configured per table with the following instructions: + +1. Identify the instance, database, and table for replication to be configured. + +1. For publish, click the toggle switch in the **publish** column. + +1. For subscribe, click the toggle switch in the **subscribe** column. diff --git a/versioned_docs/version-4.7/administration/harper-studio/organizations.md b/versioned_docs/version-4.7/administration/harper-studio/organizations.md new file mode 100644 index 00000000..f93eeff0 --- /dev/null +++ b/versioned_docs/version-4.7/administration/harper-studio/organizations.md @@ -0,0 +1,109 @@ +--- +title: Organizations +--- + +# Organizations + +Harper Studio organizations provide the ability to group Harper Cloud Instances. Organization behavior is as follows: + +- Billing occurs at the organization level to a single credit card. +- Organizations retain their own unique Harper Cloud subdomain. +- Cloud instances reside within an organization. +- Studio users can be invited to organizations to share instances. + +An organization is automatically created for you when you sign up for Harper Studio. If you only have one organization, the Studio will automatically bring you to your organization’s page. + +--- + +## List Organizations + +A summary view of all organizations your user belongs to can be viewed on the [Harper Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. You can navigate to this page at any time by clicking the **all organizations** link at the top of the Harper Studio. + +## Create a New Organization + +A new organization can be created as follows: + +1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. +1. Click the **Create a New Organization** card. +1. Fill out new organization details + - Enter Organization Name + _This is used for descriptive purposes only._ + - Enter Organization Subdomain + _Part of the URL that will be used to identify your Harper Cloud Instances. For example, with subdomain "demo" and instance name "c1" the instance URL would be: [https://c1-demo.harperdbcloud.com](https://c1-demo.harperdbcloud.com)._ +1. Click Create Organization. + +## Delete an Organization + +An organization cannot be deleted until all instances have been removed. An organization can be deleted as follows: + +1. Navigate to the Harper Studio Organizations page. +1. Identify the proper organization card and click the trash can icon. +1. Enter the organization name into the text box. + + _This is done for confirmation purposes to ensure you do not accidentally delete an organization._ + +1. Click the **Do It** button. + +## Manage Users + +Harper Studio organization owners can manage users including inviting new users, removing users, and toggling ownership. + +#### Inviting a User + +A new user can be invited to an organization as follows: + +1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. +1. Click the appropriate organization card. +1. Click **users** at the top of the screen. +1. In the **add user** box, enter the new user’s email address. +1. Click **Add User**. + +Users may or may not already be Harper Studio users when adding them to an organization. If the Harper Studio account already exists, the user will receive an email notification alerting them to the organization invitation. If the user does not have a Harper Studio account, they will receive an email welcoming them to Harper Studio. + +--- + +#### Toggle a User’s Organization Owner Status + +Organization owners have full access to the organization including the ability to manage organization users, create, modify, and delete instances, and delete the organization. Users must have accepted their invitation prior to being promoted to an owner. A user’s organization owner status can be toggled owner as follows: + +1. Navigate to the Harper Studio Organizations page. +1. Click the appropriate organization card. +1. Click **users** at the top of the screen. +1. Click the appropriate user from the **existing users** section. +1. Toggle the **Is Owner** switch to the desired status. + +--- + +#### Remove a User from an Organization + +Users may be removed from an organization at any time. Removing a user from an organization will not delete their Harper Studio account, it will only remove their access to the specified organization. A user can be removed from an organization as follows: + +1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. +1. Click the appropriate organization card. +1. Click **users** at the top of the screen. +1. Click the appropriate user from the **existing users** section. +1. Type **DELETE** in the text box in the **Delete User** row. + + _This is done for confirmation purposes to ensure you do not accidentally delete a user._ + +1. Click **Delete User**. + +## Manage Billing + +Billing is configured per organization and will be billed to the stored credit card at appropriate intervals (monthly or annually depending on the registered instance). Billing settings can be configured as follows: + +1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. +1. Click the appropriate organization card. +1. Click **billing** at the top of the screen. + +Here organization owners can view invoices, manage coupons, and manage the associated credit card. + +_Harper billing and payments are managed via Stripe._ + +### Add a Coupon + +Coupons are applicable towards any paid tier or enterprise instance and you can change your subscription at any time. Coupons can be added to your Organization as follows: + +1. In the coupons panel of the **billing** page, enter your coupon code. +1. Click **Add Coupon**. +1. The coupon will then be available and displayed in the coupons panel. diff --git a/versioned_docs/version-4.7/administration/harper-studio/query-instance-data.md b/versioned_docs/version-4.7/administration/harper-studio/query-instance-data.md new file mode 100644 index 00000000..e85f5e15 --- /dev/null +++ b/versioned_docs/version-4.7/administration/harper-studio/query-instance-data.md @@ -0,0 +1,52 @@ +--- +title: Query Instance Data +--- + +# Query Instance Data + +SQL queries can be executed directly through the Harper Studio with the following instructions: + +1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. +1. Click the appropriate organization that the instance belongs to. +1. Select your desired instance. +1. Click **query** in the instance control bar. +1. Enter your SQL query in the SQL query window. +1. Click **Execute**. + +_Please note, the Studio will execute the query exactly as entered. For example, if you attempt to `SELECT *` from a table with millions of rows, you will most likely crash your browser._ + +## Browse Query Results Set + +#### Browse Results Set Data + +The first page of results set data is automatically loaded on query execution. Paging controls are at the bottom of the table. Here you can: + +- Page left and right using the arrows. +- Type in the desired page. +- Change the page size (the amount of records displayed in the table). + +#### Refresh Results Set + +Click the refresh icon at the top right of the results set table. + +#### Automatically Refresh Results Set + +Toggle the auto switch at the top right of the results set table. The results set will now automatically refresh every 15 seconds. Filters and pages will remain set for refreshed data. + +## Query History + +Query history is stored in your local browser cache. Executed queries are listed with the most recent at the top in the **query history** section. + +#### Rerun Previous Query + +- Identify the query from the **query history** list. +- Click the appropriate query. It will be loaded into the **sql query** input box. +- Click **Execute**. + +#### Clear Query History + +Click the trash can icon at the top right of the **query history** section. + +## Create Charts + +The Harper Studio includes a charting feature where you can build charts based on your specified queries. Visit the Charts documentation for more information. diff --git a/versioned_docs/version-4.7/administration/jobs.md b/versioned_docs/version-4.7/administration/jobs.md new file mode 100644 index 00000000..c487f424 --- /dev/null +++ b/versioned_docs/version-4.7/administration/jobs.md @@ -0,0 +1,112 @@ +--- +title: Jobs +--- + +# Jobs + +Harper Jobs are asynchronous tasks performed by the Operations API. + +## Job Summary + +Jobs uses an asynchronous methodology to account for the potential of a long-running operation. For example, exporting millions of records to S3 could take some time, so that job is started and the id is provided to check on the status. + +The job status can be **COMPLETE** or **IN_PROGRESS**. + +## Example Job Operations + +Example job operations include: + +[csv data load](../developers/operations-api/bulk-operations#csv-data-load) + +[csv file load](../developers/operations-api/bulk-operations#csv-file-load) + +[csv url load](../developers/operations-api/bulk-operations#csv-url-load) + +[import from s3](../developers/operations-api/bulk-operations#import-from-s3) + +[delete_records_before](../developers/operations-api/bulk-operations#delete-records-before) + +[export_local](../developers/operations-api/bulk-operations#export-local) + +[export_to_s3](../developers/operations-api/bulk-operations#export-to-s3) + +Example Response from a Job Operation + +``` +{ + "message": "Starting job with id 062a1892-6a0a-4282-9791-0f4c93b12e16" +} +``` + +Whenever one of these operations is initiated, an asynchronous job is created and the request contains the ID of that job which can be used to check on its status. + +## Managing Jobs + +To check on a job's status, use the [get_job](../developers/operations-api/jobs#get-job) operation. + +Get Job Request + +``` +{ + "operation": "get_job", + "id": "4a982782-929a-4507-8794-26dae1132def" +} +``` + +Get Job Response + +``` +[ + { + "__createdtime__": 1611615798782, + "__updatedtime__": 1611615801207, + "created_datetime": 1611615798774, + "end_datetime": 1611615801206, + "id": "4a982782-929a-4507-8794-26dae1132def", + "job_body": null, + "message": "successfully loaded 350 of 350 records", + "start_datetime": 1611615798805, + "status": "COMPLETE", + "type": "csv_url_load", + "user": "HDB_ADMIN", + "start_datetime_converted": "2021-01-25T23:03:18.805Z", + "end_datetime_converted": "2021-01-25T23:03:21.206Z" + } +] +``` + +## Finding Jobs + +To find jobs (if the ID is not known) use the [search_jobs_by_start_date](../developers/operations-api/jobs#search-jobs-by-start-date) operation. + +Search Jobs Request + +``` +{ + "operation": "search_jobs_by_start_date", + "from_date": "2021-01-25T22:05:27.464+0000", + "to_date": "2021-01-25T23:05:27.464+0000" +} +``` + +Search Jobs Response + +``` +[ + { + "id": "942dd5cb-2368-48a5-8a10-8770ff7eb1f1", + "user": "HDB_ADMIN", + "type": "csv_url_load", + "status": "COMPLETE", + "start_datetime": 1611613284781, + "end_datetime": 1611613287204, + "job_body": null, + "message": "successfully loaded 350 of 350 records", + "created_datetime": 1611613284764, + "__createdtime__": 1611613284767, + "__updatedtime__": 1611613287207, + "start_datetime_converted": "2021-01-25T22:21:24.781Z", + "end_datetime_converted": "2021-01-25T22:21:27.204Z" + } +] +``` diff --git a/versioned_docs/version-4.7/administration/logging/audit-logging.md b/versioned_docs/version-4.7/administration/logging/audit-logging.md new file mode 100644 index 00000000..209b4981 --- /dev/null +++ b/versioned_docs/version-4.7/administration/logging/audit-logging.md @@ -0,0 +1,126 @@ +--- +title: Audit Logging +--- + +# Audit Logging + +### Audit log + +The audit log uses a standard Harper table to track transactions. For each table a user creates, a corresponding table will be created to track transactions against that table. + +Audit log is enabled by default. To disable the audit log, set `logging.auditLog` to false in the config file, `harperdb-config.yaml`. Then restart Harper for those changes to take place. Note, the audit is required to be enabled for real-time messaging. + +### Audit Log Operations + +#### read_audit_log + +The `read_audit_log` operation is flexible, enabling users to query with many parameters. All operations search on a single table. Filter options include timestamps, usernames, and table hash values. Additional examples found in the [Harper API documentation](../../developers/operations-api/logs). + +**Search by Timestamp** + +```json +{ + "operation": "read_audit_log", + "schema": "dev", + "table": "dog", + "search_type": "timestamp", + "search_values": [1660585740558] +} +``` + +There are three outcomes using timestamp. + +- `"search_values": []` - All records returned for specified table +- `"search_values": [1660585740558]` - All records after provided timestamp +- `"search_values": [1660585740558, 1760585759710]` - Records "from" and "to" provided timestamp + +--- + +**Search by Username** + +```json +{ + "operation": "read_audit_log", + "schema": "dev", + "table": "dog", + "search_type": "username", + "search_values": ["admin"] +} +``` + +The above example will return all records whose `username` is "admin." + +--- + +**Search by Primary Key** + +```json +{ + "operation": "read_audit_log", + "schema": "dev", + "table": "dog", + "search_type": "hash_value", + "search_values": [318] +} +``` + +The above example will return all records whose primary key (`hash_value`) is 318. + +--- + +#### read_audit_log Response + +The example that follows provides records of operations performed on a table. One thing of note is that the `read_audit_log` operation gives you the `original_records`. + +```json +{ + "operation": "update", + "user_name": "HDB_ADMIN", + "timestamp": 1607035559122.277, + "hash_values": [1, 2], + "records": [ + { + "id": 1, + "breed": "Muttzilla", + "age": 6, + "__updatedtime__": 1607035559122 + }, + { + "id": 2, + "age": 7, + "__updatedtime__": 1607035559121 + } + ], + "original_records": [ + { + "__createdtime__": 1607035556801, + "__updatedtime__": 1607035556801, + "age": 5, + "breed": "Mutt", + "id": 2, + "name": "Penny" + }, + { + "__createdtime__": 1607035556801, + "__updatedtime__": 1607035556801, + "age": 5, + "breed": "Mutt", + "id": 1, + "name": "Harper" + } + ] +} +``` + +#### delete_audit_logs_before + +Just like with transaction logs, you can clean up your audit logs with the `delete_audit_logs_before` operation. It will delete audit log data according to the given parameters. The example below will delete records older than the timestamp provided. + +```json +{ + "operation": "delete_audit_logs_before", + "schema": "dev", + "table": "cat", + "timestamp": 1598290282817 +} +``` diff --git a/versioned_docs/version-4.7/administration/logging/index.md b/versioned_docs/version-4.7/administration/logging/index.md new file mode 100644 index 00000000..bde1870a --- /dev/null +++ b/versioned_docs/version-4.7/administration/logging/index.md @@ -0,0 +1,11 @@ +--- +title: Logging +--- + +# Logging + +Harper provides many different logging options for various features and functionality. + +- [Standard Logging](logging/standard-logging): Harper maintains a log of events that take place throughout operation. +- [Audit Logging](logging/audit-logging): Harper uses a standard Harper table to track transactions. For each table a user creates, a corresponding table will be created to track transactions against that table. +- [Transaction Logging](logging/transaction-logging): Harper stores a verbose history of all transactions logged for specified database tables, including original data records. diff --git a/versioned_docs/version-4.7/administration/logging/standard-logging.md b/versioned_docs/version-4.7/administration/logging/standard-logging.md new file mode 100644 index 00000000..044c2260 --- /dev/null +++ b/versioned_docs/version-4.7/administration/logging/standard-logging.md @@ -0,0 +1,65 @@ +--- +title: Standard Logging +--- + +# Standard Logging + +Harper maintains a log of events that take place throughout operation. Log messages can be used for diagnostics purposes as well as monitoring. + +All logs (except for the install log) are stored in the main log file in the hdb directory `/log/hdb.log`. The install log is located in the Harper application directory most likely located in your npm directory `npm/harperdb/logs`. + +Each log message has several key components for consistent reporting of events. A log message has a format of: + +``` + [] [] ...[]: +``` + +For example, a typical log entry looks like: + +``` +2023-03-09T14:25:05.269Z [notify] [main/0]: HarperDB successfully started. +``` + +The components of a log entry are: + +- `timestamp` - This is the date/time stamp when the event occurred +- `level` - This is an associated log level that gives a rough guide to the importance and urgency of the message. The available log levels in order of least urgent (and more verbose) are: `trace`, `debug`, `info`, `warn`, `error`, `fatal`, and `notify`. +- `thread/ID` - This reports the name of the thread and the thread ID that the event was reported on. Note that NATS logs are recorded by their process name and there is no thread id for them since they are a separate process. Key threads are: + - `main` - This is the thread that is responsible for managing all other threads and routes incoming requests to the other threads + - `http` - These are the worker threads that handle the primary workload of incoming HTTP requests to the operations API and custom functions. + - `Clustering` - These are threads and processes that handle replication. + - `job` - These are job threads that have been started to handle operations that are executed in a separate job thread. +- `tags` - Logging from a custom function will include a "custom-function" tag in the log entry. Most logs will not have any additional tags. +- `message` - This is the main message that was reported. + +We try to keep logging to a minimum by default, to do this the default log level is `error`. If you require more information from the logs, increasing the log level down will provide that. + +The log level can be changed by modifying `logging.level` in the config file `harperdb-config.yaml`. + +## Clustering Logging + +Harper clustering utilizes two [NATS](https://nats.io/) servers, named Hub and Leaf. The Hub server is responsible for establishing the mesh network that connects instances of Harper and the Leaf server is responsible for managing the message stores (streams) that replicate and store messages between instances. Due to the verbosity of these servers there is a separate log level configuration for them. To adjust their log verbosity, set `clustering.logLevel` in the config file `harperdb-config.yaml`. Valid log levels from least verbose are `error`, `warn`, `info`, `debug` and `trace`. + +## Log File vs Standard Streams + +Harper logs can optionally be streamed to standard streams. Logging to standard streams (stdout/stderr) is primarily used for container logging drivers. For more traditional installations, we recommend logging to a file. Logging to both standard streams and to a file can be enabled simultaneously. To log to standard streams effectively, make sure to directly run `harperdb` and don't start it as a separate process (don't use `harperdb start`) and `logging.stdStreams` must be set to true. Note, logging to standard streams only will disable clustering catchup. + +## Logging Rotation + +Log rotation allows for managing log files, such as compressing rotated log files, archiving old log files, determining when to rotate, and the like. This will allow for organized storage and efficient use of disk space. For more information see "logging" in our [config docs](../../deployments/configuration). + +## Read Logs via the API + +To access specific logs you may query the Harper API. Logs can be queried using the `read_log` operation. `read_log` returns outputs from the log based on the provided search criteria. + +```json +{ + "operation": "read_log", + "start": 0, + "limit": 1000, + "level": "error", + "from": "2021-01-25T22:05:27.464+0000", + "until": "2021-01-25T23:05:27.464+0000", + "order": "desc" +} +``` diff --git a/versioned_docs/version-4.7/administration/logging/transaction-logging.md b/versioned_docs/version-4.7/administration/logging/transaction-logging.md new file mode 100644 index 00000000..9003ff04 --- /dev/null +++ b/versioned_docs/version-4.7/administration/logging/transaction-logging.md @@ -0,0 +1,87 @@ +--- +title: Transaction Logging +--- + +# Transaction Logging + +Harper offers two options for logging transactions executed against a table. The options are similar but utilize different storage layers. + +## Transaction log + +The first option is `read_transaction_log`. The transaction log is built upon clustering streams. Clustering streams are per-table message stores that enable data to be propagated across a cluster. Harper leverages streams for use with the transaction log. When clustering is enabled all transactions that occur against a table are pushed to its stream, and thus make up the transaction log. + +If you would like to use the transaction log, but have not set up clustering yet, please see ["How to Cluster"](../../developers/clustering/). + +## Transaction Log Operations + +### read_transaction_log + +The `read_transaction_log` operation returns a prescribed set of records, based on given parameters. The example below will give a maximum of 2 records within the timestamps provided. + +```json +{ + "operation": "read_transaction_log", + "schema": "dev", + "table": "dog", + "from": 1598290235769, + "to": 1660249020865, + "limit": 2 +} +``` + +_See example response below._ + +### read_transaction_log Response + +```json +[ + { + "operation": "insert", + "user": "admin", + "timestamp": 1660165619736, + "records": [ + { + "id": 1, + "dog_name": "Penny", + "owner_name": "Kyle", + "breed_id": 154, + "age": 7, + "weight_lbs": 38, + "__updatedtime__": 1660165619688, + "__createdtime__": 1660165619688 + } + ] + }, + { + "operation": "update", + "user": "admin", + "timestamp": 1660165620040, + "records": [ + { + "id": 1, + "dog_name": "Penny B", + "__updatedtime__": 1660165620036 + } + ] + } +] +``` + +_See example request above._ + +### delete_transaction_logs_before + +The `delete_transaction_logs_before` operation will delete transaction log data according to the given parameters. The example below will delete records older than the timestamp provided. + +```json +{ + "operation": "delete_transaction_logs_before", + "schema": "dev", + "table": "dog", + "timestamp": 1598290282817 +} +``` + +_Note: Streams are used for catchup if a node goes down. If you delete messages from a stream there is a chance catchup won't work._ + +Read on for `read_audit_log`, the second option, for logging transactions executed against a table. diff --git a/versioned_docs/version-4.7/deployments/_category_.json b/versioned_docs/version-4.7/deployments/_category_.json new file mode 100644 index 00000000..95644c6b --- /dev/null +++ b/versioned_docs/version-4.7/deployments/_category_.json @@ -0,0 +1,10 @@ +{ + "label": "Deployments", + "position": 3, + "link": { + "type": "generated-index", + "title": "Deployments Documentation", + "description": "Installation and deployment guides for HarperDB", + "keywords": ["deployments"] + } +} diff --git a/versioned_docs/version-4.7/deployments/configuration.md b/versioned_docs/version-4.7/deployments/configuration.md new file mode 100644 index 00000000..57c98a3a --- /dev/null +++ b/versioned_docs/version-4.7/deployments/configuration.md @@ -0,0 +1,1381 @@ +--- +title: Configuration File +--- + +# Configuration File + +Harper is configured through a [YAML](https://yaml.org/) file called `harperdb-config.yaml` located in the Harper root directory (by default this is a directory named `hdb` located in the home directory of the current user). + +Some configuration will be populated by default in the config file on install, regardless of whether it is used. + +--- + +## Using the Configuration File and Naming Conventions + +The configuration elements in `harperdb-config.yaml` use camelcase, such as `operationsApi`. + +To change a configuration value, edit the `harperdb-config.yaml` file and save any changes. **HarperDB must be restarted for changes to take effect.** + +Alternatively, all configuration values can also be modified using environment variables, command line arguments, or the operations API via the [`set_configuration` operation](../developers/operations-api/configuration#set-configuration). + +For nested configuration elements, use underscores to represent parent-child relationships. When accessed this way, elements are case-insensitive. + +For example, to disable logging rotation in the `logging` section: + +```yaml +logging: + rotation: + enabled: false +``` + +You could apply this change using: + +- Environment variable: `LOGGING_ROTATION_ENABLED=false` +- Command line variable: `--LOGGING_ROTATION_ENABLED false` +- Operations API (`set_configuration`): `logging_rotation_enabled: false` + +To change the `port` in the `http` section, use: + +- Environment variable: `HTTP_PORT=` +- Command line variable: `--HTTP_PORT ` +- Operations API (`set_configuration`): `http_port: ` + +To set the `operationsApi.network.port` to `9925`, use: + +- Environment variable: `OPERATIONSAPI_NETWORK_PORT=9925` +- Command line variable: `--OPERATIONSAPI_NETWORK_PORT 9925` +- Operations API (`set_configuration`): `operationsApi_network_port: 9925` + +_Note: Component configuration cannot be added or updated via CLI or ENV variables._ + +## Importing installation configuration + +To use a custom configuration file to set values on install, use the CLI/ENV variable `HDB_CONFIG` and set it to the path of your custom configuration file. + +To install Harper overtop of an existing configuration file, set `HDB_CONFIG` to the root path of your install `/harperdb-config.yaml` + +--- + +## Configuration Options + +### `http` + +`sessionAffinity` - _Type_: string; _Default_: null + +Harper is a multi-threaded server designed to scale to utilize many CPU cores with high concurrency. Session affinity can help improve the efficiency and fairness of thread utilization by routing multiple requests from the same client to the same thread. This provides a fairer method of request handling by keeping a single user contained to a single thread, can improve caching locality (multiple requests from a single user are more likely to access the same data), and can provide the ability to share information in-memory in user sessions. Enabling session affinity will cause subsequent requests from the same client to be routed to the same thread. + +To enable `sessionAffinity`, you need to specify how clients will be identified from the incoming requests. If you are using Harper to directly serve HTTP requests from users from different remote addresses, you can use a setting of `ip`. However, if you are using Harper behind a proxy server or application server, all the remote ip addresses will be the same and Harper will effectively only run on a single thread. Alternately, you can specify a header to use for identification. If you are using basic authentication, you could use the "Authorization" header to route requests to threads by the user's credentials. If you have another header that uniquely identifies users/clients, you can use that as the value of sessionAffinity. But be careful to ensure that the value does provide sufficient uniqueness and that requests are effectively distributed to all the threads and fully utilizing all your CPU cores. + +```yaml +http: + sessionAffinity: ip +``` + +`compressionThreshold` - _Type_: number; _Default_: 1200 (bytes) + +For HTTP clients that support (Brotli) compression encoding, responses that are larger than than this threshold will be compressed (also note that for clients that accept compression, any streaming responses from queries are compressed as well, since the size is not known beforehand). + +```yaml +http: + compressionThreshold: 1200 +``` + +`cors` - _Type_: boolean; _Default_: true + +Enable Cross Origin Resource Sharing, which allows requests across a domain. + +`corsAccessList` - _Type_: array; _Default_: null + +An array of allowable domains with CORS + +`corsAccessControlAllowHeaders` - _Type_: string; _Default_: 'Accept, Content-Type, Authorization' + +A string representation of a comma separated list of header keys for the [Access-Control-Allow-Headers](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Access-Control-Allow-Headers) header for OPTIONS requests. + +`headersTimeout` - _Type_: integer; _Default_: 60,000 milliseconds (1 minute) + +Limit the amount of time the parser will wait to receive the complete HTTP headers with. + +`maxHeaderSize` - _Type_: integer; _Default_: 16394 + +The maximum allowed size of HTTP headers in bytes. + +`requestQueueLimit` - _Type_: integer; _Default_: 20000 + +The maximum estimated request queue time, in milliseconds. When the queue is above this limit, requests will be rejected with a 503. + +`keepAliveTimeout` - _Type_: integer; _Default_: 30,000 milliseconds (30 seconds) + +Sets the number of milliseconds of inactivity the server needs to wait for additional incoming data after it has finished processing the last response. + +`port` - _Type_: integer; _Default_: 9926 + +The port used to access the component server. + +`securePort` - _Type_: integer; _Default_: null + +The port the Harper component server uses for HTTPS connections. This requires a valid certificate and key. + +`http2` - _Type_: boolean; _Default_: false + +Enables HTTP/2 for the HTTP server. + +`timeout` - _Type_: integer; _Default_: Defaults to 120,000 milliseconds (2 minutes) + +The length of time in milliseconds after which a request will timeout. + +```yaml +http: + cors: true + corsAccessList: + - null + headersTimeout: 60000 + maxHeaderSize: 8192 + https: false + keepAliveTimeout: 30000 + port: 9926 + securePort: null + timeout: 120000 +``` + +`mtls` - _Type_: boolean | object; _Default_: false + +This can be configured to enable mTLS based authentication for incoming connections. If enabled with default options (by setting to `true`), the client certificate will be checked against the certificate authority specified with `tls.certificateAuthority`. And if the certificate can be properly verified, the connection will authenticate users where the user's id/username is specified by the `CN` (common name) from the client certificate's `subject`, by default. + +You can also define specific mTLS options by specifying an object for mtls with the following (optional) properties which may be included: + +`user` - _Type_: string; _Default_: Common Name + +This configures a specific username to authenticate as for HTTP mTLS connections. If a `user` is defined, any authorized mTLS connection (that authorizes against the certificate authority) will be authenticated as this user. This can also be set to `null`, which indicates that no authentication is performed based on the mTLS authorization. When combined with `required: true`, this can be used to enforce that users must have authorized mTLS _and_ provide credential-based authentication. + +**Note:** MQTT has its own `mqtt.network.mtls.user` setting (see [MQTT configuration](#mqtt)). + +`required` - _Type_: boolean; _Default_: false + +This can be enabled to require client certificates (mTLS) for all incoming HTTP connections. If enabled, any connection that doesn't provide an authorized certificate will be rejected/closed. By default, this is disabled, and authentication can take place with mTLS _or_ standard credential authentication. + +**Note:** MQTT has its own `mqtt.network.mtls.required` setting (see [MQTT configuration](#mqtt)). Replication uses node-based authentication via certificates or IP addresses, with credential-based fallback (see [Securing Replication Connections](../developers/replication/#securing-connections)). + +`certificateVerification` - _Type_: boolean | object; _Default_: false (disabled) + +When mTLS is enabled, Harper can verify the revocation status of client certificates using CRL (Certificate Revocation List) and/or OCSP (Online Certificate Status Protocol). This ensures that revoked certificates cannot be used for authentication. + +**Certificate verification is disabled by default** and must be explicitly enabled for production environments where certificate revocation checking is required. + +Set to `true` to enable with defaults, `false` to disable, or configure with an object: + +**Global Settings:** + +- `failureMode` - _Type_: string; _Default_: 'fail-closed' - Global behavior when verification fails: + - `'fail-open'`: Allow connection on verification failure (logs warning) + - `'fail-closed'`: Reject connection on verification failure (recommended) + +**CRL Configuration:** (enabled by default when certificateVerification is enabled) + +- `crl.enabled` - _Type_: boolean; _Default_: true - Enable/disable CRL checking +- `crl.timeout` - _Type_: number; _Default_: 10000 - Maximum milliseconds to wait for CRL download +- `crl.cacheTtl` - _Type_: number; _Default_: 86400000 - Milliseconds to cache CRL (24 hours) +- `crl.gracePeriod` - _Type_: number; _Default_: 86400000 - Grace period after CRL nextUpdate (24 hours) +- `crl.failureMode` - _Type_: string; _Default_: 'fail-closed' - CRL-specific failure mode + +**OCSP Configuration:** (enabled by default as fallback when certificateVerification is enabled) + +- `ocsp.enabled` - _Type_: boolean; _Default_: true - Enable/disable OCSP checking +- `ocsp.timeout` - _Type_: number; _Default_: 5000 - Maximum milliseconds to wait for OCSP response +- `ocsp.cacheTtl` - _Type_: number; _Default_: 3600000 - Milliseconds to cache successful OCSP responses (1 hour) +- `ocsp.errorCacheTtl` - _Type_: number; _Default_: 300000 - Milliseconds to cache OCSP errors (5 minutes) +- `ocsp.failureMode` - _Type_: string; _Default_: 'fail-closed' - OCSP-specific failure mode + +**Verification Strategy:** +Harper uses a CRL-first strategy with OCSP fallback. When a client certificate is presented: + +1. Check CRL if available (fast, cached locally) +2. Fall back to OCSP if CRL is not available or fails +3. Apply the configured failure mode if both methods fail + +Example configurations: + +```yaml +# Basic mTLS without certificate verification (certificate revocation not checked) +http: + mtls: true +``` + +```yaml +# mTLS with certificate verification enabled (recommended for production) +http: + mtls: + certificateVerification: true # Uses all defaults (CRL + OCSP, fail-closed) +``` + +```yaml +# Require mTLS for all connections + certificate verification +http: + mtls: + required: true # Reject connections without valid client certificate + certificateVerification: true +``` + +```yaml +# mTLS with custom verification settings for high-security environments +http: + mtls: + certificateVerification: + failureMode: fail-closed # Global setting + crl: + timeout: 15000 # 15 seconds for CRL download + cacheTtl: 43200000 # Cache CRLs for 12 hours + gracePeriod: 86400000 # 24 hour grace period + ocsp: + timeout: 8000 # 8 seconds for OCSP response + cacheTtl: 7200000 # Cache results for 2 hours +``` + +```yaml +# mTLS with CRL only (no OCSP fallback) +http: + mtls: + certificateVerification: + ocsp: false # Disable OCSP, CRL remains enabled +``` + +--- + +### `threads` + +The `threads` provides control over how many threads, how much heap memory they may use, and debugging of the threads: + +`count` - _Type_: number; _Default_: One less than the number of logical cores/processors + +The `threads.count` option specifies the number of threads that will be used to service the HTTP requests for the operations API and custom functions. Generally, this should be close to the number of CPU logical cores/processors to ensure the CPU is fully utilized (a little less because Harper does have other threads at work), assuming Harper is the main service on a server. + +```yaml +threads: + count: 11 +``` + +`debug` - _Type_: boolean | object; _Default_: false + +This enables debugging. If simply set to true, this will enable debugging on the main thread on port 9229 with the 127.0.0.1 host interface. This can also be an object for more debugging control. + +`debug.port` - The port to use for debugging the main thread `debug.startingPort` - This will set up a separate port for debugging each thread. This is necessary for debugging individual threads with devtools. `debug.host` - Specify the host interface to listen on `debug.waitForDebugger` - Wait for debugger before starting + +```yaml +threads: + debug: + port: 9249 +``` + +`maxHeapMemory` - _Type_: number; + +```yaml +threads: + maxHeapMemory: 300 +``` + +This specifies the heap memory limit for each thread, in megabytes. The default heap limit is a heuristic based on available memory and thread count. + +--- + +### `replication` + +The `replication` section configures [Harper replication](../developers/replication/), which is used to create Harper clusters and replicate data between the instances. + +```yaml +replication: + hostname: server-one + url: wss://server-one:9925 + databases: '*' + routes: + - wss://server-two:9925 + port: null + securePort: 9933, + enableRootCAs: true +``` + +`hostname` - _Type_: string; + +The hostname of the current Harper instance. + +`url` - _Type_: string; + +The URL of the current Harper instance. + +`databases` - _Type_: string/array; _Default_: "\*" (all databases) + +Configure which databases to replicate. This can be a string for all database or an array for specific databases. + +```yaml +replication: + databases: + - db1 + - db2 +``` + +`routes` - _Type_: array; + +An array of routes to connect to other nodes. Each element in the array can be either a string or an object with `hostname`, `port` and optionally `startTime` properties. + +`startTime` - _Type_: string; ISO formatted UTC date string. + +Replication will attempt to catch up on all remote data upon setup. To start replication from a specific date, set this property. + +`revokedCertificates` - _Type_: array; + +An array of serial numbers of revoked certificates. If a connection is attempted with a certificate that is in this list, the connection will be rejected. + +```yaml +replication: + hostname: server-one + routes: + - wss://server-two:9925 # URL based route + - hostname: server-three # define a hostname and port + port: 9930 + startTime: 2024-02-06T15:30:00Z + revokedCertificates: + - 1769F7D6A + - QA69C7E2S +``` + +`port` - _Type_: integer; + +The port to use for replication connections. + +`securePort` - _Type_: integer; _Default_: 9933 + +The port to use for secure replication connections. + +`enableRootCAs` - _Type_: boolean; _Default_: true + +When true, Harper will verify certificates against the Node.js bundled CA store. The bundled CA store is a snapshot of the Mozilla CA store that is fixed at release time. + +`mtls` - _Type_: object; + +Configures mTLS settings for replication connections. **mTLS is always required for replication** and cannot be disabled (for security reasons). You can configure certificate verification settings: + +```yaml +replication: + mtls: + certificateVerification: true # Enable certificate revocation checking +``` + +`certificateVerification` - _Type_: boolean | object; _Default_: false (disabled) + +When enabled, Harper will verify the revocation status of replication peer certificates using CRL and/or OCSP. This follows the same configuration structure as [HTTP certificate verification](#http) documented above. + +**Important:** mTLS itself is always enabled for replication connections and cannot be disabled. This setting only controls whether certificate revocation checking (CRL/OCSP) is performed. + +Example configurations: + +```yaml +# Replication with mTLS but no certificate verification (default) +replication: + hostname: server-one + routes: + - server-two + # mTLS is always enabled, certificate verification is optional +``` + +```yaml +# Replication with certificate verification enabled (recommended for production) +replication: + hostname: server-one + routes: + - server-two + mtls: + certificateVerification: true # Uses CRL and OCSP with defaults +``` + +```yaml +# Replication with custom certificate verification settings +replication: + hostname: server-one + routes: + - server-two + mtls: + certificateVerification: + crl: + timeout: 15000 + cacheTtl: 43200000 + ocsp: + timeout: 8000 +``` + +Certificate verification can also be configured via environment variables: + +```bash +REPLICATION_MTLS_CERTIFICATEVERIFICATION=true +REPLICATION_MTLS_CERTIFICATEVERIFICATION_FAILUREMODE=fail-closed +REPLICATION_MTLS_CERTIFICATEVERIFICATION_CRL=true +REPLICATION_MTLS_CERTIFICATEVERIFICATION_CRL_TIMEOUT=15000 +REPLICATION_MTLS_CERTIFICATEVERIFICATION_OCSP=true +``` + +`blobTimeout` - _Type_: number; _Default_: 120000 + +Amount of time to wait for a blob to be transferred before timing out, measured in milliseconds. + +`failOver` - _Type_: boolean; _Default_: true + +When true, Harper will attempt to fail-over to subscribing to a different node if the current node is unreachable, to reach consistency. + +`shard` - _Type_: integer; + +This defines the shard id of this instance and is used in conjunction with the [Table Resource functions](../developers/replication/sharding#custom-sharding) `setResidency` & `setResidencyById` to programmatically route traffic to the proper shard. + +--- + +### `clustering` using NATS + +The `clustering` section configures the NATS clustering engine, this is used to replicate data between instances of Harper. + +_Note: There exist two ways to create clusters and replicate data in Harper. One option is to use native Harper replication over Websockets. The other option is to use_ [_NATS_](https://nats.io/about/) _to facilitate the cluster._ + +Clustering offers a lot of different configurations, however in a majority of cases the only options you will need to pay attention to are: + +- `clustering.enabled` Enable the clustering processes. +- `clustering.hubServer.cluster.network.port` The port other nodes will connect to. This port must be accessible from other cluster nodes. +- `clustering.hubServer.cluster.network.routes`The connections to other instances. +- `clustering.nodeName` The name of your node, must be unique within the cluster. +- `clustering.user` The name of the user credentials used for Inter-node authentication. + +`enabled` - _Type_: boolean; _Default_: false + +Enable clustering. + +_Note: If you enabled clustering but do not create and add a cluster user you will get a validation error. See `user` description below on how to add a cluster user._ + +```yaml +clustering: + enabled: true +``` + +`clustering.hubServer.cluster` + +Clustering’s `hubServer` facilitates the Harper mesh network and discovery service. + +```yaml +clustering: + hubServer: + cluster: + name: harperdb + network: + port: 9932 + routes: + - host: 3.62.184.22 + port: 9932 + - host: 3.735.184.8 + port: 9932 +``` + +`name` - _Type_: string, _Default_: harperdb + +The name of your cluster. This name needs to be consistent for all other nodes intended to be meshed in the same network. + +`port` - _Type_: integer, _Default_: 9932 + +The port the hub server uses to accept cluster connections + +`routes` - _Type_: array, _Default_: null + +An object array that represent the host and port this server will cluster to. Each object must have two properties `port` and `host`. Multiple entries can be added to create network resiliency in the event one server is unavailable. Routes can be added, updated and removed either by directly editing the `harperdb-config.yaml` file or by using the `cluster_set_routes` or `cluster_delete_routes` API endpoints. + +`host` - _Type_: string + +The host of the remote instance you are creating the connection with. + +`port` - _Type_: integer + +The port of the remote instance you are creating the connection with. This is likely going to be the `clustering.hubServer.cluster.network.port` on the remote instance. + +`clustering.hubServer.leafNodes` + +```yaml +clustering: + hubServer: + leafNodes: + network: + port: 9931 +``` + +`port` - _Type_: integer; _Default_: 9931 + +The port the hub server uses to accept leaf server connections. + +`clustering.hubServer.network` + +```yaml +clustering: + hubServer: + network: + port: 9930 +``` + +`port` - _Type_: integer; _Default_: 9930 + +Use this port to connect a client to the hub server, for example using the NATs SDK to interact with the server. + +`clustering.leafServer` + +Manages streams, streams are ‘message stores’ that store table transactions. + +```yaml +clustering: + leafServer: + network: + port: 9940 + routes: + - host: 3.62.184.22 + port: 9931 + - host: node3.example.com + port: 9931 + streams: + maxAge: 3600 + maxBytes: 10000000 + maxMsgs: 500 + path: /user/hdb/clustering/leaf +``` + +`port` - _Type_: integer; _Default_: 9940 + +Use this port to connect a client to the leaf server, for example using the NATs SDK to interact with the server. + +`routes` - _Type_: array; _Default_: null + +An object array that represent the host and port the leaf node will directly connect with. Each object must have two properties `port` and `host`. Unlike the hub server, the leaf server will establish connections to all listed hosts. Routes can be added, updated and removed either by directly editing the `harperdb-config.yaml` file or by using the `cluster_set_routes` or `cluster_delete_routes` API endpoints. + +`host` - _Type_: string + +The host of the remote instance you are creating the connection with. + +`port` - _Type_: integer + +The port of the remote instance you are creating the connection with. This is likely going to be the `clustering.hubServer.cluster.network.port` on the remote instance. + +`clustering.leafServer.streams` + +`maxAge` - _Type_: integer; _Default_: null + +The maximum age of any messages in the stream, expressed in seconds. + +`maxBytes` - _Type_: integer; _Default_: null + +The maximum size of the stream in bytes. Oldest messages are removed if the stream exceeds this size. + +`maxMsgs` - _Type_: integer; _Default_: null + +How many messages may be in a stream. Oldest messages are removed if the stream exceeds this number. + +`path` - _Type_: string; _Default_: \/clustering/leaf + +The directory where all the streams are kept. + +```yaml +clustering: + leafServer: + streams: + maxConsumeMsgs: 100 + maxIngestThreads: 2 +``` + +`maxConsumeMsgs` - _Type_: integer; _Default_: 100 + +The maximum number of messages a consumer can process in one go. + +`maxIngestThreads` - _Type_: integer; _Default_: 2 + +The number of Harper threads that are delegated to ingesting messages. + +--- + +`logLevel` - _Type_: string; _Default_: error + +Control the verbosity of clustering logs. + +```yaml +clustering: + logLevel: error +``` + +There exists a log level hierarchy in order as `trace`, `debug`, `info`, `warn`, and `error`. When the level is set to `trace` logs will be created for all possible levels. Whereas if the level is set to `warn`, the only entries logged will be `warn` and `error`. The default value is `error`. + +`nodeName` - _Type_: string; _Default_: null + +The name of this node in your Harper cluster topology. This must be a value unique from the rest of the cluster node names. + +_Note: If you want to change the node name make sure there are no subscriptions in place before doing so. After the name has been changed a full restart is required._ + +```yaml +clustering: + nodeName: great_node +``` + +`tls` + +Transport Layer Security default values are automatically generated on install. + +```yaml +clustering: + tls: + certificate: ~/hdb/keys/certificate.pem + certificateAuthority: ~/hdb/keys/ca.pem + privateKey: ~/hdb/keys/privateKey.pem + insecure: true + verify: true +``` + +`certificate` - _Type_: string; _Default_: \/keys/certificate.pem + +Path to the certificate file. + +`certificateAuthority` - _Type_: string; _Default_: \/keys/ca.pem + +Path to the certificate authority file. + +`privateKey` - _Type_: string; _Default_: \/keys/privateKey.pem + +Path to the private key file. + +`insecure` - _Type_: boolean; _Default_: true + +When true, will skip certificate verification. For use only with self-signed certs. + +`republishMessages` - _Type_: boolean; _Default_: false + +When true, all transactions that are received from other nodes are republished to this node's stream. When subscriptions are not fully connected between all nodes, this ensures that messages are routed to all nodes through intermediate nodes. This also ensures that all writes, whether local or remote, are written to the NATS transaction log. However, there is additional overhead with republishing, and setting this is to false can provide better data replication performance. When false, you need to ensure all subscriptions are fully connected between every node to every other node, and be aware that the NATS transaction log will only consist of local writes. + +`verify` - _Type_: boolean; _Default_: true + +When true, hub server will verify client certificate using the CA certificate. + +--- + +`user` - _Type_: string; _Default_: null + +The username given to the `cluster_user`. All instances in a cluster must use the same clustering user credentials (matching username and password). + +Inter-node authentication takes place via a special Harper user role type called `cluster_user`. + +The user can be created either through the API using an `add_user` request with the role set to `cluster_user`, or on install using environment variables `CLUSTERING_USER=cluster_person` `CLUSTERING_PASSWORD=pass123!` or CLI variables `harperdb --CLUSTERING_USER cluster_person` `--CLUSTERING_PASSWORD` `pass123!` + +```yaml +clustering: + user: cluster_person +``` + +--- + +### `localStudio` + +The `localStudio` section configures the local Harper Studio, a GUI for Harper hosted on the server. A hosted version of the Harper Studio with licensing and provisioning options is available at [https://studio.harperdb.io](https://studio.harperdb.io). Note, all database traffic from either `localStudio` or Harper Studio is made directly from your browser to the instance. + +`enabled` - _Type_: boolean; _Default_: false + +Enabled the local studio or not. + +```yaml +localStudio: + enabled: false +``` + +--- + +### `logging` + +The `logging` section configures Harper logging across all Harper functionality. This includes standard text logging of application and database events as well as structured data logs of record changes. Logging of application/database events are logged in text format to the `~/hdb/log/hdb.log` file (or location specified by `logging.root` or `logging.path`). Many of the logging configuration properties can be set and applied without a restart (are dynamically applied). + +In addition, structured logging of data changes are also available: + +`auditLog` - _Type_: boolean; _Default_: false + +Enabled table transaction logging. + +```yaml +logging: + auditLog: false +``` + +To access the audit logs, use the API operation `read_audit_log`. It will provide a history of the data, including original records and changes made, in a specified table. + +```json +{ + "operation": "read_audit_log", + "schema": "dev", + "table": "dog" +} +``` + +`file` - _Type_: boolean; _Default_: true + +Defines whether to log to a file. + +```yaml +logging: + file: true +``` + +`auditRetention` - _Type_: string|number; _Default_: 3d + +This specifies how long audit logs should be retained. + +`level` - _Type_: string; _Default_: warn + +Control the verbosity of text event logs. + +```yaml +logging: + level: warn +``` + +There exists a log level hierarchy in order as `trace`, `debug`, `info`, `warn`, `error`, `fatal`, and `notify`. When the level is set to `trace` logs will be created for all possible levels. Whereas if the level is set to `fatal`, the only entries logged will be `fatal` and `notify`. The default value is `error`. + +`console` - _Type_: boolean; _Default_: true + +Controls whether console.log and other console.\* calls (as well as another JS components that writes to `process.stdout` and `process.stderr`) are logged to the log file. By default, these are not logged to the log file, but this can be enabled: + +```yaml +logging: + console: true +``` + +`root` - _Type_: string; _Default_: \/log + +The directory path where the log files will be written. + +```yaml +logging: + root: ~/hdb/log +``` + +`path` - _Type_: string; _Default_: \/log/hdb.log + +The file path where the log file will be written. + +```yaml +logging: + root: ~/hdb/log/hdb.log +``` + +`rotation` + +Rotation provides the ability for a user to systematically rotate and archive the `hdb.log` file. To enable `interval` and/or `maxSize` must be set. + +_**Note:**_ `interval` and `maxSize` are approximates only. It is possible that the log file will exceed these values slightly before it is rotated. + +```yaml +logging: + rotation: + enabled: true + compress: false + interval: 1D + maxSize: 100K + path: /user/hdb/log +``` + +`enabled` - _Type_: boolean; _Default_: true + +Enables logging rotation. + +`compress` - _Type_: boolean; _Default_: false + +Enables compression via gzip when logs are rotated. + +`interval` - _Type_: string; _Default_: null + +The time that should elapse between rotations. Acceptable units are D(ays), H(ours) or M(inutes). + +`maxSize` - _Type_: string; _Default_: null + +The maximum size the log file can reach before it is rotated. Must use units M(egabyte), G(igabyte), or K(ilobyte). + +`path` - _Type_: string; _Default_: \/log + +Where to store the rotated log file. File naming convention is `HDB-YYYY-MM-DDT-HH-MM-SSSZ.log`. + +`stdStreams` - _Type_: boolean; _Default_: false + +Log Harper logs to the standard output and error streams. + +```yaml +logging: + stdStreams: false +``` + +`auditAuthEvents` + +`logFailed` - _Type_: boolean; _Default_: false + +Log all failed authentication events. + +_Example:_ `[error] [auth-event]: {"username":"admin","status":"failure","type":"authentication","originating_ip":"127.0.0.1","request_method":"POST","path":"/","auth_strategy":"Basic"}` + +`logSuccessful` - _Type_: boolean; _Default_: false + +Log all successful authentication events. + +_Example:_ `[notify] [auth-event]: {"username":"admin","status":"success","type":"authentication","originating_ip":"127.0.0.1","request_method":"POST","path":"/","auth_strategy":"Basic"}` + +```yaml +logging: + auditAuthEvents: + logFailed: false + logSuccessful: false +``` + +#### Defining Separate Logging Configurations + +Harper's logger supports defining multiple logging configurations for different components in the system. Each logging configuration can be assigned its own `path` (or `root`), `level`, `tag`, and flag to enable/disable logging to `stdStreams`. All logging defaults to the configuration of the "main" logger as configured above, but when logging is configured for different loggers, they will use their own configuration. Separate loggers can be defined: + +`logging.external` + +The `logging.external` section can be used to define logging for all external components that use the [`logger` API](../reference/globals). For example: + +```yaml +logging: + external: + level: warn + path: ~/hdb/log/apps.log +``` + +`http.logging` + +This section defines log configuration for HTTP logging. By default, HTTP requests are not logged, but defining this section will enable HTTP logging. Note that there can be substantive overhead to logging all HTTP requests. In addition to the standard logging configuration, the `http.logging` section also allows the following configuration properties to be set: + +- `timing` - This will log timing information +- `headers` - This will log the headers in each request (which can be very verbose) +- `id` - This will assign a unique id to each request and log it in the entry for each request. This is assigned as the `request.requestId` property and can be used to by other logging to track a request. + Note that the `level` will determine which HTTP requests are logged: +- `info` (or more verbose) - All HTTP requests +- `warn` - HTTP requests with a status code of 400 or above +- `error` - HTTP requests with a status code of 500 + +For example: + +```yaml +http: + logging: + timing: true + level: info + path: ~/hdb/log/http.log + ... rest of http config +``` + +`authentication.logging` + +This section defines log configuration for authentication. This takes the standard logging configuration options of `path` (or `root`), `level`, `tag`, and flag to enable/disable logging to `stdStreams`. + +`mqtt.logging` + +This section defines log configuration for MQTT. This takes the standard logging configuration options of `path` (or `root`), `level`, `tag`, and flag to enable/disable logging to `stdStreams`. + +`replication.logging` + +This section defines log configuration for replication. This takes the standard logging configuration options of `path` (or `root`), `level`, `tag`, and flag to enable/disable logging to `stdStreams`. + +`tls.logging` + +This section defines log configuration for TLS. This takes the standard logging configuration options of `path` (or `root`), `level`, `tag`, and flag to enable/disable logging to `stdStreams`. + +`storage.logging` + +This section defines log configuration for setting up and reading the database files. This takes the standard logging configuration options of `path` (or `root`), `level`, `tag`, and flag to enable/disable logging to `stdStreams`. + +`storage.logging` + +This section defines log configuration for setting up and reading the database files. This takes the standard logging configuration options of `path` (or `root`), `level`, `tag`, and flag to enable/disable logging to `stdStreams`. + +`analytics.logging` + +This section defines log configuration for analytics. This takes the standard logging configuration options of `path` (or `root`), `level`, `tag`, and flag to enable/disable logging to `stdStreams`. + +--- + +### `authentication` + +The authentication section defines the configuration for the default authentication mechanism in Harper. + +```yaml +authentication: + authorizeLocal: true + cacheTTL: 30000 + enableSessions: true + operationTokenTimeout: 1d + refreshTokenTimeout: 30d +``` + +`authorizeLocal` - _Type_: boolean; _Default_: true + +This will automatically authorize any requests from the loopback IP address as the superuser. This should be disabled for any Harper servers that may be accessed by untrusted users from the same instance. For example, this should be disabled if you are using a local proxy, or for general server hardening. + +`cacheTTL` - _Type_: number; _Default_: 30000 + +This defines the length of time (in milliseconds) that an authentication (a particular Authorization header or token) can be cached. + +`enableSessions` - _Type_: boolean; _Default_: true + +This will enable cookie-based sessions to maintain an authenticated session. This is generally the preferred mechanism for maintaining authentication in web browsers as it allows cookies to hold an authentication token securely without giving JavaScript code access to token/credentials that may open up XSS vulnerabilities. + +`operationTokenTimeout` - _Type_: string; _Default_: 1d + +Defines the length of time an operation token will be valid until it expires. Example values: [https://github.com/vercel/ms](https://github.com/vercel/ms). + +`refreshTokenTimeout` - _Type_: string; _Default_: 1d + +Defines the length of time a refresh token will be valid until it expires. Example values: [https://github.com/vercel/ms](https://github.com/vercel/ms). + +### `operationsApi` + +The `operationsApi` section configures the Harper Operations API.\ +All the `operationsApi` configuration is optional. Any configuration that is not provided under this section will default to the `http` configuration section. + +`network` + +```yaml +operationsApi: + network: + cors: true + corsAccessList: + - null + domainSocket: /user/hdb/operations-server + headersTimeout: 60000 + keepAliveTimeout: 5000 + port: 9925 + securePort: null + timeout: 120000 +``` + +`cors` - _Type_: boolean; _Default_: true + +Enable Cross Origin Resource Sharing, which allows requests across a domain. + +`corsAccessList` - _Type_: array; _Default_: null + +An array of allowable domains with CORS + +`domainSocket` - _Type_: string; _Default_: \/hdb/operations-server + +The path to the Unix domain socket used to provide the Operations API through the CLI + +`headersTimeout` - _Type_: integer; _Default_: 60,000 milliseconds (1 minute) + +Limit the amount of time the parser will wait to receive the complete HTTP headers with. + +`keepAliveTimeout` - _Type_: integer; _Default_: 5,000 milliseconds (5 seconds) + +Sets the number of milliseconds of inactivity the server needs to wait for additional incoming data after it has finished processing the last response. + +`port` - _Type_: integer; _Default_: 9925 + +The port the Harper operations API interface will listen on. + +`securePort` - _Type_: integer; _Default_: null + +The port the Harper operations API uses for HTTPS connections. This requires a valid certificate and key. + +`timeout` - _Type_: integer; _Default_: Defaults to 120,000 milliseconds (2 minutes) + +The length of time in milliseconds after which a request will timeout. + +`tls` + +This configures the Transport Layer Security for HTTPS support. + +```yaml +operationsApi: + tls: + certificate: ~/hdb/keys/certificate.pem + certificateAuthority: ~/hdb/keys/ca.pem + privateKey: ~/hdb/keys/privateKey.pem +``` + +`certificate` - _Type_: string; _Default_: \/keys/certificate.pem + +Path to the certificate file. + +`certificateAuthority` - _Type_: string; _Default_: \/keys/ca.pem + +Path to the certificate authority file. + +`privateKey` - _Type_: string; _Default_: \/keys/privateKey.pem + +Path to the private key file. + +--- + +### `componentsRoot` + +`componentsRoot` - _Type_: string; _Default_: \/components + +The path to the folder containing the local component files. + +```yaml +componentsRoot: ~/hdb/components +``` + +--- + +### `rootPath` + +`rootPath` - _Type_: string; _Default_: home directory of the current user + +The Harper database and applications/API/interface are decoupled from each other. The `rootPath` directory specifies where the Harper application persists data, config, logs, and Custom Functions. + +```yaml +rootPath: /Users/jonsnow/hdb +``` + +--- + +### `storage` + +`writeAsync` - _Type_: boolean; _Default_: false + +The `writeAsync` option turns off disk flushing/syncing, allowing for faster write operation throughput. However, this does not provide storage integrity guarantees, and if a server crashes, it is possible that there may be data loss requiring restore from another backup/another node. + +```yaml +storage: + writeAsync: false +``` + +`caching` - _Type_: boolean; _Default_: true + +The `caching` option enables in-memory caching of records, providing faster access to frequently accessed objects. This can incur some extra overhead for situations where reads are extremely random and don't benefit from caching. + +```yaml +storage: + caching: true +``` + +`compression` - _Type_: boolean; _Default_: true + +The `compression` option enables compression of records in the database. This can be helpful for very large records in reducing storage requirements and potentially allowing more data to be cached. This uses the very fast LZ4 compression algorithm, but this still incurs extra costs for compressing and decompressing. + +```yaml +storage: + compression: false +``` + +`compression.dictionary` _Type_: number; _Default_: null + +Path to a compression dictionary file + +`compression.threshold` _Type_: number; _Default_: Either `4036` or if `storage.pageSize` provided `storage.pageSize - 60` + +Only entries that are larger than this value (in bytes) will be compressed. + +```yaml +storage: + compression: + dictionary: /users/harperdb/dict.txt + threshold: 1000 +``` + +`compactOnStart` - _Type_: boolean; _Default_: false + +When `true` all non-system databases will be compacted when starting Harper, read more [here](../administration/compact). + +`compactOnStartKeepBackup` - _Type_: boolean; _Default_: false + +Keep the backups made by compactOnStart. + +```yaml +storage: + compactOnStart: true + compactOnStartKeepBackup: false +``` + +`maxTransactionQueueTime` - _Type_: time; _Default_: 45s + +The `maxTransactionQueueTime` specifies how long the write queue can get before write requests are rejected (with a 503). + +```yaml +storage: + maxTransactionQueueTime: 2m +``` + +`noReadAhead` - _Type_: boolean; _Default_: false + +The `noReadAhead` option advises the operating system to not read ahead when reading from the database. This provides better memory utilization for databases with small records (less than one page), but can degrade performance in situations where large records are used or frequent range queries are used. + +```yaml +storage: + noReadAhead: true +``` + +`prefetchWrites` - _Type_: boolean; _Default_: true + +The `prefetchWrites` option loads data prior to write transactions. This should be enabled for databases that are larger than memory (although it can be faster to disable this for smaller databases). + +```yaml +storage: + prefetchWrites: true +``` + +`path` - _Type_: string; _Default_: `/database` + +The `path` configuration sets where all database files should reside. + +```yaml +storage: + path: /users/harperdb/storage +``` + +_**Note:**_ This configuration applies to all database files, which includes system tables that are used internally by Harper. For this reason if you wish to use a non default `path` value you must move any existing schemas into your `path` location. Existing schemas is likely to include the system schema which can be found at `/schema/system`. + +`blobPaths` - _Type_: string; _Default_: `/blobs` + +The `blobPaths` configuration sets where all the blob files should reside. This can be an array of paths, and if there are multiple, the blobs will be distributed across the paths. + +```yaml +storage: + blobPaths: + - /users/harperdb/big-storage +``` + +`pageSize` - _Type_: number; _Default_: Defaults to the default page size of the OS + +Defines the page size of the database. + +```yaml +storage: + pageSize: 4096 +``` + +`reclamation` + +The reclamation section provides configuration for the reclamation process, which is responsible for reclaiming space when free space is low. For example: + +```yaml +storage: + reclamation: + threshold: 0.4 # Start storage reclamation efforts when free space has reached 40% of the volume space (default) + interval: 1h # Reclamation will run every hour (default) + evictionFactor: 100000 # A factor used to determine how much aggressively to evict cached entries (default) +``` + +--- + +### `tls` + +The section defines the certificates, keys, and settings for Transport Layer Security (TLS) for HTTPS and TLS socket support. This is used for both the HTTP and MQTT protocols. The `tls` section can be a single object with the settings below, or it can be an array of objects, where each object is a separate TLS configuration. By using an array, the TLS configuration can be used to define multiple certificates for different domains/hosts (negotiated through SNI). + +```yaml +tls: + certificate: ~/hdb/keys/certificate.pem + certificateAuthority: ~/hdb/keys/ca.pem + privateKey: ~/hdb/keys/privateKey.pem +``` + +`certificate` - _Type_: string; _Default_: \/keys/certificate.pem + +Path to the certificate file. + +`certificateAuthority` - _Type_: string; _Default_: \/keys/ca.pem + +Path to the certificate authority file. + +`privateKey` - _Type_: string; _Default_: \/keys/privateKey.pem + +Path to the private key file. + +`ciphers` - _Type_: string; + +Allows specific ciphers to be set. + +If you want to define multiple certificates that are applied based on the domain/host requested via SNI, you can define an array of TLS configurations. Each configuration can have the same properties as the root TLS configuration, but can (optionally) also have an additional `host` property to specify the domain/host that the certificate should be used for: + +```yaml +tls: + - certificate: ~/hdb/keys/certificate1.pem + certificateAuthority: ~/hdb/keys/ca1.pem + privateKey: ~/hdb/keys/privateKey1.pem + host: example.com # the host is optional, and if not provided, this certificate's common name will be used as the host name. + - certificate: ~/hdb/keys/certificate2.pem + certificateAuthority: ~/hdb/keys/ca2.pem + privateKey: ~/hdb/keys/privateKey2.pem +``` + +Note that a `tls` section can also be defined in the `operationsApi` section, which will override the root `tls` section for the operations API. + +--- + +### `mqtt` + +The MQTT protocol can be configured in this section. + +```yaml +mqtt: + network: + port: 1883 + securePort: 8883 + mtls: false + webSocket: true + requireAuthentication: true +``` + +`port` - _Type_: number; _Default_: 1883 + +This is the port to use for listening for insecure MQTT connections. + +`securePort` - _Type_: number; _Default_: 8883 + +This is the port to use for listening for secure MQTT connections. This will use the `tls` configuration for certificates. + +`webSocket` - _Type_: boolean; _Default_: true + +This enables access to MQTT through WebSockets. This will handle WebSocket connections on the http port (defaults to 9926), that have specified a (sub) protocol of `mqtt`. + +`requireAuthentication` - _Type_: boolean; _Default_: true + +This indicates if authentication should be required for establishing an MQTT connection (whether through MQTT connection credentials or mTLS). Disabling this allows unauthenticated connections, which are then subject to authorization for publishing and subscribing (and by default tables/resources do not authorize such access, but that can be enabled at the resource level). + +`mtls` - _Type_: boolean | object; _Default_: false + +This can be configured to enable mTLS based authentication for incoming connections. If enabled with default options (by setting to `true`), the client certificate will be checked against the certificate authority specified in the `tls` section. And if the certificate can be properly verified, the connection will authenticate users where the user's id/username is specified by the `CN` (common name) from the client certificate's `subject`, by default. + +You can also define specific mTLS options by specifying an object for mtls with the following (optional) properties which may be included: + +`user` - _Type_: string; _Default_: Common Name + +This configures a specific username to authenticate as for mTLS connections. If a `user` is defined, any authorized mTLS connection (that authorizes against the certificate authority) will be authenticated as this user. This can also be set to `null`, which indicates that no authentication is performed based on the mTLS authorization. When combined with `required: true`, this can be used to enforce that users must have authorized mTLS _and_ provide credential-based authentication. + +`required` - _Type_: boolean; _Default_: false + +This can be enabled to require client certificates (mTLS) for all incoming MQTT connections. If enabled, any connection that doesn't provide an authorized certificate will be rejected/closed. By default, this is disabled, and authentication can take place with mTLS _or_ standard credential authentication. + +`certificateAuthority` - _Type_: string; _Default_: Path from `tls.certificateAuthority` + +This can define a specific path to use for the certificate authority. By default, certificate authorization checks against the CA specified at `tls.certificateAuthority`, but if you need a specific/distinct CA for MQTT, you can set this. + +`certificateVerification` - _Type_: boolean | object; _Default_: true + +When mTLS is enabled, Harper verifies the revocation status of client certificates using OCSP (Online Certificate Status Protocol). This ensures that revoked certificates cannot be used for authentication. + +Set to `false` to disable certificate verification, or configure with an object: + +- `timeout` - _Type_: number; _Default_: 5000 - Maximum milliseconds to wait for OCSP response +- `cacheTtl` - _Type_: number; _Default_: 3600000 - Milliseconds to cache verification results (default: 1 hour) +- `failureMode` - _Type_: string; _Default_: 'fail-open' - Behavior when OCSP verification fails: + - `'fail-open'`: Allow connection on verification failure (logs warning) + - `'fail-closed'`: Reject connection on verification failure + +For example, you could specify that mTLS is required and will authenticate as "user-name": + +```yaml +mqtt: + network: + mtls: + user: user-name + required: true +``` + +--- + +### `databases` + +The `databases` section is an optional configuration that can be used to define where database files should reside down to the table level. This configuration should be set before the database and table have been created. The configuration will not create the directories in the path, that must be done by the user. + +To define where a database and all its tables should reside use the name of your database and the `path` parameter. + +```yaml +databases: + nameOfDatabase: + path: /path/to/database +``` + +To define where specific tables within a database should reside use the name of your database, the `tables` parameter, the name of your table and the `path` parameter. + +```yaml +databases: + nameOfDatabase: + tables: + nameOfTable: + path: /path/to/table +``` + +This same pattern can be used to define where the audit log database files should reside. To do this use the `auditPath` parameter. + +```yaml +databases: + nameOfDatabase: + auditPath: /path/to/database +``` + +**Setting the database section through the command line, environment variables or API** + +When using command line variables,environment variables or the API to configure the databases section a slightly different convention from the regular one should be used. To add one or more configurations use a JSON object array. + +Using command line variables: + +```bash +--DATABASES [{\"nameOfSchema\":{\"tables\":{\"nameOfTable\":{\"path\":\"\/path\/to\/table\"}}}}] +``` + +Using environment variables: + +```bash +DATABASES=[{"nameOfSchema":{"tables":{"nameOfTable":{"path":"/path/to/table"}}}}] +``` + +Using the API: + +```json +{ + "operation": "set_configuration", + "databases": [ + { + "nameOfDatabase": { + "tables": { + "nameOfTable": { + "path": "/path/to/table" + } + } + } + } + ] +} +``` + +### analytics + +`analytics_aggregatePeriod` - _Type_: number; _Default_: 60 (seconds) + +This defines how often recorded metrics in the `system.hdb_raw_analytics` table are aggregated into the `system.hdb_analytics` table. The analytics operations in the operations API exclusively use the aggregated analytics. + +```yaml +analytics: + aggregatePeriod: 60 +``` + +`analytics_replicate` - _Type_: boolean; _Default_: false + +This defines whether or not the aggregated analytics data in `system.hdb_analytics` should be replicated to the rest of the cluster. + +```yaml +analytics: + replicate: true +``` + +--- + +### Components + +`` - _Type_: string + +The name of the component. This will be used to name the folder where the component is installed and must be unique. + +`package` - _Type_: string + +A reference to your [component](../reference/components/applications#adding-components-to-root) package. This could be a remote git repo, a local folder/file or an NPM package. Harper will add this package to a package.json file and call `npm install` on it, so any reference that works with that paradigm will work here. + +Read more about npm install [here](https://docs.npmjs.com/cli/v8/commands/npm-install) + +`port` - _Type_: number _Default_: whatever is set in `http.port` + +The port that your component should listen on. If no port is provided it will default to `http.port` + +```yaml +: + package: 'HarperDB-Add-Ons/package-name' + port: 4321 +``` diff --git a/versioned_docs/version-4.7/deployments/harper-cli.md b/versioned_docs/version-4.7/deployments/harper-cli.md new file mode 100644 index 00000000..d447e892 --- /dev/null +++ b/versioned_docs/version-4.7/deployments/harper-cli.md @@ -0,0 +1,194 @@ +--- +title: Harper CLI +--- + +# Harper CLI + +## Harper CLI + +The Harper command line interface (CLI) is used to administer [self-installed Harper instances](install-harper/). + +### Installing Harper + +To install Harper with CLI prompts, run the following command: + +```bash +harperdb install +``` + +Alternatively, Harper installations can be automated with environment variables or command line arguments; [see a full list of configuration parameters here](configuration#using-the-configuration-file-and-naming-conventions). Note, when used in conjunction, command line arguments will override environment variables. + +**Environment Variables** + +```bash +#minimum required parameters for no additional CLI prompts +export TC_AGREEMENT=yes +export HDB_ADMIN_USERNAME=HDB_ADMIN +export HDB_ADMIN_PASSWORD=password +export ROOTPATH=/tmp/hdb/ +export OPERATIONSAPI_NETWORK_PORT=9925 +harperdb install +``` + +**Command Line Arguments** + +```bash +#minimum required parameters for no additional CLI prompts +harperdb install --TC_AGREEMENT yes --HDB_ADMIN_USERNAME HDB_ADMIN --HDB_ADMIN_PASSWORD password --ROOTPATH /tmp/hdb/ --OPERATIONSAPI_NETWORK_PORT 9925 +``` + +--- + +### Starting Harper + +To start Harper after it is installed, run the following command: + +```bash +harperdb start +``` + +--- + +### Stopping Harper + +To stop Harper once it is running, run the following command: + +```bash +harperdb stop +``` + +--- + +### Restarting Harper + +To restart Harper once it is running, run the following command: + +```bash +harperdb restart +``` + +--- + +### Getting the Harper Version + +To check the version of Harper that is installed run the following command: + +```bash +harperdb version +``` + +--- + +### Renew self-signed certificates + +To renew the Harper generated self-signed certificates, run: + +```bash +harperdb renew-certs +``` + +--- + +### Copy a database with compaction + +To copy a Harper database with compaction (to eliminate free-space and fragmentation), use + +```bash +harperdb copy-db +``` + +For example, to copy the default database: + +```bash +harperdb copy-db data /home/user/hdb/database/copy.mdb +``` + +--- + +### Get all available CLI commands + +To display all available Harper CLI commands along with a brief description run: + +```bash +harperdb help +``` + +--- + +### Get the status of Harper and clustering + +To display the status of the Harper process, the clustering hub and leaf processes, the clustering network and replication statuses, run: + +```bash +harperdb status +``` + +--- + +### Backups + +Harper uses a transactional commit process that ensures that data on disk is always transactionally consistent with storage. This means that Harper maintains database integrity in the event of a crash. It also means that you can use any standard volume snapshot tool to make a backup of a Harper database. Database files are stored in the hdb/database directory. As long as the snapshot is an atomic snapshot of these database files, the data can be copied/moved back into the database directory to restore a previous backup (with Harper shut down) , and database integrity will be preserved. Note that simply copying an in-use database file (using `cp`, for example) is _not_ a snapshot, and this would progressively read data from the database at different points in time, which yields unreliable copy that likely will not be usable. Standard copying is only reliable for a database file that is not in use. + +--- + +## Operations API through the CLI + +Some of the API operations are available through the CLI, this includes most operations that do not require nested parameters. To call the operation use the following convention: ` =`. By default, the result will be formatted as YAML, if you would like the result in JSON pass: `json=true`. + +Some examples are: + +```bash +$ harperdb describe_table database=dev table=dog + +schema: dev +name: dog +hash_attribute: id +audit: true +schema_defined: false +attributes: + - attribute: id + is_primary_key: true + - attribute: name + indexed: true +clustering_stream_name: 3307bb542e0081253klnfd3f1cf551b +record_count: 10 +last_updated_record: 1724483231970.9949 +``` + +`harperdb set_configuration logging_level=error` + +`harperdb deploy_component project=my-cool-app package=https://github.com/HarperDB/application-template` + +`harperdb get_components` + +`harperdb search_by_id database=dev table=dog ids='["1"]' get_attributes='["*"]' json=true` + +`harperdb search_by_value table=dog search_attribute=name search_value=harper get_attributes='["id", "name"]'` + +`harperdb sql sql='select * from dev.dog where id="1"'` + +### Remote Operations + +The CLI can also be used to run operations on remote Harper instances. To do this, pass the `target` parameter with the HTTP address of the remote instance. You generally will also need to provide credentials and specify the `username` and `password` parameters, or you can set environment variables `CLI_TARGET_USERNAME` and `CLI_TARGET_PASSWORD`, for example: + +```bash +export CLI_TARGET_USERNAME=HDB_ADMIN +export CLI_TARGET_PASSWORD=password +harperdb describe_database database=dev target=https://server.com:9925 +``` + +The same set of operations API are available for remote operations as well. + +#### Remote Component Deployment + +When using remote operations, you can deploy a local component to the remote instance. If you omit the `package` parameter, you can deploy the current directory. This will package the current directory and send it to the target server (also `deploy` is allowed as an alias to `deploy_component`): + +```bash +harperdb deploy target=https://server.com:9925 +``` + +If you are interacting with a cluster, you may wish to include the `replicated=true` parameter to ensure that the deployment operation is replicated to all nodes in the cluster. You will also need to restart afterwards to apply the changes (here seen with the replicated parameter): + +```bash +harperdb restart target=https://server.com:9925 replicated=true +``` diff --git a/versioned_docs/version-4.7/deployments/harper-cloud/alarms.md b/versioned_docs/version-4.7/deployments/harper-cloud/alarms.md new file mode 100644 index 00000000..372807e5 --- /dev/null +++ b/versioned_docs/version-4.7/deployments/harper-cloud/alarms.md @@ -0,0 +1,20 @@ +--- +title: Alarms +--- + +# Alarms + +Harper Cloud instance alarms are triggered when certain conditions are met. Once alarms are triggered organization owners will immediately receive an email alert and the alert will be available on the [Instance Configuration](../../administration/harper-studio/instance-configuration) page. The below table describes each alert and their evaluation metrics. + +### Heading Definitions + +- **Alarm**: Title of the alarm. +- **Threshold**: Definition of the alarm threshold. +- **Intervals**: The number of occurrences before an alarm is triggered and the period that the metric is evaluated over. +- **Proposed Remedy**: Recommended solution to avoid the alert in the future. + +| Alarm | Threshold | Intervals | Proposed Remedy | +| ------- | ---------- | --------- | --------------------------------------------------------------------------------------------------------------------------- | +| Storage | > 90% Disk | 1 x 5min | [Increased storage volume](../../administration/harper-studio/instance-configuration#update-instance-storage) | +| CPU | > 90% Avg | 2 x 5min | [Increase instance size for additional CPUs](../../administration/harper-studio/instance-configuration#update-instance-ram) | +| Memory | > 90% RAM | 2 x 5min | [Increase instance size](../../administration/harper-studio/instance-configuration#update-instance-ram) | diff --git a/versioned_docs/version-4.7/deployments/harper-cloud/index.md b/versioned_docs/version-4.7/deployments/harper-cloud/index.md new file mode 100644 index 00000000..c0785d0d --- /dev/null +++ b/versioned_docs/version-4.7/deployments/harper-cloud/index.md @@ -0,0 +1,9 @@ +--- +title: Harper Cloud +--- + +# Harper Cloud + +[Harper Cloud](https://studio.harperdb.io/) is the easiest way to test drive Harper, it’s Harper-as-a-Service. Cloud handles deployment and management of your instances in just a few clicks. Harper Cloud is currently powered by AWS with additional cloud providers on our roadmap for the future. + +You can create a new Harper Cloud instance in the Harper Studio. diff --git a/versioned_docs/version-4.7/deployments/harper-cloud/instance-size-hardware-specs.md b/versioned_docs/version-4.7/deployments/harper-cloud/instance-size-hardware-specs.md new file mode 100644 index 00000000..6ea4c7d2 --- /dev/null +++ b/versioned_docs/version-4.7/deployments/harper-cloud/instance-size-hardware-specs.md @@ -0,0 +1,23 @@ +--- +title: Instance Size Hardware Specs +--- + +# Instance Size Hardware Specs + +While Harper Cloud bills by RAM, each instance has other specifications associated with the RAM selection. The following table describes each instance size in detail\*. + +| AWS EC2 Instance Size | RAM (GiB) | # vCPUs | Network (Gbps) | Processor | +| --------------------- | --------- | ------- | -------------- | -------------------------------------- | +| t3.micro | 1 | 2 | Up to 5 | 2.5 GHz Intel Xeon Platinum 8000 | +| t3.small | 2 | 2 | Up to 5 | 2.5 GHz Intel Xeon Platinum 8000 | +| t3.medium | 4 | 2 | Up to 5 | 2.5 GHz Intel Xeon Platinum 8000 | +| m5.large | 8 | 2 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | +| m5.xlarge | 16 | 4 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | +| m5.2xlarge | 32 | 8 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | +| m5.4xlarge | 64 | 16 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | +| m5.8xlarge | 128 | 32 | 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | +| m5.12xlarge | 192 | 48 | 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | +| m5.16xlarge | 256 | 64 | 20 | Up to 3.1 GHz Intel Xeon Platinum 8000 | +| m5.24xlarge | 384 | 96 | 25 | Up to 3.1 GHz Intel Xeon Platinum 8000 | + +\*Specifications are subject to change. For the most up to date information, please refer to AWS documentation: [https://aws.amazon.com/ec2/instance-types/](https://aws.amazon.com/ec2/instance-types/). diff --git a/versioned_docs/version-4.7/deployments/harper-cloud/iops-impact.md b/versioned_docs/version-4.7/deployments/harper-cloud/iops-impact.md new file mode 100644 index 00000000..0b32df8e --- /dev/null +++ b/versioned_docs/version-4.7/deployments/harper-cloud/iops-impact.md @@ -0,0 +1,45 @@ +--- +title: IOPS Impact on Performance +--- + +# IOPS Impact on Performance + +Harper, like any database, can place a tremendous load on its storage resources. Storage, not CPU or memory, will more often be the bottleneck of server, virtual machine, or a container running Harper. Understanding how storage works, and how much storage performance your workload requires, is key to ensuring that Harper performs as expected. + +## IOPS Overview + +The primary measure of storage performance is the number of input/output operations per second (IOPS) that a storage device can perform. Different storage devices can have dramatically different performance profiles. A hard drive (HDD) might only perform a hundred or so IOPS, while a solid state drive (SSD) might be able to perform tens or hundreds of thousands of IOPS. + +Cloud providers like AWS, which powers Harper Cloud, don’t typically attach individual disks to a virtual machine or container. Instead, they combine large numbers of storage drives to create very high performance storage servers. Chunks (volumes) of that storage are then carved out and presented to many different virtual machines and containers. Due to the shared nature of this type of storage, the cloud provider places configurable limits on the number of IOPS that a volume can perform. The same way that cloud providers charge more for larger capacity volumes, they also charge more for volumes with more IOPS. + +## Harper Cloud Storage + +Harper Cloud utilizes AWS Elastic Block Storage (EBS) General Purpose SSD (gp3) volumes. This is the most common storage type used in AWS, as it provides reasonable performance for most workloads, at a reasonable price. + +AWS EBS gp3 volumes have a baseline performance level of 3,000 IOPS, as a result, all Harper Cloud storage options will offer 3,000 IOPS. We plan to offer scalable IOPS as an option in the future. + +You can read more about AWS EBS volume IOPS here: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-volume-types.html. + +## Estimating IOPS for Harper Instance + +The number of IOPS required for a particular workload is influenced by many factors. Testing your particular application is the best way to determine the number of IOPS required. A reliable method is to estimate about two IOPS for every index, including the primary key itself. So if a table has two indices besides primary key, estimate that an insert or update will require about six IOPS. Note that that can often be closer to one IOPS per index under load due to internal batching of writes, and sometimes even better when doing sequential inserts. Again it is best to test to verify this with application specific data and write patterns. + +For assistance in estimating IOPS requirements feel free to contact Harper Support or join our Community Slack Channel. + +## Example Use Case IOPS Requirements + +- **Sensor Data Collection** + + In the case of IoT sensors where data collection will be sustained, high IOPS are required. While there are not typically large queries going on in this case, there is a high volume of data being ingested. This implies that IOPS will be sustained at a high level. For example, if you are collecting 100 records per second you would expect to need roughly 3,000 IOPS just to handle the data inserts. + +- **Data Analytics/BI Server** + + Providing a server for analytics purposes typically requires a larger machine. Typically these cases involve large scale SQL joins and aggregations, which puts a large strain on reads. Harper utilizes an in-memory cache, which provides a significant performance boost on machines with large amounts of memory. However, if disparate datasets are constantly being queried and/or new data is frequently being loaded, you will find that the system still needs to have high IOPS to meet performance demand. + +- **Web Services** + + Typical web service implementations with discrete reads and writes often do not need high IOPS to perform as expected. This is often the case in more transactional systems without the requirement for high performance load. A good rule to follow is that any Harper operation that requires a data scan will be IOPS intensive, but if these are not frequent then the EBS boost will suffice. Queries utilizing equals operations in either SQL or NoSQL do not require a scan due to Harper’s native indexing. + +- **High Performance Database** + + Ultimately, if performance is your top priority, Harper should be run on bare metal hardware. Cloud providers offer these options at a higher cost, but they come with obvious performance improvements. diff --git a/versioned_docs/version-4.7/deployments/harper-cloud/verizon-5g-wavelength-instances.md b/versioned_docs/version-4.7/deployments/harper-cloud/verizon-5g-wavelength-instances.md new file mode 100644 index 00000000..aae57f67 --- /dev/null +++ b/versioned_docs/version-4.7/deployments/harper-cloud/verizon-5g-wavelength-instances.md @@ -0,0 +1,31 @@ +--- +title: Verizon 5G Wavelength +--- + +# Verizon 5G Wavelength + +These instances are only accessible from the Verizon network. When accessing your Harper instance please ensure you are connected to the Verizon network, examples include Verizon 5G Internet, Verizon Hotspots, or Verizon mobile devices. + +Harper on Verizon 5G Wavelength brings Harper closer to the end user exclusively on the Verizon network resulting in as little as single-digit millisecond response time from Harper to the client. + +Instances are built via AWS Wavelength. You can read more about [AWS Wavelength here](https://aws.amazon.com/wavelength/). + +Harper 5G Wavelength Instance Specs While Harper 5G Wavelength bills by RAM, each instance has other specifications associated with the RAM selection. The following table describes each instance size in detail\*. + +| AWS EC2 Instance Size | RAM (GiB) | # vCPUs | Network (Gbps) | Processor | +| --------------------- | --------- | ------- | -------------- | ------------------------------------------- | +| t3.medium | 4 | 2 | Up to 5 | Up to 3.1 GHz Intel Xeon Platinum Processor | +| t3.xlarge | 16 | 4 | Up to 5 | Up to 3.1 GHz Intel Xeon Platinum Processor | +| r5.2xlarge | 64 | 8 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum Processor | + +\*Specifications are subject to change. For the most up to date information, please refer to [AWS documentation](https://aws.amazon.com/ec2/instance-types/). + +## Harper 5G Wavelength Storage + +Harper 5G Wavelength utilizes AWS Elastic Block Storage (EBS) General Purpose SSD (gp2) volumes. This is the most common storage type used in AWS, as it provides reasonable performance for most workloads, at a reasonable price. + +AWS EBS gp2 volumes have a baseline performance level, which determines the number of IOPS it can perform indefinitely. The larger the volume, the higher its baseline performance. Additionally, smaller gp2 volumes are able to burst to a higher number of IOPS for periods of time. + +Smaller gp2 volumes are perfect for trying out the functionality of Harper, and might also work well for applications that don’t perform many database transactions. For applications that perform a moderate or high number of transactions, we recommend that you use a larger Harper volume. Learn more about the [impact of IOPS on performance here](iops-impact). + +You can read more about [AWS EBS gp2 volume IOPS here](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-volume-types.html#ebsvolumetypes_gp2). diff --git a/versioned_docs/version-4.7/deployments/install-harper/index.md b/versioned_docs/version-4.7/deployments/install-harper/index.md new file mode 100644 index 00000000..45849cc0 --- /dev/null +++ b/versioned_docs/version-4.7/deployments/install-harper/index.md @@ -0,0 +1,94 @@ +--- +title: Install Harper +--- + +# Install Harper + +## Install Harper + +This documentation contains information for installing Harper locally. Note that if you’d like to get up and running quickly, you can try a [managed instance with Harper Cloud](https://studio.harperdb.io/sign-up). Harper is a cross-platform database; we recommend Linux for production use, but Harper can run on Windows and Mac as well, for development purposes. Installation is usually very simple and just takes a few steps, but there are a few different options documented here. + +Harper runs on Node.js, so if you do not have it installed, you need to do that first (if you have installed, you can skip to installing Harper, itself). Node.js can be downloaded and installed from [their site](https://nodejs.org/). For Linux and Mac, we recommend installing and managing Node versions with [NVM, which has instructions for installation](https://github.com/nvm-sh/nvm). Generally NVM can be installed with the following command: + +```bash +curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.5/install.sh | bash +``` + +And then logout and login, and then install Node.js using nvm. We recommend using LTS, but support all currently maintained Node versions (which is currently version 14 and newer, and make sure to always uses latest minor/patch for the major version): + +```bash +nvm install --lts +``` + +#### `Install and Start Harper ` + +Then you can install Harper with NPM and start it: + +```bash +npm install -g harperdb +harperdb +``` + +Harper will automatically start after installation. Harper's installation can be configured with numerous options via CLI arguments, for more information visit the [Harper Command Line Interface](./harper-cli) guide. + +If you are setting up a production server on Linux, [we have much more extensive documentation on how to configure volumes for database storage, set up a systemd script, and configure your operating system to use as a database server in our linux installation guide](install-harper/linux). + +## With Docker + +If you would like to run Harper in Docker, install [Docker Desktop](https://docs.docker.com/desktop/) on your Mac or Windows computer. Otherwise, install the [Docker Engine](https://docs.docker.com/engine/install/) on your Linux server. You can then pull the image: + +```bash +docker pull harperdb/harperdb +``` + +Start a container, mount a volume and pass environment variables: + +```bash +docker run -d \ + -v :/home/harperdb/hdb \ + -e HDB_ADMIN_USERNAME=HDB_ADMIN \ + -e HDB_ADMIN_PASSWORD=password \ + -e THREADS=4 \ + -e OPERATIONSAPI_NETWORK_PORT=null \ + -e OPERATIONSAPI_NETWORK_SECUREPORT=9925 \ + -e HTTP_SECUREPORT=9926 \ + -p 9925:9925 \ + -p 9926:9926 \ + -p 9933:9933 \ + harperdb/harperdb +``` + +Here, the `` should be replaced with an actual directory path on your system where you want to store the persistent data. This command also exposes both the Harper Operations API (port 9925) and an additional HTTP port (9926). + +✅ Quick check: + +```bash +curl http://localhost:9925/health +``` + +:::info +💡 Why choose Docker: Great for consistent team environments, CI/CD pipelines, or deploying Harper alongside other services. +::: + +Once Docker Desktop or Docker Engine is installed, visit our [Docker Hub page](https://hub.docker.com/r/harperdb/harperdb) for information and examples on how to run a Harper container. + +## Offline Install + +If you need to install Harper on a device that doesn't have an Internet connection, you can choose your version and download the npm package and install it directly (you’ll still need Node.js and NPM): + +[Download Install Package](https://products-harperdb-io.s3.us-east-2.amazonaws.com/index.html) + +Once you’ve downloaded the .tgz file, run the following command from the directory where you’ve placed it: + +```bash +npm install -g harperdb-X.X.X.tgz harperdb install +``` + +## Installation on Less Common Platforms + +Harper comes with binaries for standard AMD64/x64 or ARM64 CPU architectures on Linux, Windows (x64 only), and Mac (including Apple Silicon). However, if you are installing on a less common platform (Alpine, for example), you will need to ensure that you have build tools installed for the installation process to compile the binaries (this is handled automatically), including: + +- [Go](https://go.dev/dl/): version 1.19.1 +- GCC +- Make +- Python v3.7, v3.8, v3.9, or v3.10 diff --git a/versioned_docs/version-4.7/deployments/install-harper/linux.md b/versioned_docs/version-4.7/deployments/install-harper/linux.md new file mode 100644 index 00000000..cae27c9d --- /dev/null +++ b/versioned_docs/version-4.7/deployments/install-harper/linux.md @@ -0,0 +1,225 @@ +--- +title: On Linux +--- + +# On Linux + +If you wish to install locally or already have a configured server, see the basic [Installation Guide](./) + +The following is a recommended way to configure Linux and install Harper. These instructions should work reasonably well for any public cloud or on-premises Linux instance. + +--- + +These instructions assume that the following has already been completed: + +1. Linux is installed +1. Basic networking is configured +1. A non-root user account dedicated to Harper with sudo privileges exists +1. An additional volume for storing Harper files is attached to the Linux instance +1. Traffic to ports 9925 (Harper Operations API) 9926 (Harper Application Interface) and 9932 (Harper Clustering) is permitted + +While you will need to access Harper through port 9925 for the administration through the operations API, and port 9932 for clustering, for higher level of security, you may want to consider keeping both of these ports restricted to a VPN or VPC, and only have the application interface (9926 by default) exposed to the public Internet. + +For this example, we will use an AWS Ubuntu Server 22.04 LTS m5.large EC2 Instance with an additional General Purpose SSD EBS volume and the default "ubuntu" user account. + +--- + +### (Optional) LVM Configuration + +Logical Volume Manager (LVM) can be used to stripe multiple disks together to form a single logical volume. If striping disks together is not a requirement, skip these steps. + +Find disk that already has a partition + +```bash +used_disk=$(lsblk -P -I 259 | grep "nvme.n1.*part" | grep -o "nvme.n1") +``` + +Create array of free disks + +```bash +declare -a free_disks +mapfile -t free_disks < <(lsblk -P -I 259 | grep "nvme.n1.*disk" | grep -o "nvme.n1" | grep -v "$used_disk") +``` + +Get quantity of free disks + +```bash +free_disks_qty=${#free_disks[@]} +``` + +Construct pvcreate command + +```bash +cmd_string="" +for i in "${free_disks[@]}" +do +cmd_string="$cmd_string /dev/$i" +done +``` + +Initialize disks for use by LVM + +```bash +pvcreate_cmd="pvcreate $cmd_string" +sudo $pvcreate_cmd +``` + +Create volume group + +```bash +vgcreate_cmd="vgcreate hdb_vg $cmd_string" +sudo $vgcreate_cmd +``` + +Create logical volume + +```bash +sudo lvcreate -n hdb_lv -i $free_disks_qty -l 100%FREE hdb_vg +``` + +### Configure Data Volume + +Run `lsblk` and note the device name of the additional volume + +```bash +lsblk +``` + +Create an ext4 filesystem on the volume (The below commands assume the device name is nvme1n1. If you used LVM to create logical volume, replace /dev/nvme1n1 with /dev/hdb_vg/hdb_lv) + +```bash +sudo mkfs.ext4 -L hdb_data /dev/nvme1n1 +``` + +Mount the file system and set the correct permissions for the directory + +```bash +mkdir /home/ubuntu/hdb +sudo mount -t ext4 /dev/nvme1n1 /home/ubuntu/hdb +sudo chown -R ubuntu:ubuntu /home/ubuntu/hdb +sudo chmod 775 /home/ubuntu/hdb +``` + +Create a fstab entry to mount the filesystem on boot + +```bash +echo "LABEL=hdb_data /home/ubuntu/hdb ext4 defaults,noatime 0 1" | sudo tee -a /etc/fstab +``` + +### Configure Linux and Install Prerequisites + +If a swap file or partition does not already exist, create and enable a 2GB swap file + +```bash +sudo dd if=/dev/zero of=/swapfile bs=128M count=16 +sudo chmod 600 /swapfile +sudo mkswap /swapfile +sudo swapon /swapfile +echo "/swapfile swap swap defaults 0 0" | sudo tee -a /etc/fstab +``` + +Increase the open file limits for the ubuntu user + +```bash +echo "ubuntu soft nofile 500000" | sudo tee -a /etc/security/limits.conf +echo "ubuntu hard nofile 1000000" | sudo tee -a /etc/security/limits.conf +``` + +Install Node Version Manager (nvm) + +```bash +curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.3/install.sh | bash +``` + +Load nvm (or logout and then login) + +```bash +. ~/.nvm/nvm.sh +``` + +Install Node.js using nvm ([read more about specific Node version requirements](https://www.npmjs.com/package/harperdb#prerequisites)) + +```bash +nvm install +``` + +### `Install and Start Harper ` + +Here is an example of installing Harper with minimal configuration. + +```bash +npm install -g harperdb +harperdb start \ + --TC_AGREEMENT "yes" \ + --ROOTPATH "/home/ubuntu/hdb" \ + --OPERATIONSAPI_NETWORK_PORT "9925" \ + --HDB_ADMIN_USERNAME "HDB_ADMIN" \ + --HDB_ADMIN_PASSWORD "password" +``` + +Here is an example of installing Harper with commonly used additional configuration. + +```bash +npm install -g harperdb +harperdb start \ + --TC_AGREEMENT "yes" \ + --ROOTPATH "/home/ubuntu/hdb" \ + --OPERATIONSAPI_NETWORK_PORT "9925" \ + --HDB_ADMIN_USERNAME "HDB_ADMIN" \ + --HDB_ADMIN_PASSWORD "password" \ + --HTTP_SECUREPORT "9926" \ + --CLUSTERING_ENABLED "true" \ + --CLUSTERING_USER "cluster_user" \ + --CLUSTERING_PASSWORD "password" \ + --CLUSTERING_NODENAME "hdb1" +``` + +You can also use a custom configuration file to set values on install, use the CLI/ENV variable `HDB_CONFIG` and set it to the path of your [custom configuration file](../configuration): + +```bash +npm install -g harperdb +harperdb start \ + --TC_AGREEMENT "yes" \ + --HDB_ADMIN_USERNAME "HDB_ADMIN" \ + --HDB_ADMIN_PASSWORD "password" \ + --HDB_CONFIG "/path/to/your/custom/harperdb-config.yaml" +``` + +#### Start Harper on Boot + +Harper will automatically start after installation. If you wish Harper to start when the OS boots, you have two options: + +You can set up a crontab: + +```bash +(crontab -l 2>/dev/null; echo "@reboot PATH=\"/home/ubuntu/.nvm/versions/node/v18.15.0/bin:$PATH\" && harperdb start") | crontab - +``` + +Or you can create a systemd script at `/etc/systemd/system/harperdb.service` + +Pasting the following contents into the file: + +``` +[Unit] +Description=Harper + +[Service] +Type=simple +Restart=always +User=ubuntu +Group=ubuntu +WorkingDirectory=/home/ubuntu +ExecStart=/bin/bash -c 'PATH="/home/ubuntu/.nvm/versions/node/v18.15.0/bin:$PATH"; harperdb' + +[Install] +WantedBy=multi-user.target +``` + +And then running the following: + +``` +systemctl daemon-reload +systemctl enable harperdb +``` + +For more information visit the [Harper Command Line Interface guide](../harper-cli) and the [Harper Configuration File guide](../configuration). diff --git a/versioned_docs/version-4.7/deployments/upgrade-hdb-instance.md b/versioned_docs/version-4.7/deployments/upgrade-hdb-instance.md new file mode 100644 index 00000000..da1c885f --- /dev/null +++ b/versioned_docs/version-4.7/deployments/upgrade-hdb-instance.md @@ -0,0 +1,140 @@ +--- +title: Upgrade a Harper Instance +--- + +# Upgrade a Harper Instance + +This document describes best practices for upgrading self-hosted Harper instances. Harper can be upgraded using a combination of npm and built-in Harper upgrade scripts. Whenever upgrading your Harper installation it is recommended you make a backup of your data first. Note: This document applies to self-hosted Harper instances only. All [Harper Cloud instances](harper-cloud/) will be upgraded by the Harper Cloud team. + +## Upgrading + +Upgrading Harper is a two-step process. First the latest version of Harper must be downloaded from npm, then the Harper upgrade scripts will be utilized to ensure the newest features are available on the system. + +1. Install the latest version of Harper using `npm install -g harperdb`. + + Note `-g` should only be used if you installed Harper globally (which is recommended). + +1. Run `harperdb` to initiate the upgrade process. + + Harper will then prompt you for all appropriate inputs and then run the upgrade directives. + +## Node Version Manager (nvm) + +[Node Version Manager (nvm)](https://nvm.sh/) is an easy way to install, remove, and switch between different versions of Node.js as required by various applications. More information, including directions on installing nvm can be found here: [https://nvm.sh/](https://nvm.sh/). + +Harper supports Node.js versions 14.0.0 and higher, however, **please check our** [**NPM page**](https://www.npmjs.com/package/harperdb) **for our recommended Node.js version.** To install a different version of Node.js with nvm, run the command: + +```bash +nvm install +``` + +To switch to a version of Node run: + +```bash +nvm use +``` + +To see the current running version of Node run: + +```bash +node --version +``` + +With a handful of different versions of Node.js installed, run nvm with the `ls` argument to list out all installed versions: + +```bash +nvm ls +``` + +When upgrading Harper, we recommend also upgrading your Node version. Here we assume you're running on an older version of Node; the execution may look like this: + +Switch to the older version of Node that Harper is running on (if it is not the current version): + +```bash +nvm use 14.19.0 +``` + +Make sure Harper is not running: + +```bash +harperdb stop +``` + +Uninstall Harper. Note, this step is not required, but will clean up old artifacts of Harper. We recommend removing all other Harper installations to ensure the most recent version is always running. + +```bash +npm uninstall -g harperdb +``` + +Switch to the newer version of Node: + +```bash +nvm use +``` + +Install Harper globally + +```bash +npm install -g harperdb +``` + +Run the upgrade script + +```bash +harperdb +``` + +Start Harper + +```bash +harperdb start +``` + +--- + +## Upgrading Nats to Plexus 4.4 + +To upgrade from NATS clustering to Plexus replication, follow these manual steps. They are designed for a fully replicating cluster to ensure minimal disruption during the upgrade process. + +The core of this upgrade is the _bridge node_. This node will run both NATS and Plexus simultaneously, ensuring that transactions are relayed between the two systems during the transition. The bridge node is crucial in preventing any replication downtime, as it will handle transactions from NATS nodes to Plexus nodes and vice versa. + +### Enabling Plexus + +To enable Plexus on a node that is already running NATS, you will need to update [two values](configuration) in the `harperdb-config.yaml` file: + +```yaml +replication: + url: wss://my-cluster-node-1:9925 + hostname: node-1 +``` + +`replication.url` – This should be set to the URL of the current Harper instance. + +`replication.hostname` – Since we are upgrading from NATS, this value should match the `clustering.nodeName` of the current instance. + +### Upgrade Steps + +1. Set up the bridge node: + - Choose one node to be the bridge node. + - On this node, follow the "Enabling Plexus" steps from the previous section, but **do not disable NATS clustering on this instance.** + - Stop the instance and perform the upgrade. + - Start the instance. This node should now be running both Plexus and NATS. +1. Upgrade a node: + - Choose a node that needs upgrading and enable Plexus by following the "Enable Plexus" steps. + - Disable NATS by setting `clustering.enabled` to `false`. + - Stop the instance and upgrade it. + - Start the instance. + - Call [`add_node`](../developers/operations-api/clustering#add-node) on the upgraded instance. In this call, omit `subscriptions` so that a fully replicating cluster is built. The target node for this call should be the bridge node. _Note: depending on your setup, you may need to expand this `add_node` call to include_ [_authorization and/or tls information_](../developers/operations-api/clustering#add-node)_._ + +```json +{ + "operation": "add_node", + "hostname:": "node-1", + "url": "wss://my-cluster-node-1:9925" +} +``` + +1. Repeat Step 2 on all remaining nodes that need to be upgraded. +1. Disable NATS on the bridge node by setting `clustering.enabled` to `false` and restart the instance. + +Your cluster upgrade should now be complete, with no NATS processes running on any of the nodes. diff --git a/versioned_docs/version-4.7/developers/_category_.json b/versioned_docs/version-4.7/developers/_category_.json new file mode 100644 index 00000000..fdc723e5 --- /dev/null +++ b/versioned_docs/version-4.7/developers/_category_.json @@ -0,0 +1,10 @@ +{ + "label": "Developers", + "position": 1, + "link": { + "type": "generated-index", + "title": "Developers Documentation", + "description": "Comprehensive guides and references for building applications with HarperDB", + "keywords": ["developers"] + } +} diff --git a/versioned_docs/version-4.7/developers/applications/caching.md b/versioned_docs/version-4.7/developers/applications/caching.md new file mode 100644 index 00000000..34cf778c --- /dev/null +++ b/versioned_docs/version-4.7/developers/applications/caching.md @@ -0,0 +1,292 @@ +--- +title: Caching +--- + +# Caching + +Harper has integrated support for caching data from external sources. With built-in caching capabilities and distributed high-performance low-latency responsiveness, Harper makes an ideal data caching server. Harper can store cached data in standard tables, as queryable structured data, so data can easily be consumed in one format (for example JSON or CSV) and provided to end users in different formats with different selected properties (for example MessagePack, with a subset of selected properties), or even with customized querying capabilities. Harper also manages and provides timestamps/tags for proper caching control, facilitating further downstreaming caching. With these combined capabilities, Harper is an extremely fast, interoperable, flexible, and customizable caching server. + +## Configuring Caching + +To set up caching, first you will need to define a table that you will use as your cache (to store the cached data). You can review the [introduction to building applications](./) for more information on setting up the application (and the [defining schemas documentation](defining-schemas)), but once you have defined an application folder with a schema, you can add a table for caching to your `schema.graphql`: + +```graphql +type MyCache @table(expiration: 3600) @export { + id: ID @primaryKey +} +``` + +You may also note that we can define a time-to-live (TTL) expiration on the table, indicating when table records/entries should expire and be evicted from this table. This is generally necessary for "passive" caches where there is no active notification of when entries expire. However, this is not needed if you provide a means of notifying when data is invalidated and changed. The units for expiration, and other duration-based properties, are in seconds. + +While you can provide a single expiration time, there are actually several expiration timings that are potentially relevant, and can be independently configured. These settings are available as directive properties on the table configuration (like `expiration` above): stale expiration: The point when a request for a record should trigger a request to origin (but might possibly return the current stale record depending on policy) must-revalidate expiration: The point when a request for a record must make a request to origin first and return the latest value from origin. eviction expiration: The point when a record is actually removed from the caching table. + +You can provide a single expiration and it defines the behavior for all three. You can also provide three settings for expiration, through table directives: + +- `expiration` - The amount of time until a record goes stale. +- `eviction` - The amount of time after expiration before a record can be evicted (defaults to zero). +- `scanInterval` - The interval for scanning for expired records (defaults to one quarter of the total of expiration and eviction). + +## Define External Data Source + +Next, you need to define the source for your cache. External data sources could be HTTP APIs, other databases, microservices, or any other source of data. This can be defined as a resource class in your application's `resources.js` module. You can extend the `Resource` class (which is available as a global variable in the Harper environment) as your base class. The first method to implement is a `get()` method to define how to retrieve the source data. For example, if we were caching an external HTTP API, we might define it as such: + +```javascript +class ThirdPartyAPI extends Resource { + async get() { + return (await fetch(`https://some-api.com/${this.getId()}`)).json(); + } +} +``` + +Next, we define this external data resource as the "source" for the caching table we defined above: + +```javascript +const { MyCache } = tables; +MyCache.sourcedFrom(ThirdPartyAPI); +``` + +Now we have a fully configured and connected caching table. If you access data from `MyCache` (for example, through the REST API, like `/MyCache/some-id`), Harper will check to see if the requested entry is in the table and return it if it is available (and hasn't expired). If there is no entry, or it has expired (it is older than one hour in this case), it will go to the source, calling the `get()` method, which will then retrieve the requested entry. Once the entry is retrieved, it will be saved/cached in the caching table (for one hour based on our expiration time). + +```mermaid +flowchart TD + Client1(Client 1)-->Cache(Caching Table) + Client2(Client 2)-->Cache + Cache-->Resource(Data Source Connector) + Resource-->API(Remote Data Source API) +``` + +Harper handles waiting for an existing cache resolution to finish and uses its result. This prevents a "cache stampede" when entries expire, ensuring that multiple requests to a cache entry will all wait on a single request to the data source. + +Cache tables with an expiration are periodically pruned for expired entries. Because this is done periodically, there is usually some amount of time between when a record has expired and when the record is actually evicted (the cached data is removed). But when a record is checked for availability, the expiration time is used to determine if the record is fresh (and the cache entry can be used). + +### Eviction with Indexing + +Eviction is the removal of a locally cached copy of data, but it does not imply the deletion of the actual data from the canonical or origin data source. Because evicted records still exist (just not in the local cache), if a caching table uses expiration (and eviction), and has indexing on certain attributes, the data is not removed from the indexes. The indexes that reference the evicted record are preserved, along with the attribute data necessary to maintain these indexes. Therefore eviction means the removal of non-indexed data (in this case evictions are stored as "partial" records). Eviction only removes the data that can be safely removed from a cache without affecting the integrity or behavior of the indexes. If a search query is performed that matches this evicted record, the record will be requested on-demand to fulfill the search query. + +### Specifying a Timestamp + +In the example above, we simply retrieved data to fulfill a cache request. We may want to supply the timestamp of the record we are fulfilling as well. This can be set on the context for the request: + +```javascript +class ThirdPartyAPI extends Resource { + async get() { + let response = await fetch(`https://some-api.com/${this.getId()}`); + this.getContext().lastModified = response.headers.get('Last-Modified'); + return response.json(); + } +} +``` + +#### Specifying an Expiration + +In addition, we can also specify when a cached record "expires". When a cached record expires, this means that a request for that record will trigger a request to the data source again. This does not necessarily mean that the cached record has been evicted (removed), although expired records will be periodically evicted. If the cached record still exists, the data source can revalidate it and return it. For example: + +```javascript +class ThirdPartyAPI extends Resource { + async get() { + const context = this.getContext(); + let headers = new Headers(); + if (context.replacingVersion) // this is the existing cached record + headers.set('If-Modified-Since', new Date(context.replacingVersion).toUTCString()); + let response = await fetch(`https://some-api.com/${this.getId()}`, { headers }); + let cacheInfo = response.headers.get('Cache-Control'); + let maxAge = cacheInfo?.match(/max-age=(\d)/)?.[1]; + if (maxAge) // we can set a specific expiration time by setting context.expiresAt + context.expiresAt = Date.now() + maxAge * 1000; // convert from seconds to milliseconds and add to current time + // we can just revalidate and return the record if the origin has confirmed that it has the same version: + if (response.status === 304) return context.replacingRecord; + ... +``` + +## Active Caching and Invalidation + +The cache we have created above is a "passive" cache; it only pulls data from the data source as needed, and has no knowledge of if and when data from the data source has actually changed, so it must rely on timer-based expiration to periodically retrieve possibly updated data. This means that it is possible that the cache may have stale data for a while (if the underlying data has changed, but the cached data hasn't expired), and the cache may have to refresh more than necessary if the data source data hasn't changed. Consequently it can be significantly more effective to implement an "active" cache, in which the data source is monitored and notifies the cache when any data changes. This ensures that when data changes, the cache can immediately load the updated data, and unchanged data can remain cached much longer (or indefinitely). + +### Invalidate + +One way to provide more active caching is to specifically invalidate individual records. Invalidation is useful when you know the source data has changed, and the cache needs to re-retrieve data from the source the next time that record is accessed. This can be done by executing the `invalidate()` method on a resource. For example, you could extend a table (in your resources.js) and provide a custom POST handler that does invalidation: + +```javascript +const { MyTable } = tables; +export class MyTableEndpoint extends MyTable { + async post(data) { + if (data.invalidate) + // use this flag as a marker + this.invalidate(); + } +} +``` + +(Note that if you are now exporting this endpoint through resources.js, you don't necessarily need to directly export the table separately in your schema.graphql). + +### Subscriptions + +We can provide more control of an active cache with subscriptions. If there is a way to receive notifications from the external data source of data changes, we can implement this data source as an "active" data source for our cache by implementing a `subscribe` method. A `subscribe` method should return an asynchronous iterable that iterates and returns events indicating the updates. One straightforward way of creating an asynchronous iterable is by defining the `subscribe` method as an asynchronous generator. If we had an endpoint that we could poll for changes every second, we could implement this like: + +```javascript +class ThirdPartyAPI extends Resource { + async *subscribe() { + setInterval(() => { // every second retrieve more data + // get the next data change event from the source + let update = (await fetch(`https://some-api.com/latest-update`)).json(); + const event = { // define the change event (which will update the cache) + type: 'put', // this would indicate that the event includes the new data value + id: // the primary key of the record that updated + value: // the new value of the record that updated + timestamp: // the timestamp of when the data change occurred + }; + yield event; // this returns this event, notifying the cache of the change + }, 1000); + } + async get() { +... +``` + +Notification events should always include an `id` property to indicate the primary key of the updated record. The event should have a `value` property for `put` and `message` event types. The `timestamp` is optional and can be used to indicate the exact timestamp of the change. The following event `type`s are supported: + +- `put` - This indicates that the record has been updated and provides the new value of the record. +- `invalidate` - Alternately, you can notify with an event type of `invalidate` to indicate that the data has changed, but without the overhead of actually sending the data (the `value` property is not needed), so the data only needs to be sent if and when the data is requested through the cache. An `invalidate` will evict the entry and update the timestamp to indicate that there is new data that should be requested (if needed). +- `delete` - This indicates that the record has been deleted. +- `message` - This indicates a message is being passed through the record. The record value has not changed, but this is used for [publish/subscribe messaging](../real-time). +- `transaction` - This indicates that there are multiple writes that should be treated as a single atomic transaction. These writes should be included as an array of data notification events in the `writes` property. + +And the following properties can be defined on event objects: + +- `type`: The event type as described above. +- `id`: The primary key of the record that updated +- `value`: The new value of the record that updated (for put and message) +- `writes`: An array of event properties that are part of a transaction (used in conjunction with the transaction event type). +- `table`: The name of the table with the record that was updated. This can be used with events within a transaction to specify events across multiple tables. +- `timestamp`: The timestamp of when the data change occurred + +With an active external data source with a `subscribe` method, the data source will proactively notify the cache, ensuring a fresh and efficient active cache. Note that with an active data source, we still use the `sourcedFrom` method to register the source for a caching table, and the table will automatically detect and call the subscribe method on the data source. + +By default, Harper will only run the subscribe method on one thread. Harper is multi-threaded and normally runs many concurrent worker threads, but typically running a subscription on multiple threads can introduce overlap in notifications and race conditions and running on a subscription on a single thread is preferable. However, if you want to enable subscribe on multiple threads, you can define a `static subscribeOnThisThread` method to specify if the subscription should run on the current thread: + +```javascript +class ThirdPartyAPI extends Resource { + static subscribeOnThisThread(threadIndex) { + return threadIndex < 2; // run on two threads (the first two threads) + } + async *subscribe() { + .... +``` + +An alternative to using asynchronous generators is to use a subscription stream and send events to it. A default subscription stream (that doesn't generate its own events) is available from the Resource's default subscribe method: + +```javascript +class ThirdPartyAPI extends Resource { + subscribe() { + const subscription = super.subscribe(); + setupListeningToRemoteService().on('update', (event) => { + subscription.send(event); + }); + return subscription; + } +} +``` + +## Downstream Caching + +It is highly recommended that you utilize the [REST interface](../rest) for accessing caching tables, as it facilitates downstreaming caching for clients. Timestamps are recorded with all cached entries. Timestamps are then used for incoming [REST requests to specify the `ETag` in the response](../rest#cachingconditional-requests). Clients can cache data themselves and send requests using the `If-None-Match` header to conditionally get a 304 and preserve their cached data based on the timestamp/`ETag` of the entries that are cached in Harper. Caching tables also have [subscription capabilities](caching#subscribing-to-caching-tables), which means that downstream caches can be fully "layered" on top of Harper, both as passive or active caches. + +## Write-Through Caching + +The cache we have defined so far only has data flowing from the data source to the cache. However, you may wish to support write methods, so that writes to the cache table can flow through to underlying canonical data source, as well as populate the cache. This can be accomplished by implementing the standard write methods, like `put` and `delete`. If you were using an API with standard RESTful methods, you can pass writes through to the data source like this: + +```javascript +class ThirdPartyAPI extends Resource { + async put(data) { + await fetch(`https://some-api.com/${this.getId()}`, { + method: 'PUT', + body: JSON.stringify(data) + }); + } + async delete() { + await fetch(`https://some-api.com/${this.getId()}`, { + method: 'DELETE', + }); + } + ... +``` + +When doing an insert or update to the MyCache table, the data will be sent to the underlying data source through the `put` method and the new record value will be stored in the cache as well. + +### Loading from Source in Methods + +When you are using a caching table, it is important to remember that any resource methods besides `get()`, will not automatically load data from the source. If you have defined a `put()`, `post()`, or `delete()` method and you need the source data, you can ensure it is loaded by calling the `ensureLoaded()` method. For example, if you want to modify the existing record from the source, adding a property to it: + +```javascript +class MyCache extends tables.MyCache { + async post(data) { + // if the data is not cached locally, retrieves from source: + await this.ensuredLoaded(); + // now we can be sure that the data is loaded, and can access properties + this.quantity = this.quantity - data.purchases; + } +} +``` + +### Subscribing to Caching Tables + +You can subscribe to a caching table just like any other table. The one difference is that normal tables do not usually have `invalidate` events, but an active caching table may have `invalidate` events. Again, this event type gives listeners an opportunity to choose whether or not to actually retrieve the value that changed. + +### Passive-Active Updates + +With our passive update examples, we have provided a data source handler with a `get()` method that returns the specific requested record as the response. However, we can also actively update other records in our response handler (if our data source provides data that should be propagated to other related records). This can be done transactionally, to ensure that all updates occur atomically. The context that is provided to the data source holds the transaction information, so we can simply pass the context to any update/write methods that we call. For example, let's say we are loading a blog post, which also includes comment records: + +```javascript +const { Post, Comment } = tables; +class BlogSource extends Resource { + get() { + const post = await (await fetch(`https://my-blog-server/${this.getId()}`).json()); + for (let comment of post.comments) { + await Comment.put(comment, this); // save this comment as part of our current context and transaction + } + return post; + } +} +Post.sourcedFrom(BlogSource); +``` + +Here both the update to the post and the update to the comments will be atomically/transactionally committed together with the same timestamp. + +## Cache-Control header + +When interacting with cached data, you can also use the `Cache-Control` request header to specify certain caching behaviors. When performing a PUT (or POST) method, you can use the `max-age` directive to indicate how long the resource should be cached (until stale): + +```http +PUT /my-resource/id +Cache-Control: max-age=86400 +``` + +You can use the `only-if-cached` directive on GET requests to only return a resource if it is cached (otherwise will return 504). Note, that if the entry is not cached, this will still trigger a request for the source data from the data source. If you do not want source data retrieved, you can add the `no-store` directive. You can also use the `no-cache` directive if you do not want to use the cached resource. If you wanted to check if there is a cached resource without triggering a request to the data source: + +```http +GET /my-resource/id +Cache-Control: only-if-cached, no-store +``` + +You may also use the `stale-if-error` to indicate if it is acceptable to return a stale cached resource when the data source returns an error (network connection error, 500, 502, 503, or 504). The `must-revalidate` directive can indicate a stale cached resource can not be returned, even when the data source has an error (by default a stale cached resource is returned when there is a network connection error). + +## Caching Flow + +It may be helpful to understand the flow of a cache request. When a request is made to a caching table: + +- Harper will first create a resource instance to handle the process, and ensure that the data is loaded for the resource instance. To do this, it will first check if the record is in the table/cache. + - If the record is not in the cache, Harper will first check if there is a current request to get the record from the source. If there is, Harper will wait for the request to complete and return the record from the cache. + - If not, Harper will call the `get()` method on the source to retrieve the record. The record will then be stored in the cache. + - If the record is in the cache, Harper will check if the record is stale. If the record is not stale, Harper will immediately return the record from the cache. If the record is stale, Harper will call the `get()` method on the source to retrieve the record. + - The record will then be stored in the cache. This will write the record to the cache in a separate asynchronous/background write-behind transaction, so it does not block the current request, then return the data immediately once it has it. +- The `get()` method will be called on the resource instance to return the record to the client (or perform any querying on the record). If this is overriden, the method will be called at this time. + +### Caching Flow with Write-Through + +When a writes are performed on a caching table (in `put()` or `post()` method, for example), the flow is slightly different: + +- Harper will have first created a resource instance to handle the process, and this resource instance that will be the current `this` for a call to `put()` or `post()`. +- If a `put()` or `update()` is called, for example, this action will be record in the current transaction. +- Once the transaction is committed (which is done automatically as the request handler completes), the transaction write will be sent to the source to update the data. + - The local writes will wait for the source to confirm the writes have completed (note that this effectively allows you to perform a two-phase transactional write to the source, and the source can confirm the writes have completed before the transaction is committed locally). + - The transaction writes will then be written the local caching table. +- The transaction handler will wait for the local commit to be written, then the transaction will be resolved and a response will be sent to the client. diff --git a/versioned_docs/version-4.7/developers/applications/data-loader.md b/versioned_docs/version-4.7/developers/applications/data-loader.md new file mode 100644 index 00000000..b4059207 --- /dev/null +++ b/versioned_docs/version-4.7/developers/applications/data-loader.md @@ -0,0 +1,195 @@ +--- +title: Data Loader +--- + +# Data Loader + +The Data Loader is a built-in component that provides a reliable mechanism for loading data from JSON or YAML files into Harper tables as part of component deployment. This feature is particularly useful for ensuring specific records exist in your database when deploying components, such as seed data, configuration records, or initial application data. + +## Configuration + +To use the Data Loader, first specify your data files in the `config.yaml` in your component directory: + +```yaml +dataLoader: + files: 'data/*.json' +``` + +The Data Loader is an [Extension](../../reference/components#extensions) and supports the standard `files` configuration option. + +## Data File Format + +Data files can be structured as either JSON or YAML files containing the records you want to load. Each data file must specify records for a single table - if you need to load data into multiple tables, create separate data files for each table. + +### Basic Example + +Create a data file in your component's data directory (one table per file): + +```json +{ + "database": "myapp", + "table": "users", + "records": [ + { + "id": 1, + "username": "admin", + "email": "admin@example.com", + "role": "administrator" + }, + { + "id": 2, + "username": "user1", + "email": "user1@example.com", + "role": "standard" + } + ] +} +``` + +### Multiple Tables + +To load data into multiple tables, create separate data files for each table: + +**users.json:** + +```json +{ + "database": "myapp", + "table": "users", + "records": [ + { + "id": 1, + "username": "admin", + "email": "admin@example.com" + } + ] +} +``` + +**settings.yaml:** + +```yaml +database: myapp +table: settings +records: + - id: 1 + setting_name: app_name + setting_value: My Application + - id: 2 + setting_name: version + setting_value: '1.0.0' +``` + +## File Organization + +You can organize your data files in various ways: + +### Single File Pattern + +```yaml +dataLoader: + files: 'data/seed-data.json' +``` + +### Multiple Files Pattern + +```yaml +dataLoader: + files: + - 'data/users.json' + - 'data/settings.yaml' + - 'data/initial-products.json' +``` + +### Glob Pattern + +```yaml +dataLoader: + files: 'data/**/*.{json,yaml,yml}' +``` + +## Loading Behavior + +When Harper starts up with a component that includes the Data Loader: + +1. The Data Loader reads all specified data files (JSON or YAML) +1. For each file, it validates that a single table is specified +1. Records are inserted or updated based on content hash comparison: + - New records are inserted if they don't exist + - Existing records are updated only if the data file content has changed + - User modifications made via Operations API or other methods are preserved - those records won't be overwritten + - Users can add extra fields to data-loader records without blocking future updates to the original fields +1. The Data Loader uses SHA-256 content hashing stored in a system table (`hdb_dataloader_hash`) to track which records it has loaded and detect changes + +### Change Detection + +The Data Loader intelligently handles various scenarios: + +- **New records**: Inserted with their content hash stored +- **Unchanged records**: Skipped (no database writes) +- **Changed data file**: Records are updated using `patch` to preserve any extra fields users may have added +- **User-created records**: Records created outside the Data Loader (via Operations API, REST, etc.) are never overwritten +- **User-modified records**: Records modified after being loaded are preserved and not overwritten +- **User-added fields**: Extra fields added to data-loader records are preserved during updates + +This approach ensures data files can be safely reloaded across deployments and node scaling without losing user modifications. + +Note: While the Data Loader can create tables automatically by inferring the schema from the provided records, it's recommended to define your table schemas explicitly using the [graphqlSchema](../applications/defining-schemas) component for better control and type safety. + +## Best Practices + +1. **Define Schemas First**: While the Data Loader can infer schemas, it's strongly recommended to define your table schemas and relations explicitly using the [graphqlSchema](../applications/defining-schemas) component before loading data. This ensures proper data types, constraints, and relationships between tables. + +1. **One Table Per File**: Remember that each data file can only load records into a single table. Organize your files accordingly. + +1. **Idempotency**: Design your data files to be idempotent - they should be safe to load multiple times without creating duplicate or conflicting data. + +1. **Version Control**: Include your data files in version control to ensure consistency across deployments. + +1. **Environment-Specific Data**: Consider using different data files for different environments (development, staging, production). + +1. **Data Validation**: Ensure your data files are valid JSON or YAML and match your table schemas before deployment. + +1. **Sensitive Data**: Avoid including sensitive data like passwords or API keys directly in data files. Use environment variables or secure configuration management instead. + +## Example Component Structure + +``` +my-component/ +├── config.yaml +├── data/ +│ ├── users.json +│ ├── roles.json +│ └── settings.json +├── schemas.graphql +└── roles.yaml +``` + +With this structure, your `config.yaml` might look like: + +```yaml +# Load environment variables first +loadEnv: + files: '.env' + +# Define schemas +graphqlSchema: + files: 'schemas.graphql' + +# Define roles +roles: + files: 'roles.yaml' + +# Load initial data +dataLoader: + files: 'data/*.json' + +# Enable REST endpoints +rest: true +``` + +## Related Documentation + +- [Built-In Components](../../reference/components/built-in-extensions) +- [Extensions](../../reference/components/extensions) +- [Bulk Operations](../operations-api/bulk-operations) - For loading data via the Operations API diff --git a/versioned_docs/version-4.7/developers/applications/debugging.md b/versioned_docs/version-4.7/developers/applications/debugging.md new file mode 100644 index 00000000..bd9d2622 --- /dev/null +++ b/versioned_docs/version-4.7/developers/applications/debugging.md @@ -0,0 +1,39 @@ +--- +title: Debugging Applications +--- + +# Debugging Applications + +Harper components and applications run inside the Harper process, which is a standard Node.js process that can be debugged with standard JavaScript development tools like Chrome's devtools, VSCode, and WebStorm. Debugging can be performed by launching the Harper entry script with your IDE, or you can start Harper in dev mode and connect your debugger to the running process (defaults to standard 9229 port): + +``` +harperdb dev +# or to run and debug a specific app +harperdb dev /path/to/app +``` + +Once you have connected a debugger, you may set breakpoints in your application and fully debug it. Note that when using the `dev` command from the CLI, this will run Harper in single-threaded mode. This would not be appropriate for production use, but makes it easier to debug applications. + +For local debugging and development, it is recommended that you use standard console log statements for logging. For production use, you may want to use Harper's logging facilities, so you aren't logging to the console. The logging functions are available on the global `logger` variable that is provided by Harper. This logger can be used to output messages directly to the Harper log using standardized logging level functions, described below. The log level can be set in the [Harper Configuration File](../../deployments/configuration). + +Harper Logger Functions + +- `trace(message)`: Write a 'trace' level log, if the configured level allows for it. +- `debug(message)`: Write a 'debug' level log, if the configured level allows for it. +- `info(message)`: Write a 'info' level log, if the configured level allows for it. +- `warn(message)`: Write a 'warn' level log, if the configured level allows for it. +- `error(message)`: Write a 'error' level log, if the configured level allows for it. +- `fatal(message)`: Write a 'fatal' level log, if the configured level allows for it. +- `notify(message)`: Write a 'notify' level log. + +For example, you can log a warning: + +```javascript +logger.warn('You have been warned'); +``` + +If you want to ensure a message is logged, you can use `notify` as these messages will appear in the log regardless of log level configured. + +## Viewing the Log + +The Harper Log can be found in your local `~/hdb/log/hdb.log` file (or in the log folder if you have specified an alternate hdb root), or in the Studio Status page. Additionally, you can use the [`read_log` operation](../operations-api/logs) to query the Harper log. diff --git a/versioned_docs/version-4.7/developers/applications/define-routes.md b/versioned_docs/version-4.7/developers/applications/define-routes.md new file mode 100644 index 00000000..d16c787e --- /dev/null +++ b/versioned_docs/version-4.7/developers/applications/define-routes.md @@ -0,0 +1,119 @@ +--- +title: Define Fastify Routes +--- + +# Define Fastify Routes + +Harper’s applications provide an extension for loading [Fastify](https://www.fastify.io/) routes as a way to handle endpoints. While we generally recommend building your endpoints/APIs with Harper's [REST interface](../rest) for better performance and standards compliance, Fastify's route can provide an extensive API for highly customized path handling. Below is a very simple example of a route declaration. + +The fastify route handler can be configured in your application's config.yaml (this is the default config if you used the [application template](https://github.com/HarperDB/application-template)): + +```yaml +fastifyRoutes: # This loads files that define fastify routes using fastify's auto-loader + files: routes/*.js # specify the location of route definition modules + path: . # relative to the app-name, like https://server/app-name/route-name +``` + +By default, route URLs are configured to be: + +- \[**Instance URL**]:\[**HTTP Port**]/\[**Project Name**]/\[**Route URL**] + +However, you can specify the path to be `/` if you wish to have your routes handling the root path of incoming URLs. + +- The route below, using the default config, within the **dogs** project, with a route of **breeds** would be available at **[http://localhost:9926/dogs/breeds](http://localhost:9926/dogs/breeds)**. + +In effect, this route is just a pass-through to Harper. The same result could have been achieved by hitting the core Harper API, since it uses **hdbCore.preValidation** and **hdbCore.request**, which are defined in the "helper methods" section, below. + +```javascript +export default async (server, { hdbCore, logger }) => { + server.route({ + url: '/', + method: 'POST', + preValidation: hdbCore.preValidation, + handler: hdbCore.request, + }); +}; +``` + +## Custom Handlers + +For endpoints where you want to execute multiple operations against Harper, or perform additional processing (like an ML classification, or an aggregation, or a call to a 3rd party API), you can define your own logic in the handler. The function below will execute a query against the dogs table, and filter the results to only return those dogs over 4 years in age. + +**IMPORTANT: This route has NO preValidation and uses hdbCore.requestWithoutAuthentication, which- as the name implies- bypasses all user authentication. See the security concerns and mitigations in the "helper methods" section, below.** + +```javascript +export default async (server, { hdbCore, logger }) => { + server.route({ + url: '/:id', + method: 'GET', + handler: (request) => { + request.body= { + operation: 'sql', + sql: `SELECT * FROM dev.dog WHERE id = ${request.params.id}` + }; + + const result = await hdbCore.requestWithoutAuthentication(request); + return result.filter((dog) => dog.age > 4); + } + }); +} +``` + +## Custom preValidation Hooks + +The simple example above was just a pass-through to Harper- the exact same result could have been achieved by hitting the core Harper API. But for many applications, you may want to authenticate the user using custom logic you write, or by conferring with a 3rd party service. Custom preValidation hooks let you do just that. + +Below is an example of a route that uses a custom validation hook: + +```javascript +import customValidation from '../helpers/customValidation'; + +export default async (server, { hdbCore, logger }) => { + server.route({ + url: '/:id', + method: 'GET', + preValidation: (request) => customValidation(request, logger), + handler: (request) => { + request.body = { + operation: 'sql', + sql: `SELECT * FROM dev.dog WHERE id = ${request.params.id}`, + }; + + return hdbCore.requestWithoutAuthentication(request); + }, + }); +}; +``` + +Notice we imported customValidation from the **helpers** directory. To include a helper, and to see the actual code within customValidation, see [Helper Methods](define-routes#helper-methods). + +## Helper Methods + +When declaring routes, you are given access to 2 helper methods: hdbCore and logger. + +**hdbCore** + +hdbCore contains three functions that allow you to authenticate an inbound request, and execute operations against Harper directly, by passing the standard Operations API. + +- **preValidation** + + This is an array of functions used for fastify authentication. The second function takes the authorization header from the inbound request and executes the same authentication as the standard Harper Operations API (for example, `hdbCore.preValidation[1](req, resp, callback)`). It will determine if the user exists, and if they are allowed to perform this operation. **If you use the request method, you have to use preValidation to get the authenticated user**. + +- **request** + + This will execute a request with Harper using the operations API. The `request.body` should contain a standard Harper operation and must also include the `hdb_user` property that was in `request.body` provided in the callback. + +- **requestWithoutAuthentication** + + Executes a request against Harper without any security checks around whether the inbound user is allowed to make this request. For security purposes, you should always take the following precautions when using this method: + - Properly handle user-submitted values, including url params. User-submitted values should only be used for `search_value` and for defining values in records. Special care should be taken to properly escape any values if user-submitted values are used for SQL. + +**logger** + +This helper allows you to write directly to the log file, hdb.log. It’s useful for debugging during development, although you may also use the console logger. There are 5 functions contained within logger, each of which pertains to a different **logging.level** configuration in your harperdb-config.yaml file. + +- logger.trace(‘Starting the handler for /dogs’) +- logger.debug(‘This should only fire once’) +- logger.warn(‘This should never ever fire’) +- logger.error(‘This did not go well’) +- logger.fatal(‘This did not go very well at all’) diff --git a/versioned_docs/version-4.7/developers/applications/defining-roles.md b/versioned_docs/version-4.7/developers/applications/defining-roles.md new file mode 100644 index 00000000..365aa132 --- /dev/null +++ b/versioned_docs/version-4.7/developers/applications/defining-roles.md @@ -0,0 +1,85 @@ +--- +title: Defining Application Roles +--- + +# Defining Application Roles + +Applications are more than just tables and endpoints — they need access rules. Harper lets you define roles directly in your application so you can control who can do what, without leaving your codebase. + +Let’s walk through creating a role, assigning it, and seeing it in action. + +## Step 1: Declare a Role + +First, point Harper to a roles configuration file. Add this to your `config.yaml`: + +```yaml +roles: + files: roles.yaml +``` + +Then create a simple `roles.yaml` in your application directory. For example, here’s a role that can only read and insert data into the `Dog` table: + +```yaml +dog-reader: + super_user: false + data: + Dog: + read: true + insert: true +``` + +When Harper starts up, it will create this role (or update it if it already exists). + +## Step 2: Create a User for the Role + +Next, create a non-super_user user and assign them this role. You can do this with the [Users and Roles API](../security/users-and-roles) (requires a super_user to run): + +```bash +curl -u admin:password -X POST http://localhost:9926 \ + -H "Content-Type: application/json" \ + -d '{ + "operation": "add_user", + "username": "alice", + "password": "password", + "role": "dog_reader" + }' +``` + +Now you have a user named `alice` with the `dog_reader` role. + +## Step 3: Make Requests as Different Users + +Authenticate requests as `alice` to see how her role works: + +```bash +# allowed (insert, role permits insert) +curl -u alice:password -X POST http://localhost:9926/Dog/ \ + -H "Content-Type: application/json" \ + -d '{"name": "Buddy", "breed": "Husky"}' + +# not allowed (delete, role does not permit delete) +curl -u alice:password -X DELETE http://localhost:9926/Dog/1 +``` + +The first request succeeds with a `200 OK`. The second fails with a `403 Forbidden`. + +Now compare with a super_user: + +```bash +# super_user can delete +curl -u admin:password -X DELETE http://localhost:9926/Dog/1 +``` + +This succeeds because the super_user role has full permissions. + +## Where to Go Next + +This page gave you the basics - declare a role, assign it, and see it work. + +For more advanced scenarios, including: + +- defining multiple databases per role, +- granting fine-grained attribute-level permissions, +- and the complete structure of `roles.yaml`, + +see the [Roles Reference](../../reference/roles). diff --git a/versioned_docs/version-4.7/developers/applications/defining-schemas.md b/versioned_docs/version-4.7/developers/applications/defining-schemas.md new file mode 100644 index 00000000..9df28255 --- /dev/null +++ b/versioned_docs/version-4.7/developers/applications/defining-schemas.md @@ -0,0 +1,272 @@ +--- +title: Defining Schemas +--- + +# Defining Schemas + +Schemas define tables and their attributes. Schemas can be declaratively defined in Harper's using GraphQL schema definitions. Schemas definitions can be used to ensure that tables exist (that are required for applications), and have the appropriate attributes. Schemas can define the primary key, data types for attributes, if they are required, and specify which attributes should be indexed. The [introduction to applications provides](./) a helpful introduction to how to use schemas as part of database application development. + +Schemas can be used to define the expected structure of data, but are also highly flexible and support heterogeneous data structures and by default allows data to include additional properties. The standard types for GraphQL schemas are specified in the [GraphQL schema documentation](https://graphql.org/learn/schema/). + +An example schema that defines a couple tables might look like: + +```graphql +# schema.graphql: +type Dog @table { + id: ID @primaryKey + name: String + breed: String + age: Int +} + +type Breed @table { + id: ID @primaryKey +} +``` + +In this example, you can see that we specified the expected data structure for records in the Dog and Breed table. For example, this will enforce that Dog records are required to have a `name` property with a string (or null, unless the type were specified to be non-nullable). This does not preclude records from having additional properties (see `@sealed` for preventing additional properties. For example, some Dog records could also optionally include a `favoriteTrick` property. + +In this page, we will describe the specific directives that Harper uses for defining tables and attributes in a schema. + +### Type Directives + +#### `@table` + +The schema for tables are defined using GraphQL type definitions with a `@table` directive: + +```graphql +type TableName @table +``` + +By default the table name is inherited from the type name (in this case the table name would be "TableName"). The `@table` directive supports several optional arguments (all of these are optional and can be freely combined): + +- `@table(table: "table_name")` - This allows you to explicitly specify the table name. +- `@table(database: "database_name")` - This allows you to specify which database the table belongs to. This defaults to the "data" database. +- `@table(expiration: 3600)` - Sets an expiration time on entries in the table before they are automatically cleared (primarily useful for caching tables). This is specified in seconds. +- `@table(audit: true)` - This enables the audit log for the table so that a history of record changes are recorded. This defaults to [configuration file's setting for `auditLog`](../../deployments/configuration#logging). + +Database naming: the default "data" database is generally a good default choice for tables in applications that will not be reused in other applications (and don't need to worry about staying in a separate namespace). Application with many tables may wish to organize the tables into separate databases (but remember that transactions do not preserve atomicity across different databases, only across tables in the same database). For components that are designed for re-use, it is recommended that you use a database name that is specific to the component (e.g. "my-component-data") to avoid name collisions with other components. + +#### `@export` + +This indicates that the specified table should be exported as a resource that is accessible as an externally available endpoints, through REST, MQTT, or any of the external resource APIs. + +This directive also accepts a `name` parameter to specify the name that should be used for the exported resource (how it will appear in the URL path). For example: + +``` +type MyTable @table @export(name: "my-table") +``` + +This table would be available at the URL path `/my-table/`. Without the `name` parameter, the exported name defaults to the name of the table type ("MyTable" in this example). + +### Relationships: `@relationship` + +Defining relationships is the foundation of using "join" queries in Harper. A relationship defines how one table relates to another table using a foreign key. Using the `@relationship` directive will define a property as a computed property, which resolves to the an record/instance from a target type, based on the referenced attribute, which can be in this table or the target table. The `@relationship` directive must be used in combination with an attribute with a type that references another table. + +#### `@relationship(from: attribute)` + +This defines a relationship where the foreign key is defined in this table, and relates to the primary key of the target table. If the foreign key is single-valued, this establishes a many-to-one relationship with the target table. The foreign key may also be a multi-valued array, in which case this will be a many-to-many relationship. For example, we can define a foreign key that references another table and then define the relationship. Here we create a `brandId` attribute that will be our foreign key (it will hold an id that references the primary key of the Brand table), and we define a relationship to the `Brand` table through the `brand` attribute: + +```graphql +type Product @table @export { + id: ID @primaryKey + brandId: ID @indexed + brand: Brand @relationship(from: brandId) +} +type Brand @table @export { + id: ID @primaryKey +} +``` + +Once this is defined we can use the `brand` attribute as a [property in our product instances](../../reference/resources/) and allow for querying by `brand` and selecting brand attributes as returned properties in [query results](../rest). + +Again, the foreign key may be a multi-valued array (array of keys referencing the target table records). For example, if we had a list of features that references a Feature table: + +```graphql +type Product @table @export { + id: ID @primaryKey + featureIds: [ID] @indexed # array of ids + features: [Feature] @relationship(from: featureIds) # array of referenced feature records +} +type Feature @table { + id: ID @primaryKey + ... +} +``` + +#### `@relationship(to: attribute)` + +This defines a relationship where the foreign key is defined in the target table and relates to primary key of this table. If the foreign key is single-valued, this establishes a one-to-many relationship with the target table. Note that the target table type must be an array element type (like `[Table]`). The foreign key may also be a multi-valued array, in which case this will be a many-to-many relationship. For example, we can define on a reciprocal relationship, from the example above, adding a relationship from brand back to product. Here we use continue to use the `brandId` attribute from the `Product` schema, and we define a relationship to the `Product` table through the `products` attribute: + +```graphql +type Brand @table @export { + id: ID @primaryKey + name: String + products: [Product] @relationship(to: brandId) +} +``` + +Once this is defined we can use the `products` attribute as a property in our brand instances and allow for querying by `products` and selecting product attributes as returned properties in query results. + +Note that schemas can also reference themselves with relationships, allowing records to define relationships like parent-child relationships between records in the same table. Also note, that for a many-to-many relationship, you must not combine the `to` and `from` property in the same relationship directive. + +### Computed Properties: `@computed` + +The `@computed` directive specifies that a field is computed based on other fields in the record. This is useful for creating derived fields that are not stored in the database, but are computed when specific record fields is queried/accessed. The `@computed` directive must be used in combination with a field that is a function that computes the value of the field. For example: + +```graphql +type Product @table { + id: ID @primaryKey + price: Float + taxRate: Float + totalPrice: Float @computed(from: "price + (price * taxRate)") +} +``` + +The `from` argument specifies the expression that computes the value of the field. The expression can reference other fields in the record. The expression is evaluated when the record is queried or indexed. + +The `computed` directive may also be defined in a JavaScript module, which is useful for more complex computations. You can specify a computed attribute, and then define the function with the `setComputedAttribute` method. For example: + +```graphql +type Product @table { +... + totalPrice: Float @computed +} +``` + +```javascript +tables.Product.setComputedAttribute('totalPrice', (record) => { + return record.price + record.price * record.taxRate; +}); +``` + +Computed properties may also be indexed, which provides a powerful mechanism for creating indexes on derived fields with custom querying capabilities. This can provide a mechanism for composite indexes, custom full-text indexing, vector indexing, or other custom indexing strategies. A computed property can be indexed by adding the `@indexed` directive to the computed property. When using a JavaScript module for a computed property that is indexed, it is highly recommended that you specify a `version` argument to ensure that the computed attribute is re-evaluated when the function is updated. For example: + +```graphql +type Product @table { +... + totalPrice: Float @computed(version: 1) @indexed +} +``` + +If you were to update the `setComputedAttribute` function for the `totalPrice` attribute, to use a new formula, you must increment the `version` argument to ensure that the computed attribute is re-indexed (note that on a large database, re-indexing may be a lengthy operation). Failing to increment the `version` argument with a modified function can result in an inconsistent index. The computed function must be deterministic, and should not have side effects, as it may be re-evaluated multiple times during indexing. + +Note that computed properties will not be included by default in a query result, you must explicitly include them in query results using the `select` query function. + +Another example of using a computed custom index, is that we could index all the comma-separated words in a `tags` property by doing (similar techniques are used for full-text indexing): + +```graphql +type Product @table { + id: ID @primaryKey + tags: String # comma delimited set of tags + tagsSeparated: String[] @computed(from: "tags.split(/\\s*,\\s*/)") @indexed # split and index the tags +} +``` + +For more in-depth information on computed properties, visit our blog [here](https://www.harpersystems.dev/development/tutorials/how-to-create-custom-indexes-with-computed-properties) + +### Field Directives + +The field directives can be used for information about each attribute in table type definition. + +#### `@primaryKey` + +The `@primaryKey` directive specifies that an attribute is the primary key for a table. These must be unique and when records are created, this will be auto-generated if no primary key is provided. When a primary key is auto-generated, it will be a UUID (as a string) if the primary key type is `String` or `ID`. If the primary key type is `Int`, `Long`, or `Any`, then the primary key will be an auto-incremented number. Using numeric primary keys is more efficient than using UUIDs. Note that if the type is `Int`, the primary key will be limited to 32-bit, which can be limiting and problematic for large tables. It is recommended that if you will be relying on auto-generated keys, that you use a primary key type of `Long` or `Any` (the latter will allow you to also use strings as primary keys). + +#### `@indexed` + +The `@indexed` directive specifies that an attribute should be indexed. When an attribute is indexed, Harper will create secondary index from the data in this field for fast/efficient querying using this field. This is necessary if you want to execute queries using this attribute (whether that is through RESTful query parameters, SQL, or NoSQL operations). + +A standard index will index the values in each field, so you can query directly by those values. If the field's value is an array, each of the values in the array will be indexed (you can query by any individual value). + +#### Vector Indexing + +The `@indexed` directive can also specify a `type`. To use vector indexing, you can specify the `type` as `HNSW` for Hierarchical Navigable Small World indexing. This will create a vector index for the attribute. For example: + +```graphql +type Product @table { + id: Long @primaryKey + textEmbeddings: [Float] @indexed(type: "HNSW") +} +``` + +HNSW indexing finds the nearest neighbors to a search vector. To use this, you can query with a `sort` parameter, for example: + +```javascript +let results = Product.search({ + sort: { attribute: 'textEmbeddings', target: searchVector }, + limit: 5, // get the five nearest neighbors +}); +``` + +This can be used in combination with other conditions as well, for example: + +```javascript +let results = Product.search({ + conditions: [{ attribute: 'price', comparator: 'lt', value: 50 }], + sort: { attribute: 'textEmbeddings', target: searchVector }, + limit: 5, // get the five nearest neighbors +}); +``` + +HNSW supports several additional arguments to the `@indexed` directive to adjust the HNSW parameters: + +- `distance` - Define the distance function. This can be set to 'euclidean' or 'cosine' (uses negative of cosine similarity). The default is cosine. +- `efConstruction` - Maximum number of nodes to keep in the list for finding nearest neighbors. A higher value can yield better recall, and a lower value can have better performance. If `efSearchConstruction` is set, this is only applied to indexing. The default is 100. +- `M` - The preferred number of connections at each layer in the HNSW graph. A higher number uses more space but can be helpful when the intrinsic dimensionality of the data is higher. A lower number can be more efficient. The default is 16. +- `optimizeRouting` - This uses a heuristic to avoid graph connections that match existing indirect connections (connections through another node). This can yield more efficient graph traversals for the same M setting. This is a number between 0 and 1 and a higher value will more aggressively omit connections with alternate paths. Setting this to 0 will disable route optimizing and follow the traditional HNSW algorithm for creating connections. The default is 0.5. +- `mL` - The normalization factor for level generation, by default this is computed from `M`. +- `efSearchConstruction` - Maximum number of nodes to keep in the list for finding nearest neighbors for searching. The default is 50. + +For exmpale + +```graphql +type Product @table { + id: Long @primaryKey + textEmbeddings: [Float] @indexed(type: "HNSW", distance: "euclidean", optimizeRouting: 0, efSearchConstruction: 100) +} +``` + +#### `@createdTime` + +The `@createdTime` directive indicates that this property should be assigned a timestamp of the creation time of the record (in epoch milliseconds). + +#### `@updatedTime` + +The `@updatedTime` directive indicates that this property should be assigned a timestamp of each updated time of the record (in epoch milliseconds). + +#### `@sealed` + +The `@sealed` directive specifies that no additional properties should be allowed on records besides though specified in the type itself + +### Defined vs Dynamic Schemas + +If you do not define a schema for a table and create a table through the operations API (without specifying attributes) or studio, such a table will not have a defined schema and will follow the behavior of a ["dynamic-schema" table](../../reference/dynamic-schema). It is generally best-practice to define schemas for your tables to ensure predictable, consistent structures with data integrity. + +### Field Types + +Harper supports the following field types in addition to user defined (object) types: + +- `String`: String/text +- `Int`: A 32-bit signed integer (from -2147483648 to 2147483647) +- `Long`: A 54-bit signed integer (from -9007199254740992 to 9007199254740992) +- `Float`: Any number (any number that can be represented as a [64-bit double precision floating point number](https://en.wikipedia.org/wiki/Double-precision_floating-point_format). Note that all numbers are stored in the most compact representation available) +- `BigInt`: Any integer (negative or positive) with less than 300 digits (Note that `BigInt` is a distinct and separate type from standard numbers in JavaScript, so custom code should handle this type appropriately) +- `Boolean`: true or false +- `ID`: A string (but indicates it is not intended to be human readable) +- `Any`: Any primitive, object, or array is allowed +- `Date`: A Date object +- `Bytes`: Binary data as a Buffer or Uint8Array +- `Blob`: Binary data as a [Blob](../../reference/blob), designed for large blocks of data that can be streamed. It is recommend that you use this for binary data that will typically be larger than 20KB. + +#### Renaming Tables + +It is important to note that Harper does not currently support renaming tables. If you change the name of a table in your schema definition, this will result in the creation of a new, empty table. + +### OpenAPI Specification + +_The_ [_OpenAPI Specification_](https://spec.openapis.org/oas/v3.1.0) _defines a standard, programming language-agnostic interface description for HTTP APIs, which allows both humans and computers to discover and understand the capabilities of a service without requiring access to source code, additional documentation, or inspection of network traffic._ + +If a set of endpoints are configured through a Harper GraphQL schema, those endpoints can be described by using a default REST endpoint called `GET /openapi`. + +_Note: The `/openapi` endpoint should only be used as a starting guide, it may not cover all the elements of an endpoint._ diff --git a/versioned_docs/version-4.7/developers/applications/index.md b/versioned_docs/version-4.7/developers/applications/index.md new file mode 100644 index 00000000..0a8b9499 --- /dev/null +++ b/versioned_docs/version-4.7/developers/applications/index.md @@ -0,0 +1,237 @@ +--- +title: Applications +--- + +# Applications + +Harper is more than a database, it's a distributed clustering platform allowing you to package your schema, endpoints and application logic and deploy them to an entire fleet of Harper instances optimized for on-the-edge scalable data delivery. + +In this guide, we are going to explore the evermore extensible architecture that Harper provides by building a Harper application, a fundamental building-block of the Harper ecosystem. + +When working through this guide, we recommend you use the [Harper Application Template](https://github.com/HarperDB/application-template) repo as a reference. + +Before we get started, let's clarify some terminology that is used throughout the documentation. + +**Components** are the high-level concept for modules that extend the Harper core platform adding additional functionality. The application you will build here is a component. In addition to applications, components also encompass extensions. + +> We are actively working to disambiguate the terminology. When you see "component", such as in the Operations API or CLI, it generally refers to an application. We will do our best to clarify exactly which classification of a component whenever possible. + +**Applications** are best defined as the implementation of a specific user-facing feature or functionality. Applications are built on top of extensions and can be thought of as the end product that users interact with. For example, a Next.js application that serves a web interface or an Apollo GraphQL server that provides a GraphQL API are both applications. + +**Extensions** are the building blocks of the Harper component system. Applications depend on extensions to provide the functionality the application is implementing. For example, the built-in `graphqlSchema` extension enables applications to define their databases and tables using GraphQL schemas. Furthermore, the `@harperdb/nextjs` and `@harperdb/apollo` extensions are the building blocks that provide support for building Next.js and Apollo applications. + +All together, the support for implementing a feature is the extension, and the actual implementation of the feature is the application. + +Extensions can also depend on other extensions. For example, the [`@harperdb/apollo`](https://github.com/HarperDB/apollo) extension depends on the built-in `graphqlSchema` extension to create a cache table for Apollo queries. Applications can then use the `@harperdb/apollo` extension to implement an Apollo GraphQL backend server. + +```mermaid +flowchart TD + subgraph Applications + direction TB + NextJSApp["Next.js App"] + ApolloApp["Apollo App"] + CustomResource["Custom Resource"] + end + + subgraph Extensions + direction TB + subgraph Custom + NextjsExt["@harperdb/nextjs"] + ApolloExt["@harperdb/apollo"] + end + subgraph Built-In + GraphqlSchema["graphqlSchema"] + JsResource["jsResource"] + Rest["rest"] + end + end + + subgraph Core + direction TB + Database["database"] + FileSystem["file-system"] + Networking["networking"] + end + + NextJSApp --> NextjsExt + ApolloApp --> ApolloExt + CustomResource --> JsResource & GraphqlSchema & Rest + + NextjsExt --> Networking + NextjsExt --> FileSystem + ApolloExt --> GraphqlSchema + ApolloExt --> Networking + + GraphqlSchema --> Database + JsResource --> Database + Rest --> Networking +``` + +> As of Harper v4.6, a new, **experimental** component system has been introduced called **plugins**. Plugins are a **new iteration of the existing extension system**. They are simultaneously a simplification and an extensibility upgrade. Instead of defining multiple methods (`start` vs `startOnMainThread`, `handleFile` vs `setupFile`, `handleDirectory` vs `setupDirectory`), plugins only have to define a single `handleApplication` method. Plugins are **experimental**, and complete documentation is available on the [plugin API](../reference/components/plugins) page. In time we plan to deprecate the concept of extensions in favor of plugins, but for now, both are supported. + +Beyond applications and extensions, components are further classified as built-in or custom. **Built-in** components are included with Harper by default and can be directly referenced by their name. The `graphqlSchema`, `rest`, and `jsResource` extensions used in the previous application example are all examples of built-in extensions. **Custom** components must use external references, generally npm or GitHub packages, and are often included as dependencies within the `package.json` of the component. + +> Harper maintains a number of custom components that are available on `npm` and `GitHub`, such as the [`@harperdb/nextjs`](https://github.com/HarperDB/nextjs) extension or the [`@harperdb/status-check`](https://github.com/HarperDB/status-check) application. + +Harper does not currently include any built-in applications, making "custom applications" a bit redundant. Generally, we just say "application". However, there is a multitude of both built-in and custom extensions, and so the documentation refers to them as such. A complete list of built-in extensions is available in the [Built-In Extensions](../reference/components/built-in-extensions) documentation page, and the list of custom extensions and applications is available below. + +This guide is going to walk you through building a basic Harper application using a set of built-in extensions. + +> The Reference -> Components section of the documentation contains a [complete reference for all aspects of components](../reference/components), applications, extensions, and more. + +## Custom Functionality with JavaScript + +[The getting started guide](../getting-started/quickstart) covers how to build an application entirely through schema configuration. However, if your application requires more custom functionality, you will probably want to employ your own JavaScript modules to implement more specific features and interactions. This gives you tremendous flexibility and control over how data is accessed and modified in Harper. Let's take a look at how we can use JavaScript to extend and define "resources" for custom functionality. Let's add a property to the dog records when they are returned, that includes their age in human years. In Harper, data is accessed through our [Resource API](../reference/resources/), a standard interface to access data sources, tables, and make them available to endpoints. Database tables are `Resource` classes, and so extending the function of a table is as simple as extending their class. + +To define custom (JavaScript) resources as endpoints, we need to create a `resources.js` module (this goes in the root of your application folder). And then endpoints can be defined with Resource classes that `export`ed. This can be done in addition to, or in lieu of the `@export`ed types in the schema.graphql. If you are exporting and extending a table you defined in the schema make sure you remove the `@export` from the schema so that don't export the original table or resource to the same endpoint/path you are exporting with a class. Resource classes have methods that correspond to standard HTTP/REST methods, like `get`, `post`, `patch`, and `put` to implement specific handling for any of these methods (for tables they all have default implementations). To do this, we get the `Dog` class from the defined tables, extend it, and export it: + +```javascript +// resources.js: +const { Dog } = tables; // get the Dog table from the Harper provided set of tables (in the default database) + +export class DogWithHumanAge extends Dog { + static loadAsInstance = false; + async get(target) { + const record = await super.get(target); + return { + ...record, // include all properties from the record + humanAge: 15 + record.age * 5, // silly calculation of human age equivalent + }; + } +} +``` + +Here we exported the `DogWithHumanAge` class (exported with the same name), which directly maps to the endpoint path. Therefore, now we have a `/DogWithHumanAge/` endpoint based on this class, just like the direct table interface that was exported as `/Dog/`, but the new endpoint will return objects with the computed `humanAge` property. Resource classes provide getters/setters for every defined attribute so that accessing instance properties like `age`, will get the value from the underlying record. The instance holds information about the primary key of the record so updates and actions can be applied to the correct record. And changing or assigning new properties can be saved or included in the resource as it returned and serialized. The `return super.get(query)` call at the end allows for any query parameters to be applied to the resource, such as selecting individual properties (with a [`select` query parameter](./rest#selectproperties)). + +Often we may want to incorporate data from other tables or data sources in your data models. Next, let's say that we want a `Breed` table that holds detailed information about each breed, and we want to add that information to the returned dog object. We might define the Breed table as (back in schema.graphql): + +```graphql +type Breed @table { + name: String @primaryKey + description: String @indexed + lifespan: Int + averageWeight: Float +} +``` + +We use the new table's (static) `get()` method to retrieve a breed by id. Harper will maintain the current context, ensuring that we are accessing the data atomically, in a consistent snapshot across tables. This provides: + +1. Automatic tracking of most recently updated timestamps across resources for caching purposes +1. Sharing of contextual metadata (like user who requested the data) +1. Transactional atomicity for any writes (not needed in this get operation, but important for other operations) + +The resource methods are automatically wrapped with a transaction and will automatically commit the changes when the method finishes. This allows us to fully utilize multiple resources in our current transaction. With our own snapshot of the database for the Dog and Breed table we can then access data like this: + +```javascript +//resource.js: +const { Dog, Breed } = tables; // get the Breed table too +export class DogWithBreed extends Dog { + static loadAsInstance = false; + async get(target) { + // get the Dog record + const record = await super.get(target); + // get the Breed record + let breedDescription = await Breed.get(record.breed); + return { + ...record, + breedDescription, + }; + } +} +``` + +The call to `Breed.get` will return an instance of the `Breed` resource class, which holds the record specified the provided id/primary key. Like the `Dog` instance, we can access or change properties on the Breed instance. + +Here we have focused on customizing how we retrieve data, but we may also want to define custom actions for writing data. While HTTP PUT method has a specific semantic definition (replace current record), a common method for custom actions is through the HTTP POST method. the POST method has much more open-ended semantics and is a good choice for custom actions. POST requests are handled by our Resource's post() method. Let's say that we want to define a POST handler that adds a new trick to the `tricks` array to a specific instance. We might do it like this, and specify an action to be able to differentiate actions: + +```javascript +export class CustomDog extends Dog { + static loadAsInstance = false; + async post(target, data) { + if (data.action === 'add-trick') { + const record = this.update(target); + record.tricks.push(data.trick); + } + } +} +``` + +And a POST request to /CustomDog/ would call this `post` method. The Resource class then automatically tracks changes you make to your resource instances and saves those changes when this transaction is committed (again these methods are automatically wrapped in a transaction and committed once the request handler is finished). So when you push data on to the `tricks` array, this will be recorded and persisted when this method finishes and before sending a response to the client. + +The `post` method automatically marks the current instance as being update. However, you can also explicitly specify that you are changing a resource by calling the `update()` method. If you want to modify a resource instance that you retrieved through a `get()` call (like `Breed.get()` call above), you can call its `update()` method to ensure changes are saved (and will be committed in the current transaction). + +We can also define custom authorization capabilities. For example, we might want to specify that only the owner of a dog can make updates to a dog. We could add logic to our `post()` method or `put()` method to do this. For example, we might do this: + +```javascript +export class CustomDog extends Dog { + static loadAsInstance = false; + async post(target, data) { + if (data.action === 'add-trick') { + const context = this.getContext(); + // if we want to skip the default permission checks, we can turn off checkPermissions: + target.checkPermissions = false; + const record = this.update(target); + // and do our own/custom permission check: + if (record.owner !== context.user?.username) { + throw new Error('Can not update this record'); + } + record.tricks.push(data.trick); + } + } +} +``` + +Any methods that are not defined will fall back to Harper's default authorization procedure based on users' roles. If you are using/extending a table, this is based on Harper's [role based access](./security/users-and-roles). If you are extending the base `Resource` class, the default access requires super user permission. + +You can also use the `default` export to define the root path resource handler. For example: + +```javascript +// resources.json +export default class CustomDog extends Dog { + ... +``` + +This will allow requests to url like / to be directly resolved to this resource. + +## Define Custom Data Sources + +We can also directly implement the Resource class and use it to create new data sources from scratch that can be used as endpoints. Custom resources can also be used as caching sources. Let's say that we defined a `Breed` table that was a cache of information about breeds from another source. We could implement a caching table like: + +```javascript +const { Breed } = tables; // our Breed table +class BreedSource extends Resource { + // define a data source + async get(target) { + return (await fetch(`https://best-dog-site.com/${target}`)).json(); + } +} +// define that our breed table is a cache of data from the data source above, with a specified expiration +Breed.sourcedFrom(BreedSource, { expiration: 3600 }); +``` + +The [caching documentation](applications/caching) provides much more information on how to use Harper's powerful caching capabilities and set up data sources. + +Harper provides a powerful JavaScript API with significant capabilities that go well beyond a "getting started" guide. See our documentation for more information on using the [`globals`](../reference/globals) and the [Resource interface](../reference/resources). + +## Configuring Applications/Components + +For complete information of configuring applications, refer to the [Component Configuration](../reference/components) reference page. + +## Define Fastify Routes + +Exporting resource will generate full RESTful endpoints. But, you may prefer to define endpoints through a framework. Harper includes a resource plugin for defining routes with the Fastify web framework. Fastify is a full-featured framework with many plugins, that provides sophisticated route definition capabilities. + +By default, applications are configured to load any modules in the `routes` directory (matching `routes/*.js`) with Fastify's autoloader, which will allow these modules to export a function to define fastify routes. See the [defining routes documentation](applications/define-routes) for more information on how to create Fastify routes. + +However, Fastify is not as fast as Harper's RESTful endpoints (about 10%-20% slower/more-overhead), nor does it automate the generation of a full uniform interface with correct RESTful header interactions (for caching control), so generally the Harper's REST interface is recommended for optimum performance and ease of use. + +## Restarting Your Instance + +Generally, Harper will auto-detect when files change and auto-restart the appropriate threads. However, if there are changes that aren't detected, you may manually restart, with the `restart_service` operation: + +```json +{ + "operation": "restart_service", + "service": "http_workers" +} +``` diff --git a/versioned_docs/version-4.7/developers/applications/web-applications.md b/versioned_docs/version-4.7/developers/applications/web-applications.md new file mode 100644 index 00000000..02fd1893 --- /dev/null +++ b/versioned_docs/version-4.7/developers/applications/web-applications.md @@ -0,0 +1,63 @@ +--- +title: Web Applications on Harper +--- + +# Web Applications on Harper + +Harper is an efficient, capable, and robust platform for developing web applications, with numerous capabilities designed +specifically for optimized web application delivery. In addition, there are a number of tools and frameworks that can be used +with Harper to create web applications with standard best-practice design and development patterns. Running these frameworks +on Harper can unlock tremendous scalability and performance benefits by leveraging Harper's built-in multi-threading, +caching, and distributed design. + +Harper's unique ability to run JavaScript code directly on the server side, combined with its built-in database for data storage, querying, and caching +allows you to create full-featured web applications with a single platform. This eliminates the overhead of legacy solutions that +require separate application servers, databases, and caching layers, and their requisite communication overhead and latency, while +allowing the full stack to deployed to distributed locations with full local response handling, providing an incredibly low latency web experience. + +## Web Application Frameworks + +With built-in caching mechanisms, and an easy-to-use JavaScript API for interacting with data, creating full-featured applications +using popular frameworks is a simple and straightforward process. + +Get started today with one of our examples: + +- [Next.js](https://github.com/HarperDB/nextjs-example) +- [React SSR](https://github.com/HarperDB/react-ssr-example) +- [Vue SSR](https://github.com/HarperDB/vue-ssr-example) +- [Svelte SSR](https://github.com/HarperDB/svelte-ssr-example) +- [Solid SSR](https://github.com/HarperDB/solid-ssr-example) + +## Cookie Support + +Harper includes support for authenticated sessions using cookies. This allows you to create secure, authenticated web applications +using best-practice security patterns, allowing users to login and maintain a session without any credential storage on the client side +that can be compromised. A login endpoint can be defined by exporting a resource and calling the `login` method on the request object. For example, this could be a login endpoint in your resources.js file: + +```javascript +export class Login extends Resource { + async post(data) { + const { username, password } = data; + await request.login(username, password); + return { message: 'Logged in!' }; + } +} +``` + +This endpoint can be called from the client side using a standard fetch request, a cookie will be returned, and the session will be maintained by Harper. +This allows web applications to directly interact with Harper and database resources, without needing to go through extra layers of authentication handling. + +## Browser Caching Negotiation + +Browsers support caching negotiation with revalidation, which allows requests for locally cached data to be sent to servers with a tag or timestamp. Harper REST functionality can fully interact with these headers, and return `304 Not Modified` response based on prior `Etag` sent in headers. It is highly recommended that you utilize the [REST interface](../rest) for accessing tables, as it facilitates this downstream browser caching. Timestamps are recorded with all records and are then returned [as the `ETag` in the response](../rest#cachingconditional-requests). Utilizing this browser caching can greatly reduce the load on your server and improve the performance of your web application by being able to instantly use locally cached data after revalidation from the server. + +## Built-in Cross-Origin Resource Sharing (CORS) + +Harper includes built-in support for Cross-Origin Resource Sharing (CORS), which allows you to define which domains are allowed to access your Harper instance. This is a critical security feature for web applications, as it prevents unauthorized access to your data from other domains, while allowing cross-domain access from known hosts. You can define the allowed domains in your [Harper configuration file](../../deployments/configuration#http), and Harper will automatically handle the CORS headers for you. + +## More Resources + +Make sure to check out our developer videos too: + +- [Next.js on Harper | Step-by-Step Guide for Next Level Next.js Performance](https://youtu.be/GqLEwteFJYY) +- [Server-side Rendering (SSR) with Multi-Tier Cache Demo](https://youtu.be/L-tnBNhO9Fc) diff --git a/versioned_docs/version-4.7/developers/clustering/certificate-management.md b/versioned_docs/version-4.7/developers/clustering/certificate-management.md new file mode 100644 index 00000000..a11a1a35 --- /dev/null +++ b/versioned_docs/version-4.7/developers/clustering/certificate-management.md @@ -0,0 +1,82 @@ +--- +title: Certificate Management +--- + +# Certificate Management + +## Development + +Out of the box Harper generates certificates that are used when Harper nodes are clustered together to securely share data between nodes. These certificates are meant for testing and development purposes. Because these certificates do not have Common Names (CNs) that will match the Fully Qualified Domain Name (FQDN) of the Harper node, the following settings (see the full [configuration file](../../deployments/configuration) docs for more details) are defaulted & recommended for ease of development: + +``` +clustering: + tls: + certificate: ~/hdb/keys/certificate.pem + certificateAuthority: ~/hdb/keys/ca.pem + privateKey: ~/hdb/keys/privateKey.pem + insecure: true + verify: true +``` + +The certificates that Harper generates are stored in your `/keys/`. + +`insecure` is set to `true` to accept the certificate CN mismatch due to development certificates. + +`verify` is set to `true` to enable mutual TLS between the nodes. + +## Production + +In a production environment, we recommend using your own certificate authority (CA), or a public CA such as LetsEncrypt to generate certs for your Harper cluster. This will let you generate certificates with CNs that match the FQDN of your nodes. + +Once you generate new certificates, to make Harper start using them you can either replace the generated files with your own, or update the configuration to point to your new certificates, and then restart Harper. + +Since these new certificates can be issued with correct CNs, you should set `insecure` to `false` so that nodes will do full validation of the certificates of the other nodes. + +### Certificate Revocation Checking + +Harper automatically performs certificate revocation checking using OCSP (Online Certificate Status Protocol) for all cluster connections. This critical security feature ensures that: + +- Revoked certificates cannot be used for cluster communication +- Compromised nodes can be quickly isolated by revoking their certificates +- Certificate status is verified in real-time with the Certificate Authority + +Certificate verification is enabled by default for cluster connections and follows the same configuration as HTTP mTLS connections. The verification settings can be customized in the HTTP configuration section to balance security requirements with performance considerations. + +For production clusters, consider using `failureMode: fail-closed` to ensure maximum security by rejecting connections when OCSP verification cannot be completed. + +### Certificate Requirements + +- Certificates must have an `Extended Key Usage` that defines both `TLS Web Server Authentication` and `TLS Web Client Authentication` as these certificates will be used to accept connections from other Harper nodes and to make requests to other Harper nodes. Example: + +``` +X509v3 Key Usage: critical + Digital Signature, Key Encipherment +X509v3 Extended Key Usage: + TLS Web Server Authentication, TLS Web Client Authentication +``` + +- If you are using an intermediate CA to issue the certificates, the entire certificate chain (to the root CA) must be included in the `certificateAuthority` file. +- If your certificates expire you will need a way to issue new certificates to the nodes and then restart Harper. If you are using a public CA such as LetsEncrypt, a tool like `certbot` can be used to renew certificates. + +### Certificate Troubleshooting + +If you are having TLS issues with clustering, use the following steps to verify that your certificates are valid. + +1. Make sure certificates can be parsed and that you can view the contents: + +``` +openssl x509 -in .pem -noout -text` +``` + +1. Make sure the certificate validates with the CA: + +``` +openssl verify -CAfile .pem .pem` +``` + +1. Make sure the certificate and private key are a valid pair by verifying that the output of the following commands match: + +``` +openssl rsa -modulus -noout -in .pem | openssl md5 +openssl x509 -modulus -noout -in .pem | openssl md5 +``` diff --git a/versioned_docs/version-4.7/developers/clustering/creating-a-cluster-user.md b/versioned_docs/version-4.7/developers/clustering/creating-a-cluster-user.md new file mode 100644 index 00000000..0a8b2a6c --- /dev/null +++ b/versioned_docs/version-4.7/developers/clustering/creating-a-cluster-user.md @@ -0,0 +1,59 @@ +--- +title: Creating a Cluster User +--- + +# Creating a Cluster User + +Inter-node authentication takes place via Harper users. There is a special role type called `cluster_user` that exists by default and limits the user to only clustering functionality. + +A `cluster_user` must be created and added to the `harperdb-config.yaml` file for clustering to be enabled. + +All nodes that are intended to be clustered together need to share the same `cluster_user` credentials (i.e. username and password). + +There are multiple ways a `cluster_user` can be created, they are: + +1. Through the operations API by calling `add_user` + +```json +{ + "operation": "add_user", + "role": "cluster_user", + "username": "cluster_account", + "password": "letsCluster123!", + "active": true +} +``` + +When using the API to create a cluster user the `harperdb-config.yaml` file must be updated with the username of the new cluster user. + +This can be done through the API by calling `set_configuration` or by editing the `harperdb-config.yaml` file. + +```json +{ + "operation": "set_configuration", + "clustering_user": "cluster_account" +} +``` + +In the `harperdb-config.yaml` file under the top-level `clustering` element there will be a user element. Set this to the name of the cluster user. + +```yaml +clustering: + user: cluster_account +``` + +_Note: When making any changes to the `harperdb-config.yaml` file, Harper must be restarted for the changes to take effect._ + +1. Upon installation using **command line variables**. This will automatically set the user in the `harperdb-config.yaml` file. + +_Note: Using command line or environment variables for setting the cluster user only works on install._ + +``` +harperdb install --CLUSTERING_USER cluster_account --CLUSTERING_PASSWORD letsCluster123! +``` + +1. Upon installation using **environment variables**. This will automatically set the user in the `harperdb-config.yaml` file. + +``` +CLUSTERING_USER=cluster_account CLUSTERING_PASSWORD=letsCluster123 +``` diff --git a/versioned_docs/version-4.7/developers/clustering/enabling-clustering.md b/versioned_docs/version-4.7/developers/clustering/enabling-clustering.md new file mode 100644 index 00000000..606bc29c --- /dev/null +++ b/versioned_docs/version-4.7/developers/clustering/enabling-clustering.md @@ -0,0 +1,49 @@ +--- +title: Enabling Clustering +--- + +# Enabling Clustering + +Clustering does not run by default; it needs to be enabled. + +To enable clustering the `clustering.enabled` configuration element in the `harperdb-config.yaml` file must be set to `true`. + +There are multiple ways to update this element, they are: + +1. Directly editing the `harperdb-config.yaml` file and setting enabled to `true` + +```yaml +clustering: + enabled: true +``` + +_Note: When making any changes to the `harperdb-config.yaml` file Harper must be restarted for the changes to take effect._ + +1. Calling `set_configuration` through the operations API + +```json +{ + "operation": "set_configuration", + "clustering_enabled": true +} +``` + +_Note: When making any changes to Harper configuration Harper must be restarted for the changes to take effect._ + +1. Using **command line variables**. + +``` +harperdb --CLUSTERING_ENABLED true +``` + +1. Using **environment variables**. + +``` +CLUSTERING_ENABLED=true +``` + +An efficient way to **install Harper**, **create the cluster user**, **set the node name** and **enable clustering** in one operation is to combine the steps using command line and/or environment variables. Here is an example using command line variables. + +``` +harperdb install --CLUSTERING_ENABLED true --CLUSTERING_NODENAME Node1 --CLUSTERING_USER cluster_account --CLUSTERING_PASSWORD letsCluster123! +``` diff --git a/versioned_docs/version-4.7/developers/clustering/establishing-routes.md b/versioned_docs/version-4.7/developers/clustering/establishing-routes.md new file mode 100644 index 00000000..1d4d5ae2 --- /dev/null +++ b/versioned_docs/version-4.7/developers/clustering/establishing-routes.md @@ -0,0 +1,73 @@ +--- +title: Establishing Routes +--- + +# Establishing Routes + +A route is a connection between two nodes. It is how the clustering network is established. + +Routes do not need to cross connect all nodes in the cluster. You can select a leader node or a few leaders and all nodes connect to them, you can chain, etc… As long as there is one route connecting a node to the cluster all other nodes should be able to reach that node. + +Using routes the clustering servers will create a mesh network between nodes. This mesh network ensures that if a node drops out all other nodes can still communicate with each other. That being said, we recommend designing your routing with failover in mind, this means not storing all your routes on one node but dispersing them throughout the network. + +A simple route example is a two node topology, if Node1 adds a route to connect it to Node2, Node2 does not need to add a route to Node1. That one route configuration is all that’s needed to establish a bidirectional connection between the nodes. + +A route consists of a `port` and a `host`. + +`port` - the clustering port of the remote instance you are creating the connection with. This is going to be the `clustering.hubServer.cluster.network.port` in the Harper configuration on the node you are connecting with. + +`host` - the host of the remote instance you are creating the connection with.This can be an IP address or a URL. + +Routes are set in the `harperdb-config.yaml` file using the `clustering.hubServer.cluster.network.routes` element, which expects an object array, where each object has two properties, `port` and `host`. + +```yaml +clustering: + hubServer: + cluster: + network: + routes: + - host: 3.62.184.22 + port: 9932 + - host: 3.735.184.8 + port: 9932 +``` + +![figure 1](/img/v4.6/clustering/figure1.png) + +This diagram shows one way of using routes to connect a network of nodes. Node2 and Node3 do not reference any routes in their config. Node1 contains routes for Node2 and Node3, which is enough to establish a network between all three nodes. + +There are multiple ways to set routes, they are: + +1. Directly editing the `harperdb-config.yaml` file (refer to code snippet above). +1. Calling `cluster_set_routes` through the API. + +```json +{ + "operation": "cluster_set_routes", + "server": "hub", + "routes": [{ "host": "3.735.184.8", "port": 9932 }] +} +``` + +_Note: When making any changes to Harper configuration Harper must be restarted for the changes to take effect._ + +1. From the command line. + +```bash +--CLUSTERING_HUBSERVER_CLUSTER_NETWORK_ROUTES "[{\"host\": \"3.735.184.8\", \"port\": 9932}]" +``` + +1. Using environment variables. + +```bash +CLUSTERING_HUBSERVER_CLUSTER_NETWORK_ROUTES=[{"host": "3.735.184.8", "port": 9932}] +``` + +The API also has `cluster_get_routes` for getting all routes in the config and `cluster_delete_routes` for deleting routes. + +```json +{ + "operation": "cluster_delete_routes", + "routes": [{ "host": "3.735.184.8", "port": 9932 }] +} +``` diff --git a/versioned_docs/version-4.7/developers/clustering/index.md b/versioned_docs/version-4.7/developers/clustering/index.md new file mode 100644 index 00000000..fddd3851 --- /dev/null +++ b/versioned_docs/version-4.7/developers/clustering/index.md @@ -0,0 +1,31 @@ +--- +title: NATS Clustering +--- + +# NATS Clustering + +Harper 4.0 - 4.3 used a clustering system based on NATS for replication. In 4.4+, Harper has moved to a new native replication system that has better performance, reliability, and data consistency. This document describes the legacy NATS clustering system. Harper clustering is the process of connecting multiple Harper databases together to create a database mesh network that enables users to define data replication patterns. + +Harper’s clustering engine replicates data between instances of Harper using a highly performant, bi-directional pub/sub model on a per-table basis. Data replicates asynchronously with eventual consistency across the cluster following the defined pub/sub configuration. Individual transactions are sent in the order in which they were transacted, once received by the destination instance, they are processed in an ACID-compliant manner. Conflict resolution follows a last writer wins model based on recorded transaction time on the transaction and the timestamp on the record on the node. + +--- + +### Common Use Case + +A common use case is an edge application collecting and analyzing sensor data that creates an alert if a sensor value exceeds a given threshold: + +- The edge application should not be making outbound http requests for security purposes. +- There may not be a reliable network connection. +- Not all sensor data will be sent to the cloud--either because of the unreliable network connection, or maybe it’s just a pain to store it. +- The edge node should be inaccessible from outside the firewall. +- The edge node will send alerts to the cloud with a snippet of sensor data containing the offending sensor readings. + +Harper simplifies the architecture of such an application with its bi-directional, table-level replication: + +- The edge instance subscribes to a "thresholds" table on the cloud instance, so the application only makes localhost calls to get the thresholds. +- The application continually pushes sensor data into a "sensor_data" table via the localhost API, comparing it to the threshold values as it does so. +- When a threshold violation occurs, the application adds a record to the "alerts" table. +- The application appends to that record array "sensor_data" entries for the 60 seconds (or minutes, or days) leading up to the threshold violation. +- The edge instance publishes the "alerts" table up to the cloud instance. + +By letting Harper focus on the fault-tolerant logistics of transporting your data, you get to write less code. By moving data only when and where it’s needed, you lower storage and bandwidth costs. And by restricting your app to only making local calls to Harper, you reduce the overall exposure of your application to outside forces. diff --git a/versioned_docs/version-4.7/developers/clustering/managing-subscriptions.md b/versioned_docs/version-4.7/developers/clustering/managing-subscriptions.md new file mode 100644 index 00000000..f043c9d1 --- /dev/null +++ b/versioned_docs/version-4.7/developers/clustering/managing-subscriptions.md @@ -0,0 +1,199 @@ +--- +title: Managing subscriptions +--- + +Tables are replicated when the table is designated as replicating and there is subscription between the nodes. +Tables designated as replicating by default, but can be changed by setting `replicate` to `false` in the table definition: + +```graphql +type Product @table(replicate: false) { + id: ID! + name: String! +} +``` + +Or in your harperdb-config.yaml, you can set the default replication behavior for databases, and indicate which databases +should be replicated by default: + +```yaml +replication: + databases: data +``` + +If a table is not in the list of databases to be replicated, it will not be replicated unless the table is specifically set to replicate: + +```graphql +type Product @table(replicate: true) { + id: ID! + name: String! +} +``` + +Reading hdb*nodes (what we do \_to* the node, not what the node does). + +The subscription can be set to publish, subscribe, or both. + +# Managing subscriptions + +Subscriptions can be added, updated, or removed through the API. + +_Note: The databases and tables in the subscription must exist on either the local or the remote node. Any databases or tables that do not exist on one particular node, for example, the local node, will be automatically created on the local node._ + +To add a single node and create one or more subscriptions use `set_node_replication`. + +```json +{ + "operation": "set_node_replication", + "node_name": "Node2", + "subscriptions": [ + { + "database": "data", + "table": "dog", + "publish": false, + "subscribe": true + }, + { + "database": "data", + "table": "chicken", + "publish": true, + "subscribe": true + } + ] +} +``` + +This is an example of adding Node2 to your local node. Subscriptions are created for two tables, dog and chicken. + +To update one or more subscriptions with a single node you can also use `set_node_replication`, however this will behave as a PATCH/upsert, where only the subscription(s) changing will be inserted/update while the others will be left untouched. + +```json +{ + "operation": "set_node_replication", + "node_name": "Node2", + "subscriptions": [ + { + "schema": "dev", + "table": "dog", + "publish": true, + "subscribe": true + } + ] +} +``` + +This call will update the subscription with the dog table. Any other subscriptions with Node2 will not change. + +To add or update subscriptions with one or more nodes in one API call use `configure_cluster`. + +```json +{ + "operation": "configure_cluster", + "connections": [ + { + "node_name": "Node2", + "subscriptions": [ + { + "database": "dev", + "table": "chicken", + "publish": false, + "subscribe": true + }, + { + "database": "prod", + "table": "dog", + "publish": true, + "subscribe": true + } + ] + }, + { + "node_name": "Node3", + "subscriptions": [ + { + "database": "dev", + "table": "chicken", + "publish": true, + "subscribe": false + } + ] + } + ] +} +``` + +_Note: `configure_cluster` will override **any and all** existing subscriptions defined on the local node. This means that before going through the connections in the request and adding the subscriptions, it will first go through **all existing subscriptions the local node has** and remove them. To get all existing subscriptions use `cluster_status`._ + +#### Start time + +There is an optional property called `start_time` that can be passed in the subscription. This property accepts an ISO formatted UTC date. + +`start_time` can be used to set from what time you would like to source transactions from a table when creating or updating a subscription. + +```json +{ + "operation": "set_node_replication", + "node_name": "Node2", + "subscriptions": [ + { + "database": "dev", + "table": "dog", + "publish": false, + "subscribe": true, + "start_time": "2022-09-02T20:06:35.993Z" + } + ] +} +``` + +This example will get all transactions on Node2’s dog table starting from `2022-09-02T20:06:35.993Z` and replicate them locally on the dog table. + +If no start time is passed it defaults to the current time. + +_Note: start time utilizes clustering to back source transactions. For this reason it can only source transactions that occurred when clustering was enabled._ + +#### Remove node + +To remove a node and all its subscriptions use `remove_node`. + +```json +{ + "operation": "remove_node", + "node_name": "Node2" +} +``` + +#### Cluster status + +To get the status of all connected nodes and see their subscriptions use `cluster_status`. + +```json +{ + "node_name": "Node1", + "is_enabled": true, + "connections": [ + { + "node_name": "Node2", + "status": "open", + "ports": { + "clustering": 9932, + "operations_api": 9925 + }, + "latency_ms": 65, + "uptime": "11m 19s", + "subscriptions": [ + { + "schema": "dev", + "table": "dog", + "publish": true, + "subscribe": true + } + ], + "system_info": { + "hdb_version": "4.0.0", + "node_version": "16.17.1", + "platform": "linux" + } + } + ] +} +``` diff --git a/versioned_docs/version-4.7/developers/clustering/naming-a-node.md b/versioned_docs/version-4.7/developers/clustering/naming-a-node.md new file mode 100644 index 00000000..7a512efb --- /dev/null +++ b/versioned_docs/version-4.7/developers/clustering/naming-a-node.md @@ -0,0 +1,45 @@ +--- +title: Naming a Node +--- + +# Naming a Node + +Node name is the name given to a node. It is how nodes are identified within the cluster and must be unique to the cluster. + +The name cannot contain any of the following characters: `.,*>` . Dot, comma, asterisk, greater than, or whitespace. + +The name is set in the `harperdb-config.yaml` file using the `clustering.nodeName` configuration element. + +_Note: If you want to change the node name make sure there are no subscriptions in place before doing so. After the name has been changed a full restart is required._ + +There are multiple ways to update this element, they are: + +1. Directly editing the `harperdb-config.yaml` file. + +```yaml +clustering: + nodeName: Node1 +``` + +_Note: When making any changes to the `harperdb-config.yaml` file Harper must be restarted for the changes to take effect._ + +1. Calling `set_configuration` through the operations API + +```json +{ + "operation": "set_configuration", + "clustering_nodeName": "Node1" +} +``` + +1. Using command line variables. + +``` +harperdb --CLUSTERING_NODENAME Node1 +``` + +1. Using environment variables. + +``` +CLUSTERING_NODENAME=Node1 +``` diff --git a/versioned_docs/version-4.7/developers/clustering/requirements-and-definitions.md b/versioned_docs/version-4.7/developers/clustering/requirements-and-definitions.md new file mode 100644 index 00000000..22bc3977 --- /dev/null +++ b/versioned_docs/version-4.7/developers/clustering/requirements-and-definitions.md @@ -0,0 +1,11 @@ +--- +title: Requirements and Definitions +--- + +# Requirements and Definitions + +To create a cluster you must have two or more nodes\* (aka instances) of Harper running. + +\*_A node is a single instance/installation of Harper. A node of Harper can operate independently with clustering on or off._ + +On the following pages we'll walk you through the steps required, in order, to set up a Harper cluster. diff --git a/versioned_docs/version-4.7/developers/clustering/subscription-overview.md b/versioned_docs/version-4.7/developers/clustering/subscription-overview.md new file mode 100644 index 00000000..b4827de7 --- /dev/null +++ b/versioned_docs/version-4.7/developers/clustering/subscription-overview.md @@ -0,0 +1,45 @@ +--- +title: Subscription Overview +--- + +# Subscription Overview + +A subscription defines how data should move between two nodes. They are exclusively table level and operate independently. They connect a table on one node to a table on another node, the subscription will apply to a matching database name and table name on both nodes. + +_Note: ‘local’ and ‘remote’ will often be referred to. In the context of these docs ‘local’ is the node that is receiving the API request to create/update a subscription and remote is the other node that is referred to in the request, the node on the other end of the subscription._ + +A subscription consists of: + +`database` - the name of the database that the table you are creating the subscription for belongs to. _Note, this was previously referred to as schema and may occasionally still be referenced that way._ + +`table` - the name of the table the subscription will apply to. + +`publish` - a boolean which determines if transactions on the local table should be replicated on the remote table. + +`subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table. + +#### Publish subscription + +![figure 2](/img/v4.6/clustering/figure2.png) + +This diagram is an example of a `publish` subscription from the perspective of Node1. + +The record with id 2 has been inserted in the dog table on Node1, after it has completed that insert it is sent to Node 2 and inserted in the dog table there. + +#### Subscribe subscription + +![figure 3](/img/v4.6/clustering/figure3.png) + +This diagram is an example of a `subscribe` subscription from the perspective of Node1. + +The record with id 3 has been inserted in the dog table on Node2, after it has completed that insert it is sent to Node1 and inserted there. + +#### Subscribe and Publish + +![figure 4](/img/v4.6/clustering/figure4.png) + +This diagram shows both subscribe and publish but publish is set to false. You can see that because subscribe is true the insert on Node2 is being replicated on Node1 but because publish is set to false the insert on Node1 is _**not**_ being replicated on Node2. + +![figure 5](/img/v4.6/clustering/figure5.png) + +This shows both subscribe and publish set to true. The insert on Node1 is replicated on Node2 and the update on Node2 is replicated on Node1. diff --git a/versioned_docs/version-4.7/developers/clustering/things-worth-knowing.md b/versioned_docs/version-4.7/developers/clustering/things-worth-knowing.md new file mode 100644 index 00000000..f523c7bf --- /dev/null +++ b/versioned_docs/version-4.7/developers/clustering/things-worth-knowing.md @@ -0,0 +1,43 @@ +--- +title: Things Worth Knowing +--- + +# Things Worth Knowing + +Additional information that will help you define your clustering topology. + +--- + +### Transactions + +Transactions that are replicated across the cluster are: + +- Insert +- Update +- Upsert +- Delete +- Bulk loads + - CSV data load + - CSV file load + - CSV URL load + - Import from S3 + +When adding or updating a node any databases and tables in the subscription that don’t exist on the remote node will be automatically created. + +**Destructive database operations do not replicate across a cluster**. Those operations include `drop_database`, `drop_table`, and `drop_attribute`. If the desired outcome is to drop database information from any nodes then the operation(s) will need to be run on each node independently. + +Users and roles are not replicated across the cluster. + +--- + +### Queueing + +Harper has built-in resiliency for when network connectivity is lost within a subscription. When connections are reestablished, a catchup routine is executed to ensure data that was missed, specific to the subscription, is sent/received as defined. + +--- + +### Topologies + +Harper clustering creates a mesh network between nodes giving end users the ability to create an infinite number of topologies. subscription topologies can be simple or as complex as needed. + +![](/img/v4.6/clustering/figure6.png) diff --git a/versioned_docs/version-4.7/developers/operations-api/advanced-json-sql-examples.md b/versioned_docs/version-4.7/developers/operations-api/advanced-json-sql-examples.md new file mode 100644 index 00000000..ec300e2e --- /dev/null +++ b/versioned_docs/version-4.7/developers/operations-api/advanced-json-sql-examples.md @@ -0,0 +1,1775 @@ +--- +title: Advanced JSON SQL Examples +--- + +# Advanced JSON SQL Examples + +## Create movies database + +Create a new database called "movies" using the 'create_database' operation. + +_Note: Creating a database is optional, if one is not created Harper will default to using a database named `data`_ + +### Body + +```json +{ + "operation": "create_database", + "database": "movies" +} +``` + +### Response: 200 + +```json +{ + "message": "database 'movies' successfully created" +} +``` + +--- + +## Create movie Table + +Creates a new table called "movie" inside the database "movies" using the ‘create_table’ operation. + +### Body + +```json +{ + "operation": "create_table", + "database": "movies", + "table": "movie", + "primary_key": "id" +} +``` + +### Response: 200 + +```json +{ + "message": "table 'movies.movie' successfully created." +} +``` + +--- + +## Create credits Table + +Creates a new table called "credits" inside the database "movies" using the ‘create_table’ operation. + +### Body + +```json +{ + "operation": "create_table", + "database": "movies", + "table": "credits", + "primary_key": "movie_id" +} +``` + +### Response: 200 + +```json +{ + "message": "table 'movies.credits' successfully created." +} +``` + +--- + +## Bulk Insert movie Via CSV + +Inserts data from a hosted CSV file into the "movie" table using the 'csv_url_load' operation. + +### Body + +```json +{ + "operation": "csv_url_load", + "database": "movies", + "table": "movie", + "csv_url": "https://search-json-sample-data.s3.us-east-2.amazonaws.com/movie.csv" +} +``` + +### Response: 200 + +```json +{ + "message": "Starting job with id 1889eee4-23c1-4945-9bb7-c805fc20726c" +} +``` + +--- + +## Bulk Insert credits Via CSV + +Inserts data from a hosted CSV file into the "credits" table using the 'csv_url_load' operation. + +### Body + +```json +{ + "operation": "csv_url_load", + "database": "movies", + "table": "credits", + "csv_url": "https://search-json-sample-data.s3.us-east-2.amazonaws.com/credits.csv" +} +``` + +### Response: 200 + +```json +{ + "message": "Starting job with id 3a14cd74-67f3-41e9-8ccd-45ffd0addc2c", + "job_id": "3a14cd74-67f3-41e9-8ccd-45ffd0addc2c" +} +``` + +--- + +## View raw data + +In the following example we will be running expressions on the keywords & production_companies attributes, so for context we are displaying what the raw data looks like. + +### Body + +```json +{ + "operation": "sql", + "sql": "SELECT title, rank, keywords, production_companies FROM movies.movie ORDER BY rank LIMIT 10" +} +``` + +### Response: 200 + +```json +[ + { + "title": "Ad Astra", + "rank": 1, + "keywords": [ + { + "id": 305, + "name": "moon" + }, + { + "id": 697, + "name": "loss of loved one" + }, + { + "id": 839, + "name": "planet mars" + }, + { + "id": 14626, + "name": "astronaut" + }, + { + "id": 157265, + "name": "moon colony" + }, + { + "id": 162429, + "name": "solar system" + }, + { + "id": 240119, + "name": "father son relationship" + }, + { + "id": 244256, + "name": "near future" + }, + { + "id": 257878, + "name": "planet neptune" + }, + { + "id": 260089, + "name": "space walk" + } + ], + "production_companies": [ + { + "id": 490, + "name": "New Regency Productions", + "origin_country": "" + }, + { + "id": 79963, + "name": "Keep Your Head", + "origin_country": "" + }, + { + "id": 73492, + "name": "MadRiver Pictures", + "origin_country": "" + }, + { + "id": 81, + "name": "Plan B Entertainment", + "origin_country": "US" + }, + { + "id": 30666, + "name": "RT Features", + "origin_country": "BR" + }, + { + "id": 30148, + "name": "Bona Film Group", + "origin_country": "CN" + }, + { + "id": 22213, + "name": "TSG Entertainment", + "origin_country": "US" + } + ] + }, + { + "title": "Extraction", + "rank": 2, + "keywords": [ + { + "id": 3070, + "name": "mercenary" + }, + { + "id": 4110, + "name": "mumbai (bombay), india" + }, + { + "id": 9717, + "name": "based on comic" + }, + { + "id": 9730, + "name": "crime boss" + }, + { + "id": 11107, + "name": "rescue mission" + }, + { + "id": 18712, + "name": "based on graphic novel" + }, + { + "id": 265216, + "name": "dhaka (dacca), bangladesh" + } + ], + "production_companies": [ + { + "id": 106544, + "name": "AGBO", + "origin_country": "US" + }, + { + "id": 109172, + "name": "Thematic Entertainment", + "origin_country": "US" + }, + { + "id": 92029, + "name": "TGIM Films", + "origin_country": "US" + } + ] + }, + { + "title": "To the Beat! Back 2 School", + "rank": 3, + "keywords": [ + { + "id": 10873, + "name": "school" + } + ], + "production_companies": [] + }, + { + "title": "Bloodshot", + "rank": 4, + "keywords": [ + { + "id": 2651, + "name": "nanotechnology" + }, + { + "id": 9715, + "name": "superhero" + }, + { + "id": 9717, + "name": "based on comic" + }, + { + "id": 164218, + "name": "psychotronic" + }, + { + "id": 255024, + "name": "shared universe" + }, + { + "id": 258575, + "name": "valiant comics" + } + ], + "production_companies": [ + { + "id": 34, + "name": "Sony Pictures", + "origin_country": "US" + }, + { + "id": 10246, + "name": "Cross Creek Pictures", + "origin_country": "US" + }, + { + "id": 6573, + "name": "Mimran Schur Pictures", + "origin_country": "US" + }, + { + "id": 333, + "name": "Original Film", + "origin_country": "US" + }, + { + "id": 103673, + "name": "The Hideaway Entertainment", + "origin_country": "US" + }, + { + "id": 124335, + "name": "Valiant Entertainment", + "origin_country": "US" + }, + { + "id": 5, + "name": "Columbia Pictures", + "origin_country": "US" + }, + { + "id": 1225, + "name": "One Race", + "origin_country": "US" + }, + { + "id": 30148, + "name": "Bona Film Group", + "origin_country": "CN" + } + ] + }, + { + "title": "The Call of the Wild", + "rank": 5, + "keywords": [ + { + "id": 818, + "name": "based on novel or book" + }, + { + "id": 4542, + "name": "gold rush" + }, + { + "id": 15162, + "name": "dog" + }, + { + "id": 155821, + "name": "sled dogs" + }, + { + "id": 189390, + "name": "yukon" + }, + { + "id": 207928, + "name": "19th century" + }, + { + "id": 259987, + "name": "cgi animation" + }, + { + "id": 263806, + "name": "1890s" + } + ], + "production_companies": [ + { + "id": 787, + "name": "3 Arts Entertainment", + "origin_country": "US" + }, + { + "id": 127928, + "name": "20th Century Studios", + "origin_country": "US" + }, + { + "id": 22213, + "name": "TSG Entertainment", + "origin_country": "US" + } + ] + }, + { + "title": "Sonic the Hedgehog", + "rank": 6, + "keywords": [ + { + "id": 282, + "name": "video game" + }, + { + "id": 6054, + "name": "friendship" + }, + { + "id": 10842, + "name": "good vs evil" + }, + { + "id": 41645, + "name": "based on video game" + }, + { + "id": 167043, + "name": "road movie" + }, + { + "id": 172142, + "name": "farting" + }, + { + "id": 188933, + "name": "bar fight" + }, + { + "id": 226967, + "name": "amistad" + }, + { + "id": 245230, + "name": "live action remake" + }, + { + "id": 258111, + "name": "fantasy" + }, + { + "id": 260223, + "name": "videojuego" + } + ], + "production_companies": [ + { + "id": 333, + "name": "Original Film", + "origin_country": "US" + }, + { + "id": 10644, + "name": "Blur Studios", + "origin_country": "US" + }, + { + "id": 77884, + "name": "Marza Animation Planet", + "origin_country": "JP" + }, + { + "id": 4, + "name": "Paramount", + "origin_country": "US" + }, + { + "id": 113750, + "name": "SEGA", + "origin_country": "JP" + }, + { + "id": 100711, + "name": "DJ2 Entertainment", + "origin_country": "" + }, + { + "id": 24955, + "name": "Paramount Animation", + "origin_country": "US" + } + ] + }, + { + "title": "Birds of Prey (and the Fantabulous Emancipation of One Harley Quinn)", + "rank": 7, + "keywords": [ + { + "id": 849, + "name": "dc comics" + }, + { + "id": 9717, + "name": "based on comic" + }, + { + "id": 187056, + "name": "woman director" + }, + { + "id": 229266, + "name": "dc extended universe" + } + ], + "production_companies": [ + { + "id": 9993, + "name": "DC Entertainment", + "origin_country": "US" + }, + { + "id": 82968, + "name": "LuckyChap Entertainment", + "origin_country": "GB" + }, + { + "id": 103462, + "name": "Kroll & Co Entertainment", + "origin_country": "US" + }, + { + "id": 174, + "name": "Warner Bros. Pictures", + "origin_country": "US" + }, + { + "id": 429, + "name": "DC Comics", + "origin_country": "US" + }, + { + "id": 128064, + "name": "DC Films", + "origin_country": "US" + }, + { + "id": 101831, + "name": "Clubhouse Pictures", + "origin_country": "US" + } + ] + }, + { + "title": "Justice League Dark: Apokolips War", + "rank": 8, + "keywords": [ + { + "id": 849, + "name": "dc comics" + } + ], + "production_companies": [ + { + "id": 2785, + "name": "Warner Bros. Animation", + "origin_country": "US" + }, + { + "id": 9993, + "name": "DC Entertainment", + "origin_country": "US" + }, + { + "id": 429, + "name": "DC Comics", + "origin_country": "US" + } + ] + }, + { + "title": "Parasite", + "rank": 9, + "keywords": [ + { + "id": 1353, + "name": "underground" + }, + { + "id": 5318, + "name": "seoul" + }, + { + "id": 5732, + "name": "birthday party" + }, + { + "id": 5752, + "name": "private lessons" + }, + { + "id": 9866, + "name": "basement" + }, + { + "id": 10453, + "name": "con artist" + }, + { + "id": 11935, + "name": "working class" + }, + { + "id": 12565, + "name": "psychological thriller" + }, + { + "id": 13126, + "name": "limousine driver" + }, + { + "id": 14514, + "name": "class differences" + }, + { + "id": 14864, + "name": "rich poor" + }, + { + "id": 17997, + "name": "housekeeper" + }, + { + "id": 18015, + "name": "tutor" + }, + { + "id": 18035, + "name": "family" + }, + { + "id": 33421, + "name": "crime family" + }, + { + "id": 173272, + "name": "flood" + }, + { + "id": 188861, + "name": "smell" + }, + { + "id": 198673, + "name": "unemployed" + }, + { + "id": 237462, + "name": "wealthy family" + } + ], + "production_companies": [ + { + "id": 7036, + "name": "CJ Entertainment", + "origin_country": "KR" + }, + { + "id": 4399, + "name": "Barunson E&A", + "origin_country": "KR" + } + ] + }, + { + "title": "Star Wars: The Rise of Skywalker", + "rank": 10, + "keywords": [ + { + "id": 161176, + "name": "space opera" + } + ], + "production_companies": [ + { + "id": 1, + "name": "Lucasfilm", + "origin_country": "US" + }, + { + "id": 11461, + "name": "Bad Robot", + "origin_country": "US" + }, + { + "id": 2, + "name": "Walt Disney Pictures", + "origin_country": "US" + }, + { + "id": 120404, + "name": "British Film Commission", + "origin_country": "" + } + ] + } +] +``` + +--- + +## Simple search_json call + +This query uses search_json to convert the keywords object array to a simple string array. The expression '[name]' tells the function to extract all values for the name attribute and wrap them in an array. + +### Body + +```json +{ + "operation": "sql", + "sql": "SELECT title, rank, search_json('[name]', keywords) as keywords FROM movies.movie ORDER BY rank LIMIT 10" +} +``` + +### Response: 200 + +```json +[ + { + "title": "Ad Astra", + "rank": 1, + "keywords": [ + "moon", + "loss of loved one", + "planet mars", + "astronaut", + "moon colony", + "solar system", + "father son relationship", + "near future", + "planet neptune", + "space walk" + ] + }, + { + "title": "Extraction", + "rank": 2, + "keywords": [ + "mercenary", + "mumbai (bombay), india", + "based on comic", + "crime boss", + "rescue mission", + "based on graphic novel", + "dhaka (dacca), bangladesh" + ] + }, + { + "title": "To the Beat! Back 2 School", + "rank": 3, + "keywords": ["school"] + }, + { + "title": "Bloodshot", + "rank": 4, + "keywords": ["nanotechnology", "superhero", "based on comic", "psychotronic", "shared universe", "valiant comics"] + }, + { + "title": "The Call of the Wild", + "rank": 5, + "keywords": [ + "based on novel or book", + "gold rush", + "dog", + "sled dogs", + "yukon", + "19th century", + "cgi animation", + "1890s" + ] + }, + { + "title": "Sonic the Hedgehog", + "rank": 6, + "keywords": [ + "video game", + "friendship", + "good vs evil", + "based on video game", + "road movie", + "farting", + "bar fight", + "amistad", + "live action remake", + "fantasy", + "videojuego" + ] + }, + { + "title": "Birds of Prey (and the Fantabulous Emancipation of One Harley Quinn)", + "rank": 7, + "keywords": ["dc comics", "based on comic", "woman director", "dc extended universe"] + }, + { + "title": "Justice League Dark: Apokolips War", + "rank": 8, + "keywords": ["dc comics"] + }, + { + "title": "Parasite", + "rank": 9, + "keywords": [ + "underground", + "seoul", + "birthday party", + "private lessons", + "basement", + "con artist", + "working class", + "psychological thriller", + "limousine driver", + "class differences", + "rich poor", + "housekeeper", + "tutor", + "family", + "crime family", + "flood", + "smell", + "unemployed", + "wealthy family" + ] + }, + { + "title": "Star Wars: The Rise of Skywalker", + "rank": 10, + "keywords": ["space opera"] + } +] +``` + +--- + +## Use search_json in a where clause + +This example shows how we can use SEARCH_JSON to filter out records in a WHERE clause. The production_companies attribute holds an object array of companies that produced each movie, we want to only see movies which were produced by Marvel Studios. Our expression is a filter '$[name="Marvel Studios"]' this tells the function to iterate the production_companies array and only return entries where the name is "Marvel Studios". + +### Body + +```json +{ + "operation": "sql", + "sql": "SELECT title, release_date FROM movies.movie where search_json('$[name=\"Marvel Studios\"]', production_companies) IS NOT NULL ORDER BY release_date" +} +``` + +### Response: 200 + +```json +[ + { + "title": "Iron Man", + "release_date": "2008-04-30" + }, + { + "title": "The Incredible Hulk", + "release_date": "2008-06-12" + }, + { + "title": "Iron Man 2", + "release_date": "2010-04-28" + }, + { + "title": "Thor", + "release_date": "2011-04-21" + }, + { + "title": "Captain America: The First Avenger", + "release_date": "2011-07-22" + }, + { + "title": "Marvel One-Shot: The Consultant", + "release_date": "2011-09-12" + }, + { + "title": "Marvel One-Shot: A Funny Thing Happened on the Way to Thor's Hammer", + "release_date": "2011-10-25" + }, + { + "title": "The Avengers", + "release_date": "2012-04-25" + }, + { + "title": "Marvel One-Shot: Item 47", + "release_date": "2012-09-13" + }, + { + "title": "Iron Man 3", + "release_date": "2013-04-18" + }, + { + "title": "Marvel One-Shot: Agent Carter", + "release_date": "2013-09-08" + }, + { + "title": "Thor: The Dark World", + "release_date": "2013-10-29" + }, + { + "title": "Marvel One-Shot: All Hail the King", + "release_date": "2014-02-04" + }, + { + "title": "Marvel Studios: Assembling a Universe", + "release_date": "2014-03-18" + }, + { + "title": "Captain America: The Winter Soldier", + "release_date": "2014-03-20" + }, + { + "title": "Guardians of the Galaxy", + "release_date": "2014-07-30" + }, + { + "title": "Avengers: Age of Ultron", + "release_date": "2015-04-22" + }, + { + "title": "Ant-Man", + "release_date": "2015-07-14" + }, + { + "title": "Captain America: Civil War", + "release_date": "2016-04-27" + }, + { + "title": "Team Thor", + "release_date": "2016-08-28" + }, + { + "title": "Doctor Strange", + "release_date": "2016-10-25" + }, + { + "title": "Guardians of the Galaxy Vol. 2", + "release_date": "2017-04-19" + }, + { + "title": "Spider-Man: Homecoming", + "release_date": "2017-07-05" + }, + { + "title": "Thor: Ragnarok", + "release_date": "2017-10-25" + }, + { + "title": "Black Panther", + "release_date": "2018-02-13" + }, + { + "title": "Avengers: Infinity War", + "release_date": "2018-04-25" + }, + { + "title": "Ant-Man and the Wasp", + "release_date": "2018-07-04" + }, + { + "title": "Captain Marvel", + "release_date": "2019-03-06" + }, + { + "title": "Avengers: Endgame", + "release_date": "2019-04-24" + }, + { + "title": "Spider-Man: Far from Home", + "release_date": "2019-06-28" + }, + { + "title": "Black Widow", + "release_date": "2020-10-28" + }, + { + "title": "Untitled Spider-Man 3", + "release_date": "2021-11-04" + }, + { + "title": "Thor: Love and Thunder", + "release_date": "2022-02-10" + }, + { + "title": "Doctor Strange in the Multiverse of Madness", + "release_date": "2022-03-23" + }, + { + "title": "Untitled Marvel Project (3)", + "release_date": "2022-07-29" + }, + { + "title": "Guardians of the Galaxy Vol. 3", + "release_date": "2023-02-16" + } +] +``` + +--- + +## Use search_json to show the movies with the largest casts + +This example shows how we can use SEARCH_JSON to perform a simple calculation on JSON and order by the results. The cast attribute holds an object array of details around the cast of a movie. We use the expression '$count(id)' that counts each id and returns the value back which we alias in SQL as cast_size which in turn gets used to sort the rows. + +### Body + +```json +{ + "operation": "sql", + "sql": "SELECT movie_title, search_json('$count(id)', `cast`) as cast_size FROM movies.credits ORDER BY cast_size DESC LIMIT 10" +} +``` + +### Response: 200 + +```json +[ + { + "movie_title": "Around the World in Eighty Days", + "cast_size": 312 + }, + { + "movie_title": "And the Oscar Goes To...", + "cast_size": 259 + }, + { + "movie_title": "Rock of Ages", + "cast_size": 223 + }, + { + "movie_title": "Mr. Smith Goes to Washington", + "cast_size": 213 + }, + { + "movie_title": "Les Misérables", + "cast_size": 208 + }, + { + "movie_title": "Jason Bourne", + "cast_size": 201 + }, + { + "movie_title": "The Muppets", + "cast_size": 191 + }, + { + "movie_title": "You Don't Mess with the Zohan", + "cast_size": 183 + }, + { + "movie_title": "The Irishman", + "cast_size": 173 + }, + { + "movie_title": "Spider-Man: Far from Home", + "cast_size": 173 + } +] +``` + +--- + +## search_json as a condition, in a select with a table join + +This example shows how we can use SEARCH_JSON to find movies where at least of 2 our favorite actors from Marvel films have acted together then list the movie, its overview, release date, and the actors names and their characters. The WHERE clause performs a count on credits.cast attribute that have the matching actors. The SELECT performs the same filter on the cast attribute and performs a transform on each object to just return the actor's name and their character. + +### Body + +```json +{ + "operation": "sql", + "sql": "SELECT m.title, m.overview, m.release_date, search_json('$[name in [\"Robert Downey Jr.\", \"Chris Evans\", \"Scarlett Johansson\", \"Mark Ruffalo\", \"Chris Hemsworth\", \"Jeremy Renner\", \"Clark Gregg\", \"Samuel L. Jackson\", \"Gwyneth Paltrow\", \"Don Cheadle\"]].{\"actor\": name, \"character\": character}', c.`cast`) as characters FROM movies.credits c INNER JOIN movies.movie m ON c.movie_id = m.id WHERE search_json('$count($[name in [\"Robert Downey Jr.\", \"Chris Evans\", \"Scarlett Johansson\", \"Mark Ruffalo\", \"Chris Hemsworth\", \"Jeremy Renner\", \"Clark Gregg\", \"Samuel L. Jackson\", \"Gwyneth Paltrow\", \"Don Cheadle\"]])', c.`cast`) >= 2" +} +``` + +### Response: 200 + +```json +[ + { + "title": "Out of Sight", + "overview": "Meet Jack Foley, a smooth criminal who bends the law and is determined to make one last heist. Karen Sisco is a federal marshal who chooses all the right moves … and all the wrong guys. Now they're willing to risk it all to find out if there's more between them than just the law.", + "release_date": "1998-06-26", + "characters": [ + { + "actor": "Don Cheadle", + "character": "Maurice Miller" + }, + { + "actor": "Samuel L. Jackson", + "character": "Hejira Henry (uncredited)" + } + ] + }, + { + "title": "Iron Man", + "overview": "After being held captive in an Afghan cave, billionaire engineer Tony Stark creates a unique weaponized suit of armor to fight evil.", + "release_date": "2008-04-30", + "characters": [ + { + "actor": "Robert Downey Jr.", + "character": "Tony Stark / Iron Man" + }, + { + "actor": "Gwyneth Paltrow", + "character": "Virginia \"Pepper\" Potts" + }, + { + "actor": "Clark Gregg", + "character": "Phil Coulson" + }, + { + "actor": "Samuel L. Jackson", + "character": "Nick Fury (uncredited)" + }, + { + "actor": "Samuel L. Jackson", + "character": "Nick Fury" + } + ] + }, + { + "title": "Captain America: The First Avenger", + "overview": "During World War II, Steve Rogers is a sickly man from Brooklyn who's transformed into super-soldier Captain America to aid in the war effort. Rogers must stop the Red Skull – Adolf Hitler's ruthless head of weaponry, and the leader of an organization that intends to use a mysterious device of untold powers for world domination.", + "release_date": "2011-07-22", + "characters": [ + { + "actor": "Chris Evans", + "character": "Steve Rogers / Captain America" + }, + { + "actor": "Samuel L. Jackson", + "character": "Nick Fury" + } + ] + }, + { + "title": "In Good Company", + "overview": "Dan Foreman is a seasoned advertisement sales executive at a high-ranking publication when a corporate takeover results in him being placed under naive supervisor Carter Duryea, who is half his age. Matters are made worse when Dan's new supervisor becomes romantically involved with his daughter an 18 year-old college student Alex.", + "release_date": "2004-12-29", + "characters": [ + { + "actor": "Scarlett Johansson", + "character": "Alex Foreman" + }, + { + "actor": "Clark Gregg", + "character": "Mark Steckle" + } + ] + }, + { + "title": "Zodiac", + "overview": "The true story of the investigation of the \"Zodiac Killer\", a serial killer who terrified the San Francisco Bay Area, taunting police with his ciphers and letters. The case becomes an obsession for three men as their lives and careers are built and destroyed by the endless trail of clues.", + "release_date": "2007-03-02", + "characters": [ + { + "actor": "Mark Ruffalo", + "character": "Dave Toschi" + }, + { + "actor": "Robert Downey Jr.", + "character": "Paul Avery" + } + ] + }, + { + "title": "Hard Eight", + "overview": "A stranger mentors a young Reno gambler who weds a hooker and befriends a vulgar casino regular.", + "release_date": "1996-02-28", + "characters": [ + { + "actor": "Gwyneth Paltrow", + "character": "Clementine" + }, + { + "actor": "Samuel L. Jackson", + "character": "Jimmy" + } + ] + }, + { + "title": "The Spirit", + "overview": "Down these mean streets a man must come. A hero born, murdered, and born again. A Rookie cop named Denny Colt returns from the beyond as The Spirit, a hero whose mission is to fight against the bad forces from the shadows of Central City. The Octopus, who kills anyone unfortunate enough to see his face, has other plans; he is going to wipe out the entire city.", + "release_date": "2008-12-25", + "characters": [ + { + "actor": "Scarlett Johansson", + "character": "Silken Floss" + }, + { + "actor": "Samuel L. Jackson", + "character": "Octopuss" + } + ] + }, + { + "title": "S.W.A.T.", + "overview": "Hondo Harrelson recruits Jim Street to join an elite unit of the Los Angeles Police Department. Together they seek out more members, including tough Deke Kay and single mom Chris Sanchez. The team's first big assignment is to escort crime boss Alex Montel to prison. It seems routine, but when Montel offers a huge reward to anyone who can break him free, criminals of various stripes step up for the prize.", + "release_date": "2003-08-08", + "characters": [ + { + "actor": "Samuel L. Jackson", + "character": "Sgt. Dan 'Hondo' Harrelson" + }, + { + "actor": "Jeremy Renner", + "character": "Brian Gamble" + } + ] + }, + { + "title": "Iron Man 2", + "overview": "With the world now aware of his dual life as the armored superhero Iron Man, billionaire inventor Tony Stark faces pressure from the government, the press and the public to share his technology with the military. Unwilling to let go of his invention, Stark, with Pepper Potts and James 'Rhodey' Rhodes at his side, must forge new alliances – and confront powerful enemies.", + "release_date": "2010-04-28", + "characters": [ + { + "actor": "Robert Downey Jr.", + "character": "Tony Stark / Iron Man" + }, + { + "actor": "Gwyneth Paltrow", + "character": "Virginia \"Pepper\" Potts" + }, + { + "actor": "Don Cheadle", + "character": "James \"Rhodey\" Rhodes / War Machine" + }, + { + "actor": "Scarlett Johansson", + "character": "Natalie Rushman / Natasha Romanoff / Black Widow" + }, + { + "actor": "Samuel L. Jackson", + "character": "Nick Fury" + }, + { + "actor": "Clark Gregg", + "character": "Phil Coulson" + } + ] + }, + { + "title": "Thor", + "overview": "Against his father Odin's will, The Mighty Thor - a powerful but arrogant warrior god - recklessly reignites an ancient war. Thor is cast down to Earth and forced to live among humans as punishment. Once here, Thor learns what it takes to be a true hero when the most dangerous villain of his world sends the darkest forces of Asgard to invade Earth.", + "release_date": "2011-04-21", + "characters": [ + { + "actor": "Chris Hemsworth", + "character": "Thor Odinson" + }, + { + "actor": "Clark Gregg", + "character": "Phil Coulson" + }, + { + "actor": "Jeremy Renner", + "character": "Clint Barton / Hawkeye (uncredited)" + }, + { + "actor": "Samuel L. Jackson", + "character": "Nick Fury (uncredited)" + } + ] + }, + { + "title": "View from the Top", + "overview": "A small-town woman tries to achieve her goal of becoming a flight attendant.", + "release_date": "2003-03-21", + "characters": [ + { + "actor": "Gwyneth Paltrow", + "character": "Donna" + }, + { + "actor": "Mark Ruffalo", + "character": "Ted Stewart" + } + ] + }, + { + "title": "The Nanny Diaries", + "overview": "A college graduate goes to work as a nanny for a rich New York family. Ensconced in their home, she has to juggle their dysfunction, a new romance, and the spoiled brat in her charge.", + "release_date": "2007-08-24", + "characters": [ + { + "actor": "Scarlett Johansson", + "character": "Annie Braddock" + }, + { + "actor": "Chris Evans", + "character": "Hayden \"Harvard Hottie\"" + } + ] + }, + { + "title": "The Perfect Score", + "overview": "Six high school seniors decide to break into the Princeton Testing Center so they can steal the answers to their upcoming SAT tests and all get perfect scores.", + "release_date": "2004-01-30", + "characters": [ + { + "actor": "Chris Evans", + "character": "Kyle" + }, + { + "actor": "Scarlett Johansson", + "character": "Francesca Curtis" + } + ] + }, + { + "title": "The Avengers", + "overview": "When an unexpected enemy emerges and threatens global safety and security, Nick Fury, director of the international peacekeeping agency known as S.H.I.E.L.D., finds himself in need of a team to pull the world back from the brink of disaster. Spanning the globe, a daring recruitment effort begins!", + "release_date": "2012-04-25", + "characters": [ + { + "actor": "Robert Downey Jr.", + "character": "Tony Stark / Iron Man" + }, + { + "actor": "Chris Evans", + "character": "Steve Rogers / Captain America" + }, + { + "actor": "Mark Ruffalo", + "character": "Bruce Banner / The Hulk" + }, + { + "actor": "Chris Hemsworth", + "character": "Thor Odinson" + }, + { + "actor": "Scarlett Johansson", + "character": "Natasha Romanoff / Black Widow" + }, + { + "actor": "Jeremy Renner", + "character": "Clint Barton / Hawkeye" + }, + { + "actor": "Samuel L. Jackson", + "character": "Nick Fury" + }, + { + "actor": "Clark Gregg", + "character": "Phil Coulson" + }, + { + "actor": "Gwyneth Paltrow", + "character": "Virginia \"Pepper\" Potts" + } + ] + }, + { + "title": "Iron Man 3", + "overview": "When Tony Stark's world is torn apart by a formidable terrorist called the Mandarin, he starts an odyssey of rebuilding and retribution.", + "release_date": "2013-04-18", + "characters": [ + { + "actor": "Robert Downey Jr.", + "character": "Tony Stark / Iron Man" + }, + { + "actor": "Gwyneth Paltrow", + "character": "Virginia \"Pepper\" Potts" + }, + { + "actor": "Don Cheadle", + "character": "James \"Rhodey\" Rhodes / Iron Patriot" + }, + { + "actor": "Mark Ruffalo", + "character": "Bruce Banner (uncredited)" + } + ] + }, + { + "title": "Marvel One-Shot: The Consultant", + "overview": "Agent Coulson informs Agent Sitwell that the World Security Council wishes Emil Blonsky to be released from prison to join the Avengers Initiative. As Nick Fury doesn't want to release Blonsky, the two agents decide to send a patsy to sabotage the meeting...", + "release_date": "2011-09-12", + "characters": [ + { + "actor": "Clark Gregg", + "character": "Phil Coulson" + }, + { + "actor": "Robert Downey Jr.", + "character": "Tony Stark (archive footage)" + } + ] + }, + { + "title": "Thor: The Dark World", + "overview": "Thor fights to restore order across the cosmos… but an ancient race led by the vengeful Malekith returns to plunge the universe back into darkness. Faced with an enemy that even Odin and Asgard cannot withstand, Thor must embark on his most perilous and personal journey yet, one that will reunite him with Jane Foster and force him to sacrifice everything to save us all.", + "release_date": "2013-10-29", + "characters": [ + { + "actor": "Chris Hemsworth", + "character": "Thor Odinson" + }, + { + "actor": "Chris Evans", + "character": "Loki as Captain America (uncredited)" + } + ] + }, + { + "title": "Avengers: Age of Ultron", + "overview": "When Tony Stark tries to jumpstart a dormant peacekeeping program, things go awry and Earth’s Mightiest Heroes are put to the ultimate test as the fate of the planet hangs in the balance. As the villainous Ultron emerges, it is up to The Avengers to stop him from enacting his terrible plans, and soon uneasy alliances and unexpected action pave the way for an epic and unique global adventure.", + "release_date": "2015-04-22", + "characters": [ + { + "actor": "Robert Downey Jr.", + "character": "Tony Stark / Iron Man" + }, + { + "actor": "Chris Evans", + "character": "Steve Rogers / Captain America" + }, + { + "actor": "Mark Ruffalo", + "character": "Bruce Banner / The Hulk" + }, + { + "actor": "Chris Hemsworth", + "character": "Thor Odinson" + }, + { + "actor": "Scarlett Johansson", + "character": "Natasha Romanoff / Black Widow" + }, + { + "actor": "Jeremy Renner", + "character": "Clint Barton / Hawkeye" + }, + { + "actor": "Samuel L. Jackson", + "character": "Nick Fury" + }, + { + "actor": "Don Cheadle", + "character": "James \"Rhodey\" Rhodes / War Machine" + } + ] + }, + { + "title": "Captain America: The Winter Soldier", + "overview": "After the cataclysmic events in New York with The Avengers, Steve Rogers, aka Captain America is living quietly in Washington, D.C. and trying to adjust to the modern world. But when a S.H.I.E.L.D. colleague comes under attack, Steve becomes embroiled in a web of intrigue that threatens to put the world at risk. Joining forces with the Black Widow, Captain America struggles to expose the ever-widening conspiracy while fighting off professional assassins sent to silence him at every turn. When the full scope of the villainous plot is revealed, Captain America and the Black Widow enlist the help of a new ally, the Falcon. However, they soon find themselves up against an unexpected and formidable enemy—the Winter Soldier.", + "release_date": "2014-03-20", + "characters": [ + { + "actor": "Chris Evans", + "character": "Steve Rogers / Captain America" + }, + { + "actor": "Samuel L. Jackson", + "character": "Nick Fury" + }, + { + "actor": "Scarlett Johansson", + "character": "Natasha Romanoff / Black Widow" + } + ] + }, + { + "title": "Thanks for Sharing", + "overview": "A romantic comedy that brings together three disparate characters who are learning to face a challenging and often confusing world as they struggle together against a common demon—sex addiction.", + "release_date": "2013-09-19", + "characters": [ + { + "actor": "Mark Ruffalo", + "character": "Adam" + }, + { + "actor": "Gwyneth Paltrow", + "character": "Phoebe" + } + ] + }, + { + "title": "Chef", + "overview": "When Chef Carl Casper suddenly quits his job at a prominent Los Angeles restaurant after refusing to compromise his creative integrity for its controlling owner, he is left to figure out what's next. Finding himself in Miami, he teams up with his ex-wife, his friend and his son to launch a food truck. Taking to the road, Chef Carl goes back to his roots to reignite his passion for the kitchen -- and zest for life and love.", + "release_date": "2014-05-08", + "characters": [ + { + "actor": "Scarlett Johansson", + "character": "Molly" + }, + { + "actor": "Robert Downey Jr.", + "character": "Marvin" + } + ] + }, + { + "title": "Marvel Studios: Assembling a Universe", + "overview": "A look at the story behind Marvel Studios and the Marvel Cinematic Universe, featuring interviews and behind-the-scenes footage from all of the Marvel films, the Marvel One-Shots and \"Marvel's Agents of S.H.I.E.L.D.\"", + "release_date": "2014-03-18", + "characters": [ + { + "actor": "Robert Downey Jr.", + "character": "Himself / Tony Stark / Iron Man" + }, + { + "actor": "Chris Hemsworth", + "character": "Himself / Thor" + }, + { + "actor": "Chris Evans", + "character": "Himself / Steve Rogers / Captain America" + }, + { + "actor": "Mark Ruffalo", + "character": "Himself / Bruce Banner / Hulk" + }, + { + "actor": "Gwyneth Paltrow", + "character": "Herself" + }, + { + "actor": "Clark Gregg", + "character": "Himself" + }, + { + "actor": "Samuel L. Jackson", + "character": "Himself" + }, + { + "actor": "Scarlett Johansson", + "character": "Herself" + }, + { + "actor": "Jeremy Renner", + "character": "Himself" + } + ] + }, + { + "title": "Captain America: Civil War", + "overview": "Following the events of Age of Ultron, the collective governments of the world pass an act designed to regulate all superhuman activity. This polarizes opinion amongst the Avengers, causing two factions to side with Iron Man or Captain America, which causes an epic battle between former allies.", + "release_date": "2016-04-27", + "characters": [ + { + "actor": "Chris Evans", + "character": "Steve Rogers / Captain America" + }, + { + "actor": "Robert Downey Jr.", + "character": "Tony Stark / Iron Man" + }, + { + "actor": "Scarlett Johansson", + "character": "Natasha Romanoff / Black Widow" + }, + { + "actor": "Don Cheadle", + "character": "James \"Rhodey\" Rhodes / War Machine" + }, + { + "actor": "Jeremy Renner", + "character": "Clint Barton / Hawkeye" + } + ] + }, + { + "title": "Thor: Ragnarok", + "overview": "Thor is imprisoned on the other side of the universe and finds himself in a race against time to get back to Asgard to stop Ragnarok, the destruction of his home-world and the end of Asgardian civilization, at the hands of an all-powerful new threat, the ruthless Hela.", + "release_date": "2017-10-25", + "characters": [ + { + "actor": "Chris Hemsworth", + "character": "Thor Odinson" + }, + { + "actor": "Mark Ruffalo", + "character": "Bruce Banner / Hulk" + }, + { + "actor": "Scarlett Johansson", + "character": "Natasha Romanoff / Black Widow (archive footage / uncredited)" + } + ] + }, + { + "title": "Avengers: Endgame", + "overview": "After the devastating events of Avengers: Infinity War, the universe is in ruins due to the efforts of the Mad Titan, Thanos. With the help of remaining allies, the Avengers must assemble once more in order to undo Thanos' actions and restore order to the universe once and for all, no matter what consequences may be in store.", + "release_date": "2019-04-24", + "characters": [ + { + "actor": "Robert Downey Jr.", + "character": "Tony Stark / Iron Man" + }, + { + "actor": "Chris Evans", + "character": "Steve Rogers / Captain America" + }, + { + "actor": "Mark Ruffalo", + "character": "Bruce Banner / Hulk" + }, + { + "actor": "Chris Hemsworth", + "character": "Thor Odinson" + }, + { + "actor": "Scarlett Johansson", + "character": "Natasha Romanoff / Black Widow" + }, + { + "actor": "Jeremy Renner", + "character": "Clint Barton / Hawkeye" + }, + { + "actor": "Don Cheadle", + "character": "James Rhodes / War Machine" + }, + { + "actor": "Gwyneth Paltrow", + "character": "Pepper Potts" + }, + { + "actor": "Samuel L. Jackson", + "character": "Nick Fury" + } + ] + }, + { + "title": "Avengers: Infinity War", + "overview": "As the Avengers and their allies have continued to protect the world from threats too large for any one hero to handle, a new danger has emerged from the cosmic shadows: Thanos. A despot of intergalactic infamy, his goal is to collect all six Infinity Stones, artifacts of unimaginable power, and use them to inflict his twisted will on all of reality. Everything the Avengers have fought for has led up to this moment - the fate of Earth and existence itself has never been more uncertain.", + "release_date": "2018-04-25", + "characters": [ + { + "actor": "Robert Downey Jr.", + "character": "Tony Stark / Iron Man" + }, + { + "actor": "Chris Hemsworth", + "character": "Thor Odinson" + }, + { + "actor": "Chris Evans", + "character": "Steve Rogers / Captain America" + }, + { + "actor": "Scarlett Johansson", + "character": "Natasha Romanoff / Black Widow" + }, + { + "actor": "Don Cheadle", + "character": "James \"Rhodey\" Rhodes / War Machine" + }, + { + "actor": "Gwyneth Paltrow", + "character": "Virginia \"Pepper\" Potts" + }, + { + "actor": "Samuel L. Jackson", + "character": "Nick Fury (uncredited)" + }, + { + "actor": "Mark Ruffalo", + "character": "Bruce Banner / The Hulk" + } + ] + }, + { + "title": "Captain Marvel", + "overview": "The story follows Carol Danvers as she becomes one of the universe’s most powerful heroes when Earth is caught in the middle of a galactic war between two alien races. Set in the 1990s, Captain Marvel is an all-new adventure from a previously unseen period in the history of the Marvel Cinematic Universe.", + "release_date": "2019-03-06", + "characters": [ + { + "actor": "Samuel L. Jackson", + "character": "Nick Fury" + }, + { + "actor": "Clark Gregg", + "character": "Agent Phil Coulson" + }, + { + "actor": "Chris Evans", + "character": "Steve Rogers / Captain America (uncredited)" + }, + { + "actor": "Scarlett Johansson", + "character": "Natasha Romanoff / Black Widow (uncredited)" + }, + { + "actor": "Don Cheadle", + "character": "James 'Rhodey' Rhodes / War Machine (uncredited)" + }, + { + "actor": "Mark Ruffalo", + "character": "Bruce Banner / The Hulk (uncredited)" + } + ] + }, + { + "title": "Spider-Man: Homecoming", + "overview": "Following the events of Captain America: Civil War, Peter Parker, with the help of his mentor Tony Stark, tries to balance his life as an ordinary high school student in Queens, New York City, with fighting crime as his superhero alter ego Spider-Man as a new threat, the Vulture, emerges.", + "release_date": "2017-07-05", + "characters": [ + { + "actor": "Robert Downey Jr.", + "character": "Tony Stark / Iron Man" + }, + { + "actor": "Gwyneth Paltrow", + "character": "Virginia \"Pepper\" Potts" + }, + { + "actor": "Chris Evans", + "character": "Steve Rogers / Captain America" + } + ] + }, + { + "title": "Team Thor", + "overview": "Discover what Thor was up to during the events of Captain America: Civil War.", + "release_date": "2016-08-28", + "characters": [ + { + "actor": "Chris Hemsworth", + "character": "Thor Odinson" + }, + { + "actor": "Mark Ruffalo", + "character": "Bruce Banner" + } + ] + }, + { + "title": "Black Widow", + "overview": "Natasha Romanoff, also known as Black Widow, confronts the darker parts of her ledger when a dangerous conspiracy with ties to her past arises. Pursued by a force that will stop at nothing to bring her down, Natasha must deal with her history as a spy and the broken relationships left in her wake long before she became an Avenger.", + "release_date": "2020-10-28", + "characters": [ + { + "actor": "Scarlett Johansson", + "character": "Natasha Romanoff / Black Widow" + }, + { + "actor": "Robert Downey Jr.", + "character": "Tony Stark / Iron Man" + } + ] + } +] +``` diff --git a/versioned_docs/version-4.7/developers/operations-api/analytics.md b/versioned_docs/version-4.7/developers/operations-api/analytics.md new file mode 100644 index 00000000..59ac6011 --- /dev/null +++ b/versioned_docs/version-4.7/developers/operations-api/analytics.md @@ -0,0 +1,121 @@ +--- +title: Analytics Operations +--- + +# Analytics Operations + +## get_analytics + +Retrieves analytics data from the server. + +- `operation` _(required)_ - must always be `get_analytics` +- `metric` _(required)_ - any value returned by `list_metrics` +- `start_time` _(optional)_ - Unix timestamp in seconds +- `end_time` _(optional)_ - Unix timestamp in seconds +- `get_attributes` _(optional)_ - array of attribute names to retrieve +- `conditions` _(optional)_ - array of conditions to filter results (see [search_by_conditions docs](./nosql-operations) for details) + +### Body + +```json +{ + "operation": "get_analytics", + "metric": "resource-usage", + "start_time": 1609459200, + "end_time": 1609545600, + "get_attributes": ["id", "metric", "userCPUTime", "systemCPUTime"], + "conditions": [ + { + "attribute": "node", + "operator": "equals", + "value": "node1.example.com" + } + ] +} +``` + +### Response 200 + +```json +[ + { + "id": "12345", + "metric": "resource-usage", + "userCPUTime": 100, + "systemCPUTime": 50 + }, + { + "id": "67890", + "metric": "resource-usage", + "userCPUTime": 150, + "systemCPUTime": 75 + } +] +``` + +## list_metrics + +Returns a list of available metrics that can be queried. + +- `operation` _(required)_ - must always be `list_metrics` +- `metric_types` _(optional)_ - array of metric types to filter results; one or both of `custom` and `builtin`; default is `builtin` + +### Body + +```json +{ + "operation": "list_metrics", + "metric_types": ["custom", "builtin"] +} +``` + +### Response 200 + +```json +["resource-usage", "table-size", "database-size", "main-thread-utilization", "utilization", "storage-volume"] +``` + +## describe_metric + +Provides detailed information about a specific metric, including its structure and available parameters. + +- `operation` _(required)_ - must always be `describe_metric` +- `metric` _(required)_ - name of the metric to describe + +### Body + +```json +{ + "operation": "describe_metric", + "metric": "resource-usage" +} +``` + +### Response 200 + +```json +{ + "attributes": [ + { + "name": "id", + "type": "number" + }, + { + "name": "metric", + "type": "string" + }, + { + "name": "userCPUTime", + "type": "number" + }, + { + "name": "systemCPUTime", + "type": "number" + }, + { + "name": "node", + "type": "string" + } + ] +} +``` diff --git a/versioned_docs/version-4.7/developers/operations-api/bulk-operations.md b/versioned_docs/version-4.7/developers/operations-api/bulk-operations.md new file mode 100644 index 00000000..b6714552 --- /dev/null +++ b/versioned_docs/version-4.7/developers/operations-api/bulk-operations.md @@ -0,0 +1,255 @@ +--- +title: Bulk Operations +--- + +# Bulk Operations + +## Export Local + +Exports data based on a given search operation to a local file in JSON or CSV format. + +- `operation` _(required)_ - must always be `export_local` +- `format` _(required)_ - the format you wish to export the data, options are `json` & `csv` +- `path` _(required)_ - path local to the server to export the data +- `search_operation` _(required)_ - search_operation of `search_by_hash`, `search_by_value`, `search_by_conditions` or `sql` +- `filename` _(optional)_ - the name of the file where your export will be written to (do not include extension in filename). If one is not provided it will be autogenerated based on the epoch. + +### Body + +```json +{ + "operation": "export_local", + "format": "json", + "path": "/data/", + "search_operation": { + "operation": "sql", + "sql": "SELECT * FROM dev.breed" + } +} +``` + +### Response: 200 + +```json +{ + "message": "Starting job with id 6fc18eaa-3504-4374-815c-44840a12e7e5" +} +``` + +--- + +## CSV Data Load + +Ingests CSV data, provided directly in the operation as an `insert`, `update` or `upsert` into the specified database table. + +- `operation` _(required)_ - must always be `csv_data_load` +- `action` _(optional)_ - type of action you want to perform - `insert`, `update` or `upsert`. The default is `insert` +- `database` _(optional)_ - name of the database where you are loading your data. The default is `data` +- `table` _(required)_ - name of the table where you are loading your data +- `data` _(required)_ - csv data to import into Harper + +### Body + +```json +{ + "operation": "csv_data_load", + "database": "dev", + "action": "insert", + "table": "breed", + "data": "id,name,section,country,image\n1,ENGLISH POINTER,British and Irish Pointers and Setters,GREAT BRITAIN,https://www.fci.be/Nomenclature/Illustrations/001g07.jpg\n2,ENGLISH SETTER,British and Irish Pointers and Setters,GREAT BRITAIN,https://www.fci.be/Nomenclature/Illustrations/002g07.jpg\n3,KERRY BLUE TERRIER,Large and medium sized Terriers,IRELAND,\n" +} +``` + +### Response: 200 + +```json +{ + "message": "Starting job with id 2fe25039-566e-4670-8bb3-2db3d4e07e69", + "job_id": "2fe25039-566e-4670-8bb3-2db3d4e07e69" +} +``` + +--- + +## CSV File Load + +Ingests CSV data, provided via a path on the local filesystem, as an `insert`, `update` or `upsert` into the specified database table. + +_Note: The CSV file must reside on the same machine on which Harper is running. For example, the path to a CSV on your computer will produce an error if your Harper instance is a cloud instance._ + +- `operation` _(required)_ - must always be `csv_file_load` +- `action` _(optional)_ - type of action you want to perform - `insert`, `update` or `upsert`. The default is `insert` +- `database` _(optional)_ - name of the database where you are loading your data. The default is `data` +- `table` _(required)_ - name of the table where you are loading your data +- `file_path` _(required)_ - path to the csv file on the host running Harper + +### Body + +```json +{ + "operation": "csv_file_load", + "action": "insert", + "database": "dev", + "table": "breed", + "file_path": "/home/user/imports/breeds.csv" +} +``` + +### Response: 200 + +```json +{ + "message": "Starting job with id 3994d8e2-ec6a-43c4-8563-11c1df81870e", + "job_id": "3994d8e2-ec6a-43c4-8563-11c1df81870e" +} +``` + +--- + +## CSV URL Load + +Ingests CSV data, provided via URL, as an `insert`, `update` or `upsert` into the specified database table. + +- `operation` _(required)_ - must always be `csv_url_load` +- `action` _(optional)_ - type of action you want to perform - `insert`, `update` or `upsert`. The default is `insert` +- `database` _(optional)_ - name of the database where you are loading your data. The default is `data` +- `table` _(required)_ - name of the table where you are loading your data +- `csv_url` _(required)_ - URL to the csv + +### Body + +```json +{ + "operation": "csv_url_load", + "action": "insert", + "database": "dev", + "table": "breed", + "csv_url": "https://s3.amazonaws.com/complimentarydata/breeds.csv" +} +``` + +### Response: 200 + +```json +{ + "message": "Starting job with id 332aa0a2-6833-46cd-88a6-ae375920436a", + "job_id": "332aa0a2-6833-46cd-88a6-ae375920436a" +} +``` + +--- + +## Export To S3 + +Exports data based on a given search operation from table to AWS S3 in JSON or CSV format. + +- `operation` _(required)_ - must always be `export_to_s3` +- `format` _(required)_ - the format you wish to export the data, options are `json` & `csv` +- `s3` _(required)_ - details your access keys, bucket, bucket region and key for saving the data to S3 +- `search_operation` _(required)_ - search_operation of `search_by_hash`, `search_by_value`, `search_by_conditions` or `sql` + +### Body + +```json +{ + "operation": "export_to_s3", + "format": "json", + "s3": { + "aws_access_key_id": "YOUR_KEY", + "aws_secret_access_key": "YOUR_SECRET_KEY", + "bucket": "BUCKET_NAME", + "key": "OBJECT_NAME", + "region": "BUCKET_REGION" + }, + "search_operation": { + "operation": "sql", + "sql": "SELECT * FROM dev.dog" + } +} +``` + +### Response: 200 + +```json +{ + "message": "Starting job with id 9fa85968-4cb1-4008-976e-506c4b13fc4a", + "job_id": "9fa85968-4cb1-4008-976e-506c4b13fc4a" +} +``` + +--- + +## Import from S3 + +This operation allows users to import CSV or JSON files from an AWS S3 bucket as an `insert`, `update` or `upsert`. + +- `operation` _(required)_ - must always be `import_from_s3` +- `action` _(optional)_ - type of action you want to perform - `insert`, `update` or `upsert`. The default is `insert` +- `database` _(optional)_ - name of the database where you are loading your data. The default is `data` +- `table` _(required)_ - name of the table where you are loading your data +- `s3` _(required)_ - object containing required AWS S3 bucket info for operation: + - `aws_access_key_id` - AWS access key for authenticating into your S3 bucket + - `aws_secret_access_key` - AWS secret for authenticating into your S3 bucket + - `bucket` - AWS S3 bucket to import from + - `key` - the name of the file to import - _the file must include a valid file extension ('.csv' or '.json')_ + - `region` - the region of the bucket + +### Body + +```json +{ + "operation": "import_from_s3", + "action": "insert", + "database": "dev", + "table": "dog", + "s3": { + "aws_access_key_id": "YOUR_KEY", + "aws_secret_access_key": "YOUR_SECRET_KEY", + "bucket": "BUCKET_NAME", + "key": "OBJECT_NAME", + "region": "BUCKET_REGION" + } +} +``` + +### Response: 200 + +```json +{ + "message": "Starting job with id 062a1892-6a0a-4282-9791-0f4c93b12e16", + "job_id": "062a1892-6a0a-4282-9791-0f4c93b12e16" +} +``` + +--- + +## Delete Records Before + +Delete data before the specified timestamp on the specified database table exclusively on the node where it is executed. Any clustered nodes with replicated data will retain that data. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `delete_records_before` +- `date` _(required)_ - records older than this date will be deleted. Supported format looks like: `YYYY-MM-DDThh:mm:ss.sZ` +- `schema` _(required)_ - name of the schema where you are deleting your data +- `table` _(required)_ - name of the table where you are deleting your data + +### Body + +```json +{ + "operation": "delete_records_before", + "date": "2021-01-25T23:05:27.464", + "schema": "dev", + "table": "breed" +} +``` + +### Response: 200 + +```json +{ + "message": "Starting job with id d3aed926-e9fe-4ec1-aea7-0fb4451bd373", + "job_id": "d3aed926-e9fe-4ec1-aea7-0fb4451bd373" +} +``` diff --git a/versioned_docs/version-4.7/developers/operations-api/certificate-management.md b/versioned_docs/version-4.7/developers/operations-api/certificate-management.md new file mode 100644 index 00000000..f8eea402 --- /dev/null +++ b/versioned_docs/version-4.7/developers/operations-api/certificate-management.md @@ -0,0 +1,124 @@ +--- +title: Certificate Management +--- + +# Certificate Management + +## Add Certificate + +Adds or updates a certificate in the `hdb_certificate` system table. +If a `private_key` is provided it will **not** be stored in `hdb_certificate`, it will be written to file in `/keys/`. +If a `private_key` is not passed the operation will search for one that matches the certificate. If one is not found an error will be returned. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `add_certificate` +- `name` _(required)_ - a unique name for the certificate +- `certificate` _(required)_ - a PEM formatted certificate string +- `is_authority` _(required)_ - a boolean indicating if the certificate is a certificate authority +- `hosts` _(optional)_ - an array of hostnames that the certificate is valid for +- `private_key` _(optional)_ - a PEM formatted private key string + +### Body + +```json +{ + "operation": "add_certificate", + "name": "my-cert", + "certificate": "-----BEGIN CERTIFICATE-----ZDFAay... -----END CERTIFICATE-----", + "is_authority": false, + "private_key": "-----BEGIN RSA PRIVATE KEY-----Y4dMpw5f... -----END RSA PRIVATE KEY-----" +} +``` + +### Response: 200 + +```json +{ + "message": "Successfully added certificate: my-cert" +} +``` + +--- + +## Remove Certificate + +Removes a certificate from the `hdb_certificate` system table and deletes the corresponding private key file. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `remove_certificate` +- `name` _(required)_ - the name of the certificate + +### Body + +```json +{ + "operation": "remove_certificate", + "name": "my-cert" +} +``` + +### Response: 200 + +```json +{ + "message": "Successfully removed my-cert" +} +``` + +--- + +## List Certificates + +Lists all certificates in the `hdb_certificate` system table. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `list_certificates` + +### Body + +```json +{ + "operation": "list_certificates" +} +``` + +### Response: 200 + +```json +[ + { + "name": "HarperDB-Certificate-Authority-node1", + "certificate": "-----BEGIN CERTIFICATE-----\r\nTANBgkqhk... S34==\r\n-----END CERTIFICATE-----\r\n", + "private_key_name": "privateKey.pem", + "is_authority": true, + "details": { + "issuer": "CN=HarperDB-Certificate-Authority-node1 C=USA ST=Colorado L=Denver O=HarperDB\\, Inc.", + "subject": "CN=HarperDB-Certificate-Authority-node1 C=USA ST=Colorado L=Denver O=HarperDB\\, Inc.", + "serial_number": "5235345", + "valid_from": "Aug 27 15:00:00 2024 GMT", + "valid_to": "Aug 25 15:00:00 2034 GMT" + }, + "is_self_signed": true, + "uses": ["https", "wss"] + }, + { + "name": "node1", + "certificate": "-----BEGIN CERTIFICATE-----\r\ngIEcSR1M... 5bv==\r\n-----END CERTIFICATE-----\r\n", + "private_key_name": "privateKey.pem", + "is_authority": false, + "details": { + "issuer": "CN=HarperDB-Certificate-Authority-node1 C=USA ST=Colorado L=Denver O=HarperDB\\, Inc.", + "subject": "CN=node.1 C=USA ST=Colorado L=Denver O=HarperDB\\, Inc.", + "subject_alt_name": "IP Address:127.0.0.1, DNS:localhost, IP Address:0:0:0:0:0:0:0:1, DNS:node.1", + "serial_number": "5243646", + "valid_from": "Aug 27 15:00:00 2024 GMT", + "valid_to": "Aug 25 15:00:00 2034 GMT" + }, + "is_self_signed": true, + "uses": ["https", "wss"] + } +] +``` diff --git a/versioned_docs/version-4.7/developers/operations-api/clustering-nats.md b/versioned_docs/version-4.7/developers/operations-api/clustering-nats.md new file mode 100644 index 00000000..8076da98 --- /dev/null +++ b/versioned_docs/version-4.7/developers/operations-api/clustering-nats.md @@ -0,0 +1,486 @@ +--- +title: Clustering using NATS +--- + +# Clustering using NATS + +## Cluster Set Routes + +Adds a route/routes to either the hub or leaf server cluster configuration. This operation behaves as a PATCH/upsert, meaning it will add new routes to the configuration while leaving existing routes untouched. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `cluster_set_routes` +- `server` _(required)_ - must always be `hub` or `leaf`, in most cases you should use `hub` here +- `routes` _(required)_ - must always be an objects array with a host and port: + - `host` - the host of the remote instance you are clustering to + - `port` - the clustering port of the remote instance you are clustering to, in most cases this is the value in `clustering.hubServer.cluster.network.port` on the remote instance `harperdb-config.yaml` + +### Body + +```json +{ + "operation": "cluster_set_routes", + "server": "hub", + "routes": [ + { + "host": "3.22.181.22", + "port": 12345 + }, + { + "host": "3.137.184.8", + "port": 12345 + }, + { + "host": "18.223.239.195", + "port": 12345 + }, + { + "host": "18.116.24.71", + "port": 12345 + } + ] +} +``` + +### Response: 200 + +```json +{ + "message": "cluster routes successfully set", + "set": [ + { + "host": "3.22.181.22", + "port": 12345 + }, + { + "host": "3.137.184.8", + "port": 12345 + }, + { + "host": "18.223.239.195", + "port": 12345 + }, + { + "host": "18.116.24.71", + "port": 12345 + } + ], + "skipped": [] +} +``` + +--- + +## Cluster Get Routes + +Gets all the hub and leaf server routes from the config file. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `cluster_get_routes` + +### Body + +```json +{ + "operation": "cluster_get_routes" +} +``` + +### Response: 200 + +```json +{ + "hub": [ + { + "host": "3.22.181.22", + "port": 12345 + }, + { + "host": "3.137.184.8", + "port": 12345 + }, + { + "host": "18.223.239.195", + "port": 12345 + }, + { + "host": "18.116.24.71", + "port": 12345 + } + ], + "leaf": [] +} +``` + +--- + +## Cluster Delete Routes + +Removes route(s) from hub and/or leaf server routes array in config file. Returns a deletion success message and arrays of deleted and skipped records. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `cluster_delete_routes` +- `routes` _(required)_ - Must be an array of route object(s) + +### Body + +```json +{ + "operation": "cluster_delete_routes", + "routes": [ + { + "host": "18.116.24.71", + "port": 12345 + } + ] +} +``` + +### Response: 200 + +```json +{ + "message": "cluster routes successfully deleted", + "deleted": [ + { + "host": "18.116.24.71", + "port": 12345 + } + ], + "skipped": [] +} +``` + +--- + +## Add Node + +Registers an additional Harper instance with associated subscriptions. Learn more about [Harper clustering here](../clustering/). + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `add_node` +- `node_name` _(required)_ - the node name of the remote node +- `subscriptions` _(required)_ - The relationship created between nodes. Must be an object array and include `schema`, `table`, `subscribe` and `publish`: + - `schema` - the schema to replicate from + - `table` - the table to replicate from + - `subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table + - `publish` - a boolean which determines if transactions on the local table should be replicated on the remote table + - `start_time` _(optional)_ - How far back to go to get transactions from node being added. Must be in UTC YYYY-MM-DDTHH:mm:ss.sssZ format + +### Body + +```json +{ + "operation": "add_node", + "node_name": "ec2-3-22-181-22", + "subscriptions": [ + { + "schema": "dev", + "table": "dog", + "subscribe": false, + "publish": true, + "start_time": "2022-09-02T20:06:35.993Z" + } + ] +} +``` + +### Response: 200 + +```json +{ + "message": "Successfully added 'ec2-3-22-181-22' to manifest" +} +``` + +--- + +## Update Node + +Modifies an existing Harper instance registration and associated subscriptions. This operation behaves as a PATCH/upsert, meaning it will insert or update the specified replication configurations while leaving other table replication configuration untouched. Learn more about [Harper clustering here](../clustering/). + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `update_node` +- `node_name` _(required)_ - the node name of the remote node you are updating +- `subscriptions` _(required)_ - The relationship created between nodes. Must be an object array and include `schema`, `table`, `subscribe` and `publish`: + - `schema` - the schema to replicate from + - `table` - the table to replicate from + - `subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table + - `publish` - a boolean which determines if transactions on the local table should be replicated on the remote table + - `start_time` _(optional)_ - How far back to go to get transactions from node being added. Must be in UTC YYYY-MM-DDTHH:mm:ss.sssZ format + +### Body + +```json +{ + "operation": "update_node", + "node_name": "ec2-18-223-239-195", + "subscriptions": [ + { + "schema": "dev", + "table": "dog", + "subscribe": true, + "publish": false, + "start_time": "2022-09-02T20:06:35.993Z" + } + ] +} +``` + +### Response: 200 + +```json +{ + "message": "Successfully updated 'ec2-3-22-181-22'" +} +``` + +--- + +## Set Node Replication + +A more adeptly named alias for add and update node. This operation behaves as a PATCH/upsert, meaning it will insert or update the specified replication configurations while leaving other table replication configuration untouched. The `database` (aka `schema`) parameter is optional, it will default to `data`. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `set_node_replication` +- `node_name` _(required)_ - the node name of the remote node you are updating +- `subscriptions` _(required)_ - The relationship created between nodes. Must be an object array and `table`, `subscribe` and `publish`: + - `database` _(optional)_ - the database to replicate from + - `table` _(required)_ - the table to replicate from + - `subscribe` _(required)_ - a boolean which determines if transactions on the remote table should be replicated on the local table + - `publish` _(required)_ - a boolean which determines if transactions on the local table should be replicated on the remote table +- + +### Body + +```json +{ + "operation": "set_node_replication", + "node_name": "node1", + "subscriptions": [ + { + "table": "dog", + "subscribe": true, + "publish": true + } + ] +} +``` + +### Response: 200 + +```json +{ + "message": "Successfully updated 'ec2-3-22-181-22'" +} +``` + +--- + +## Cluster Status + +Returns an array of status objects from a cluster. A status object will contain the clustering node name, whether or not clustering is enabled, and a list of possible connections. Learn more about [Harper clustering here](../clustering/). + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `cluster_status` + +### Body + +```json +{ + "operation": "cluster_status" +} +``` + +### Response: 200 + +```json +{ + "node_name": "ec2-18-221-143-69", + "is_enabled": true, + "connections": [ + { + "node_name": "ec2-3-22-181-22", + "status": "open", + "ports": { + "clustering": 12345, + "operations_api": 9925 + }, + "latency_ms": 13, + "uptime": "30d 1h 18m 8s", + "subscriptions": [ + { + "schema": "dev", + "table": "dog", + "publish": true, + "subscribe": true + } + ] + } + ] +} +``` + +--- + +## Cluster Network + +Returns an object array of enmeshed nodes. Each node object will contain the name of the node, the amount of time (in milliseconds) it took for it to respond, the names of the nodes it is enmeshed with and the routes set in its config file. Learn more about [Harper clustering here](../clustering/). + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_- must always be `cluster_network` +- `timeout` _(optional)_ - the amount of time in milliseconds to wait for a response from the network. Must be a number +- `connected_nodes` _(optional)_ - omit `connected_nodes` from the response. Must be a boolean. Defaults to `false` +- `routes` _(optional)_ - omit `routes` from the response. Must be a boolean. Defaults to `false` + +### Body + +```json +{ + "operation": "cluster_network" +} +``` + +### Response: 200 + +```json +{ + "nodes": [ + { + "name": "local_node", + "response_time": 4, + "connected_nodes": ["ec2-3-142-255-78"], + "routes": [ + { + "host": "3.142.255.78", + "port": 9932 + } + ] + }, + { + "name": "ec2-3-142-255-78", + "response_time": 57, + "connected_nodes": ["ec2-3-12-153-124", "ec2-3-139-236-138", "local_node"], + "routes": [] + } + ] +} +``` + +--- + +## Remove Node + +Removes a Harper instance and associated subscriptions from the cluster. Learn more about [Harper clustering here](../clustering/). + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `remove_node` +- `name` _(required)_ - The name of the node you are de-registering + +### Body + +```json +{ + "operation": "remove_node", + "node_name": "ec2-3-22-181-22" +} +``` + +### Response: 200 + +```json +{ + "message": "Successfully removed 'ec2-3-22-181-22' from manifest" +} +``` + +--- + +## Configure Cluster + +Bulk create/remove subscriptions for any number of remote nodes. Resets and replaces any existing clustering setup. +Learn more about [Harper clustering here](../clustering/). + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `configure_cluster` +- `connections` _(required)_ - must be an object array with each object containing `node_name` and `subscriptions` for that node + +### Body + +```json +{ + "operation": "configure_cluster", + "connections": [ + { + "node_name": "ec2-3-137-184-8", + "subscriptions": [ + { + "schema": "dev", + "table": "dog", + "subscribe": true, + "publish": false + } + ] + }, + { + "node_name": "ec2-18-223-239-195", + "subscriptions": [ + { + "schema": "dev", + "table": "dog", + "subscribe": true, + "publish": true + } + ] + } + ] +} +``` + +### Response: 200 + +```json +{ + "message": "Cluster successfully configured." +} +``` + +--- + +## Purge Stream + +Will purge messages from a stream + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `purge_stream` +- `database` _(required)_ - the name of the database where the streams table resides +- `table` _(required)_ - the name of the table that belongs to the stream +- `options` _(optional)_ - control how many messages get purged. Options are: + - `keep` - purge will keep this many most recent messages + - `seq` - purge all messages up to, but not including, this sequence + +### Body + +```json +{ + "operation": "purge_stream", + "database": "dev", + "table": "dog", + "options": { + "keep": 100 + } +} +``` + +--- diff --git a/versioned_docs/version-4.7/developers/operations-api/clustering.md b/versioned_docs/version-4.7/developers/operations-api/clustering.md new file mode 100644 index 00000000..bbb87d62 --- /dev/null +++ b/versioned_docs/version-4.7/developers/operations-api/clustering.md @@ -0,0 +1,355 @@ +--- +title: Clustering +--- + +# Clustering + +The following operations are available for configuring and managing [Harper replication](../replication/).\ + +_**If you are using NATS for clustering, please see the**_ [_**NATS Clustering Operations**_](clustering-nats) _**documentation.**_ + +## Add Node + +Adds a new Harper instance to the cluster. If `subscriptions` are provided, it will also create the replication relationships between the nodes. If they are not provided a fully replicating system will be created. [Learn more about adding nodes here](../replication/). + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `add_node` +- `hostname` or `url` _(required)_ - one of these fields is required. You must provide either the `hostname` or the `url` of the node you want to add +- `verify_tls` _(optional)_ - a boolean which determines if the TLS certificate should be verified. This will allow the Harper default self-signed certificates to be accepted. Defaults to `true` +- `authorization` _(optional)_ - an object or a string which contains the authorization information for the node being added. If it is an object, it should contain `username` and `password` fields. If it is a string, it should use HTTP `Authorization` style credentials +- `retain_authorization` _(optional)_ - a boolean which determines if the authorization credentials should be retained/stored and used everytime a connection is made to this node. If `true`, the authorization will be stored on the node record. Generally this should not be used, as mTLS/certificate based authorization is much more secure and safe, and avoids the need for storing credentials. Defaults to `false`. +- `revoked_certificates` _(optional)_ - an array of revoked certificates serial numbers. If a certificate is revoked, it will not be accepted for any connections. +- `shard` _(optional)_ - a number which can be used to indicate which shard this node belongs to. This is only needed if you are using sharding. +- `subscriptions` _(optional)_ - The relationship created between nodes. If not provided a fully replicated cluster will be setup. Must be an object array and include `database`, `table`, `subscribe` and `publish`: + - `database` - the database to replicate + - `table` - the table to replicate + - `subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table + - `publish` - a boolean which determines if transactions on the local table should be replicated on the remote table + +### Body + +```json +{ + "operation": "add_node", + "hostname": "server-two", + "verify_tls": false, + "authorization": { + "username": "admin", + "password": "password" + } +} +``` + +### Response: 200 + +```json +{ + "message": "Successfully added 'server-two' to cluster" +} +``` + +--- + +## Update Node + +Modifies an existing Harper instance in the cluster. + +_Operation is restricted to super_user roles only_ + +_Note: will attempt to add the node if it does not exist_ + +- `operation` _(required)_ - must always be `update_node` +- `hostname` _(required)_ - the `hostname` of the remote node you are updating +- `revoked_certificates` _(optional)_ - an array of revoked certificates serial numbers. If a certificate is revoked, it will not be accepted for any connections. +- `shard` _(optional)_ - a number which can be used to indicate which shard this node belongs to. This is only needed if you are using sharding. +- `subscriptions` _(required)_ - The relationship created between nodes. Must be an object array and include `database`, `table`, `subscribe` and `publish`: + - `database` - the database to replicate from + - `table` - the table to replicate from + - `subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table + - `publish` - a boolean which determines if transactions on the local table should be replicated on the remote table + +### Body + +```json +{ + "operation": "update_node", + "hostname": "server-two", + "subscriptions": [ + { + "database": "dev", + "table": "my-table", + "subscribe": true, + "publish": true + } + ] +} +``` + +### Response: 200 + +```json +{ + "message": "Successfully updated 'server-two'" +} +``` + +--- + +## Remove Node + +Removes a Harper node from the cluster and stops replication, [Learn more about remove node here](../replication/). + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `remove_node` +- `name` _(required)_ - The name of the node you are removing + +### Body + +```json +{ + "operation": "remove_node", + "hostname": "server-two" +} +``` + +### Response: 200 + +```json +{ + "message": "Successfully removed 'server-two' from cluster" +} +``` + +--- + +## Cluster Status + +Returns an array of status objects from a cluster. + +`database_sockets` shows the actual websocket connections that exist between nodes. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `cluster_status` + +### Body + +```json +{ + "operation": "cluster_status" +} +``` + +### Response: 200 + +```json +{ + "type": "cluster-status", + "connections": [ + { + "replicateByDefault": true, + "replicates": true, + "url": "wss://server-2.domain.com:9933", + "name": "server-2.domain.com", + "subscriptions": null, + "database_sockets": [ + { + "database": "data", + "connected": true, + "latency": 0.7, + "thread_id": 1, + "nodes": ["server-2.domain.com"], + "lastCommitConfirmed": "Wed, 12 Feb 2025 19:09:34 GMT", + "lastReceivedRemoteTime": "Wed, 12 Feb 2025 16:49:29 GMT", + "lastReceivedLocalTime": "Wed, 12 Feb 2025 16:50:59 GMT", + "lastSendTime": "Wed, 12 Feb 2025 16:50:59 GMT" + } + ] + } + ], + "node_name": "server-1.domain.com", + "is_enabled": true +} +``` + +There is a separate socket for each database for each node. Each node is represented in the connections array, and each database connection to that node is represented in the `database_sockets` array. Additional timing statistics include: + +- `lastCommitConfirmed`: When a commit is sent out, it should receive a confirmation from the remote server; this is the last receipt of confirmation of an outgoing commit. +- `lastReceivedRemoteTime`: This is the timestamp of the transaction that was last received. The timestamp is from when the original transaction occurred. +- `lastReceivedLocalTime`: This is local time when the last transaction was received. If there is a different between this and `lastReceivedRemoteTime`, it means there is a delay from the original transaction to \* receiving it and so it is probably catching-up/behind. +- `sendingMessage`: The timestamp of transaction is actively being sent. This won't exist if the replicator is waiting for the next transaction to send. + +--- + +## Configure Cluster + +Bulk create/remove subscriptions for any number of remote nodes. Resets and replaces any existing clustering setup. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `configure_cluster` +- `connections` _(required)_ - must be an object array with each object following the `add_node` schema. + +### Body + +```json +{ + "operation": "configure_cluster", + "connections": [ + { + "hostname": "server-two", + "verify_tls": false, + "authorization": { + "username": "admin", + "password": "password2" + }, + "subscriptions": [ + { + "schema": "dev", + "table": "my-table", + "subscribe": true, + "publish": false + } + ] + }, + { + "hostname": "server-three", + "verify_tls": false, + "authorization": { + "username": "admin", + "password": "password3" + }, + "subscriptions": [ + { + "schema": "dev", + "table": "dog", + "subscribe": true, + "publish": true + } + ] + } + ] +} +``` + +### Response: 200 + +```json +{ + "message": "Cluster successfully configured." +} +``` + +--- + +## Cluster Set Routes + +Adds a route/routes to the `replication.routes` configuration. This operation behaves as a PATCH/upsert, meaning it will add new routes to the configuration while leaving existing routes untouched. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `cluster_set_routes` +- `routes` _(required)_ - the routes field is an array that specifies the routes for clustering. Each element in the array can be either a string or an object with `hostname` and `port` properties. + +### Body + +```json +{ + "operation": "cluster_set_routes", + "routes": [ + "wss://server-two:9925", + { + "hostname": "server-three", + "port": 9930 + } + ] +} +``` + +### Response: 200 + +```json +{ + "message": "cluster routes successfully set", + "set": [ + "wss://server-two:9925", + { + "hostname": "server-three", + "port": 9930 + } + ], + "skipped": [] +} +``` + +--- + +## Cluster Get Routes + +Gets the replication routes from the Harper config file. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `cluster_get_routes` + +### Body + +```json +{ + "operation": "cluster_get_routes" +} +``` + +### Response: 200 + +```json +[ + "wss://server-two:9925", + { + "hostname": "server-three", + "port": 9930 + } +] +``` + +--- + +## Cluster Delete Routes + +Removes route(s) from the Harper config file. Returns a deletion success message and arrays of deleted and skipped records. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `cluster_delete_routes` +- `routes` _(required)_ - Must be an array of route object(s) + +### Body + +```json +{ + "operation": "cluster_delete_routes", + "routes": [ + { + "hostname": "server-three", + "port": 9930 + } + ] +} +``` + +### Response: 200 + +```json +{ + "message": "cluster routes successfully deleted", + "deleted": [ + { + "hostname": "server-three", + "port": 9930 + } + ], + "skipped": [] +} +``` diff --git a/versioned_docs/version-4.7/developers/operations-api/components.md b/versioned_docs/version-4.7/developers/operations-api/components.md new file mode 100644 index 00000000..30c835bc --- /dev/null +++ b/versioned_docs/version-4.7/developers/operations-api/components.md @@ -0,0 +1,549 @@ +--- +title: Components +--- + +# Components + +## Add Component + +Creates a new component project in the component root directory using a specified template (defaults to the [application template](https://github.com/HarperFast/application-template)). + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `add_component` +- `project` _(required)_ - the name of the project you wish to create +- `template` _(optional)_ - the URL of a git repository to use as a template. Must be a string. Defaults to `https://github.com/HarperFast/application-template` +- `install_command` _(optional)_ - A command to use when installing the component. Must be a string. Defaults to `npm install`. Depending on the host environment, you can use this to switch to using an alternative package manager. +- `install_timeout` _(optional)_ - The maximum time, in milliseconds, to wait for the install command to complete. Must be a number. Defaults to `300000` (5 minutes) +- `replicated` _(optional)_ - if true, Harper will replicate the component to all nodes in the cluster. Must be a boolean. + +### Body + +```json +{ + "operation": "add_component", + "project": "my-component" +} +``` + +### Response: 200 + +```json +{ + "message": "Successfully added project: my-component" +} +``` + +--- + +## Deploy Component + +Will deploy a component using either a base64-encoded string representation of a `.tar` file (the output from `package_component`) or a package value, which can be any valid NPM reference, such as a GitHub repo, an NPM package, a tarball, a local directory or a website. + +If deploying with the `payload` option, Harper will decrypt the base64-encoded string, reconstitute the .tar file of your project folder, and extract it to the component root project directory. + +If deploying with the `package` option, the package value will be written to `harperdb-config.yaml`. Then npm install will be utilized to install the component in the `node_modules` directory located in the hdb root. The value is a package reference, which should generally be a [URL reference, as described here](https://docs.npmjs.com/cli/v10/configuring-npm/package-json#urls-as-dependencies) (it is also possible to include NPM registerd packages and file paths). URL package references can directly reference tarballs that can be installed as a package. However, the most common and recommended usage is to install from a Git repository, which can be combined with a tag to deploy a specific version directly from versioned source control. When using tags, we highly recommend that you use the `semver` directive to ensure consistent and reliable installation by NPM. In addition to tags, you can also reference branches or commit numbers. Here is an example URL package reference to a (public) Git repository that doesn't require authentication: + +``` +https://github.com/HarperDB/application-template#semver:v1.0.0 +``` + +or this can be shortened to: + +``` +HarperDB/application-template#semver:v1.0.0 +``` + +You can also install from private repository if you have installed SSH keys on the server. Ensure the `host` portion of the url exactly matches the `host` used when adding ssh keys to ensure proper authentication. + +``` +git+ssh://git@github.com:my-org/my-app.git#semver:v1.0.0 +``` + +Or you can use a Github token: + +``` +https://@github.com/my-org/my-app#semver:v1.0.0 +``` + +Or you can use a GitLab Project Access Token: + +``` +https://my-project:@gitlab.com/my-group/my-project#semver:v1.0.0 +``` + +Note that your component will be installed by NPM. If your component has dependencies, NPM will attempt to download and install these as well. NPM normally uses the public registry.npmjs.org registry. If you are installing without network access to this, you may wish to define [custom registry locations](https://docs.npmjs.com/cli/v8/configuring-npm/npmrc) if you have any dependencies that need to be installed. NPM will install the deployed component and any dependencies in node_modules in the hdb root directory (typically `~/hdb/node_modules`). + +_Note: After deploying a component a restart may be required_ + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `deploy_component` +- `project` _(required)_ - the name of the project you wish to deploy +- `package` _(optional)_ - this can be any valid GitHub or NPM reference +- `payload` _(optional)_ - a base64-encoded string representation of the .tar file. Must be a string +- `restart` _(optional)_ - must be either a boolean or the string `rolling`. If set to `rolling`, a rolling restart will be triggered after the component is deployed, meaning that each node in the cluster will be sequentially restarted (waiting for the last restart to start the next). If set to `true`, the restart will not be rolling, all nodes will be restarted in parallel. If `replicated` is `true`, the restart operations will be replicated across the cluster. +- `replicated` _(optional)_ - if true, Harper will replicate the component to all nodes in the cluster. Must be a boolean. +- `install_command` _(optional)_ - A command to use when installing the component. Must be a string. Defaults to `npm install`. Depending on the host environment, you can use this to switch to using an alternative package manager. +- `install_timeout` _(optional)_ - The maximum time, in milliseconds, to wait for the install command to complete. Must be a number. Defaults to `300000` (5 minutes) + +### Body + +```json +{ + "operation": "deploy_component", + "project": "my-component", + "payload": "A very large base64-encoded string representation of the .tar file" +} +``` + +```json +{ + "operation": "deploy_component", + "project": "my-component", + "package": "HarperDB/application-template", + "replicated": true +} +``` + +### Response: 200 + +```json +{ + "message": "Successfully deployed: my-component" +} +``` + +--- + +## Package Component + +Creates a temporary `.tar` file of the specified project folder, then reads it into a base64-encoded string and returns an object with the string and the payload. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `package_component` +- `project` _(required)_ - the name of the project you wish to package +- `skip_node_modules` _(optional)_ - if true, creates option for tar module that will exclude the project's node_modules directory. Must be a boolean + +### Body + +```json +{ + "operation": "package_component", + "project": "my-component", + "skip_node_modules": true +} +``` + +### Response: 200 + +```json +{ + "project": "my-component", + "payload": "LgAAAAAAAAAAAAAAAAAAA...AAAAAAAAAAAAAAAAAAAAAAAAAAAAA==" +} +``` + +--- + +## Drop Component + +Deletes a file from inside the component project or deletes the complete project. + +**If just `project` is provided it will delete all that projects local files and folders** + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `drop_component` +- `project` _(required)_ - the name of the project you wish to delete or to delete from if using the `file` parameter +- `file` _(optional)_ - the path relative to your project folder of the file you wish to delete +- `replicated` _(optional)_ - if true, Harper will replicate the component deletion to all nodes in the cluster. Must be a boolean. +- `restart` _(optional)_ - if true, Harper will restart after dropping the component. Must be a boolean. + +### Body + +```json +{ + "operation": "drop_component", + "project": "my-component", + "file": "utils/myUtils.js" +} +``` + +### Response: 200 + +```json +{ + "message": "Successfully dropped: my-component/utils/myUtils.js" +} +``` + +--- + +## Get Components + +Gets all local component files and folders and any component config from `harperdb-config.yaml` + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `get_components` + +### Body + +```json +{ + "operation": "get_components" +} +``` + +### Response: 200 + +```json +{ + "name": "components", + "entries": [ + { + "package": "HarperDB/application-template", + "name": "deploy-test-gh" + }, + { + "package": "@fastify/compress", + "name": "fast-compress" + }, + { + "name": "my-component", + "entries": [ + { + "name": "LICENSE", + "mtime": "2023-08-22T16:00:40.286Z", + "size": 1070 + }, + { + "name": "index.md", + "mtime": "2023-08-22T16:00:40.287Z", + "size": 1207 + }, + { + "name": "config.yaml", + "mtime": "2023-08-22T16:00:40.287Z", + "size": 1069 + }, + { + "name": "package.json", + "mtime": "2023-08-22T16:00:40.288Z", + "size": 145 + }, + { + "name": "resources.js", + "mtime": "2023-08-22T16:00:40.289Z", + "size": 583 + }, + { + "name": "schema.graphql", + "mtime": "2023-08-22T16:00:40.289Z", + "size": 466 + }, + { + "name": "utils", + "entries": [ + { + "name": "commonUtils.js", + "mtime": "2023-08-22T16:00:40.289Z", + "size": 583 + } + ] + } + ] + } + ] +} +``` + +--- + +## Get Component File + +Gets the contents of a file inside a component project. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `get_component_file` +- `project` _(required)_ - the name of the project where the file is located +- `file` _(required)_ - the path relative to your project folder of the file you wish to view +- `encoding` _(optional)_ - the encoding that will be passed to the read file call. Defaults to `utf8` + +### Body + +```json +{ + "operation": "get_component_file", + "project": "my-component", + "file": "resources.js" +} +``` + +### Response: 200 + +```json +{ + "message": "/**export class MyCustomResource extends tables.TableName {\n\t/ we can define our own custom POST handler\n\tpost(content) {\n\t\t/ do something with the incoming content;\n\t\treturn super.post(content);\n\t}\n\t/ or custom GET handler\n\tget() {\n\t\t/ we can modify this resource before returning\n\t\treturn super.get();\n\t}\n}\n */\n/ we can also define a custom resource without a specific table\nexport class Greeting extends Resource {\n\t/ a \"Hello, world!\" handler\n\tget() {\n\t\treturn { greeting: 'Hello, world!' };\n\t}\n}" +} +``` + +--- + +## Set Component File + +Creates or updates a file inside a component project. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `set_component_file` +- `project` _(required)_ - the name of the project the file is located in +- `file` _(required)_ - the path relative to your project folder of the file you wish to set +- `payload` _(required)_ - what will be written to the file +- `encoding` _(optional)_ - the encoding that will be passed to the write file call. Defaults to `utf8` +- `replicated` _(optional)_ - if true, Harper will replicate the component update to all nodes in the cluster. Must be a boolean. + +### Body + +```json +{ + "operation": "set_component_file", + "project": "my-component", + "file": "test.js", + "payload": "console.log('hello world')" +} +``` + +### Response: 200 + +```json +{ + "message": "Successfully set component: test.js" +} +``` + +--- + +## Add SSH Key + +Adds an SSH key for deploying components from private repositories. This will also create an ssh config file that will be used when deploying the components. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `add_ssh_key` +- `name` _(required)_ - the name of the key +- `key` _(required)_ - the private key contents. Must be an ed25519 key. Line breaks must be delimited with `\n` and have a trailing `\n` +- `host` _(required)_ - the host for the ssh config (see below). Used as part of the `package` url when deploying a component using this key. +- `hostname` _(required)_ - the hostname for the ssh config (see below). Used to map `host` to an actual domain (e.g. `github.com`) +- `known_hosts` _(optional)_ - the public SSH keys of the host your component will be retrieved from. If `hostname` is `github.com` this will be retrieved automatically. Line breaks must be delimited with `\n` +- `replicated` _(optional)_ - if true, HarperDB will replicate the key to all nodes in the cluster. Must be a boolean. + +### Body + +```json +{ + "operation": "add_ssh_key", + "name": "harperdb-private-component", + "key": "-----BEGIN OPENSSH PRIVATE KEY-----\nthis\nis\na\nfake\nkey\n-----END OPENSSH PRIVATE KEY-----\n", + "host": "harperdb-private-component.github.com", + "hostname": "github.com" +} +``` + +### Response: 200 + +```json +{ + "message": "Added ssh key: harperdb-private-component" +} +``` + +### Generated Config and Deploy Component "package" string examples + +``` +#harperdb-private-component +Host harperdb-private-component.github.com + HostName github.com + User git + IdentityFile /hdbroot/ssh/harperdb-private-component.key + IdentitiesOnly yes +``` + +``` +"package": "git+ssh://git@:.git#semver:v1.2.3" + +"package": "git+ssh://git@harperdb-private-component.github.com:HarperDB/harperdb-private-component.git#semver:v1.2.3" +``` + +Note that `deploy_component` with a package uses `npm install` so the url must be a valid npm format url. The above is an example of a url using a tag in the repo to install. + +--- + +## Update SSH Key + +Updates the private key contents of an existing SSH key. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `update_ssh_key` +- `name` _(required)_ - the name of the key to be updated +- `key` _(required)_ - the private key contents. Must be an ed25519 key. Line breaks must be delimited with `\n` and have a trailing `\n` +- `host` _(required)_ - the host for the ssh config (see below). Used as part of the `package` url when deploying a component using this key. +- `hostname` _(required)_ - the hostname for the ssh config (see below). Used to map `host` to an actual domain (e.g. `github.com`) +- `known_hosts` _(optional)_ - the public SSH keys of the host your component will be retrieved from. If `hostname` is `github.com` this will be retrieved automatically. Line breaks must be delimited with `\n` +- `replicated` _(optional)_ - if true, HarperDB will replicate the key to all nodes in the cluster. Must be a boolean. + +### Body + +```json +{ + "operation": "update_ssh_key", + "name": "harperdb-private-component", + "key": "-----BEGIN OPENSSH PRIVATE KEY-----\nthis\nis\na\nNEWFAKE\nkey\n-----END OPENSSH PRIVATE KEY-----\n", + "host": "harperdb-private-component.github.com", + "hostname": "github.com" +} +``` + +### Response: 200 + +```json +{ + "message": "Updated ssh key: harperdb-private-component" +} +``` + +## Delete SSH Key + +Deletes a SSH key. This will also remove it from the generated SSH config. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `delete_ssh_key` +- `name` _(required)_ - the name of the key to be deleted +- `replicated` _(optional)_ - if true, Harper will replicate the key deletion to all nodes in the cluster. Must be a boolean. + +### Body + +```json +{ + "name": "harperdb-private-component" +} +``` + +### Response: 200 + +```json +{ + "message": "Deleted ssh key: harperdb-private-component" +} +``` + +--- + +## List SSH Keys + +List off the names of added SSH keys + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `list_ssh_keys` + +### Body + +```json +{ + "operation": "list_ssh_keys" +} +``` + +### Response: 200 + +```json +[ + { + "name": "harperdb-private-component" + } +] +``` + +_Note: Additional SSH keys would appear as more objects in this array_ + +--- + +## Set SSH Known Hosts + +Sets the SSH known_hosts file. This will overwrite the file. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `set_ssh_known_hosts` +- `known_hosts` _(required)_ - The contents to set the known_hosts to. Line breaks must be delimite d with +- `replicated` _(optional)_ - if true, Harper will replicate the known hosts to all nodes in the cluster. Must be a boolean. + +### Body + +```json +{ + "operation": "set_ssh_known_hosts", + "known_hosts": "github.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEmKSENjQEezOmxkZMy7opKgwFB9nkt5YRrYMjNuG5N87uRgg6CLrbo5wAdT/y6v0mKV0U2w0WZ2YB/++Tpockg=\ngithub.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOMqqnkVzrm0SdG6UOoqKLsabgH5C9okWi0dh2l9GKJl\ngithub.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCj7ndNxQowgcQnjshcLrqPEiiphnt+VTTvDP6mHBL9j1aNUkY4Ue1gvwnGLVlOhGeYrnZaMgRK6+PKCUXaDbC7qtbW8gIkhL7aGCsOr/C56SJMy/BCZfxd1nWzAOxSDPgVsmerOBYfNqltV9/hWCqBywINIR+5dIg6JTJ72pcEpEjcYgXkE2YEFXV1JHnsKgbLWNlhScqb2UmyRkQyytRLtL+38TGxkxCflmO+5Z8CSSNY7GidjMIZ7Q4zMjA2n1nGrlTDkzwDCsw+wqFPGQA179cnfGWOWRVruj16z6XyvxvjJwbz0wQZ75XK5tKSb7FNyeIEs4TT4jk+S4dhPeAUC5y+bDYirYgM4GC7uEnztnZyaVWQ7B381AK4Qdrwt51ZqExKbQpTUNn+EjqoTwvqNj4kqx5QUCI0ThS/YkOxJCXmPUWZbhjpCg56i+2aB6CmK2JGhn57K5mj0MNdBXA4/WnwH6XoPWJzK5Nyu2zB3nAZp+S5hpQs+p1vN1/wsjk=\n" +} +``` + +### Response: 200 + +```json +{ + "message": "Known hosts successfully set" +} +``` + +## Get SSH Known Hosts + +Gets the contents of the known_hosts file + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `get_ssh_known_hosts` + +### Body + +```json +{ + "operation": "get_ssh_known_hosts" +} +``` + +### Response: 200 + +```json +{ + "known_hosts": "github.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEmKSENjQEezOmxkZMy7opKgwFB9nkt5YRrYMjNuG5N87uRgg6CLrbo5wAdT/y6v0mKV0U2w0WZ2YB/++Tpockg=\ngithub.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOMqqnkVzrm0SdG6UOoqKLsabgH5C9okWi0dh2l9GKJl\ngithub.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCj7ndNxQowgcQnjshcLrqPEiiphnt+VTTvDP6mHBL9j1aNUkY4Ue1gvwnGLVlOhGeYrnZaMgRK6+PKCUXaDbC7qtbW8gIkhL7aGCsOr/C56SJMy/BCZfxd1nWzAOxSDPgVsmerOBYfNqltV9/hWCqBywINIR+5dIg6JTJ72pcEpEjcYgXkE2YEFXV1JHnsKgbLWNlhScqb2UmyRkQyytRLtL+38TGxkxCflmO+5Z8CSSNY7GidjMIZ7Q4zMjA2n1nGrlTDkzwDCsw+wqFPGQA179cnfGWOWRVruj16z6XyvxvjJwbz0wQZ75XK5tKSb7FNyeIEs4TT4jk+S4dhPeAUC5y+bDYirYgM4GC7uEnztnZyaVWQ7B381AK4Qdrwt51ZqExKbQpTUNn+EjqoTwvqNj4kqx5QUCI0ThS/YkOxJCXmPUWZbhjpCg56i+2aB6CmK2JGhn57K5mj0MNdBXA4/WnwH6XoPWJzK5Nyu2zB3nAZp+S5hpQs+p1vN1/wsjk=\n" +} +``` + +--- + +## Install Node Modules + +This operation is deprecated, as it is handled automatically by deploy_component and restart. +Executes npm install against specified custom function projects. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `install_node_modules` +- `projects` _(required)_ - must ba an array of custom functions projects. +- `dry_run` _(optional)_ - refers to the npm --dry-run flag: [https://docs.npmjs.com/cli/v8/commands/npm-install#dry-run](https://docs.npmjs.com/cli/v8/commands/npm-install#dry-run). Defaults to false. + +### Body + +```json +{ + "operation": "install_node_modules", + "projects": ["dogs", "cats"], + "dry_run": true +} +``` diff --git a/versioned_docs/version-4.7/developers/operations-api/configuration.md b/versioned_docs/version-4.7/developers/operations-api/configuration.md new file mode 100644 index 00000000..8f2365da --- /dev/null +++ b/versioned_docs/version-4.7/developers/operations-api/configuration.md @@ -0,0 +1,135 @@ +--- +title: Configuration +--- + +# Configuration + +## Set Configuration + +Modifies the Harper configuration file parameters. Must follow with a restart or restart_service operation. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `set_configuration` +- `logging_level` _(example/optional)_ - one or more configuration keywords to be updated in the Harper configuration file +- `clustering_enabled` _(example/optional)_ - one or more configuration keywords to be updated in the Harper configuration file + +### Body + +```json +{ + "operation": "set_configuration", + "logging_level": "trace", + "clustering_enabled": true +} +``` + +### Response: 200 + +```json +{ + "message": "Configuration successfully set. You must restart HarperDB for new config settings to take effect." +} +``` + +--- + +## Get Configuration + +Returns the Harper configuration parameters. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `get_configuration` + +### Body + +```json +{ + "operation": "get_configuration" +} +``` + +### Response: 200 + +```json +{ + "http": { + "compressionThreshold": 1200, + "cors": false, + "corsAccessList": [null], + "keepAliveTimeout": 30000, + "port": 9926, + "securePort": null, + "timeout": 120000 + }, + "threads": 11, + "authentication": { + "cacheTTL": 30000, + "enableSessions": true, + "operationTokenTimeout": "1d", + "refreshTokenTimeout": "30d" + }, + "analytics": { + "aggregatePeriod": 60 + }, + "replication": { + "hostname": "node1", + "databases": "*", + "routes": null, + "url": "wss://127.0.0.1:9925" + }, + "componentsRoot": "/Users/hdb/components", + "localStudio": { + "enabled": false + }, + "logging": { + "auditAuthEvents": { + "logFailed": false, + "logSuccessful": false + }, + "auditLog": true, + "auditRetention": "3d", + "file": true, + "level": "error", + "root": "/Users/hdb/log", + "rotation": { + "enabled": false, + "compress": false, + "interval": null, + "maxSize": null, + "path": "/Users/hdb/log" + }, + "stdStreams": false + }, + "mqtt": { + "network": { + "port": 1883, + "securePort": 8883 + }, + "webSocket": true, + "requireAuthentication": true + }, + "operationsApi": { + "network": { + "cors": true, + "corsAccessList": ["*"], + "domainSocket": "/Users/hdb/operations-server", + "port": 9925, + "securePort": null + } + }, + "rootPath": "/Users/hdb", + "storage": { + "writeAsync": false, + "caching": true, + "compression": false, + "noReadAhead": true, + "path": "/Users/hdb/database", + "prefetchWrites": true + }, + "tls": { + "privateKey": "/Users/hdb/keys/privateKey.pem" + } +} +``` diff --git a/versioned_docs/version-4.7/developers/operations-api/custom-functions.md b/versioned_docs/version-4.7/developers/operations-api/custom-functions.md new file mode 100644 index 00000000..23709148 --- /dev/null +++ b/versioned_docs/version-4.7/developers/operations-api/custom-functions.md @@ -0,0 +1,279 @@ +--- +title: Custom Functions +--- + +# Custom Functions + +_These operations are deprecated._ + +## Custom Functions Status + +Returns the state of the Custom functions server. This includes whether it is enabled, upon which port it is listening, and where its root project directory is located on the host machine. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `custom_function_status` + +### Body + +```json +{ + "operation": "custom_functions_status" +} +``` + +### Response: 200 + +```json +{ + "is_enabled": true, + "port": 9926, + "directory": "/Users/myuser/hdb/custom_functions" +} +``` + +--- + +## Get Custom Functions + +Returns an array of projects within the Custom Functions root project directory. Each project has details including each of the files in the routes and helpers directories, and the total file count in the static folder. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `get_custom_functions` + +### Body + +```json +{ + "operation": "get_custom_functions" +} +``` + +### Response: 200 + +```json +{ + "dogs": { + "routes": ["examples"], + "helpers": ["example"], + "static": 3 + } +} +``` + +--- + +## Get Custom Function + +Returns the content of the specified file as text. HarperDStudio uses this call to render the file content in its built-in code editor. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `get_custom_function` +- `project` _(required)_ - the name of the project containing the file for which you wish to get content +- `type` _(required)_ - the name of the sub-folder containing the file for which you wish to get content - must be either routes or helpers +- `file` _(required)_ - The name of the file for which you wish to get content - should not include the file extension (which is always .js) + +### Body + +```json +{ + "operation": "get_custom_function", + "project": "dogs", + "type": "helpers", + "file": "example" +} +``` + +### Response: 200 + +```json +{ + "message": "'use strict';\n\nconst https = require('https');\n\nconst authRequest = (options) => {\n return new Promise((resolve, reject) => {\n const req = https.request(options, (res) => {\n res.setEncoding('utf8');\n let responseBody = '';\n\n res.on('data', (chunk) => {\n responseBody += chunk;\n });\n\n res.on('end', () => {\n resolve(JSON.parse(responseBody));\n });\n });\n\n req.on('error', (err) => {\n reject(err);\n });\n\n req.end();\n });\n};\n\nconst customValidation = async (request,logger) => {\n const options = {\n hostname: 'jsonplaceholder.typicode.com',\n port: 443,\n path: '/todos/1',\n method: 'GET',\n headers: { authorization: request.headers.authorization },\n };\n\n const result = await authRequest(options);\n\n /*\n * throw an authentication error based on the response body or statusCode\n */\n if (result.error) {\n const errorString = result.error || 'Sorry, there was an error authenticating your request';\n logger.error(errorString);\n throw new Error(errorString);\n }\n return request;\n};\n\nmodule.exports = customValidation;\n" +} +``` + +--- + +## Set Custom Function + +Updates the content of the specified file. Harper Studio uses this call to save any changes made through its built-in code editor. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `set_custom_function` +- `project` _(required)_ - the name of the project containing the file for which you wish to set content +- `type` _(required)_ - the name of the sub-folder containing the file for which you wish to set content - must be either routes or helpers +- `file` _(required)_ - the name of the file for which you wish to set content - should not include the file extension (which is always .js) +- `function_content` _(required)_ - the content you wish to save into the specified file + +### Body + +```json +{ + "operation": "set_custom_function", + "project": "dogs", + "type": "helpers", + "file": "example", + "function_content": "'use strict';\n\nconst https = require('https');\n\nconst authRequest = (options) => {\n return new Promise((resolve, reject) => {\n const req = https.request(options, (res) => {\n res.setEncoding('utf8');\n let responseBody = '';\n\n res.on('data', (chunk) => {\n responseBody += chunk;\n });\n\n res.on('end', () => {\n resolve(JSON.parse(responseBody));\n });\n });\n\n req.on('error', (err) => {\n reject(err);\n });\n\n req.end();\n });\n};\n\nconst customValidation = async (request,logger) => {\n const options = {\n hostname: 'jsonplaceholder.typicode.com',\n port: 443,\n path: '/todos/1',\n method: 'GET',\n headers: { authorization: request.headers.authorization },\n };\n\n const result = await authRequest(options);\n\n /*\n * throw an authentication error based on the response body or statusCode\n */\n if (result.error) {\n const errorString = result.error || 'Sorry, there was an error authenticating your request';\n logger.error(errorString);\n throw new Error(errorString);\n }\n return request;\n};\n\nmodule.exports = customValidation;\n" +} +``` + +### Response: 200 + +```json +{ + "message": "Successfully updated custom function: example.js" +} +``` + +--- + +## Drop Custom Function + +Deletes the specified file. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `drop_custom_function` +- `project` _(required)_ - the name of the project containing the file you wish to delete +- `type` _(required)_ - the name of the sub-folder containing the file you wish to delete. Must be either routes or helpers +- `file` _(required)_ - the name of the file you wish to delete. Should not include the file extension (which is always .js) + +### Body + +```json +{ + "operation": "drop_custom_function", + "project": "dogs", + "type": "helpers", + "file": "example" +} +``` + +### Response: 200 + +```json +{ + "message": "Successfully deleted custom function: example.js" +} +``` + +--- + +## Add Custom Function Project + +Creates a new project folder in the Custom Functions root project directory. It also inserts into the new directory the contents of our Custom Functions Project template, which is available publicly, here: [https://github.com/HarperDB/harperdb-custom-functions-template](https://github.com/HarperDB/harperdb-custom-functions-template). + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `add_custom_function_project` +- `project` _(required)_ - the name of the project you wish to create + +### Body + +```json +{ + "operation": "add_custom_function_project", + "project": "dogs" +} +``` + +### Response: 200 + +```json +{ + "message": "Successfully created custom function project: dogs" +} +``` + +--- + +## Drop Custom Function Project + +Deletes the specified project folder and all of its contents. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `drop_custom_function_project` +- `project` _(required)_ - the name of the project you wish to delete + +### Body + +```json +{ + "operation": "drop_custom_function_project", + "project": "dogs" +} +``` + +### Response: 200 + +```json +{ + "message": "Successfully deleted project: dogs" +} +``` + +--- + +## Package Custom Function Project + +Creates a .tar file of the specified project folder, then reads it into a base64-encoded string and returns an object with the string, the payload and the file. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `package_custom_function_project` +- `project` _(required)_ - the name of the project you wish to package up for deployment +- `skip_node_modules` _(optional)_ - if true, creates option for tar module that will exclude the project's node_modules directory. Must be a boolean. + +### Body + +```json +{ + "operation": "package_custom_function_project", + "project": "dogs", + "skip_node_modules": true +} +``` + +### Response: 200 + +```json +{ + "project": "dogs", + "payload": "LgAAAAAAAAAAAAAAAAAAA...AAAAAAAAAAAAAAAAAAAAAAAAAAAAA==", + "file": "/tmp/d27f1154-5d82-43f0-a5fb-a3018f366081.tar" +} +``` + +--- + +## Deploy Custom Function Project + +Takes the output of package_custom_function_project, decrypts the base64-encoded string, reconstitutes the .tar file of your project folder, and extracts it to the Custom Functions root project directory. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `deploy_custom_function_project` +- `project` _(required)_ - the name of the project you wish to deploy. Must be a string +- `payload` _(required)_ - a base64-encoded string representation of the .tar file. Must be a string + +### Body + +```json +{ + "operation": "deploy_custom_function_project", + "project": "dogs", + "payload": "A very large base64-encoded string represenation of the .tar file" +} +``` + +### Response: 200 + +```json +{ + "message": "Successfully deployed project: dogs" +} +``` diff --git a/versioned_docs/version-4.7/developers/operations-api/databases-and-tables.md b/versioned_docs/version-4.7/developers/operations-api/databases-and-tables.md new file mode 100644 index 00000000..936425c3 --- /dev/null +++ b/versioned_docs/version-4.7/developers/operations-api/databases-and-tables.md @@ -0,0 +1,388 @@ +--- +title: Databases and Tables +--- + +# Databases and Tables + +## Describe All + +Returns the definitions of all databases and tables within the database. Record counts above 5000 records are estimated, as determining the exact count can be expensive. When the record count is estimated, this is indicated by the inclusion of a confidence interval of `estimated_record_range`. If you need the exact count, you can include an `"exact_count": true` in the operation, but be aware that this requires a full table scan (may be expensive). + +- `operation` _(required)_ - must always be `describe_all` + +### Body + +```json +{ + "operation": "describe_all" +} +``` + +### Response: 200 + +```json +{ + "dev": { + "dog": { + "schema": "dev", + "name": "dog", + "hash_attribute": "id", + "audit": true, + "schema_defined": false, + "attributes": [ + { + "attribute": "id", + "indexed": true, + "is_primary_key": true + }, + { + "attribute": "__createdtime__", + "indexed": true + }, + { + "attribute": "__updatedtime__", + "indexed": true + }, + { + "attribute": "type", + "indexed": true + } + ], + "clustering_stream_name": "dd9e90c2689151ab812e0f2d98816bff", + "record_count": 4000, + "estimated_record_range": [3976, 4033], + "last_updated_record": 1697658683698.4504 + } + } +} +``` + +--- + +## Describe database + +Returns the definitions of all tables within the specified database. + +- `operation` _(required)_ - must always be `describe_database` +- `database` _(optional)_ - database where the table you wish to describe lives. The default is `data` + +### Body + +```json +{ + "operation": "describe_database", + "database": "dev" +} +``` + +### Response: 200 + +```json +{ + "dog": { + "schema": "dev", + "name": "dog", + "hash_attribute": "id", + "audit": true, + "schema_defined": false, + "attributes": [ + { + "attribute": "id", + "indexed": true, + "is_primary_key": true + }, + { + "attribute": "__createdtime__", + "indexed": true + }, + { + "attribute": "__updatedtime__", + "indexed": true + }, + { + "attribute": "type", + "indexed": true + } + ], + "clustering_stream_name": "dd9e90c2689151ab812e0f2d98816bff", + "record_count": 4000, + "estimated_record_range": [3976, 4033], + "last_updated_record": 1697658683698.4504 + } +} +``` + +--- + +## Describe Table + +Returns the definition of the specified table. + +- `operation` _(required)_ - must always be `describe_table` +- `table` _(required)_ - table you wish to describe +- `database` _(optional)_ - database where the table you wish to describe lives. The default is `data` + +### Body + +```json +{ + "operation": "describe_table", + "table": "dog" +} +``` + +### Response: 200 + +```json +{ + "schema": "dev", + "name": "dog", + "hash_attribute": "id", + "audit": true, + "schema_defined": false, + "attributes": [ + { + "attribute": "id", + "indexed": true, + "is_primary_key": true + }, + { + "attribute": "__createdtime__", + "indexed": true + }, + { + "attribute": "__updatedtime__", + "indexed": true + }, + { + "attribute": "type", + "indexed": true + } + ], + "clustering_stream_name": "dd9e90c2689151ab812e0f2d98816bff", + "record_count": 4000, + "estimated_record_range": [3976, 4033], + "last_updated_record": 1697658683698.4504 +} +``` + +--- + +## Create database + +Create a new database. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `create_database` +- `database` _(optional)_ - name of the database you are creating. The default is `data` + +### Body + +```json +{ + "operation": "create_database", + "database": "dev" +} +``` + +### Response: 200 + +```json +{ + "message": "database 'dev' successfully created" +} +``` + +--- + +## Drop database + +Drop an existing database. NOTE: Dropping a database will delete all tables and all of their records in that database. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - this should always be `drop_database` +- `database` _(required)_ - name of the database you are dropping +- `replicated` _(optional)_ - if true, Harper will replicate the component to all nodes in the cluster. Must be a boolean. + +### Body + +```json +{ + "operation": "drop_database", + "database": "dev" +} +``` + +### Response: 200 + +```json +{ + "message": "successfully deleted 'dev'" +} +``` + +--- + +## Create Table + +Create a new table within a database. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `create_table` +- `database` _(optional)_ - name of the database where you want your table to live. If the database does not exist, it will be created. If the `database` property is not provided it will default to `data`. +- `table` _(required)_ - name of the table you are creating +- `primary_key` _(required)_ - primary key for the table +- `attributes` _(optional)_ - an array of attributes that specifies the schema for the table, that is the set of attributes for the table. When attributes are supplied the table will not be considered a "dynamic schema" table, and attributes will not be auto-added when records with new properties are inserted. Each attribute is specified as: + - `name` _(required)_ - the name of the attribute + - `indexed` _(optional)_ - indicates if the attribute should be indexed + - `type` _(optional)_ - specifies the data type of the attribute (can be String, Int, Float, Date, ID, Any) +- `expiration` _(optional)_ - specifies the time-to-live or expiration of records in the table before they are evicted (records are not evicted on any timer if not specified). This is specified in seconds. + +### Body + +```json +{ + "operation": "create_table", + "database": "dev", + "table": "dog", + "primary_key": "id" +} +``` + +### Response: 200 + +```json +{ + "message": "table 'dev.dog' successfully created." +} +``` + +--- + +## Drop Table + +Drop an existing database table. NOTE: Dropping a table will delete all associated records in that table. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - this should always be `drop_table` +- `database` _(optional)_ - database where the table you are dropping lives. The default is `data` +- `table` _(required)_ - name of the table you are dropping +- `replicated` _(optional)_ - if true, Harper will replicate the component to all nodes in the cluster. Must be a boolean. + +### Body + +```json +{ + "operation": "drop_table", + "database": "dev", + "table": "dog" +} +``` + +### Response: 200 + +```json +{ + "message": "successfully deleted table 'dev.dog'" +} +``` + +--- + +## Create Attribute + +Create a new attribute within the specified table. **The create_attribute operation can be used for admins wishing to pre-define database values for setting role-based permissions or for any other reason.** + +_Note: Harper will automatically create new attributes on insert and update if they do not already exist within the database._ + +- `operation` _(required)_ - must always be `create_attribute` +- `database` _(optional)_ - name of the database of the table you want to add your attribute. The default is `data` +- `table` _(required)_ - name of the table where you want to add your attribute to live +- `attribute` _(required)_ - name for the attribute + +### Body + +```json +{ + "operation": "create_attribute", + "database": "dev", + "table": "dog", + "attribute": "is_adorable" +} +``` + +### Response: 200 + +```json +{ + "message": "inserted 1 of 1 records", + "skipped_hashes": [], + "inserted_hashes": ["383c0bef-5781-4e1c-b5c8-987459ad0831"] +} +``` + +--- + +## Drop Attribute + +Drop an existing attribute from the specified table. NOTE: Dropping an attribute will delete all associated attribute values in that table. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - this should always be `drop_attribute` +- `database` _(optional)_ - database where the table you are dropping lives. The default is `data` +- `table` _(required)_ - table where the attribute you are dropping lives +- `attribute` _(required)_ - attribute that you intend to drop + +### Body + +```json +{ + "operation": "drop_attribute", + "database": "dev", + "table": "dog", + "attribute": "is_adorable" +} +``` + +### Response: 200 + +```json +{ + "message": "successfully deleted attribute 'is_adorable'" +} +``` + +--- + +## Get Backup + +This will return a snapshot of the requested database. This provides a means for backing up the database through the operations API. The response will be the raw database file (in binary format), which can later be restored as a database file by copying into the appropriate hdb/databases directory (with Harper not running). The returned file is a snapshot of the database at the moment in time that the get_backup operation begins. This also supports backing up individual tables in a database. However, this is a more expensive operation than backing up a database in whole, and will lose any transactional atomicity between writes across tables, so generally it is recommended that you backup the entire database. + +It is important to note that trying to copy a database file that is in use (Harper actively running and writing to the file) using standard file copying tools is not safe (the copied file will likely be corrupt), which is why using this snapshot operation is recommended for backups (volume snapshots are also a good way to backup Harper databases). + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - this should always be `get_backup` +- `database` _(required)_ - this is the database that will be snapshotted and returned +- `table` _(optional)_ - this will specify a specific table to backup +- `tables` _(optional)_ - this will specify a specific set of tables to backup + +### Body + +```json +{ + "operation": "get_backup", + "database": "dev" +} +``` + +### Response: 200 + +``` +The database in raw binary data format +``` diff --git a/versioned_docs/version-4.7/developers/operations-api/index.md b/versioned_docs/version-4.7/developers/operations-api/index.md new file mode 100644 index 00000000..ad44d9de --- /dev/null +++ b/versioned_docs/version-4.7/developers/operations-api/index.md @@ -0,0 +1,55 @@ +--- +title: Operations API +--- + +# Operations API + +The operations API provides a full set of capabilities for configuring, deploying, administering, and controlling Harper. To send operations to the operations API, you send a POST request to the operations API endpoint, which [defaults to port 9925](../deployments/configuration#operationsapi), on the root path, where the body is the operations object. These requests need to authenticated, which can be done with [basic auth](./security#basic-auth) or [JWT authentication](./security#jwt-auth). For example, a request to create a table would be performed as: + +```http +POST https://my-harperdb-server:9925/ +Authorization: Basic YourBase64EncodedInstanceUser:Pass +Content-Type: application/json + +{ + "operation": "create_table", + "table": "my-table" +} +``` + +The operations API reference is available below and categorized by topic: + +- [Quick Start Examples](operations-api/quickstart-examples) +- [Databases and Tables](operations-api/databases-and-tables) +- [NoSQL Operations](operations-api/nosql-operations) +- [Bulk Operations](operations-api/bulk-operations) +- [Users and Roles](operations-api/users-and-roles) +- [Clustering](operations-api/clustering) +- [Clustering with NATS](operations-api/clustering-nats) +- [Components](operations-api/components) +- [Registration](operations-api/registration) +- [Jobs](operations-api/jobs) +- [Logs](operations-api/logs) +- [System Operations](operations-api/system-operations) +- [Configuration](operations-api/configuration) +- [Certificate Management](operations-api/certificate-management) +- [Token Authentication](operations-api/token-authentication) +- [SQL Operations](operations-api/sql-operations) +- [Advanced JSON SQL Examples](operations-api/advanced-json-sql-examples) +- [Analytics](operations-api/analytics) + +• [Past Release API Documentation](https://olddocs.harperdb.io) + +## More Examples + +Here is an example of using `curl` to make an operations API request: + +```bash +curl --location --request POST 'https://instance-subdomain.harperdbcloud.com' \ +--header 'Authorization: Basic YourBase64EncodedInstanceUser:Pass' \ +--header 'Content-Type: application/json' \ +--data-raw '{ +"operation": "create_schema", +"schema": "dev" +}' +``` diff --git a/versioned_docs/version-4.7/developers/operations-api/jobs.md b/versioned_docs/version-4.7/developers/operations-api/jobs.md new file mode 100644 index 00000000..cf71fa00 --- /dev/null +++ b/versioned_docs/version-4.7/developers/operations-api/jobs.md @@ -0,0 +1,87 @@ +--- +title: Jobs +--- + +# Jobs + +## Get Job + +Returns job status, metrics, and messages for the specified job ID. + +- `operation` _(required)_ - must always be `get_job` +- `id` _(required)_ - the id of the job you wish to view + +### Body + +```json +{ + "operation": "get_job", + "id": "4a982782-929a-4507-8794-26dae1132def" +} +``` + +### Response: 200 + +```json +[ + { + "__createdtime__": 1611615798782, + "__updatedtime__": 1611615801207, + "created_datetime": 1611615798774, + "end_datetime": 1611615801206, + "id": "4a982782-929a-4507-8794-26dae1132def", + "job_body": null, + "message": "successfully loaded 350 of 350 records", + "start_datetime": 1611615798805, + "status": "COMPLETE", + "type": "csv_url_load", + "user": "HDB_ADMIN", + "start_datetime_converted": "2021-01-25T23:03:18.805Z", + "end_datetime_converted": "2021-01-25T23:03:21.206Z" + } +] +``` + +--- + +## Search Jobs By Start Date + +Returns a list of job statuses, metrics, and messages for all jobs executed within the specified time window. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `search_jobs_by_start_date` +- `from_date` _(required)_ - the date you wish to start the search +- `to_date` _(required)_ - the date you wish to end the search + +### Body + +```json +{ + "operation": "search_jobs_by_start_date", + "from_date": "2021-01-25T22:05:27.464+0000", + "to_date": "2021-01-25T23:05:27.464+0000" +} +``` + +### Response: 200 + +```json +[ + { + "id": "942dd5cb-2368-48a5-8a10-8770ff7eb1f1", + "user": "HDB_ADMIN", + "type": "csv_url_load", + "status": "COMPLETE", + "start_datetime": 1611613284781, + "end_datetime": 1611613287204, + "job_body": null, + "message": "successfully loaded 350 of 350 records", + "created_datetime": 1611613284764, + "__createdtime__": 1611613284767, + "__updatedtime__": 1611613287207, + "start_datetime_converted": "2021-01-25T22:21:24.781Z", + "end_datetime_converted": "2021-01-25T22:21:27.204Z" + } +] +``` diff --git a/versioned_docs/version-4.7/developers/operations-api/logs.md b/versioned_docs/version-4.7/developers/operations-api/logs.md new file mode 100644 index 00000000..4bf6b518 --- /dev/null +++ b/versioned_docs/version-4.7/developers/operations-api/logs.md @@ -0,0 +1,732 @@ +--- +title: Logs +--- + +# Logs + +## Read Harper Log + +Returns log outputs from the primary Harper log based on the provided search criteria. [Read more about Harper logging here](../../administration/logging/standard-logging#read-logs-via-the-api). + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `read_Log` +- `start` _(optional)_ - result to start with. Default is 0, the first log in `hdb.log`. Must be a number +- `limit` _(optional)_ - number of results returned. Default behavior is 1000. Must be a number +- `level` _(optional)_ - error level to filter on. Default behavior is all levels. Must be `notify`, `error`, `warn`, `info`, `debug` or `trace` +- `from` _(optional)_ - date to begin showing log results. Must be `YYYY-MM-DD` or `YYYY-MM-DD hh:mm:ss`. Default is first log in `hdb.log` +- `until` _(optional)_ - date to end showing log results. Must be `YYYY-MM-DD` or `YYYY-MM-DD hh:mm:ss`. Default is last log in `hdb.log` +- `order` _(optional)_ - order to display logs desc or asc by timestamp. By default, will maintain `hdb.log` order + +### Body + +```json +{ + "operation": "read_log", + "start": 0, + "limit": 1000, + "level": "error", + "from": "2021-01-25T22:05:27.464+0000", + "until": "2021-01-25T23:05:27.464+0000", + "order": "desc" +} +``` + +### Response: 200 + +```json +[ + { + "level": "notify", + "message": "Connected to cluster server.", + "timestamp": "2021-01-25T23:03:20.710Z", + "thread": "main/0", + "tags": [] + }, + { + "level": "warn", + "message": "Login failed", + "timestamp": "2021-01-25T22:24:45.113Z", + "thread": "http/9", + "tags": [] + }, + { + "level": "error", + "message": "unknown attribute 'name and breed'", + "timestamp": "2021-01-25T22:23:24.167Z", + "thread": "http/9", + "tags": [] + } +] +``` + +--- + +## Read Transaction Log + +Returns all transactions logged for the specified database table. You may filter your results with the optional from, to, and limit fields. [Read more about Harper transaction logs here](logs#read-transaction-log). + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `read_transaction_log` +- `schema` _(required)_ - schema under which the transaction log resides +- `table` _(required)_ - table under which the transaction log resides +- `from` _(optional)_ - time format must be millisecond-based epoch in UTC +- `to` _(optional)_ - time format must be millisecond-based epoch in UTC +- `limit` _(optional)_ - max number of logs you want to receive. Must be a number + +### Body + +```json +{ + "operation": "read_transaction_log", + "schema": "dev", + "table": "dog", + "from": 1560249020865, + "to": 1660585656639, + "limit": 10 +} +``` + +### Response: 200 + +```json +[ + { + "operation": "insert", + "user": "admin", + "timestamp": 1660165619736, + "records": [ + { + "id": 1, + "dog_name": "Penny", + "owner_name": "Kyle", + "breed_id": 154, + "age": 7, + "weight_lbs": 38, + "__updatedtime__": 1660165619688, + "__createdtime__": 1660165619688 + } + ] + }, + { + "operation": "insert", + "user": "admin", + "timestamp": 1660165619813, + "records": [ + { + "id": 2, + "dog_name": "Harper", + "owner_name": "Stephen", + "breed_id": 346, + "age": 7, + "weight_lbs": 55, + "adorable": true, + "__updatedtime__": 1660165619797, + "__createdtime__": 1660165619797 + }, + { + "id": 3, + "dog_name": "Alby", + "owner_name": "Kaylan", + "breed_id": 348, + "age": 7, + "weight_lbs": 84, + "adorable": true, + "__updatedtime__": 1660165619797, + "__createdtime__": 1660165619797 + }, + { + "id": 4, + "dog_name": "Billy", + "owner_name": "Zach", + "breed_id": 347, + "age": 6, + "weight_lbs": 60, + "adorable": true, + "__updatedtime__": 1660165619797, + "__createdtime__": 1660165619797 + }, + { + "id": 5, + "dog_name": "Rose Merry", + "owner_name": "Zach", + "breed_id": 348, + "age": 8, + "weight_lbs": 15, + "adorable": true, + "__updatedtime__": 1660165619797, + "__createdtime__": 1660165619797 + }, + { + "id": 6, + "dog_name": "Kato", + "owner_name": "Kyle", + "breed_id": 351, + "age": 6, + "weight_lbs": 32, + "adorable": true, + "__updatedtime__": 1660165619797, + "__createdtime__": 1660165619797 + }, + { + "id": 7, + "dog_name": "Simon", + "owner_name": "Fred", + "breed_id": 349, + "age": 3, + "weight_lbs": 35, + "adorable": true, + "__updatedtime__": 1660165619797, + "__createdtime__": 1660165619797 + }, + { + "id": 8, + "dog_name": "Gemma", + "owner_name": "Stephen", + "breed_id": 350, + "age": 5, + "weight_lbs": 55, + "adorable": true, + "__updatedtime__": 1660165619797, + "__createdtime__": 1660165619797 + }, + { + "id": 9, + "dog_name": "Yeti", + "owner_name": "Jaxon", + "breed_id": 200, + "age": 5, + "weight_lbs": 55, + "adorable": true, + "__updatedtime__": 1660165619797, + "__createdtime__": 1660165619797 + }, + { + "id": 10, + "dog_name": "Monkey", + "owner_name": "Aron", + "breed_id": 271, + "age": 7, + "weight_lbs": 35, + "adorable": true, + "__updatedtime__": 1660165619797, + "__createdtime__": 1660165619797 + }, + { + "id": 11, + "dog_name": "Bode", + "owner_name": "Margo", + "breed_id": 104, + "age": 8, + "weight_lbs": 75, + "adorable": true, + "__updatedtime__": 1660165619797, + "__createdtime__": 1660165619797 + }, + { + "id": 12, + "dog_name": "Tucker", + "owner_name": "David", + "breed_id": 346, + "age": 2, + "weight_lbs": 60, + "adorable": true, + "__updatedtime__": 1660165619798, + "__createdtime__": 1660165619798 + }, + { + "id": 13, + "dog_name": "Jagger", + "owner_name": "Margo", + "breed_id": 271, + "age": 7, + "weight_lbs": 35, + "adorable": true, + "__updatedtime__": 1660165619798, + "__createdtime__": 1660165619798 + } + ] + }, + { + "operation": "update", + "user": "admin", + "timestamp": 1660165620040, + "records": [ + { + "id": 1, + "dog_name": "Penny B", + "__updatedtime__": 1660165620036 + } + ] + } +] +``` + +--- + +## Delete Transaction Logs Before + +Deletes transaction log data for the specified database table that is older than the specified timestamp. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `delete_transaction_log_before` +- `schema` _(required)_ - schema under which the transaction log resides. Must be a string +- `table` _(required)_ - table under which the transaction log resides. Must be a string +- `timestamp` _(required)_ - records older than this date will be deleted. Format is millisecond-based epoch in UTC + +### Body + +```json +{ + "operation": "delete_transaction_logs_before", + "schema": "dev", + "table": "dog", + "timestamp": 1598290282817 +} +``` + +### Response: 200 + +```json +{ + "message": "Starting job with id 26a6d3a6-6d77-40f9-bee7-8d6ef479a126" +} +``` + +--- + +## Read Audit Log + +AuditLog must be enabled in the Harper configuration file to make this request. Returns a verbose history of all transactions logged for the specified database table, including original data records. You may filter your results with the optional search_type and search_values fields. [Read more about Harper transaction logs here.](../../administration/logging/transaction-logging#read_transaction_log) + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `read_audit_log` +- `schema` _(required)_ - schema under which the transaction log resides +- `table` _(required)_ - table under which the transaction log resides +- `search_type` _(optional)_ - possibilities are `hash_value`, `timestamp` and `username` +- `search_values` _(optional)_ - an array of string or numbers relating to search_type + +### Body + +```json +{ + "operation": "read_audit_log", + "schema": "dev", + "table": "dog" +} +``` + +### Response: 200 + +```json +[ + { + "operation": "insert", + "user_name": "admin", + "timestamp": 1660585635882.288, + "hash_values": [318], + "records": [ + { + "id": 318, + "dog_name": "Polliwog", + "__updatedtime__": 1660585635876, + "__createdtime__": 1660585635876 + } + ] + }, + { + "operation": "insert", + "user_name": "admin", + "timestamp": 1660585716133.01, + "hash_values": [444], + "records": [ + { + "id": 444, + "dog_name": "Davis", + "__updatedtime__": 1660585716128, + "__createdtime__": 1660585716128 + } + ] + }, + { + "operation": "update", + "user_name": "admin", + "timestamp": 1660585740558.415, + "hash_values": [444], + "records": [ + { + "id": 444, + "fur_type": "coarse", + "__updatedtime__": 1660585740556 + } + ], + "original_records": [ + { + "id": 444, + "dog_name": "Davis", + "__updatedtime__": 1660585716128, + "__createdtime__": 1660585716128 + } + ] + }, + { + "operation": "delete", + "user_name": "admin", + "timestamp": 1660585759710.56, + "hash_values": [444], + "original_records": [ + { + "id": 444, + "dog_name": "Davis", + "__updatedtime__": 1660585740556, + "__createdtime__": 1660585716128, + "fur_type": "coarse" + } + ] + } +] +``` + +--- + +## Read Audit Log by timestamp + +AuditLog must be enabled in the Harper configuration file to make this request. Returns the transactions logged for the specified database table between the specified time window. [Read more about Harper transaction logs here](logs#read-transaction-log). + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `read_audit_log` +- `schema` _(required)_ - schema under which the transaction log resides +- `table` _(required)_ - table under which the transaction log resides +- `search_type` _(optional)_ - timestamp +- `search_values` _(optional)_ - an array containing a maximum of two values \[`from_timestamp`, `to_timestamp`] defining the range of transactions you would like to view. + - Timestamp format is millisecond-based epoch in UTC + - If no items are supplied then all transactions are returned + - If only one entry is supplied then all transactions after the supplied timestamp will be returned + +### Body + +```json +{ + "operation": "read_audit_log", + "schema": "dev", + "table": "dog", + "search_type": "timestamp", + "search_values": [1660585740558, 1660585759710.56] +} +``` + +### Response: 200 + +```json +[ + { + "operation": "insert", + "user_name": "admin", + "timestamp": 1660585635882.288, + "hash_values": [318], + "records": [ + { + "id": 318, + "dog_name": "Polliwog", + "__updatedtime__": 1660585635876, + "__createdtime__": 1660585635876 + } + ] + }, + { + "operation": "insert", + "user_name": "admin", + "timestamp": 1660585716133.01, + "hash_values": [444], + "records": [ + { + "id": 444, + "dog_name": "Davis", + "__updatedtime__": 1660585716128, + "__createdtime__": 1660585716128 + } + ] + }, + { + "operation": "update", + "user_name": "admin", + "timestamp": 1660585740558.415, + "hash_values": [444], + "records": [ + { + "id": 444, + "fur_type": "coarse", + "__updatedtime__": 1660585740556 + } + ], + "original_records": [ + { + "id": 444, + "dog_name": "Davis", + "__updatedtime__": 1660585716128, + "__createdtime__": 1660585716128 + } + ] + }, + { + "operation": "delete", + "user_name": "admin", + "timestamp": 1660585759710.56, + "hash_values": [444], + "original_records": [ + { + "id": 444, + "dog_name": "Davis", + "__updatedtime__": 1660585740556, + "__createdtime__": 1660585716128, + "fur_type": "coarse" + } + ] + }, + { + "operation": "update", + "user_name": "admin", + "timestamp": 1660586298457.224, + "hash_values": [318], + "records": [ + { + "id": 318, + "fur_type": "super fluffy", + "__updatedtime__": 1660586298455 + } + ], + "original_records": [ + { + "id": 318, + "dog_name": "Polliwog", + "__updatedtime__": 1660585635876, + "__createdtime__": 1660585635876 + } + ] + } +] +``` + +--- + +## Read Audit Log by username + +AuditLog must be enabled in the Harper configuration file to make this request. Returns the transactions logged for the specified database table which were committed by the specified user. [Read more about Harper transaction logs here](../../administration/logging/transaction-logging#read_transaction_log). + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `read_audit_log` +- `schema` _(required)_ - schema under which the transaction log resides +- `table` _(required)_ - table under which the transaction log resides +- `search_type` _(optional)_ - username +- `search_values` _(optional)_ - the Harper user for whom you would like to view transactions + +### Body + +```json +{ + "operation": "read_audit_log", + "schema": "dev", + "table": "dog", + "search_type": "username", + "search_values": ["admin"] +} +``` + +### Response: 200 + +```json +{ + "admin": [ + { + "operation": "insert", + "user_name": "admin", + "timestamp": 1660585635882.288, + "hash_values": [318], + "records": [ + { + "id": 318, + "dog_name": "Polliwog", + "__updatedtime__": 1660585635876, + "__createdtime__": 1660585635876 + } + ] + }, + { + "operation": "insert", + "user_name": "admin", + "timestamp": 1660585716133.01, + "hash_values": [444], + "records": [ + { + "id": 444, + "dog_name": "Davis", + "__updatedtime__": 1660585716128, + "__createdtime__": 1660585716128 + } + ] + }, + { + "operation": "update", + "user_name": "admin", + "timestamp": 1660585740558.415, + "hash_values": [444], + "records": [ + { + "id": 444, + "fur_type": "coarse", + "__updatedtime__": 1660585740556 + } + ], + "original_records": [ + { + "id": 444, + "dog_name": "Davis", + "__updatedtime__": 1660585716128, + "__createdtime__": 1660585716128 + } + ] + }, + { + "operation": "delete", + "user_name": "admin", + "timestamp": 1660585759710.56, + "hash_values": [444], + "original_records": [ + { + "id": 444, + "dog_name": "Davis", + "__updatedtime__": 1660585740556, + "__createdtime__": 1660585716128, + "fur_type": "coarse" + } + ] + }, + { + "operation": "update", + "user_name": "admin", + "timestamp": 1660586298457.224, + "hash_values": [318], + "records": [ + { + "id": 318, + "fur_type": "super fluffy", + "__updatedtime__": 1660586298455 + } + ], + "original_records": [ + { + "id": 318, + "dog_name": "Polliwog", + "__updatedtime__": 1660585635876, + "__createdtime__": 1660585635876 + } + ] + } + ] +} +``` + +--- + +## Read Audit Log by hash_value + +AuditLog must be enabled in the Harper configuration file to make this request. Returns the transactions logged for the specified database table which were committed to the specified hash value(s). [Read more about Harper transaction logs here](../../administration/logging/transaction-logging#read_transaction_log). + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `read_audit_log` +- `schema` _(required)_ - schema under which the transaction log resides +- `table` _(required)_ - table under which the transaction log resides +- `search_type` _(optional)_ - hash_value +- `search_values` _(optional)_ - an array of hash_attributes for which you wish to see transaction logs + +### Body + +```json +{ + "operation": "read_audit_log", + "schema": "dev", + "table": "dog", + "search_type": "hash_value", + "search_values": [318] +} +``` + +### Response: 200 + +```json +{ + "318": [ + { + "operation": "insert", + "user_name": "admin", + "timestamp": 1660585635882.288, + "records": [ + { + "id": 318, + "dog_name": "Polliwog", + "__updatedtime__": 1660585635876, + "__createdtime__": 1660585635876 + } + ] + }, + { + "operation": "update", + "user_name": "admin", + "timestamp": 1660586298457.224, + "records": [ + { + "id": 318, + "fur_type": "super fluffy", + "__updatedtime__": 1660586298455 + } + ], + "original_records": [ + { + "id": 318, + "dog_name": "Polliwog", + "__updatedtime__": 1660585635876, + "__createdtime__": 1660585635876 + } + ] + } + ] +} +``` + +--- + +## Delete Audit Logs Before + +AuditLog must be enabled in the Harper configuration file to make this request. Deletes audit log data for the specified database table that is older than the specified timestamp. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `delete_audit_logs_before` +- `schema` _(required)_ - schema under which the transaction log resides. Must be a string +- `table` _(required)_ - table under which the transaction log resides. Must be a string +- `timestamp` _(required)_ - records older than this date will be deleted. Format is millisecond-based epoch in UTC + +### Body + +```json +{ + "operation": "delete_audit_logs_before", + "schema": "dev", + "table": "dog", + "timestamp": 1660585759710.56 +} +``` + +### Response: 200 + +```json +{ + "message": "Starting job with id 7479e5f8-a86e-4fc9-add7-749493bc100f" +} +``` diff --git a/versioned_docs/version-4.7/developers/operations-api/nosql-operations.md b/versioned_docs/version-4.7/developers/operations-api/nosql-operations.md new file mode 100644 index 00000000..7691cd81 --- /dev/null +++ b/versioned_docs/version-4.7/developers/operations-api/nosql-operations.md @@ -0,0 +1,389 @@ +--- +title: NoSQL Operations +--- + +# NoSQL Operations + +## Insert + +Adds one or more rows of data to a database table. Primary keys of the inserted JSON record may be supplied on insert. If a primary key is not provided, then a GUID or incremented number (depending on type) will be generated for each record. + +- `operation` _(required)_ - must always be `insert` +- `database` _(optional)_ - database where the table you are inserting records into lives. The default is `data` +- `table` _(required)_ - table where you want to insert records +- `records` _(required)_ - array of one or more records for insert + +### Body + +```json +{ + "operation": "insert", + "database": "dev", + "table": "dog", + "records": [ + { + "id": 8, + "dog_name": "Harper", + "breed_id": 346, + "age": 7 + }, + { + "id": 9, + "dog_name": "Penny", + "breed_id": 154, + "age": 7 + } + ] +} +``` + +### Response: 200 + +```json +{ + "message": "inserted 2 of 2 records", + "inserted_hashes": [8, 9], + "skipped_hashes": [] +} +``` + +--- + +## Update + +Changes the values of specified attributes in one or more rows in a database table as identified by the primary key. NOTE: Primary key of the updated JSON record(s) MUST be supplied on update. + +- `operation` _(required)_ - must always be `update` +- `database` _(optional)_ - database of the table you are updating records in. The default is `data` +- `table` _(required)_ - table where you want to update records +- `records` _(required)_ - array of one or more records for update + +### Body + +```json +{ + "operation": "update", + "database": "dev", + "table": "dog", + "records": [ + { + "id": 1, + "weight_lbs": 55 + }, + { + "id": 2, + "owner": "Kyle B", + "weight_lbs": 35 + } + ] +} +``` + +### Response: 200 + +```json +{ + "message": "updated 2 of 2 records", + "update_hashes": [1, 3], + "skipped_hashes": [] +} +``` + +--- + +## Upsert + +Changes the values of specified attributes for rows with matching primary keys that exist in the table. Adds rows to the database table for primary keys that do not exist or are not provided. + +- `operation` _(required)_ - must always be `upsert` +- `database` _(optional)_ - database of the table you are updating records in. The default is `data` +- `table` _(required)_ - table where you want to update records +- `records` _(required)_ - array of one or more records for update + +### Body + +```json +{ + "operation": "upsert", + "database": "dev", + "table": "dog", + "records": [ + { + "id": 8, + "weight_lbs": 155 + }, + { + "name": "Bill", + "breed": "Pit Bull", + "id": 10, + "Age": 11, + "weight_lbs": 155 + }, + { + "name": "Harper", + "breed": "Mutt", + "age": 5, + "weight_lbs": 155 + } + ] +} +``` + +### Response: 200 + +```json +{ + "message": "upserted 3 of 3 records", + "upserted_hashes": [8, 10, "ea06fc8e-717b-4c6c-b69d-b29014054ab7"] +} +``` + +--- + +## Delete + +Removes one or more rows of data from a specified table. + +- `operation` _(required)_ - must always be `delete` +- `database` _(optional)_ - database where the table you are deleting records lives. The default is `data` +- `table` _(required)_ - table where you want to deleting records +- `ids` _(required)_ - array of one or more primary key values, which identifies records to delete + +### Body + +```json +{ + "operation": "delete", + "database": "dev", + "table": "dog", + "ids": [1, 2] +} +``` + +### Response: 200 + +```json +{ + "message": "2 of 2 records successfully deleted", + "deleted_hashes": [1, 2], + "skipped_hashes": [] +} +``` + +--- + +## Search By ID + +Returns data from a table for one or more primary keys. + +- `operation` _(required)_ - must always be `search_by_id` +- `database` _(optional)_ - database where the table you are searching lives. The default is `data` +- `table` _(required)_ - table you wish to search +- `ids` _(required)_ - array of primary keys to retrieve +- `get_attributes` _(required)_ - define which attributes you want returned. Use `['*']` to return all attributes + +### Body + +```json +{ + "operation": "search_by_id", + "database": "dev", + "table": "dog", + "ids": [1, 2], + "get_attributes": ["dog_name", "breed_id"] +} +``` + +### Response: 200 + +```json +[ + { + "dog_name": "Penny", + "breed_id": 154 + }, + { + "dog_name": "Harper", + "breed_id": 346 + } +] +``` + +--- + +## Search By Value + +Returns data from a table for a matching value. + +- `operation` _(required)_ - must always be `search_by_value` +- `database` _(optional)_ - database where the table you are searching lives. The default is `data` +- `table` _(required)_ - table you wish to search +- `attribute` _(required)_ - attribute you wish to search can be any attribute +- `search_attribute` - deprecated in favor of `attribute` +- `value` _(required)_ - value you wish to search - wild cards are allowed +- `search_value` - deprecated in favor of `value` +- `get_attributes` _(required)_ - define which attributes you want returned. Use `['*']` to return all attributes + +### Body + +```json +{ + "operation": "search_by_value", + "database": "dev", + "table": "dog", + "attribute": "owner_name", + "value": "Ky*", + "get_attributes": ["id", "dog_name"] +} +``` + +### Response: 200 + +```json +[ + { + "dog_name": "Penny" + }, + { + "dog_name": "Kato" + } +] +``` + +--- + +## Search By Conditions + +Returns data from a table for one or more matching conditions. This supports grouping of conditions to indicate order of operations as well. + +- `operation` _(required)_ - must always be `search_by_conditions` +- `database` _(optional)_ - database where the table you are searching lives. The default is `data` +- `table` _(required)_ - table you wish to search +- `operator` _(optional)_ - the operator used between each condition - `and`, `or`. The default is `and` +- `offset` _(optional)_ - the number of records that the query results will skip. The default is `0` +- `limit` _(optional)_ - the number of records that the query results will include. The default is `null`, resulting in no limit +- `sort` _optional_ - This is an object that indicates the sort order. It has the following properties: + - `attribute` _(required)_ - The attribute to sort by + - `descending` _(optional)_ - If true, will sort in descending order (defaults to ascending order) + - `next` _(optional)_ - This can define the next sort object that will be used to break ties for sorting when there are multiple records with the same value for the first attribute (follows the same structure as `sort`). +- `get_attributes` _(required)_ - define which attributes you want returned. Use `['*']` to return all attributes +- `conditions` _(required)_ - the array of conditions objects, specified below, to filter by. Must include one or more object in the array that are a condition or a grouped set of conditions. A condition has the following properties: + - `attribute` _(required)_ - the attribute you wish to search, can be any attribute + - `search_attribute` - deprecated in favor of `attribute` + - `comparator` _(required)_ - the type of search to perform - `equals`, `contains`, `starts_with`, `ends_with`, `greater_than`, `greater_than_equal`, `less_than`, `less_than_equal`, `between` + - `search_type` - deprecated in favor of `comparator` + - `value` _(required)_ - case-sensitive value you wish to search. If the `comparator` is `between` then use an array of two values to search between + - `search_value` - deprecated in favor of `value` + Or a set of grouped conditions has the following properties: + - `operator` _(optional)_ - the operator used between each condition - `and`, `or`. The default is `and` + - `conditions` _(required)_ - the array of conditions objects as described above. + +### Body + +```json +{ + "operation": "search_by_conditions", + "database": "dev", + "table": "dog", + "operator": "and", + "offset": 0, + "limit": 10, + "sort": { + "attribute": "id", + "next": { + "attribute": "age", + "descending": true + } + }, + "get_attributes": ["*"], + "conditions": [ + { + "attribute": "age", + "comparator": "between", + "value": [5, 8] + }, + { + "attribute": "weight_lbs", + "comparator": "greater_than", + "value": 40 + }, + { + "operator": "or", + "conditions": [ + { + "attribute": "adorable", + "comparator": "equals", + "value": true + }, + { + "attribute": "lovable", + "comparator": "equals", + "value": true + } + ] + } + ] +} +``` + +### Response: 200 + +```json +[ + { + "__createdtime__": 1620227719791, + "__updatedtime__": 1620227719791, + "adorable": true, + "age": 7, + "breed_id": 346, + "dog_name": "Harper", + "id": 2, + "owner_name": "Stephen", + "weight_lbs": 55 + }, + { + "__createdtime__": 1620227719792, + "__updatedtime__": 1620227719792, + "adorable": true, + "age": 7, + "breed_id": 348, + "dog_name": "Alby", + "id": 3, + "owner_name": "Kaylan", + "weight_lbs": 84 + }, + { + "__createdtime__": 1620227719792, + "__updatedtime__": 1620227719792, + "adorable": true, + "age": 6, + "breed_id": 347, + "dog_name": "Billy", + "id": 4, + "owner_name": "Zach", + "weight_lbs": 60 + }, + { + "__createdtime__": 1620227719792, + "__updatedtime__": 1620227719792, + "adorable": true, + "age": 5, + "breed_id": 250, + "dog_name": "Gemma", + "id": 8, + "owner_name": "Stephen", + "weight_lbs": 55 + }, + { + "__createdtime__": 1620227719792, + "__updatedtime__": 1620227719792, + "adorable": true, + "age": 8, + "breed_id": 104, + "dog_name": "Bode", + "id": 11, + "owner_name": "Margo", + "weight_lbs": 75 + } +] +``` diff --git a/versioned_docs/version-4.7/developers/operations-api/quickstart-examples.md b/versioned_docs/version-4.7/developers/operations-api/quickstart-examples.md new file mode 100644 index 00000000..a6c8f637 --- /dev/null +++ b/versioned_docs/version-4.7/developers/operations-api/quickstart-examples.md @@ -0,0 +1,370 @@ +--- +title: Quick Start Examples +--- + +# Quick Start Examples + +Harper recommends utilizing [Harper Applications](../../developers/applications/) for defining databases, tables, and other functionality. However, this guide is a great way to get started using on the Harper Operations API. + +## Create dog Table + +We first need to create a table. Since our company is named after our CEO's dog, lets create a table to store all our employees' dogs. We'll call this table, `dogs`. + +Tables in Harper are schema-less, so we don't need to add any attributes other than a primary_key (in pre 4.2 versions this was referred to as the hash_attribute) to create this table. + +Harper does offer a `database` parameter that can be used to hold logical groupings of tables. The parameter is optional and if not provided the operation will default to using a database named `data`. + +If you receive an error response, make sure your Basic Authentication user and password match those you entered during the installation process. + +### Body + +```json +{ + "operation": "create_table", + "table": "dog", + "primary_key": "id" +} +``` + +### Response: 200 + +```json +{ + "message": "table 'data.dog' successfully created." +} +``` + +--- + +## Create breed Table + +Now that we have a table to store our dog data, we also want to create a table to track known breeds. Just as with the dog table, the only attribute we need to specify is the `primary_key`. + +### Body + +```json +{ + "operation": "create_table", + "table": "breed", + "primary_key": "id" +} +``` + +### Response: 200 + +```json +{ + "message": "table 'data.breed' successfully created." +} +``` + +--- + +## Insert 1 Dog + +We're ready to add some dog data. Penny is our CTO's pup, so she gets ID 1 or we're all fired. We are specifying attributes in this call, but this doesn't prevent us from specifying additional attributes in subsequent calls. + +### Body + +```json +{ + "operation": "insert", + "table": "dog", + "records": [ + { + "id": 1, + "dog_name": "Penny", + "owner_name": "Kyle", + "breed_id": 154, + "age": 7, + "weight_lbs": 38 + } + ] +} +``` + +### Response: 200 + +```json +{ + "message": "inserted 1 of 1 records", + "inserted_hashes": [1], + "skipped_hashes": [] +} +``` + +--- + +## Insert Multiple Dogs + +Let's add some more Harper doggies! We can add as many dog objects as we want into the records collection. If you're adding a lot of objects, we would recommend using the .csv upload option (see the next section where we populate the breed table). + +### Body + +```json +{ + "operation": "insert", + "table": "dog", + "records": [ + { + "id": 2, + "dog_name": "Harper", + "owner_name": "Stephen", + "breed_id": 346, + "age": 7, + "weight_lbs": 55, + "adorable": true + }, + { + "id": 3, + "dog_name": "Alby", + "owner_name": "Kaylan", + "breed_id": 348, + "age": 7, + "weight_lbs": 84, + "adorable": true + }, + { + "id": 4, + "dog_name": "Billy", + "owner_name": "Zach", + "breed_id": 347, + "age": 6, + "weight_lbs": 60, + "adorable": true + }, + { + "id": 5, + "dog_name": "Rose Merry", + "owner_name": "Zach", + "breed_id": 348, + "age": 8, + "weight_lbs": 15, + "adorable": true + }, + { + "id": 6, + "dog_name": "Kato", + "owner_name": "Kyle", + "breed_id": 351, + "age": 6, + "weight_lbs": 32, + "adorable": true + }, + { + "id": 7, + "dog_name": "Simon", + "owner_name": "Fred", + "breed_id": 349, + "age": 3, + "weight_lbs": 35, + "adorable": true + }, + { + "id": 8, + "dog_name": "Gemma", + "owner_name": "Stephen", + "breed_id": 350, + "age": 5, + "weight_lbs": 55, + "adorable": true + }, + { + "id": 9, + "dog_name": "Yeti", + "owner_name": "Jaxon", + "breed_id": 200, + "age": 5, + "weight_lbs": 55, + "adorable": true + }, + { + "id": 10, + "dog_name": "Monkey", + "owner_name": "Aron", + "breed_id": 271, + "age": 7, + "weight_lbs": 35, + "adorable": true + }, + { + "id": 11, + "dog_name": "Bode", + "owner_name": "Margo", + "breed_id": 104, + "age": 8, + "weight_lbs": 75, + "adorable": true + }, + { + "id": 12, + "dog_name": "Tucker", + "owner_name": "David", + "breed_id": 346, + "age": 2, + "weight_lbs": 60, + "adorable": true + }, + { + "id": 13, + "dog_name": "Jagger", + "owner_name": "Margo", + "breed_id": 271, + "age": 7, + "weight_lbs": 35, + "adorable": true + } + ] +} +``` + +### Response: 200 + +```json +{ + "message": "inserted 12 of 12 records", + "inserted_hashes": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], + "skipped_hashes": [] +} +``` + +--- + +## Bulk Insert Breeds Via CSV + +We need to populate the 'breed' table with some data so we can reference it later. For larger data sets, we recommend using our CSV upload option. + +Each header in a column will be considered as an attribute, and each row in the file will be a row in the table. Simply specify the file path and the table to upload to, and Harper will take care of the rest. You can pull the breeds.csv file from here: [https://s3.amazonaws.com/complimentarydata/breeds.csv](https://s3.amazonaws.com/complimentarydata/breeds.csv) + +### Body + +```json +{ + "operation": "csv_url_load", + "table": "breed", + "csv_url": "https://s3.amazonaws.com/complimentarydata/breeds.csv" +} +``` + +### Response: 200 + +```json +{ + "message": "Starting job with id e77d63b9-70d5-499c-960f-6736718a4369", + "job_id": "e77d63b9-70d5-499c-960f-6736718a4369" +} +``` + +--- + +## Update 1 Dog Using NoSQL + +Harper supports NoSQL and SQL commands. We're going to update the dog table to show Penny's last initial using our NoSQL API. + +### Body + +```json +{ + "operation": "update", + "table": "dog", + "records": [ + { + "id": 1, + "dog_name": "Penny B" + } + ] +} +``` + +### Response: 200 + +```json +{ + "message": "updated 1 of 1 records", + "update_hashes": [1], + "skipped_hashes": [] +} +``` + +--- + +## Select a Dog by ID Using SQL + +Now we're going to use a simple SQL SELECT call to pull Penny's updated data. Note we now see Penny's last initial in the dog name. + +### Body + +```json +{ + "operation": "sql", + "sql": "SELECT * FROM data.dog where id = 1" +} +``` + +### Response: 200 + +```json +[ + { + "owner_name": "Kyle", + "adorable": null, + "breed_id": 154, + "__updatedtime__": 1610749428575, + "dog_name": "Penny B", + "weight_lbs": 38, + "id": 1, + "age": 7, + "__createdtime__": 1610749386566 + } +] +``` + +--- + +## Select Dogs and Join Breed + +Here's a more complex SQL command joining the breed table with the dog table. We will also pull only the pups belonging to Kyle, Zach, and Stephen. + +### Body + +```json +{ + "operation": "sql", + "sql": "SELECT d.id, d.dog_name, d.owner_name, b.name, b.section FROM data.dog AS d INNER JOIN data.breed AS b ON d.breed_id = b.id WHERE d.owner_name IN ('Kyle', 'Zach', 'Stephen') AND b.section = 'Mutt' ORDER BY d.dog_name" +} +``` + +### Response: 200 + +```json +[ + { + "id": 4, + "dog_name": "Billy", + "owner_name": "Zach", + "name": "LABRADOR / GREAT DANE MIX", + "section": "Mutt" + }, + { + "id": 8, + "dog_name": "Gemma", + "owner_name": "Stephen", + "name": "SHORT HAIRED SETTER MIX", + "section": "Mutt" + }, + { + "id": 2, + "dog_name": "Harper", + "owner_name": "Stephen", + "name": "HUSKY MIX", + "section": "Mutt" + }, + { + "id": 5, + "dog_name": "Rose Merry", + "owner_name": "Zach", + "name": "TERRIER MIX", + "section": "Mutt" + } +] +``` diff --git a/versioned_docs/version-4.7/developers/operations-api/registration.md b/versioned_docs/version-4.7/developers/operations-api/registration.md new file mode 100644 index 00000000..d5d278c5 --- /dev/null +++ b/versioned_docs/version-4.7/developers/operations-api/registration.md @@ -0,0 +1,231 @@ +--- +title: Registration +--- + +# Registration + +## Registration Info + +Returns the registration data of the Harper instance. + +- `operation` _(required)_ - must always be `registration_info` + +### Body + +```json +{ + "operation": "registration_info" +} +``` + +### Response: 200 + +```json +{ + "registered": true, + "version": "4.2.0", + "ram_allocation": 2048, + "license_expiration_date": "2022-01-15" +} +``` + +--- + +## Install Usage License + +Install a Harper license for a block of usage. Multiple usage blocks may be installed, and they will be used up sequentially, with the earliest installed blocks used first. A license is installed +by creating a string that consists of three base64url encoded blocks, separated by dots. The three blocks consist of: + +- `header`: This is a JSON object with two properties: + - `typ`: should be "Harper-License" + - `alg`: should be "EdDSA" + +This JSON object should be converted to base64url (conversion from utf-8 to base64url) and is the first base64url block. + +- license payload: This is a JSON object with properties: + - `id` _(required)_ - A unique id for the license + - `level` _(required)_ - Usage level number + - `region` _(required)_ - The region id where this license can be used + - `reads` _(required)_ - The number of allowed reads + - `readBytes` _(required)_ - The number of allowed read bytes + - `writes` _(required)_ - The number of allowed writes + - `writeBytes` _(required)_ - The number of allowed write bytes + - `realTimeMessages` _(required)_ - The number of allowed real-time messages + - `realTimeBytes` _(required)_ - The number of allowed real-time message bytes + - `cpuTime` _(optional)_ - The allowed amount of CPU time consumed by application code + - `storage` _(optional)_ - Maximum of storage that may be used + - `expiration` _(required)_ - The date when this block expires, as an ISO date + +This JSON object should be converted to base64url (conversion from utf-8 to base64url) and is the second base64url block. + +For example: + +```json +{ + "id": "license-717b-4c6c-b69d-b29014054ab7", + "level": 2, + "region": "us-nw-2", + "reads": 2000000000, + "readBytes": 8000000000000, + "writes": 500000000, + "writeBytes": 1000000000000, + "realTimeMessages": 10000000000, + "realTimeBytes": 40000000000000, + "cpuTime": 108000, + "storage": 400000000000000, + "expiration": "2025-07-25T21:17:21.248Z" +} +``` + +- `signature`: This is the cryptographic signature, signed by Harper, of the first two blocks, separated by a dot, `header.payload`. This is also converted to base64url. + +The three base64url blocks are combined to form the `license` property value in the operation. + +- `operation` _(required)_ - must always be `install_usage_license` +- `license` _(required)_ - This is the combination of the three blocks in the form `header.payload.signature` + +### Body + +```json +{ + "operation": "install_usage_license", + "license": "abc...0123.abc...0123.abc...0123" +} +``` + +### Response: 200 + +```json +{ + "message": "Successfully installed usage license" +} +``` + +--- + +## Get Usage Licenses + +This will retrieve and return _all_ usage licenses (including expired, exhausted, and licenses in any other state), with counts of how much of the limits have been consumed. + +- `operation` _(required)_ - must always be `get_usage_licenses` +- `region` _(optional)_ - will filter by region when supplied + +### Body + +```json +{ + "operation": "get_usage_licenses" +} +``` + +### Response: 200 + +```json +[ + { + "id": "license-717b-4c6c-b69d-b29014054ab7", + "level": 2, + "region": "us-nw-2", + "reads": 2000000000, + "usedReads": 1100000000, + "readBytes": 8000000000000, + "usedReadBytes": 3000000000000, + "writes": 500000000, + "usedWrites": 300000000, + "writeBytes": 1000000000000, + "usedWriteBytes": 4300000000000, + "realTimeMessages": 10000000000, + "usedRealTimeMessages": 2000000000, + "realTimeBytes": 40000000000000, + "usedRealTimeBytes": 13000000000000, + "cpuTime": 108000, + "usedCpuTime": 41000, + "storage": 400000000000000, + "expiration": "2025-07-25T21:17:21.248Z" + }, + { + "id": "license-4c6c-b69d-b29014054ab7-717b", + "level": 2, + "region": "us-nw-2", + "reads": 2000000000, + "usedReads": 0, + "readBytes": 8000000000000, + "usedReadBytes": 0, + "writes": 500000000, + "usedWrites": 0, + "writeBytes": 1000000000000, + "usedWriteBytes": 0, + "realTimeMessages": 10000000000, + "usedRealTimeMessages": 0, + "realTimeBytes": 40000000000000, + "usedRealTimeBytes": 0, + "cpuTime": 108000, + "usedCpuTime": 0, + "storage": 400000000000000, + "expiration": "2025-09-25T21:17:21.248Z" + }, + { + "id": "license-4c6c-b69d-b29014054ab7-717b", + "level": 2, + "region": "us-se-2", + "reads": 2000000000, + "usedReads": 0, + "readBytes": 8000000000000, + "usedReadBytes": 0, + "writes": 500000000, + "usedWrites": 0, + "writeBytes": 1000000000000, + "usedWriteBytes": 0, + "realTimeMessages": 10000000000, + "usedRealTimeMessages": 0, + "realTimeBytes": 40000000000000, + "usedRealTimeBytes": 0, + "cpuTime": 108000, + "usedCpuTime": 0, + "storage": 400000000000000, + "expiration": "2025-11-25T21:17:21.248Z" + } +] +``` + +--- + +## Get Fingerprint + +(Deprecated) +Returns the Harper fingerprint, uniquely generated based on the machine, for licensing purposes. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `get_fingerprint` + +### Body + +```json +{ + "operation": "get_fingerprint" +} +``` + +--- + +## Set License + +(Deprecated) +Sets the Harper license as generated by Harper License Management software. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `set_license` +- `key` _(required)_ - your license key +- `company` _(required)_ - the company that was used in the license + +### Body + +```json +{ + "operation": "set_license", + "key": "", + "company": "" +} +``` diff --git a/versioned_docs/version-4.7/developers/operations-api/sql-operations.md b/versioned_docs/version-4.7/developers/operations-api/sql-operations.md new file mode 100644 index 00000000..4b7076bb --- /dev/null +++ b/versioned_docs/version-4.7/developers/operations-api/sql-operations.md @@ -0,0 +1,127 @@ +--- +title: SQL Operations +--- + +:::warning +Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. +::: + +# SQL Operations + +## Select + +Executes the provided SQL statement. The SELECT statement is used to query data from the database. + +- `operation` _(required)_ - must always be `sql` +- `sql` _(required)_ - use standard SQL + +### Body + +```json +{ + "operation": "sql", + "sql": "SELECT * FROM dev.dog WHERE id = 1" +} +``` + +### Response: 200 + +```json +[ + { + "id": 1, + "age": 7, + "dog_name": "Penny", + "weight_lbs": 38, + "breed_id": 154, + "owner_name": "Kyle", + "adorable": true, + "__createdtime__": 1611614106043, + "__updatedtime__": 1611614119507 + } +] +``` + +--- + +## Insert + +Executes the provided SQL statement. The INSERT statement is used to add one or more rows to a database table. + +- `operation` _(required)_ - must always be `sql` +- `sql` _(required)_ - use standard SQL + +### Body + +```json +{ + "operation": "sql", + "sql": "INSERT INTO dev.dog (id, dog_name) VALUE (22, 'Simon')" +} +``` + +### Response: 200 + +```json +{ + "message": "inserted 1 of 1 records", + "inserted_hashes": [22], + "skipped_hashes": [] +} +``` + +--- + +## Update + +Executes the provided SQL statement. The UPDATE statement is used to change the values of specified attributes in one or more rows in a database table. + +- `operation` _(required)_ - must always be `sql` +- `sql` _(required)_ - use standard SQL + +### Body + +```json +{ + "operation": "sql", + "sql": "UPDATE dev.dog SET dog_name = 'penelope' WHERE id = 1" +} +``` + +### Response: 200 + +```json +{ + "message": "updated 1 of 1 records", + "update_hashes": [1], + "skipped_hashes": [] +} +``` + +--- + +## Delete + +Executes the provided SQL statement. The DELETE statement is used to remove one or more rows of data from a database table. + +- `operation` _(required)_ - must always be `sql` +- `sql` _(required)_ - use standard SQL + +### Body + +```json +{ + "operation": "sql", + "sql": "DELETE FROM dev.dog WHERE id = 1" +} +``` + +### Response: 200 + +```json +{ + "message": "1 of 1 record successfully deleted", + "deleted_hashes": [1], + "skipped_hashes": [] +} +``` diff --git a/versioned_docs/version-4.7/developers/operations-api/system-operations.md b/versioned_docs/version-4.7/developers/operations-api/system-operations.md new file mode 100644 index 00000000..d39e93cb --- /dev/null +++ b/versioned_docs/version-4.7/developers/operations-api/system-operations.md @@ -0,0 +1,195 @@ +--- +title: System Operations +--- + +# System Operations + +## Restart + +Restarts the Harper instance. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `restart` + +### Body + +```json +{ + "operation": "restart" +} +``` + +### Response: 200 + +```json +{ + "message": "Restarting HarperDB. This may take up to 60 seconds." +} +``` + +--- + +## Restart Service + +Restarts servers for the specified Harper service. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `restart_service` +- `service` _(required)_ - must be one of: `http_workers`, `clustering_config` or `clustering` +- `replicated` _(optional)_ - must be a boolean. If set to `true`, Harper will replicate the restart service operation across all nodes in the cluster. The restart will occur as a rolling restart, ensuring that each node is fully restarted before the next node begins restarting. + +### Body + +```json +{ + "operation": "restart_service", + "service": "http_workers" +} +``` + +### Response: 200 + +```json +{ + "message": "Restarting http_workers" +} +``` + +--- + +## System Information + +Returns detailed metrics on the host system. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `system_information` +- `attributes` _(optional)_ - string array of top level attributes desired in the response, if no value is supplied all attributes will be returned. Available attributes are: ['system', 'time', 'cpu', 'memory', 'disk', 'network', 'harperdb_processes', 'table_size', 'metrics', 'threads', 'replication'] + +### Body + +```json +{ + "operation": "system_information" +} +``` + +--- + +## Set Status + +Sets a status value that can be used for application-specific status tracking. Status values are stored in memory and are not persisted across restarts. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `set_status` +- `id` _(required)_ - the key identifier for the status +- `status` _(required)_ - the status value to set (string between 1-512 characters) + +### Body + +```json +{ + "operation": "set_status", + "id": "primary", + "status": "active" +} +``` + +### Response: 200 + +```json +{ + "id": "primary", + "status": "active", + "__createdtime__": 1621364589543, + "__updatedtime__": 1621364589543 +} +``` + +### Notes + +- The `id` parameter must be one of the allowed status types: 'primary', 'maintenance', or 'availability' +- If no `id` is specified, it defaults to 'primary' +- For 'availability' status, only 'Available' or 'Unavailable' values are accepted +- For other status types, any string value is accepted + +--- + +## Get Status + +Retrieves a status value previously set with the set_status operation. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `get_status` +- `id` _(optional)_ - the key identifier for the status to retrieve (defaults to all statuses if not provided) + +### Body + +```json +{ + "operation": "get_status", + "id": "primary" +} +``` + +### Response: 200 + +```json +{ + "id": "primary", + "status": "active", + "__createdtime__": 1621364589543, + "__updatedtime__": 1621364589543 +} +``` + +If no id parameter is provided, all status values will be returned: + +```json +[ + { + "id": "primary", + "status": "active", + "__createdtime__": 1621364589543, + "__updatedtime__": 1621364589543 + }, + { + "id": "maintenance", + "status": "scheduled", + "__createdtime__": 1621364600123, + "__updatedtime__": 1621364600123 + } +] +``` + +--- + +## Clear Status + +Removes a status entry by its ID. + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `clear_status` +- `id` _(required)_ - the key identifier for the status to remove + +### Body + +```json +{ + "operation": "clear_status", + "id": "primary" +} +``` + +### Response: 200 + +```json +{ + "message": "Status successfully cleared" +} +``` diff --git a/versioned_docs/version-4.7/developers/operations-api/token-authentication.md b/versioned_docs/version-4.7/developers/operations-api/token-authentication.md new file mode 100644 index 00000000..178db842 --- /dev/null +++ b/versioned_docs/version-4.7/developers/operations-api/token-authentication.md @@ -0,0 +1,60 @@ +--- +title: Token Authentication +--- + +# Token Authentication + +## Create Authentication Tokens + +Creates the tokens needed for authentication: operation & refresh token. + +_Note - this operation does not require authorization to be set_ + +- `operation` _(required)_ - must always be `create_authentication_tokens` +- `username` _(required)_ - username of user to generate tokens for +- `password` _(required)_ - password of user to generate tokens for + +### Body + +```json +{ + "operation": "create_authentication_tokens", + "username": "", + "password": "" +} +``` + +### Response: 200 + +```json +{ + "operation_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6IkhEQl9BRE1JTiIsImlhdCI6MTYwNTA2Mzk0OSwiZXhwIjoxNjA1MTUwMzQ5LCJzdWIiOiJvcGVyYXRpb24ifQ.TlV93BqavQVQntXTt_WeY5IjAuCshfd6RzhihLWFWhu1qEKLHdwg9o5Z4ASaNmfuyKBqbFw65IbOYKd348EXeC_T6d0GO3yUhICYWXkqhQnxVW_T-ECKc7m5Bty9HTgfeaJ2e2yW55nbZYWG_gLtNgObUjCziX20-gGGR25sNTRm78mLQPYQkBJph6WXwAuyQrX704h0NfvNqyAZSwjxgtjuuEftTJ7FutLrQSLGIBIYq9nsHrFkheiDSn-C8_WKJ_zATa4YIofjqn9g5wA6o_7kSNaU2-gWnCm_jbcAcfvOmXh6rd89z8pwPqnC0f131qHIBps9UHaC1oozzmu_C6bsg7905OoAdFFY42Vojs98SMbfRApRvwaS4SprBsam3izODNI64ZUBREu3l4SZDalUf2kN8XPVWkI1LKq_mZsdtqr1r11Z9xslI1wVdxjunYeanjBhs7_j2HTX7ieVGn1a23cWceUk8F1HDGe_KEuPQs03R73V8acq_freh-kPhIa4eLqmcHeBw3WcyNGW8GuP8kyQRkGuO5sQSzZqbr_YSbZdSShZWTWDE6RYYC9ZV9KJtHVxhs0hexUpcoqO8OtJocyltRjtDjhSm9oUxszYRaALu-h8YadZT9dEKzsyQIt30d7LS9ETmmGWx4nKSTME2bV21PnDv_rEc5R6gnE", + "refresh_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6IkhEQl9BRE1JTiIsImlhdCI6MTYwNTA2Mzk0OSwiZXhwIjoxNjA3NjU1OTQ5LCJzdWIiOiJyZWZyZXNoIn0.znhJhkdSROBPP_GLRzAxYdjgQ3BuqpAbQB7zMSSOQJ3s83HnmZ10Bnpw_3L2aF-tOFgz_t6HUAvn26fNOLsspJD2aOvHPcVS4yLKS5nagpA6ar_pqng9f6Ebfs8ohguLCfHnHRJ8poLxuWRvWW9_9pIlDiwsj4yo3Mbxi3mW8Bbtnk2MwiNHFxTksD12Ne8EWz8q2jic5MjArqBBgR373oYoWU1oxpTM6gIsZCBRowXcc9XFy2vyRoggEUU4ISRFQ4ZY9ayJ-_jleSDCUamJSNQsdb1OUTvc6CxeYlLjCoV0ijRUB6p2XWNVezFhDu8yGqOeyGFJzArhxbVc_pl4UYd5aUVxhrO9DdhG29cY_mHV0FqfXphR9QllK--LJFTP4aFqkCxnVr7HSa17hL0ZVK1HaKrx21PAdCkVNZpD6J3RtRbTkfnIB_C3Be9jhOV3vpTf7ZGn_Bs3CPJi_sL313Z1yKSDAS5rXTPceEOcTPHjzkMP9Wz19KfFq_0kuiZdDmeYNqJeFPAgGJ-S0tO51krzyGqLyCCA32_W104GR8OoQi2gEED6HIx2G0-1rnLnefN6eHQiY5r-Q3Oj9e2y3EvqqgWOmEDw88-SjPTwQVnMbBHYN2RfluU7EmvDh6Saoe79Lhlu8ZeSJ1x6ZgA8-Cirraz1_526Tn8v5FGDfrc" +} +``` + +--- + +## Refresh Operation Token + +This operation creates a new operation token. + +- `operation` _(required)_ - must always be `refresh_operation_token` +- `refresh_token` _(required)_ - the refresh token that was provided when tokens were created + +### Body + +```json +{ + "operation": "refresh_operation_token", + "refresh_token": "EXISTING_REFRESH_TOKEN" +} +``` + +### Response: 200 + +```json +{ + "operation_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6eyJfX2NyZWF0ZWR0aW1lX18iOjE2MDQ1MTc4Nzk1MjMsIl9fdXBkYXRlZHRpbWVfXyI6MTYwNDUxNzg3OTUyMywiYWN0aXZlIjp0cnVlLCJhdXRoX3Rva2VuIjpudWxsLCJyb2xlIjp7Il9fY3JlYXRlZHRpbWVfXyI6MTYwNDUxNzg3OTUyMSwiX191cGRhdGVkdGltZV9fIjoxNjA0NTE3ODc5NTIxLCJpZCI6IjZhYmRjNGJhLWU5MjQtNDlhNi1iOGY0LWM1NWUxYmQ0OTYzZCIsInBlcm1pc3Npb24iOnsic3VwZXJfdXNlciI6dHJ1ZSwic3lzdGVtIjp7InRhYmxlcyI6eyJoZGJfdGFibGUiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9hdHRyaWJ1dGUiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9zY2hlbWEiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl91c2VyIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119LCJoZGJfcm9sZSI6eyJyZWFkIjp0cnVlLCJpbnNlcnQiOmZhbHNlLCJ1cGRhdGUiOmZhbHNlLCJkZWxldGUiOmZhbHNlLCJhdHRyaWJ1dGVfcGVybWlzc2lvbnMiOltdfSwiaGRiX2pvYiI6eyJyZWFkIjp0cnVlLCJpbnNlcnQiOmZhbHNlLCJ1cGRhdGUiOmZhbHNlLCJkZWxldGUiOmZhbHNlLCJhdHRyaWJ1dGVfcGVybWlzc2lvbnMiOltdfSwiaGRiX2xpY2Vuc2UiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9pbmZvIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119LCJoZGJfbm9kZXMiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl90ZW1wIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119fX19LCJyb2xlIjoic3VwZXJfdXNlciJ9LCJ1c2VybmFtZSI6IkhEQl9BRE1JTiJ9LCJpYXQiOjE2MDUwNjQ0MjMsImV4cCI6MTYwNTE1MDgyMywic3ViIjoib3BlcmF0aW9uIn0.VVZdhlh7_xFEaGPwhAh6VJ1d7eisiF3ok3ZwLTQAMWZB6umb2S7pPSTbXAmqAGHRlFAK3BYfnwT3YWt0gZbHvk24_0x3s_dej3PYJ8khIxzMjqpkR6qSjQIC2dhKqpwRPNtoqW_xnep9L-qf5iPtqkwsqWhF1c5VSN8nFouLWMZSuJ6Mag04soNhFvY0AF6QiTyzajMTb6uurRMWOnxk8hwMrY_5xtupabqtZheXP_0DV8l10B7GFi_oWf_lDLmwRmNbeUfW8ZyCIJMj36bjN3PsfVIxog87SWKKCwbWZWfJWw0KEph-HvU0ay35deyGWPIaDQmujuh2vtz-B0GoIAC58PJdXNyQRzES_nSb6Oqc_wGZsLM6EsNn_lrIp3mK_3a5jirZ8s6Z2SfcYKaLF2hCevdm05gRjFJ6ijxZrUSOR2S415wLxmqCCWCp_-sEUz8erUrf07_aj-Bv99GUub4b_znOsQF3uABKd4KKff2cNSMhAa-6sro5GDRRJg376dcLi2_9HOZbnSo90zrpVq8RNV900aydyzDdlXkZja8jdHBk4mxSSewYBvM7up6I0G4X-ZlzFOp30T7kjdLa6480Qp34iYRMMtq0Htpb5k2jPt8dNFnzW-Q2eRy1wNBbH3cCH0rd7_BIGuTCrl4hGU8QjlBiF7Gj0_-uJYhKnhg" +} +``` diff --git a/versioned_docs/version-4.7/developers/operations-api/users-and-roles.md b/versioned_docs/version-4.7/developers/operations-api/users-and-roles.md new file mode 100644 index 00000000..91f222b9 --- /dev/null +++ b/versioned_docs/version-4.7/developers/operations-api/users-and-roles.md @@ -0,0 +1,508 @@ +--- +title: Users and Roles +--- + +# Users and Roles + +## List Roles + +Returns a list of all roles. [Learn more about Harper roles here.](../security/users-and-roles) + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `list_roles` + +### Body + +```json +{ + "operation": "list_roles" +} +``` + +### Response: 200 + +```json +[ + { + "__createdtime__": 1611615061106, + "__updatedtime__": 1611615061106, + "id": "05c2ffcd-f780-40b1-9432-cfe8ba5ad890", + "permission": { + "super_user": false, + "dev": { + "tables": { + "dog": { + "read": true, + "insert": true, + "update": true, + "delete": false, + "attribute_permissions": [ + { + "attribute_name": "name", + "read": true, + "insert": true, + "update": true + } + ] + } + } + } + }, + "role": "developer" + }, + { + "__createdtime__": 1610749235614, + "__updatedtime__": 1610749235614, + "id": "136f03fa-a0e9-46c3-bd5d-7f3e7dd5b564", + "permission": { + "cluster_user": true + }, + "role": "cluster_user" + }, + { + "__createdtime__": 1610749235609, + "__updatedtime__": 1610749235609, + "id": "745b3138-a7cf-455a-8256-ac03722eef12", + "permission": { + "super_user": true + }, + "role": "super_user" + } +] +``` + +--- + +## Add Role + +Creates a new role with the specified permissions. [Learn more about Harper roles here.](../security/users-and-roles) + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `add_role` +- `role` _(required)_ - name of role you are defining +- `permission` _(required)_ - object defining permissions for users associated with this role: + - `super_user` _(optional)_ - boolean which, if set to true, gives users associated with this role full access to all operations and methods. If not included, value will be assumed to be false. + - `structure_user` _(optional)_ - boolean OR array of database names (as strings). If boolean, user can create new databases and tables. If array of strings, users can only manage tables within the specified databases. This overrides any individual table permissions for specified databases, or for all databases if the value is true. + +### Body + +```json +{ + "operation": "add_role", + "role": "developer", + "permission": { + "super_user": false, + "structure_user": false, + "dev": { + "tables": { + "dog": { + "read": true, + "insert": true, + "update": true, + "delete": false, + "attribute_permissions": [ + { + "attribute_name": "name", + "read": true, + "insert": true, + "update": true + } + ] + } + } + } + } +} +``` + +### Response: 200 + +```json +{ + "role": "developer", + "permission": { + "super_user": false, + "structure_user": false, + "dev": { + "tables": { + "dog": { + "read": true, + "insert": true, + "update": true, + "delete": false, + "attribute_permissions": [ + { + "attribute_name": "name", + "read": true, + "insert": true, + "update": true + } + ] + } + } + } + }, + "id": "0a9368b0-bd81-482f-9f5a-8722e3582f96", + "__updatedtime__": 1598549532897, + "__createdtime__": 1598549532897 +} +``` + +--- + +## Alter Role + +Modifies an existing role with the specified permissions. updates permissions from an existing role. [Learn more about Harper roles here.](../security/users-and-roles) + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `alter_role` +- `id` _(required)_ - the id value for the role you are altering +- `role` _(optional)_ - name value to update on the role you are altering +- `permission` _(required)_ - object defining permissions for users associated with this role: + - `super_user` _(optional)_ - boolean which, if set to true, gives users associated with this role full access to all operations and methods. If not included, value will be assumed to be false. + - `structure_user` _(optional)_ - boolean OR array of database names (as strings). If boolean, user can create new databases and tables. If array of strings, users can only manage tables within the specified databases. This overrides any individual table permissions for specified databases, or for all databases if the value is true. + +### Body + +```json +{ + "operation": "alter_role", + "id": "f92162e2-cd17-450c-aae0-372a76859038", + "role": "another_developer", + "permission": { + "super_user": false, + "structure_user": false, + "dev": { + "tables": { + "dog": { + "read": true, + "insert": true, + "update": true, + "delete": false, + "attribute_permissions": [ + { + "attribute_name": "name", + "read": false, + "insert": true, + "update": true + } + ] + } + } + } + } +} +``` + +### Response: 200 + +```json +{ + "id": "a7cb91e9-32e4-4dbf-a327-fab4fa9191ea", + "role": "developer", + "permission": { + "super_user": false, + "structure_user": false, + "dev": { + "tables": { + "dog": { + "read": true, + "insert": true, + "update": true, + "delete": false, + "attribute_permissions": [ + { + "attribute_name": "name", + "read": false, + "insert": true, + "update": true + } + ] + } + } + } + }, + "__updatedtime__": 1598549996106 +} +``` + +--- + +## Drop Role + +Deletes an existing role from the database. NOTE: Role with associated users cannot be dropped. [Learn more about Harper roles here.](../security/users-and-roles) + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - this must always be `drop_role` +- `id` _(required)_ - this is the id of the role you are dropping + +### Body + +```json +{ + "operation": "drop_role", + "id": "developer" +} +``` + +### Response: 200 + +```json +{ + "message": "developer successfully deleted" +} +``` + +--- + +## List Users + +Returns a list of all users. [Learn more about Harper roles here.](../security/users-and-roles) + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `list_users` + +### Body + +```json +{ + "operation": "list_users" +} +``` + +### Response: 200 + +```json +[ + { + "__createdtime__": 1635520961165, + "__updatedtime__": 1635520961165, + "active": true, + "role": { + "__createdtime__": 1635520961161, + "__updatedtime__": 1635520961161, + "id": "7c78ef13-c1f3-4063-8ea3-725127a78279", + "permission": { + "super_user": true, + "system": { + "tables": { + "hdb_table": { + "read": true, + "insert": false, + "update": false, + "delete": false, + "attribute_permissions": [] + }, + "hdb_attribute": { + "read": true, + "insert": false, + "update": false, + "delete": false, + "attribute_permissions": [] + }, + "hdb_schema": { + "read": true, + "insert": false, + "update": false, + "delete": false, + "attribute_permissions": [] + }, + "hdb_user": { + "read": true, + "insert": false, + "update": false, + "delete": false, + "attribute_permissions": [] + }, + "hdb_role": { + "read": true, + "insert": false, + "update": false, + "delete": false, + "attribute_permissions": [] + }, + "hdb_job": { + "read": true, + "insert": false, + "update": false, + "delete": false, + "attribute_permissions": [] + }, + "hdb_license": { + "read": true, + "insert": false, + "update": false, + "delete": false, + "attribute_permissions": [] + }, + "hdb_info": { + "read": true, + "insert": false, + "update": false, + "delete": false, + "attribute_permissions": [] + }, + "hdb_nodes": { + "read": true, + "insert": false, + "update": false, + "delete": false, + "attribute_permissions": [] + }, + "hdb_temp": { + "read": true, + "insert": false, + "update": false, + "delete": false, + "attribute_permissions": [] + } + } + } + }, + "role": "super_user" + }, + "username": "HDB_ADMIN" + } +] +``` + +--- + +## User Info + +Returns user data for the associated user credentials. + +- `operation` _(required)_ - must always be `user_info` + +### Body + +```json +{ + "operation": "user_info" +} +``` + +### Response: 200 + +```json +{ + "__createdtime__": 1610749235611, + "__updatedtime__": 1610749235611, + "active": true, + "role": { + "__createdtime__": 1610749235609, + "__updatedtime__": 1610749235609, + "id": "745b3138-a7cf-455a-8256-ac03722eef12", + "permission": { + "super_user": true + }, + "role": "super_user" + }, + "username": "HDB_ADMIN" +} +``` + +--- + +## Add User + +Creates a new user with the specified role and credentials. [Learn more about Harper roles here.](../security/users-and-roles) + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `add_user` +- `role` _(required)_ - 'role' name value of the role you wish to assign to the user. See `add_role` for more detail +- `username` _(required)_ - username assigned to the user. It can not be altered after adding the user. It serves as the hash +- `password` _(required)_ - clear text for password. Harper will encrypt the password upon receipt +- `active` _(required)_ - boolean value for status of user's access to your Harper instance. If set to false, user will not be able to access your instance of Harper. + +### Body + +```json +{ + "operation": "add_user", + "role": "role_name", + "username": "hdb_user", + "password": "password", + "active": true +} +``` + +### Response: 200 + +```json +{ + "message": "hdb_user successfully added" +} +``` + +--- + +## Alter User + +Modifies an existing user's role and/or credentials. [Learn more about Harper roles here.](../security/users-and-roles) + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `alter_user` +- `username` _(required)_ - username assigned to the user. It can not be altered after adding the user. It serves as the hash. +- `password` _(optional)_ - clear text for password. Harper will encrypt the password upon receipt +- `role` _(optional)_ - `role` name value of the role you wish to assign to the user. See `add_role` for more detail +- `active` _(optional)_ - status of user's access to your Harper instance. See `add_role` for more detail + +### Body + +```json +{ + "operation": "alter_user", + "role": "role_name", + "username": "hdb_user", + "password": "password", + "active": true +} +``` + +### Response: 200 + +```json +{ + "message": "updated 1 of 1 records", + "new_attributes": [], + "txn_time": 1611615114397.988, + "update_hashes": ["hdb_user"], + "skipped_hashes": [] +} +``` + +--- + +## Drop User + +Deletes an existing user by username. [Learn more about Harper roles here.](../security/users-and-roles) + +_Operation is restricted to super_user roles only_ + +- `operation` _(required)_ - must always be `drop_user` +- `username` _(required)_ - username assigned to the user + +### Body + +```json +{ + "operation": "drop_user", + "username": "sgoldberg" +} +``` + +### Response: 200 + +```json +{ + "message": "sgoldberg successfully deleted" +} +``` diff --git a/versioned_docs/version-4.7/developers/real-time.md b/versioned_docs/version-4.7/developers/real-time.md new file mode 100644 index 00000000..9c5c79e4 --- /dev/null +++ b/versioned_docs/version-4.7/developers/real-time.md @@ -0,0 +1,180 @@ +--- +title: Real-Time +--- + +# Real-Time + +## Real-Time + +Harper provides real-time access to data and messaging. This allows clients to monitor and subscribe to data for changes in real-time as well as handling data-oriented messaging. Harper supports multiple standardized protocols to facilitate diverse standards-based client interaction. + +Harper real-time communication is based around database tables. Declared tables are the basis for monitoring data, and defining "topics" for publishing and subscribing to messages. Declaring a table that establishes a topic can be as simple as adding a table with no attributes to your [schema.graphql in a Harper application folder](./applications/): + +``` +type MyTopic @table @export +``` + +You can then subscribe to records or sub-topics in this topic/namespace, as well as save data and publish messages, with the protocols discussed below. + +### Content Negotiation + +Harper is a database, not a generic broker, and therefore highly adept at handling _structured_ data. Data can be published and subscribed in all supported structured/object formats, including JSON, CBOR, and MessagePack, and the data will be stored and handled as structured data. This means that different clients can individually choose which format they prefer, both for inbound and outbound messages. One client could publish in JSON, and another client could choose to receive messages in CBOR. + +## Protocols + +### MQTT + +Harper supports MQTT as an interface to this real-time data delivery. It is important to note that MQTT in Harper is not just a generic pub/sub hub, but is deeply integrated with the database providing subscriptions directly to database records, and publishing to these records. In this document we will explain how MQTT pub/sub concepts are aligned and integrated with database functionality. + +#### Configuration + +Harper supports MQTT with its `mqtt` server module and Harper supports MQTT over standard TCP sockets or over WebSockets. This is enabled by default, but can be configured in your `harperdb-config.yaml` configuration, allowing you to change which ports it listens on, if secure TLS connections are used, and MQTT is accepted over WebSockets: + +```yaml +mqtt: + network: + port: 1883 + securePort: 8883 # for TLS + webSocket: true # will also enable WS support through the default HTTP interface/port + mTLS: false + requireAuthentication: true +``` + +Note that if you are using WebSockets for MQTT, the sub-protocol should be set to "mqtt" (this is required by the MQTT specification, and should be included by any conformant client): `Sec-WebSocket-Protocol: mqtt`. mTLS is also supported by enabling it in the configuration and using the certificate authority from the TLS section of the configuration. See the [configuration documentation for more information](../deployments/configuration). + +#### Capabilities + +Harper's MQTT capabilities includes support for MQTT versions v3.1 and v5 with standard publish and subscription capabilities with multi-level topics, QoS 0 and 1 levels, and durable (non-clean) sessions. MQTT supports QoS 2 interaction, but doesn't guarantee exactly once delivery (although any guarantees of exactly once over unstable networks is a fictional aspiration). MQTT doesn't currently support last will, nor single-level wildcards (only multi-level wildcards). + +### Topics + +In MQTT, messages are published to, and subscribed from, topics. In Harper topics are aligned with resource endpoint paths in exactly the same way as the REST endpoints. If you define a table or resource in your schema, with a path/endpoint of "my-resource", that means that this can be addressed as a topic just like a URL path. So a topic of "my-resource/some-id" would correspond to the record in the my-resource table (or custom resource) with a record id of "some-id". + +This means that you can subscribe to "my-resource/some-id" and making this subscription means you will receive notification messages for any updates to this record. If this record is modified or deleted, a message will be sent to listeners of this subscription. + +The current value of this record is also treated as the "retained" message for this topic. When you subscribe to "my-resource/some-id", you will immediately receive the record for this id, through a "publish" command from the server, as the initial "retained" message that is first delivered. This provides a simple and effective way to get the current state of a record and future updates to that record without having to worry about timing issues of aligning a retrieval and subscription separately. + +Similarly, publishing a message to a "topic" also interacts with the database. Publishing a message with "retain" flag enabled is interpreted as an update or put to that record. The published message will replace the current record with the contents of the published message. + +If a message is published without a `retain` flag, the message will not alter the record at all, but will still be published to any subscribers to that record. + +Harper supports QoS 0 and 1 for publishing and subscribing. + +Harper supports multi-level topics, both for subscribing and publishing. Harper also supports multi-level wildcards, so you can subscribe to /`my-resource/#` to receive notifications for `my-resource/some-id` as well as `my-resource/nested/id`, or you can subscribe to `my-resource/nested/#` and receive the latter, but not the former, topic messages. Harper currently only supports trailing multi-level wildcards (no single-level wildcards with '\*'). + +#### Events + +JavaScript components can also listen for MQTT events. This is available on the server.mqtt.events object. For example, to set up a listener/callback for when MQTT clients connect and authorize, we can do: + +```javascript +server.mqtt.events.on('connected', (session, socket) => { + console.log('client connected with id', session.clientId); +}); +``` + +The following MQTT events are available: + +- `connection` - When a client initially establishes a TCP or WS connection to the server +- `connected` - When a client establishes an authorized MQTT connection +- `auth-failed` - When a client fails to authenticate +- `disconnected` - When a client disconnects from the server + +### Ordering + +Harper is designed to be a distributed database, and an intrinsic characteristic of distributed servers is that messages may take different amounts of time to traverse the network and may arrive in a different order depending on server location and network topology. Harper is designed for distributed data with minimal latency, and so messages are delivered to subscribers immediately when they arrive, Harper does not delay messages for coordinating confirmation or consensus among other nodes, which would significantly increase latency, messages are delivered as quickly as possible. + +As an example, let's consider message #1 is published to node A, which then sends the message to node B and node C, but the message takes a while to get there. Slightly later, while the first message is still in transit, message #2 is published to node B, which then replicates it to A and C, and because of network conditions, message #2 arrives at node C before message #1. Because Harper prioritizes low latency, when node C receives message #2, it immediately publishes it to all its local subscribers (it has no knowledge that message #1 is in transit). + +When message #1 is received by node C, the behavior of what it does with this message is dependent on whether the message is a "retained" message (was published with a retain flag set to true, or was put/update/upsert/inserted into the database) or was a non-retained message. In the case of a non-retained message, this message will be delivered to all local subscribers (even though it had been published earlier), thereby prioritizing the delivery of every message. On the other hand, a retained message will not deliver the earlier out-of-order message to clients, and Harper will keep the message with the latest timestamp as the "winning" record state (and will be retained message for any subsequent subscriptions). Retained messages maintain (eventual) consistency across the entire cluster of servers, all nodes will converge to the same message as the being the latest and retained message (#2 in this case). + +Non-retained messages are generally a good choice for applications like chat, where every message needs to be delivered even if they might arrive out-of-order (the order may not be consistent across all servers). Retained messages can be thought of a "superseding" messages, and are a good fit for applications like instrument measurements like temperature readings, where the priority to provide the _latest_ temperature and older temperature readings are not important to publish after a new reading, and consistency of the most-recent record (across the network) is important. + +### WebSockets + +WebSockets are supported through the REST interface and go through the `connect(incomingMessages)` method on resources. By default, making a WebSockets connection to a URL will subscribe to the referenced resource. For example, making a WebSocket connection to `new WebSocket('wss://server/my-resource/341')` will access the resource defined for 'my-resource' and the resource id of 341 and connect to it. On the web platform this could be: + +```javascript +let ws = new WebSocket('wss://server/my-resource/341'); +ws.onmessage = (event) => { + // received a notification from the server + let data = JSON.parse(event.data); +}; +``` + +By default, the resources will make a subscription to that resource, monitoring any changes to the records or messages published to it, and will return events on the WebSockets connection. You can also override `connect(incomingMessages)` with your own handler. The `connect` method simply needs to return an iterable (asynchronous iterable) that represents the stream of messages to be sent to the client. One easy way to create an iterable stream is to define the `connect` method as a generator and `yield` messages as they become available. For example, a simple WebSockets echo server for a resource could be written: + +```javascript +export class Echo extends Resource { + async *connect(incomingMessages) { + for await (let message of incomingMessages) { // wait for each incoming message from the client + // and send the message back to the client + yield message; + } + } +``` + +You can also call the default `connect` and it will provide a convenient streaming iterable with events for the outgoing messages, with a `send` method that you can call to send messages on the iterable, and a `close` event for determining when the connection is closed. The incoming messages iterable is also an event emitter, and you can listen for `data` events to get the incoming messages using event style: + +```javascript +export class Example extends Resource { + connect(incomingMessages) { + let outgoingMessages = super.connect(); + let timer = setInterval(() => { + outgoingMessages.send({greeting: 'hi again!'}); + }, 1000); // send a message once a second + incomingMessages.on('data', (message) => { + // another way of echo-ing the data back to the client + outgoingMessages.send(message); + }); + outgoingMessages.on('close', () => { + // make sure we end the timer once the connection is closed + clearInterval(timer); + }); + return outgoingMessages; + } +``` + +### Server Sent Events + +Server Sent Events (SSE) are also supported through the REST server interface, and provide a simple and efficient mechanism for web-based applications to receive real-time updates. For consistency of push delivery, SSE connections go through the `connect()` method on resources, much like WebSockets. The primary difference is that `connect` is called without any `incomingMessages` argument, since SSE is a one-directional transport mechanism. This can be used much like WebSockets, specifying a resource URL path will connect to that resource, and by default provides a stream of messages for changes and messages for that resource. For example, you can connect to receive notification in a browser for a resource like: + +```javascript +let eventSource = new EventSource('https://server/my-resource/341', { withCredentials: true }); +eventSource.onmessage = (event) => { + // received a notification from the server + let data = JSON.parse(event.data); +}; +``` + +### MQTT Feature Support Matrix + +| Feature | Support | +| ------------------------------------------------------------------ | -------------------------------------------------------------- | +| Connections, protocol negotiation, and acknowledgement with v3.1.1 | :heavy_check_mark: | +| Connections, protocol negotiation, and acknowledgement with v5 | :heavy_check_mark: | +| Secure MQTTS | :heavy_check_mark: | +| MQTTS over WebSockets | :heavy_check_mark: | +| MQTT authentication via user/pass | :heavy_check_mark: | +| MQTT authentication via mTLS | :heavy_check_mark: | +| Publish | :heavy_check_mark: | +| Subscribe | :heavy_check_mark: | +| Multi-level wildcard | :heavy_check_mark: | +| Single-level wildcard | :heavy_check_mark: | +| QoS 0 | :heavy_check_mark: | +| QoS 1 | :heavy_check_mark: | +| QoS 2 | Not fully supported, can perform conversation but does persist | +| Keep-Alive monitoring | :heavy_check_mark: | +| Clean session | :heavy_check_mark: | +| Durable session | :heavy_check_mark: | +| Distributed durable session | | +| Will | :heavy_check_mark: | +| MQTT V5 User properties | | +| MQTT V5 Will properties | | +| MQTT V5 Connection properties | | +| MQTT V5 Connection acknowledgement properties | | +| MQTT V5 Publish properties | | +| MQTT V5 Subscribe properties retain handling | :heavy_check_mark: | +| MQTT V5 Subscribe properties | | +| MQTT V5 Ack properties | | +| MQTT V5 AUTH command | | +| MQTT V5 Shared Subscriptions | | diff --git a/versioned_docs/version-4.7/developers/replication/index.md b/versioned_docs/version-4.7/developers/replication/index.md new file mode 100644 index 00000000..a71fa803 --- /dev/null +++ b/versioned_docs/version-4.7/developers/replication/index.md @@ -0,0 +1,285 @@ +--- +title: Replication/Clustering +--- + +# Replication/Clustering + +Harper’s replication system is designed to make distributed data replication fast and reliable across multiple nodes. This means you can easily build a distributed database that ensures high availability, disaster recovery, and data localization. The best part? It’s simple to set up, configure, and manage. You can easily add or remove nodes, choose which data to replicate, and monitor the system’s health without jumping through hoops. + +### Replication Overview + +Harper replication uses a peer-to-peer model where every node in your cluster can send and subscribe to data. Each node connects through WebSockets, allowing data to flow seamlessly in both directions. By default, Harper takes care of managing these connections and subscriptions, so you don’t have to worry about data consistency. The system is designed to maintain secure, reliable connections between nodes, ensuring that your data is always safe. + +### Replication Configuration + +To connect your nodes, you need to provide hostnames or URLs for the nodes to connect to each other. This can be done via configuration or through operations. To configure replication, you can specify connection information the `replication` section of the [harperdb-config.yaml](../deployments/configuration). Here, you can specify the host name of the current node, and routes to connect to other nodes, for example: + +```yaml +replication: + hostname: server-one + routes: + - server-two + - server-three +``` + +In this example, the current node is `server-one`, and it will connect to `server-two` and `server-three`. Routes to other nodes can also be configured with URLs or ports: + +```yaml +replication: + hostname: server-one + routes: + - wss://server-two:9933 # URL based route + - hostname: server-three # define a hostname and port + port: 9933 +``` + +You can also use the [operations API](./operations-api/clustering) to dynamically add and remove nodes from the cluster. This is useful for adding new nodes to a running cluster or removing nodes that are no longer needed. For example (note this is the basic form, you would also need to provide the necessary credentials for the operation, see the section on securing connections for more details): + +```json +{ + "operation": "add_node", + "hostname": "server-two" +} +``` + +These operations will also dynamically generating certificates as needed, if there are no existing signed certificates, or if the existing certificates are not valid for the new node. + +Harper will also automatically replicate node information to other nodes in a cluster ([gossip-style discovery](https://highscalability.com/gossip-protocol-explained/)). This means that you only need to connect to one node in an existing cluster, and Harper will automatically detect and connect to other nodes in the cluster (bidirectionally). + +By default, Harper will replicate all the data in all the databases. You can configure which databases are replicated, and then override this behavior on a per-table basis. For example, you can indicate which databases should be replicated by default, here indicating you want to replicate the `data` and `system` databases: + +```yaml +replication: + databases: + - data + - system +``` + +By default, all tables within a replicated database will be replicated. Transactions are replicated atomically, which may involve data across multiple tables. However, you can also configure replication for individual tables, and disable and exclude replication for specific tables in a database by setting `replicate` to `false` in the table definition: + +```graphql +type LocalTableForNode @table(replicate: false) { + id: ID! + name: String! +} +``` + +You can also control which nodes data is replicated to, and how many nodes data is replicated to. By default, Harper will replicate data to all nodes in the cluster, but you can control where data is replicated to with the [sharding configuration and APIs](replication/sharding). + +By default, replication connects to the secure port 9933. You can configure the replication port in the `replication` section. + +```yaml +replication: + securePort: 9933 +``` + +### Securing Connections + +Harper supports the highest levels of security through public key infrastructure based security and authorization. Replication connections use WebSocket protocol and support multiple authentication methods depending on your security configuration: + +- **Certificate-based authentication** (recommended for production): Nodes are identified by the certificate's common name (CN) or Subject Alternative Names (SANs) +- **IP-based authentication** (for development/testing): Nodes are identified by their IP address when using insecure connections (see [Insecure Connection IP-based Authentication](#insecure-connection-ip-based-authentication) below) + +When using certificate-based authentication, Harper can automatically perform CRL (Certificate Revocation List) and OCSP (Online Certificate Status Protocol) verification to check if certificates have been revoked. This ensures that compromised certificates cannot be used for replication connections. OCSP and CRL verification works automatically with certificates from public certificate authorities (like Let's Encrypt or DigiCert) when `enableRootCAs` is enabled, as these certificates include the necessary OCSP responder URLs and CRL distribution points. For self-signed certificates or private CAs that don't support OCSP/CRL, you can use Harper's manual certificate revocation feature (see [Revoking Certificates](#revoking-certificates) below). Certificate verification settings follow the same configuration as HTTP mTLS connections (see [certificate verification configuration](../deployments/configuration#http)). + +#### Provide your own certificates + +If you want to secure your Harper connections with your own signed certificates, you can easily do so. Whether you have certificates from a public authority (like Let's Encrypt or Digicert) or a corporate certificate authority, you can use them to authenticate nodes securely. You can then allow nodes to authorize each other by checking the certificate against the standard list of root certificate authorities by enabling the `enableRootCAs` option in the config: + +``` +replication + enableRootCAs: true +``` + +And then just make sure the certificate’s common name (CN) matches the node's hostname. + +#### Setting Up Custom Certificates + +There are two ways to configure Harper with your own certificates: + +1. Use the `add_certificate` operation to upload them. +1. Or, specify the certificate paths directly in the `replication` section of the `harperdb-config.yaml` file. + +If your certificate is signed by a trusted public authority, just provide the path to the certificate and private key. If you're using self-signed certificates or a private certificate authority, you’ll also need to provide the certificate authority (CA) details to complete the setup.\ +\ +Example configuration: + +```yaml +tls: + certificate: /path/to/certificate.pem + certificateAuthority: /path/to/ca.pem + privateKey: /path/to/privateKey.pem +``` + +With this in place, Harper will load the provided certificates into the certificate table and use these to secure and authenticate connections between nodes. + +You have the option to skip providing a specific certificate authority (CA) and instead verify your certificate against the root certificates included in the bundled Mozilla CA store. This bundled CA store, provided by Node.js, is a snapshot of Mozilla's CA certificates that is fixed at the time of each Node.js release. + +To enable the root certificates set `replication.enableRootCAs` to `true` in the `harperdb-config.yaml` file: + +```yaml +replication: + enableRootCAs: true +``` + +#### Cross-generated certificates + +Harper can also generate its own certificates for secure connections. This is useful for setting up secure connections between nodes when no existing certificates are available, and can be used in development, testing, or production environments. Certificates will be automatically requested and signed between nodes to support a form of distributed certificate generation and signing. To establish secure connections between nodes using cross-generated certificates, you simply use the [`add_node` operation](./operations-api/clustering) over SSL, and specify the temporary authentication credentials to use for connecting and authorizing the certificate generation and signing. \ +\ +Example configuration: + +```json +{ + "operation": "add_node", + "hostname": "server-two", + "verify_tls": false, + "authorization": { + "username": "admin", + "password": "password" + } +} +``` + +When you connect to another node (e.g., `server-two`), Harper uses secure WebSockets and the provided credentials to establish the connection. + +If you’re working with a fresh install, you’ll need to set `verify_tls` to `false` temporarily, so the self-signed certificate is accepted. Once the connection is made, Harper will automatically handle the certificate signing process: + +- It creates a certificate signing request (CSR), sends it to `server-two`, which then signs it and returns the signed certificate along with the certificate authority (CA). +- The signed certificate is stored for future connections between the nodes, ensuring secure communication. + +**Important:** Your credentials are not stored—they are discarded immediately after use. + +You can also provide credentials in HTTP Authorization format (Basic auth, Token auth, or JWT). This is helpful for handling authentication with the required permissions to generate and sign certificates. + +Additionally, you can use `set_node` as an alias for the `add_node` operation if you prefer. + +#### Revoking Certificates + +Certificates used in replication can be revoked by using the certificate serial number and either the `revoked_certificates` attribute in the `hdb_nodes` system table or route config in `harperdb-config.yaml`. + +To utilize the `revoked_certificates` attribute in the `hdb_nodes` table, you can use the `add_node` or `update_node` operation to add the certificate serial number to the `revoked_certificates` array. For example: + +```json +{ + "operation": "update_node", + "hostname": "server-two", + "revoked_certificates": ["1769F7D6A"] +} +``` + +To utilize the replication route config in `harperdb-config.yaml`, you can add the certificate serial number to the `revokedCertificates` array. For example: + +```yaml +replication: + routes: + - hostname: server-three + port: 9930 + revokedCertificates: + - 1769F7D6A + - QA69C7E2S +``` + +#### Removing Nodes + +Nodes can be removed from the cluster using the [`remove_node` operation](./operations-api/clustering). This will remove the node from the cluster, and stop replication to and from the node. For example: + +```json +{ + "operation": "remove_node", + "hostname": "server-two" +} +``` + +#### Insecure Connection IP-based Authentication + +You can completely disable secure connections and use IP addresses to authenticate nodes with each other. This can be useful for development and testing, or within a secure private network, but should never be used for production with publicly accessible servers. To disable secure connections, simply configure replication within an insecure port, either by [configuring the operations API](../deployments/configuration) to run on an insecure port or replication to run on an insecure port. And then set up IP-based routes to connect to other nodes: + +```yaml +replication: + port: 9933 + routes: + - 127.0.0.2 + - 127.0.0.3 +``` + +Note that in this example, we are using loop back addresses, which can be a convenient way to run multiple nodes on a single machine for testing and development. + +#### Explicit Subscriptions + +#### Managing Node Connections and Subscriptions in Harper + +By default, Harper automatically handles connections and subscriptions between nodes, ensuring data consistency across your cluster. It even uses data routing to manage node failures. But if you want more control, you can manage these connections manually by explicitly subscribing to nodes. This is useful for advanced configurations, testing, or debugging. + +#### Important Notes on Explicit Subscriptions + +If you choose to manage subscriptions manually, Harper will no longer handle data consistency for you. This means there’s no guarantee that all nodes will have consistent data if subscriptions don’t fully replicate in all directions. If a node goes down, it’s possible that some data wasn’t replicated before the failure. + +#### How to Subscribe to Nodes + +To explicitly subscribe to a node, you can use operations like `add_node` and define the subscriptions. For example, you can configure a node (e.g., `server-two`) to publish transactions on a specific table (e.g., `dev.my-table`) without receiving data from that node. + +Example configuration: + +```json +{ + "operation": "add_node", + "hostname": "server-two", + "subscriptions": [ + { + "database": "dev", + "table": "my-table", + "publish": true, + "subscribe": false + } + ] +} +``` + +To update an explicit subscription you can use the [`update_node` operation](./operations-api/clustering). + +Here we are updating the subscription to receive transactions on the `dev.my-table` table from the `server-two` node. + +```json +{ + "operation": "update_node", + "hostname": "server-two", + "subscriptions": [ + { + "database": "dev", + "table": "my-table", + "publish": true, + "subscribe": true + } + ] +} +``` + +#### Monitoring Replication + +You can monitor the status of replication through the operations API. You can use the [`cluster_status` operation](./operations-api/clustering) to get the status of replication. For example: + +```json +{ + "operation": "cluster_status" +} +``` + +#### Database Initial Synchronization and Resynchronization + +When a new node is added to the cluster, if its database has not previously been synced, it will initially download the database from the first node it connects to. This will copy every record from the source database to the new node. Once the initial synchronization is complete, the new node will enter replication mode and receive records from each node as they are created, updated, or deleted. If a node goes down and comes back up, it will also resynchronize with the other nodes in the cluster, to ensure that it has the most up-to-date data. + +You may also specify a `start_time` in the `add_node` to specify that when a database connects, that it should not download the entire database, but only data since a given starting time. + +**Advanced Configuration** + +You can also check the configuration of the replication system, including the current known nodes and certificates, by querying the hdb_nodes and hdb_certificate table: + +```json +{ + "operation": "search_by_value", + "database": "system", + "table": "hdb_nodes", + "attribute": "name", + "value": "*" +} +``` diff --git a/versioned_docs/version-4.7/developers/replication/sharding.md b/versioned_docs/version-4.7/developers/replication/sharding.md new file mode 100644 index 00000000..307e38f1 --- /dev/null +++ b/versioned_docs/version-4.7/developers/replication/sharding.md @@ -0,0 +1,167 @@ +--- +title: Sharding +--- + +Harper's replication system supports various levels of replication or sharding. Harper can be configured or set up to replicate to different data to different subsets of nodes. This can be used facilitate horizontally scalability of storage and write performance, while maintaining optimal strategies of data locality and data consistency. When sharding is configured, Harper will replicate data to only a subset of nodes, based on the sharding configuration, and can then retrieve data from the appropriate nodes as needed to fulfill requests for data. + +There are two main ways to setup sharding in Harper. The approach is to use dynamic sharding, where the location or residency of records is determined dynamically based on where the record was written and record data, and records can be dynamically relocated based on where they are accessed. This residency information can be specific to each record, and can vary based on the computed residency and where the data is written and accessed. + +The second approach is define specific shards, where each node is assigned to a specific shard, and each record is replicated to the nodes in that shard based on the primary key, regardless of where the data was written or accessed, or content. This approach is more static, but can be more efficient for certain use cases, and means that the location of data can always be predictably determined based on the primary key. + +## Configuration For Dynamic Sharding + +By default, Harper will replicate all data to all nodes. However, replication can easily be configured for "sharding", or storing different data in different locations or nodes. The simplest way to configure sharding and limit replication to improve performance and efficiency is to configure a replication-to count. This will limit the number of nodes that data is replicated to. For example, to specify that writes should replicate to 2 other nodes besides the node that first stored the data, you can set the `replicateTo` to 2 in the `replication` section of the `harperdb-config.yaml` file: + +```yaml +replication: + replicateTo: 2 +``` + +This will ensure that data is replicated to two other nodes, so that each record will be stored on three nodes in total. + +With a sharding configuration (or customization below) in place, requests will for records that don't reside on the server handling requests will automatically be forwarded to the appropriate node. This will be done transparently, so that the client will not need to know where the data is stored. + +## Replication Control with Headers + +With the REST interface, replication levels and destinations can also specified with the `X-Replicate-To` header. This can be used to indicate the number of additional nodes that data should be replicated to, or to specify the nodes that data should be replicated to. The `X-Replicate-To` header can be used with the `POST` and `PUT` methods. This header can also specify if the response should wait for confirmation from other nodes, and how many, with the `confirm` parameter. For example, to specify that data should be replicated to two other nodes, and the response should be returned once confirmation is received from one other node, you can use the following header: + +```http +PUT /MyTable/3 +X-Replicate-To: 2;confirm=1 + +... +``` + +You can also explicitly specify destination nodes by providing a comma-separated list of node hostnames. For example, to specify that data should be replicated to nodes `node1` and `node2`, you can use the following header: + +```http +PUT /MyTable/3 +X-Replicate-To: node1,node2 +``` + +(This can also be used with the `confirm` parameter.) + +## Replication Control with Operations + +Likewise, you can specify replicateTo and confirm parameters in the operation object when using the Harper API. For example, to specify that data should be replicated to two other nodes, and the response should be returned once confirmation is received from one other node, you can use the following operation object: + +```json +{ + "operation": "update", + "schema": "dev", + "table": "MyTable", + "hashValues": [3], + "record": { + "name": "John Doe" + }, + "replicateTo": 2, + "replicatedConfirmation": 1 +} +``` + +or you can specify nodes: + +```jsonc +{ + // ... + "replicateTo": ["node-1", "node-2"], + // ... +} +``` + +## Programmatic Replication Control + +Additionally, you can specify `replicateTo` and `replicatedConfirmation` parameters programmatically in the context of a resource. For example, you can define a put method: + +```javascript +class MyTable extends tables.MyTable { + put(record) { + const context = this.getContext(); + context.replicateTo = 2; // or an array of node names + context.replicatedConfirmation = 1; + return super.put(record); + } +} +``` + +## Configuration for Static Sharding + +Alternatively, you can configure static sharding, where each node is assigned to a specific shard, and each record is replicated to the nodes in that shard based on the primary key. The `shard` is identified by a number. To configure the shard for each node, you can specify the shard number in the `replication`'s `shard` in the configuration: + +```yaml +replication: + shard: 1 +``` + +Alternatively, you can configure the `shard` under the `replication` `routes`. This allows you to assign a specific shard id based on the routing configuration. + +```yaml +replication: + routes: + - hostname: node1 + shard: 1 + - hostname: node2 + shard: 2 +``` + +Or you can specify a `shard` number by including that property in an `add_node` operation or `set_node` operation, to dynamically assign a node to a shard. + +You can then specify shard number in the `setResidency` or `setResidencyById` functions below. + +## Custom Sharding + +You can also define a custom sharding strategy by specifying a function to compute the "residency" or location of where records should be stored and reside. To do this we use the `setResidency` method, providing a function that will determine the residency of each record. The function you provide will be called with the record entry, and should return an array of nodes that the record should be replicated to (using their hostname). For example, to shard records based on the value of the `id` field, you can use the following code: + +```javascript +MyTable.setResidency((record) => { + return record.id % 2 === 0 ? ['node1'] : ['node2']; +}); +``` + +With this approach, the record metadata, which includes the residency information, and any indexed properties, will be replicated to all nodes, but the full record will only be replicated to the nodes specified by the residency function. + +The `setResidency` function can alternately return a shard number, which will replicate the data to all the nodes in that shard: + +```javascript +MyTable.setResidency((record) => { + return record.id % 2 === 0 ? 1 : 2; +}); +``` + +### Custom Sharding By Primary Key + +Alternately you can define a custom sharding strategy based on the primary key alone. This allows records to be retrieved without needing access to the record data or metadata. With this approach, data will only be replicated to the nodes specified by the residency function (the record metadata doesn't need to replicated to all nodes). To do this, you can use the `setResidencyById` method, providing a function that will determine the residency or shard of each record based on the primary key. The function you provide will be called with the primary key, and should return a `shard` number or an array of nodes that the record should be replicated to (using their hostname). For example, to shard records based on the value of the primary key, you can use the following code: + +```javascript +MyTable.setResidencyById((id) => { + return id % 2 === 0 ? 1 : 2; // return shard number +}); +``` + +or + +```javascript +MyTable.setResidencyById((id) => { + return id % 2 === 0 ? ['node1'] : ['node2']; // return array of node hostnames +}); +``` + +### Disabling Cross-Node Access + +Normally sharding allows data to be stored in specific nodes, but still allows access to the data from any node. However, you can also disable cross-node access so that data is only returned if is stored on the node where it is accessed. To do this, you can set the `replicateFrom` property on the context of operation to `false`: + +```json +{ + "operation": "search_by_id", + "table": "MyTable", + "ids": [3], + "replicateFrom": false +} +``` + +Or use a header with the REST API: + +```http +GET /MyTable/3 +X-Replicate-From: none +``` diff --git a/versioned_docs/version-4.7/developers/rest.md b/versioned_docs/version-4.7/developers/rest.md new file mode 100644 index 00000000..7e085d8e --- /dev/null +++ b/versioned_docs/version-4.7/developers/rest.md @@ -0,0 +1,403 @@ +--- +title: REST +--- + +# REST + +## REST + +Harper provides a powerful, efficient, and standard-compliant HTTP REST interface for interacting with tables and other resources. The REST interface is the recommended interface for data access, querying, and manipulation (for HTTP interactions), providing the best performance and HTTP interoperability with different clients. + +Resources, including tables, can be configured as RESTful endpoints. Make sure you review the [application introduction](applications/) and [defining schemas](applications/defining-schemas) to properly define your schemas and select which tables are exported and available through REST interface, as tables are not exported by default. The name of the [exported](applications/defining-schemas#export) resource defines the basis of the endpoint path available at the application HTTP server port [configured here](../deployments/configuration#http) (the default being `9926`). From there, a record id or query can be appended. Following uniform interface principles, HTTP methods define different actions with resources. For each method, this describes the default action. + +The default path structure provides access to resources at several levels: + +- `/my-resource` - The root path of a resource usually has a description of the resource (like a describe operation for a table). +- `/my-resource/` - The trailing slash in a path indicates it is a collection of the records. The root collection for a table represents all the records in a table, and usually you will append query parameters to query and search for more specific records. +- `/my-resource/record-id` - This resource locator represents a specific record, referenced by its id. This is typically how you can retrieve, update, and delete individual records. +- `/my-resource/record-id/` - Again, a trailing slash indicates a collection; here it is the collection of the records that begin with the specified id prefix. +- `/my-resource/record-id/with/multiple/parts` - A record id can consist of multiple path segments. + +### GET + +These can be used to retrieve individual records or perform searches. This is handled by the Resource method `get()` (and can be overridden). + +#### `GET /my-resource/` + +This can be used to retrieve a record by its primary key. The response will include the record as the body. + +##### Caching/Conditional Requests + +A `GET` response for a record will include an encoded version, a timestamp of the last modification, of this record in the `ETag` request headers (or any accessed record when used in a custom get method). On subsequent requests, a client (that has a cached copy) may include an `If-None-Match` request header with this tag. If the record has not been updated since this date, the response will have a 304 status and no body. This facilitates significant performance gains since the response data doesn't need to be serialized and transferred over the network. + +#### `GET /my-resource/?property=value` + +This can be used to search for records by the specified property name and value. See the querying section for more information. + +#### `GET /my-resource/.property` + +This can be used to retrieve the specified property of the specified record. Note that this will only work for properties that are declared in the schema. + +### PUT + +This can be used to create or update a record with the provided object/data (similar to an "upsert") with a specified key. This is handled by the Resource method `put(record)`. + +#### `PUT /my-resource/` + +This will create or update the record with the URL path that maps to the record's primary key. The record will be replaced with the contents of the data in the request body. The new record will exactly match the data that was sent (this will remove any properties that were present in the previous record and not included in the body). Future GETs will return the exact data that was provided by PUT (what you PUT is what you GET). For example: + +```http +PUT /MyTable/123 +Content-Type: application/json + +{ "name": "some data" } +``` + +This will create or replace the record with a primary key of "123" with the object defined by the JSON in the body. This is handled by the Resource method `put()`. + +### DELETE + +This can be used to delete a record or records. + +### `DELETE /my-resource/` + +This will delete a record with the given primary key. This is handled by the Resource's `delete` method. For example: + +```http +DELETE /MyTable/123 +``` + +This will delete the record with the primary key of "123". + +### `DELETE /my-resource/?property=value` + +This will delete all the records that match the provided query. + +### POST + +Generally the POST method can be used for custom actions since POST has the broadest semantics. For tables that are expost\ed as endpoints, this also can be used to create new records. + +#### `POST /my-resource/` + +This is handled by the Resource method `post(data)`, which is a good method to extend to make various other types of modifications. Also, with a table you can create a new record without specifying a primary key, for example: + +```http +POST /MyTable/ +Content-Type: application/json + +{ "name": "some data" } +``` + +This will create a new record, auto-assigning a primary key, which will be returned in the `Location` header. + +### Querying through URL query parameters + +URL query parameters provide a powerful language for specifying database queries in Harper. This can be used to search by a single attribute name and value, to find all records which provide value for the given property/attribute. It is important to note that this attribute must be configured to be indexed to search on it. For example: + +```http +GET /my-resource/?property=value +``` + +We can specify multiple properties that must match: + +```http +GET /my-resource/?property=value&property2=another-value +``` + +Note that only one of the attributes needs to be indexed for this query to execute. + +We can also specify different comparators such as less than and greater than queries using [FIQL](https://datatracker.ietf.org/doc/html/draft-nottingham-atompub-fiql-00) syntax. If we want to specify records with an `age` value greater than 20: + +```http +GET /my-resource/?age=gt=20 +``` + +Or less than or equal to 20: + +```http +GET /my-resource/?age=le=20 +``` + +The comparison operators include standard FIQL operators, `lt` (less than), `le` (less than or equal), `gt` (greater than), `ge` (greater than or equal), and `ne` (not equal). These comparison operators can also be combined with other query parameters with `&`. For example, if we wanted products with a category of software and price between 100 and 200, we could write: + +```http +GET /Product/?category=software&price=gt=100&price=lt=200 +``` + +Comparison operators can also be used on Date fields, however, we have to ensure that the date format is properly escaped. For example, if we are looking for a listing date greater than `2017-03-08T09:00:00.000Z` we must escape the colons as `%3A`: + +``` +GET /Product/?listDate=gt=2017-03-08T09%3A30%3A00.000Z +``` + +You can also search for attributes that start with a specific string, by using the == comparator and appending a `*` to the attribute value: + +```http +GET /Product/?name==Keyboard* +``` + +**Chained Conditions** + +You can also specify that a range condition must be met for a single attribute value by chaining conditions. This is done by omitting the name in the name-value pair. For example, to find products with a price between 100 and 200, you could write: + +```http +GET /Product/?price=gt=100<=200 +``` + +Chaining can be used to combined `gt` or `ge` with `lt` or `le` to specify a range of values. Currently, no other types of chaining are supported. + +Note that some HTTP clients may be overly aggressive in encoding query parameters, and you may need to disable extra encoding of query parameters, to ensure operators are passed through without manipulation. + +Here is a full list of the supported FIQL-style operators/comparators: + +- `==`: equal +- `=lt=`: less than +- `=le=`: less than or equal +- `=gt=`: greater than +- `=ge=`: greater than or equal +- `=ne=`, !=: not equal +- `=ct=`: contains the value (for strings) +- `=sw=`, `==*`: starts with the value (for strings) +- `=ew=`: ends with the value (for strings) +- `=`, `===`: strict equality (no type conversion) +- `!==`: strict inequality (no type conversion) + +#### Unions + +Conditions can also be applied with `OR` logic, returning the union of records that match either condition. This can be specified by using the `|` operator instead of `&`. For example, to return any product a rating of `5` _or_ a `featured` attribute that is `true`, we could write: + +```http +GET /Product/?rating=5|featured=true +``` + +#### Grouping of Operators + +Multiple conditions with different operators can be combined with grouping of conditions to indicate the order of operation. Grouping conditions can be done with parenthesis, with standard grouping conventions as used in query and mathematical expressions. For example, a query to find products with a rating of 5 OR a price between 100 and 200 could be written: + +```http +GET /Product/?rating=5|(price=gt=100&price=lt=200) +``` + +Grouping conditions can also be done with square brackets, which function the same as parenthesis for grouping conditions. The advantage of using square brackets is that you can include user provided values that might have parenthesis in them, and use standard URI component encoding functionality, which will safely escape/encode square brackets, but not parenthesis. For example, if we were constructing a query for products with a rating of a 5 and matching one of a set of user provided tags, a query could be built like: + +```http +GET /Product/?rating=5&[tag=fast|tag=scalable|tag=efficient] +``` + +And the tags could be safely generated from user inputs in a tag array like: + +```javascript +let url = `/Product/?rating=5[${tags.map(encodeURIComponent).join('|')}]`; +``` + +More complex queries can be created by further nesting groups: + +```http +GET /Product/?price=lt=100|[rating=5&[tag=fast|tag=scalable|tag=efficient]&inStock=true] +``` + +### Query Calls + +Harper has several special query functions that use "call" syntax. These can be included in the query string as its own query entry (separated from other query conditions with an `&`). These include: + +#### `select(properties)` + +This function allows you to specify which properties should be included in the responses. This takes several forms: + +- `?select(property)`: This will return the values of the specified property directly in the response (will not be put in an object). +- `?select(property1,property2)`: This returns the records as objects, but limited to the specified properties. +- `?select([property1,property2,...])`: This returns the records as arrays of the property values in the specified properties. +- `?select(property1,)`: This can be used to specify that objects should be returned with the single specified property. +- `?select(property{subProperty1,subProperty2{subSubProperty,..}},...)`: This can be used to specify which sub-properties should be included in nested objects and joined/references records. + +To get a list of product names with a category of software: + +```http +GET /Product/?category=software&select(name) +``` + +#### `limit(start,end)` or `limit(end)` + +This function specifies a limit on the number of records returned, optionally providing a starting offset. + +For example, to find the first twenty records with a `rating` greater than 3, `inStock` equal to true, only returning the `rating` and `name` properties, you could use: + +```http +GET /Product/?rating=gt=3&inStock=true&select(rating,name)&limit(20) +``` + +#### `sort(property)`, `sort(+property,-property,...)` + +This function allows you to indicate the sort order for the returned results. The argument for `sort()` is one or more properties that should be used to sort. If the property is prefixed with '+' or no prefix, the sort will be performed in ascending order by the indicated attribute/property. If the property is prefixed with '-', it will be sorted in descending order. If the multiple properties are specified, the sort will be performed on the first property, and for records with the same value for that property, the next property will be used to break the tie and sort results. This tie breaking will continue through any provided properties. + +For example, to sort by product name (in ascending order): + +```http +GET /Product?rating=gt=3&sort(+name) +``` + +To sort by rating in ascending order, then by price in descending order for products with the same rating: + +```http +GET /Product?sort(+rating,-price) +``` + +## Relationships + +Harper supports relationships in its data models, allowing for tables to define a relationship with data from other tables (or even itself) through foreign keys. These relationships can be one-to-many, many-to-one, or many-to-many (and even with ordered relationships). These relationships are defined in the schema, and then can easily be queried through chained attributes that act as "join" queries, allowing related attributes to referenced in conditions and selected for returned results. + +### Chained Attributes and Joins + +To support relationships and hierarchical data structures, in addition to querying on top-level attributes, you can also query on chained attributes. Most importantly, this provides Harper's "join" functionality, allowing related tables to be queried and joined in the results. Chained properties are specified by using dot syntax. In order to effectively leverage join functionality, you need to define a relationship in your schema: + +```graphql +type Product @table @export { + id: ID @primaryKey + name: String + brandId: ID @indexed + brand: Brand @relationship(from: "brandId") +} +type Brand @table @export { + id: ID @primaryKey + name: String + products: [Product] @relationship(to: "brandId") +} +``` + +And then you could query a product by brand name: + +```http +GET /Product/?brand.name=Microsoft +``` + +This will query for products for which the `brandId` references a `Brand` record with a `name` of `"Microsoft"`. + +The `brand` attribute in `Product` is a "computed" attribute from the foreign key (`brandId`), for the many-to-one relationship to the `Brand`. In the schema above, we also defined the reverse one-to-many relationship from a `Brand` to a `Product`, and we could likewise query that: + +```http +GET /Brand/?products.name=Keyboard +``` + +This would return any `Brand` with at least one product with a name `"Keyboard"`. Note, that both of these queries are effectively acting as an "INNER JOIN". + +#### Chained/Nested Select + +Computed relationship attributes are not included by default in query results. However, we can include them by specifying them in a select: + +```http +GET /Product/?brand.name=Microsoft&select(name,brand) +``` + +We can also do a "nested" select and specify which sub-attributes to include. For example, if we only wanted to include the name property from the brand, we could do so: + +```http +GET /Product/?brand.name=Microsoft&select(name,brand{name}) +``` + +Or to specify multiple sub-attributes, we can comma delimit them. Note that selects can "join" to another table without any constraint/filter on the related/joined table: + +```http +GET /Product/?name=Keyboard&select(name,brand{name,id}) +``` + +When selecting properties from a related table without any constraints on the related table, this effectively acts like a "LEFT JOIN" and will omit the `brand` property if the brandId is `null` or references a non-existent brand. + +#### Many-to-many Relationships (Array of Foreign Keys) + +Many-to-many relationships are also supported, and can easily be created using an array of foreign key values, without requiring the traditional use of a junction table. This can be done by simply creating a relationship on an array-typed property that references a local array of foreign keys. For example, we could create a relationship to the resellers of a product (each product can have multiple resellers, each ) + +```graphql +type Product @table @export { + id: ID @primaryKey + name: String + resellerIds: [ID] @indexed + resellers: [Reseller] @relationship(from: "resellerId") +} +type Reseller @table { + id: ID @primaryKey + name: String + ... +} +``` + +The product record can then hold an array of the reseller ids. When the `reseller` property is accessed (either through code or through select, conditions), the array of ids is resolved to an array of reseller records. We can also query through the resellers relationships like with the other relationships. For example, to query the products that are available through the "Cool Shop": + +```http +GET /Product/?resellers.name=Cool Shop&select(id,name,resellers{name,id}) +``` + +One of the benefits of using an array of foreign key values is that the this can be manipulated using standard array methods (in JavaScript), and the array can dictate an order to keys and therefore to the resulting records. For example, you may wish to define a specific order to the resellers and how they are listed (which comes first, last): + +```http +PUT /Product/123 +Content-Type: application/json + +{ "id": "123", "resellerIds": ["first-reseller-id", "second-reseller-id", "last-reseller-id"], +...} +``` + +#### Type Conversion + +Queries parameters are simply text, so there are several features for converting parameter values to properly typed values for performing correct searches. For the FIQL comparators, which includes `==`, `!=`, `=gt=`, `=lt=`, `=ge=`, `=gt=`, the parser will perform type conversion, according to the following rules: + +- `name==null`: Will convert the value to `null` for searching. +- `name==123`: Will convert the value to a number _if_ the attribute is untyped (there is no type specified in a GraphQL schema, or the type is specified to be `Any`). +- `name==true`: Will convert the value to a boolean _if_ the attribute is untyped (there is no type specified in a GraphQL schema, or the type is specified to be `Any`). +- `name==number:123`: Will explicitly convert the value after "number:" to a number. +- `name==boolean:true`: Will explicitly convert the value after "boolean:" to a boolean. +- `name==string:some%20text`: Will explicitly keep the value after "string:" as a string (and perform URL component decoding) +- `name==date:2024-01-05T20%3A07%3A27.955Z`: Will explicitly convert the value after "date:" to a Date object. + +If the attribute specifies a type (like `Float`) in the schema definition, the value will always be converted to the specified type before searching. + +For "strict" operators, which includes `=`, `===`, and `!==`, no automatic type conversion will be applied, the value will be decoded as string with URL component decoding, and have type conversion applied if the attribute specifies a type, in which case the attribute type will specify the type conversion. + +#### Content Types and Negotiation + +HTTP defines a couple of headers for indicating the (preferred) content type of the request and response. The `Content-Type` request header can be used to specify the content type of the request body (for PUT, PATCH, and POST). The `Accept` request header indicates the preferred content type of the response. For general records with object structures, Harper supports the following content types: `application/json` - Common format, easy to read, with great tooling support. `application/cbor` - Recommended binary format for optimal encoding efficiency and performance. `application/x-msgpack` - This is also an efficient format, but CBOR is preferable, as it has better streaming capabilities and faster time-to-first-byte. `text/csv` - CSV, lacks explicit typing, not well suited for heterogeneous data structures, but good for moving data to and from a spreadsheet. + +CBOR is generally the most efficient and powerful encoding format, with the best performance, most compact encoding, and most expansive ability to encode different data types like Dates, Maps, and Sets. MessagePack is very similar and tends to have broader adoption. However, JSON can be easier to work with and may have better tooling. Also, if you are using compression for data transfer (gzip or brotli), JSON will often result in more compact compressed data due to character frequencies that better align with Huffman coding, making JSON a good choice for web applications that do not require specific data types beyond the standard JSON types. + +Requesting a specific content type can also be done in a URL by suffixing the path with extension for the content type. If you want to retrieve a record in CSV format, you could request: + +```http +GET /product/some-id.csv +``` + +Or you could request a query response in MessagePack: + +```http +GET /product/.msgpack?category=software +``` + +However, generally it is not recommended that you use extensions in paths and it is best practice to use the `Accept` header to specify acceptable content types. + +#### Specific Content Objects + +You can specify other content types, and the data will be stored as a record or object that holds the type and contents of the data. For example, if you do: + +``` +PUT /my-resource/33 +Content-Type: text/calendar + +BEGIN:VCALENDAR +VERSION:2.0 +... +``` + +This would store a record equivalent to JSON: + +``` +{ "contentType": "text/calendar", data: "BEGIN:VCALENDAR\nVERSION:2.0\n... +``` + +Retrieving a record with `contentType` and `data` properties will likewise return a response with the specified `Content-Type` and body. If the `Content-Type` is not of the `text` family, the data will be treated as binary data (a Node.js `Buffer`). + +You can also use `application/octet-stream` to indicate that the request body should be preserved in binary form. This also useful for uploading to a specific property: + +``` +PUT /my-resource/33/image +Content-Type: image/gif + +...image data... +``` diff --git a/versioned_docs/version-4.7/developers/security/basic-auth.md b/versioned_docs/version-4.7/developers/security/basic-auth.md new file mode 100644 index 00000000..22361432 --- /dev/null +++ b/versioned_docs/version-4.7/developers/security/basic-auth.md @@ -0,0 +1,57 @@ +--- +title: Basic Authentication +--- + +# Basic Authentication + +Harper uses Basic Auth and JSON Web Tokens (JWTs) to secure our HTTP requests. In the context of an HTTP transaction, **basic access authentication** is a method for an HTTP user agent to provide a username and password when making a request. + +**You do not need to log in separately. Basic Auth is added to each HTTP request like create_database, create_table, insert etc… via headers.** + +A header is added to each HTTP request. The header key is `Authorization` the header value is `Basic <>`. + +## Authentication in Harper Studio + +In the below code sample, you can see where we add the authorization header to the request. This needs to be added for each and every HTTP request for Harper. + +_Note: This function uses btoa. Learn about_ [_btoa here_](https://developer.mozilla.org/en-US/docs/Web/API/btoa)_._ + +```javascript +function callHarperDB(call_object, operation, callback) { + const options = { + method: 'POST', + hostname: call_object.endpoint_url, + port: call_object.endpoint_port, + path: '/', + headers: { + 'content-type': 'application/json', + 'authorization': 'Basic ' + btoa(call_object.username + ':' + call_object.password), + 'cache-control': 'no-cache', + }, + }; + + const http_req = http.request(options, function (hdb_res) { + let chunks = []; + + hdb_res.on('data', function (chunk) { + chunks.push(chunk); + }); + + hdb_res.on('end', function () { + const body = Buffer.concat(chunks); + if (isJson(body)) { + return callback(null, JSON.parse(body)); + } else { + return callback(body, null); + } + }); + }); + + http_req.on('error', function (chunk) { + return callback('Failed to connect', null); + }); + + http_req.write(JSON.stringify(operation)); + http_req.end(); +} +``` diff --git a/versioned_docs/version-4.7/developers/security/certificate-management.md b/versioned_docs/version-4.7/developers/security/certificate-management.md new file mode 100644 index 00000000..a78ed633 --- /dev/null +++ b/versioned_docs/version-4.7/developers/security/certificate-management.md @@ -0,0 +1,153 @@ +--- +title: Certificate Management +--- + +# Certificate Management + +This document is information on managing certificates for Harper external facing APIs. For information on certificate management for clustering see [clustering certificate management](../clustering/certificate-management). + +## Development + +An out of the box install of Harper does not have HTTPS enabled (see [configuration](../../deployments/configuration#http) for relevant configuration file settings.) This is great for local development. If you are developing using a remote server and your requests are traversing the Internet, we recommend that you enable HTTPS. + +To enable HTTPS, set `http.securePort` in `harperdb-config.yaml` to the port you wish to use for HTTPS connections and restart Harper. + +By default Harper will generate certificates and place them at `/keys/`. These certificates will not have a valid Common Name (CN) for your Harper node, so you will be able to use HTTPS, but your HTTPS client must be configured to accept the invalid certificate. + +## Production + +For production deployments, in addition to using HTTPS, we recommend using your own certificate authority (CA) or a public CA such as Let's Encrypt, to generate certificates with CNs that match the Fully Qualified Domain Name (FQDN) of your Harper node. + +We have a few recommended options for enabling HTTPS in a production setting. + +### Option: Enable Harper HTTPS and Replace Certificates + +To enable HTTPS, set `http.securePort` in `harperdb-config.yaml` to the port you wish to use for HTTPS connections and restart Harper. + +To replace the certificates, either replace the contents of the existing certificate files at `/keys/`, or update the Harper configuration with the path of your new certificate files, and then restart Harper. + +```yaml +tls: + certificate: ~/hdb/keys/certificate.pem + privateKey: ~/hdb/keys/privateKey.pem +``` + +`operationsApi.tls` configuration is optional. If it is not set Harper will default to the values in the `tls` section. + +```yaml +operationsApi: + tls: + certificate: ~/hdb/keys/certificate.pem + privateKey: ~/hdb/keys/privateKey.pem +``` + +### mTLS + +Mutual TLS (mTLS) is a security protocol that requires both the client and the server to present certificates to each other. Requiring a client certificate can be useful for authenticating clients and ensuring that only authorized clients can access your Harper instance. This can be enabled by setting the `http.mtls` configuration in `harperdb-config.yaml` to `true` and providing a certificate authority in the TLS section: + +```yaml + +http: + mtls: true + ... +tls: + certificateAuthority: ~/hdb/keys/ca.pem + ... +``` + +### Certificate Revocation Checking + +When using mTLS, you may also want to enable certificate revocation checking to ensure that revoked certificates cannot be used for authentication, even if they're still within their validity period. Harper supports two industry-standard methods for checking certificate revocation status: + +**CRL (Certificate Revocation List)** + +- A digitally signed list of revoked certificates published by the Certificate Authority +- Downloaded and cached locally for fast verification +- Updated periodically (typically daily) +- Best for: High-volume verification, offline scenarios, predictable bandwidth usage + +**OCSP (Online Certificate Status Protocol)** + +- Real-time query to check individual certificate status +- Provides immediate revocation status +- Requires network connection for each check (with caching) +- Best for: Real-time revocation status, certificates without CRL distribution points + +**Harper's Approach: CRL-First with OCSP Fallback** + +Harper uses a CRL-first strategy for optimal performance: + +1. Checks CRL if available (fast, cached locally for 24 hours by default) +2. Falls back to OCSP if CRL is not available or fails (cached for 1 hour by default) +3. Applies the configured failure mode if both methods fail + +This strategy provides the best balance of performance, reliability, and security. + +**Enabling Certificate Verification** + +Certificate revocation checking is disabled by default and must be explicitly enabled: + +```yaml +http: + mtls: + required: true + certificateVerification: true # Enable with defaults +``` + +For production environments with high-security requirements, you can customize the verification settings: + +```yaml +http: + mtls: + required: true + certificateVerification: + failureMode: fail-closed # Reject connections on verification failure + crl: + timeout: 15000 # 15 seconds to download CRL + cacheTtl: 43200000 # Cache for 12 hours + ocsp: + timeout: 8000 # 8 seconds for OCSP response + cacheTtl: 7200000 # Cache for 2 hours +``` + +**Performance Considerations** + +- **CRL caching**: CRLs are cached locally, so subsequent verifications are very fast (no network requests) +- **OCSP caching**: Successful OCSP responses are cached (1 hour by default), errors cached for 5 minutes +- **Background refresh**: CRLs are refreshed in the background before expiration to avoid blocking requests +- **Graceful degradation**: Network failures don't block connections in fail-open mode + +**When to Use Certificate Verification** + +Enable certificate revocation checking when: + +- You need to immediately revoke access for compromised certificates +- Compliance or security policies require revocation checking +- You're in a zero-trust security environment +- Client certificates have long validity periods + +You may skip it if: + +- All certificates have very short validity periods (e.g., < 24 hours) +- You have alternative revocation mechanisms in place +- Performance is critical and risk is acceptable + +For detailed configuration options, see the [configuration reference](../../deployments/configuration#http). + +### Option: Nginx Reverse Proxy + +Instead of enabling HTTPS for Harper, Nginx can be used as a reverse proxy for Harper. + +Install Nginx, configure Nginx to use certificates issued from your own CA or a public CA, then configure Nginx to listen for HTTPS requests and forward to Harper as HTTP requests. + +[Certbot](https://certbot.eff.org/) is a great tool for automatically requesting and renewing Let’s Encrypt certificates used by Nginx. + +### Option: External Reverse Proxy + +Instead of enabling HTTPS for Harper, a number of different external services can be used as a reverse proxy for Harper. These services typically have integrated certificate management. Configure the service to listen for HTTPS requests and forward (over a private network) to Harper as HTTP requests. + +Examples of these types of services include an AWS Application Load Balancer or a GCP external HTTP(S) load balancer. + +### Additional Considerations + +It is possible to use different certificates for the Operations API and the Custom Functions API. In scenarios where only your Custom Functions endpoints need to be exposed to the Internet and the Operations API is reserved for Harper administration, you may want to use a private CA to issue certificates for the Operations API and a public CA for the Custom Functions API certificates. diff --git a/versioned_docs/version-4.7/developers/security/certificate-verification.md b/versioned_docs/version-4.7/developers/security/certificate-verification.md new file mode 100644 index 00000000..dd7360ec --- /dev/null +++ b/versioned_docs/version-4.7/developers/security/certificate-verification.md @@ -0,0 +1,502 @@ +--- +title: Certificate Verification +--- + +# Certificate Verification + +Certificate verification (also called certificate revocation checking) is a security feature that ensures revoked certificates cannot be used for authentication, even if they are otherwise valid and trusted. This is a critical security control for environments where certificates may need to be revoked before their expiration date due to compromise, employee departure, or other security concerns. + +## Overview + +When a client presents a certificate for mTLS authentication, Harper performs the following checks: + +1. **Certificate Validation** (always performed by Node.js TLS): + - Certificate signature is valid + - Certificate is issued by a trusted CA + - Certificate is within its validity period + - Certificate chain is properly formed + +2. **Certificate Revocation Checking** (optional, must be explicitly enabled): + - Certificate has not been revoked by the issuing CA + - Uses CRL (Certificate Revocation List) and/or OCSP (Online Certificate Status Protocol) + +## Revocation Checking Methods + +Harper supports two industry-standard methods for checking certificate revocation status: + +### CRL (Certificate Revocation List) + +A CRL is a digitally signed list of revoked certificates published by a Certificate Authority. + +**Advantages:** + +- Fast verification (cached locally) +- Works offline once downloaded +- Predictable bandwidth usage +- Good for high-volume verification +- No privacy concerns (no per-certificate queries) + +**How it works:** + +1. Harper downloads the CRL from the distribution point specified in the certificate +2. CRL is cached locally (24 hours by default) +3. Subsequent verifications check the cached CRL (very fast, no network requests) +4. CRL is refreshed in the background before expiration + +**Configuration:** + +```yaml +http: + mtls: + certificateVerification: + crl: + timeout: 10000 # 10 seconds to download CRL + cacheTtl: 86400000 # Cache for 24 hours + gracePeriod: 86400000 # 24 hour grace period after nextUpdate + failureMode: fail-closed # Reject on CRL check failure +``` + +### OCSP (Online Certificate Status Protocol) + +OCSP provides real-time certificate status checking by querying the CA's OCSP responder. + +**Advantages:** + +- Real-time revocation status +- Smaller response size than CRL +- Good for certificates without CRL distribution points +- Works when CRL is unavailable + +**How it works:** + +1. Harper sends a request to the OCSP responder specified in the certificate +2. OCSP responder returns the current status (good, revoked, or unknown) +3. Response is cached (1 hour by default for success, 5 minutes for errors) + +**Configuration:** + +```yaml +http: + mtls: + certificateVerification: + ocsp: + timeout: 5000 # 5 seconds for OCSP response + cacheTtl: 3600000 # Cache successful responses for 1 hour + errorCacheTtl: 300000 # Cache errors for 5 minutes + failureMode: fail-closed # Reject on OCSP check failure +``` + +## Verification Strategy + +Harper uses a **CRL-first strategy with OCSP fallback** for optimal performance and reliability: + +1. **Check CRL** if available + - Fast (uses cached CRL) + - No network request needed if CRL is cached + - If CRL check succeeds or fails definitively, return result + +2. **Fall back to OCSP** if: + - Certificate has no CRL distribution point + - CRL download fails + - CRL is expired and cannot be refreshed + +3. **Apply failure mode** if both methods fail + +This strategy provides the best balance of: + +- **Performance**: CRL checks are very fast when cached +- **Reliability**: OCSP provides fallback when CRL is unavailable +- **Security**: Always attempts verification before falling back + +## Configuration + +### Enable with Defaults + +The simplest configuration enables certificate verification with sensible defaults: + +```yaml +http: + mtls: + required: true + certificateVerification: true +``` + +This enables: + +- CRL checking (enabled, 10s timeout, 24h cache) +- OCSP checking (enabled, 5s timeout, 1h cache) +- Fail-closed mode (rejects connections on verification failure) + +### Custom Configuration + +For production environments, you may want to customize settings: + +```yaml +http: + mtls: + required: true + certificateVerification: + failureMode: fail-closed # Global setting + crl: + timeout: 15000 # 15 seconds for CRL download + cacheTtl: 43200000 # Cache CRLs for 12 hours + gracePeriod: 86400000 # 24 hour grace period + failureMode: fail-closed # CRL-specific setting + ocsp: + timeout: 8000 # 8 seconds for OCSP response + cacheTtl: 7200000 # Cache results for 2 hours + errorCacheTtl: 600000 # Cache errors for 10 minutes + failureMode: fail-closed # OCSP-specific setting +``` + +### CRL Only (No OCSP) + +For environments where OCSP is not available or desired: + +```yaml +http: + mtls: + certificateVerification: + ocsp: false # Disable OCSP, CRL remains enabled +``` + +### OCSP Only (No CRL) + +For environments preferring real-time checking: + +```yaml +http: + mtls: + certificateVerification: + crl: false # Disable CRL, OCSP remains enabled +``` + +### Environment Variables + +All settings can be configured via environment variables: + +```bash +# Enable certificate verification +HTTP_MTLS_CERTIFICATEVERIFICATION=true + +# Global failure mode +HTTP_MTLS_CERTIFICATEVERIFICATION_FAILUREMODE=fail-closed + +# CRL settings +HTTP_MTLS_CERTIFICATEVERIFICATION_CRL=true +HTTP_MTLS_CERTIFICATEVERIFICATION_CRL_TIMEOUT=15000 +HTTP_MTLS_CERTIFICATEVERIFICATION_CRL_CACHETTL=43200000 +HTTP_MTLS_CERTIFICATEVERIFICATION_CRL_GRACEPERIOD=86400000 +HTTP_MTLS_CERTIFICATEVERIFICATION_CRL_FAILUREMODE=fail-closed + +# OCSP settings +HTTP_MTLS_CERTIFICATEVERIFICATION_OCSP=true +HTTP_MTLS_CERTIFICATEVERIFICATION_OCSP_TIMEOUT=8000 +HTTP_MTLS_CERTIFICATEVERIFICATION_OCSP_CACHETTL=7200000 +HTTP_MTLS_CERTIFICATEVERIFICATION_OCSP_ERRORCACHETTL=600000 +HTTP_MTLS_CERTIFICATEVERIFICATION_OCSP_FAILUREMODE=fail-closed +``` + +For replication servers, use the `REPLICATION_` prefix instead of `HTTP_`. + +## Failure Modes + +Certificate verification supports two failure modes that control behavior when verification cannot be completed: + +### fail-closed (Recommended) + +**Default behavior.** Rejects connections when verification fails due to network errors, timeouts, or other operational issues. + +**Use when:** + +- Security is paramount +- You can tolerate false positives (rejecting valid certificates) +- Your CA infrastructure is highly available +- You're in a zero-trust environment + +**Example:** + +```yaml +certificateVerification: + failureMode: fail-closed +``` + +### fail-open + +Allows connections when verification fails, but logs a warning. The connection is still rejected if the certificate is explicitly found to be revoked. + +**Use when:** + +- Availability is more important than perfect security +- Your CA infrastructure may be intermittently unavailable +- You have other compensating controls +- You're gradually rolling out certificate verification + +**Example:** + +```yaml +certificateVerification: + failureMode: fail-open +``` + +**Important:** Invalid signatures on CRLs always result in rejection regardless of failure mode, as this indicates potential tampering. + +## Performance Considerations + +### CRL Performance + +- **First verification**: Downloads CRL (10s timeout by default) +- **Subsequent verifications**: Instant (reads from cache) +- **Background refresh**: CRL is refreshed before expiration without blocking requests +- **Memory usage**: ~10-100KB per CRL depending on size +- **Network usage**: One download per CRL per cacheTtl period + +### OCSP Performance + +- **First verification**: OCSP query (5s timeout by default) +- **Subsequent verifications**: Reads from cache (1 hour default) +- **Memory usage**: Minimal (~1KB per cached response) +- **Network usage**: One query per unique certificate per cacheTtl period + +### Optimization Tips + +1. **Increase CRL cache TTL** for stable environments: + + ```yaml + crl: + cacheTtl: 172800000 # 48 hours + ``` + +2. **Increase OCSP cache TTL** for long-lived connections: + + ```yaml + ocsp: + cacheTtl: 7200000 # 2 hours + ``` + +3. **Use CRL only** if you control the CA and **all certificates have CRL distribution points**: + + ```yaml + ocsp: false # Only disable if all certs have CRL URLs + ``` + +4. **Reduce grace period** if you need tighter revocation enforcement: + ```yaml + crl: + gracePeriod: 0 # No grace period + ``` + +## Production Best Practices + +### High-Security Environments + +```yaml +http: + mtls: + required: true + certificateVerification: + failureMode: fail-closed # Always reject on failure + crl: + timeout: 15000 # Longer timeout for reliability + cacheTtl: 43200000 # 12 hours (balance security and performance) + gracePeriod: 0 # No grace period for strict enforcement + ocsp: + timeout: 8000 + cacheTtl: 3600000 # 1 hour +``` + +### High-Availability Environments + +```yaml +http: + mtls: + required: true + certificateVerification: + failureMode: fail-open # Prioritize availability + crl: + timeout: 5000 # Shorter timeout to fail faster + cacheTtl: 86400000 # 24 hours + gracePeriod: 86400000 # 24 hour grace period + ocsp: + timeout: 3000 + cacheTtl: 7200000 # 2 hours for fewer queries +``` + +### Performance-Critical Environments + +For maximum performance, increase cache durations to minimize network requests: + +```yaml +http: + mtls: + required: true + certificateVerification: + crl: + cacheTtl: 172800000 # 48 hours (minimize CRL downloads) + gracePeriod: 86400000 # 24 hour grace period + ocsp: + cacheTtl: 7200000 # 2 hours (minimize OCSP queries) + errorCacheTtl: 600000 # Cache errors for 10 minutes +``` + +**Note**: Only disable OCSP (`ocsp: false`) if you're certain all client certificates have CRL distribution points. Otherwise, certificates without CRLs won't be checked for revocation. + +## Troubleshooting + +### Connection Rejected: Certificate Verification Failed + +**Cause:** Certificate was found to be revoked or verification failed in fail-closed mode. + +**Solutions:** + +1. Check if certificate is actually revoked in the CRL or OCSP responder +2. Verify CA infrastructure is accessible +3. Check timeout settings (may need to increase) +4. Temporarily use fail-open mode while investigating: + ```yaml + certificateVerification: + failureMode: fail-open + ``` + +### High Latency on First Connection + +**Cause:** CRL is being downloaded for the first time. + +**Solutions:** + +1. This is normal and only happens once per CRL per cacheTtl period +2. Subsequent connections will be fast (cached CRL) +3. Increase CRL timeout if downloads are slow: + ```yaml + crl: + timeout: 20000 # 20 seconds + ``` + +### Frequent CRL Downloads + +**Cause:** CRL cacheTtl is too short or CRL nextUpdate period is very short. + +**Solutions:** + +1. Increase cacheTtl: + ```yaml + crl: + cacheTtl: 172800000 # 48 hours + ``` +2. Increase gracePeriod to allow using slightly expired CRLs: + ```yaml + crl: + gracePeriod: 172800000 # 48 hours + ``` + +### OCSP Responder Unavailable + +**Cause:** OCSP responder is down or unreachable. + +**Solutions:** + +1. CRL will be used as fallback automatically +2. Use fail-open mode to allow connections: + ```yaml + ocsp: + failureMode: fail-open + ``` +3. Disable OCSP and rely on CRL only (ensure all certs have CRL URLs): + ```yaml + ocsp: false + ``` + +### Network/Firewall Blocking Outbound Requests + +**Cause:** Secure hosting environments often restrict outbound HTTP/HTTPS traffic to reduce exfiltration risks. This prevents Harper from reaching CRL distribution points and OCSP responders. + +**Symptoms:** + +- Certificate verification timeouts in fail-closed mode +- Logs show connection failures to CRL/OCSP URLs +- First connection succeeds (no cached CRL), subsequent fail after cache expires + +**Solutions:** + +1. **Allow outbound traffic to CA infrastructure** (recommended): + - Whitelist CRL distribution point URLs (from your certificates) + - Whitelist OCSP responder URLs (from your certificates) + - Example: If using Let's Encrypt, allow `http://x1.c.lencr.org/` and `http://ocsp.int-x3.letsencrypt.org/` + +2. **Use fail-open mode** (allows connections when verification fails): + + ```yaml + certificateVerification: + failureMode: fail-open # Don't block on network issues + ``` + +3. **Use CRL only with local caching/proxy**: + - Set up an internal CRL mirror/proxy + - Configure firewall to allow Harper → internal CRL proxy + - Increase cache TTL to reduce fetch frequency: + ```yaml + certificateVerification: + crl: + cacheTtl: 172800000 # 48 hours + ocsp: false # Disable OCSP + ``` + +4. **Disable verification** (if you have alternative security controls): + ```yaml + certificateVerification: false + ``` + +## Security Considerations + +### When Certificate Verification is Critical + +Enable certificate verification when: + +- Certificates have long validity periods (> 1 day) +- You need immediate revocation capability +- Compliance requires revocation checking (PCI DSS, HIPAA, etc.) +- You're in a zero-trust security model +- Client certificates are used for API authentication + +### When You Might Skip It + +Consider not using certificate verification when: + +- Certificates have very short validity periods (< 24 hours) +- You rotate certificates automatically (e.g., with cert-manager) +- You have alternative revocation mechanisms +- Performance is critical and risk is acceptable +- Your CA doesn't publish CRLs or support OCSP + +### Defense in Depth + +Certificate verification is one layer of security. Also consider: + +- Short certificate validity periods (reduces window of compromise) +- Certificate pinning (prevents CA compromise) +- Network segmentation (limits blast radius) +- Access logging and monitoring +- Regular certificate rotation + +## Replication Server + +Certificate verification works identically for replication servers. Use the `replication.mtls` configuration: + +```yaml +replication: + hostname: server-one + routes: + - server-two + mtls: + certificateVerification: true +``` + +**Important:** mTLS is always required for replication and cannot be disabled. This configuration only controls whether certificate revocation checking is performed. + +For complete replication configuration, see [Configuration - Replication](../../deployments/configuration#replication). + +## Further Reading + +- [Certificate Management](./certificate-management) - Managing certificates and CAs +- [mTLS Authentication](./mtls-auth) - Setting up mTLS +- [Configuration Reference](../../deployments/configuration) - Complete configuration options diff --git a/versioned_docs/version-4.7/developers/security/configuration.md b/versioned_docs/version-4.7/developers/security/configuration.md new file mode 100644 index 00000000..2dee9d86 --- /dev/null +++ b/versioned_docs/version-4.7/developers/security/configuration.md @@ -0,0 +1,40 @@ +--- +title: Configuration +--- + +# Configuration + +Harper was set up to require very minimal configuration to work out of the box. There are, however, some best practices we encourage for anyone building an app with Harper. + +## CORS + +Harper allows for managing [cross-origin HTTP requests](https://developer.mozilla.org/en-US/docs/Web/HTTP/Access_control_CORS). By default, Harper enables CORS for all domains if you need to disable CORS completely or set up an access list of domains you can do the following: + +1. Open the harperdb-config.yaml file, which can be found in \, the location you specified during install. +1. In harperdb-config.yaml there should be 2 entries under `operationsApi.network`: cors and corsAccessList. + - `cors` + 1. To turn off, change to: `cors: false` + 1. To turn on, change to: `cors: true` + - `corsAccessList` + 1. The `corsAccessList` will only be recognized by the system when `cors` is `true` + 1. To create an access list you set `corsAccessList` to a comma-separated list of domains. + + i.e. `corsAccessList` is `https://harpersystems.dev,https://products.harpersystems.dev` + + 1. To clear out the access list and allow all domains: `corsAccessList` is `[null]` + +## SSL + +Harper provides the option to use an HTTP or HTTPS and HTTP/2 interface. The default port for the server is 9925. + +These default ports can be changed by updating the `operationsApi.network.port` value in `/harperdb-config.yaml` + +By default, HTTPS is turned off and HTTP is turned on. It is recommended that you never directly expose Harper's HTTP interface through a publicly available port. HTTP is intended for local or private network use. + +You can toggle HTTPS and HTTP in the settings file. By setting `operationsApi.network.https` to true/false. When `https` is set to `false`, the server will use HTTP (version 1.1). Enabling HTTPS will enable both HTTPS/1.1 and HTTPS/2. + +Harper automatically generates a certificate (certificate.pem), a certificate authority (ca.pem) and a private key file (privateKey.pem) which live at `/keys/`. + +You can replace these with your own certificates and key. + +**Changes to these settings require a restart. Use operation `harperdb restart` from Harper Operations API.** diff --git a/versioned_docs/version-4.7/developers/security/index.md b/versioned_docs/version-4.7/developers/security/index.md new file mode 100644 index 00000000..a090aa88 --- /dev/null +++ b/versioned_docs/version-4.7/developers/security/index.md @@ -0,0 +1,23 @@ +--- +title: Security +--- + +# Security + +Harper uses role-based, attribute-level security to ensure that users can only gain access to the data they're supposed to be able to access. Our granular permissions allow for unparalleled flexibility and control, and can actually lower the total cost of ownership compared to other database solutions, since you no longer have to replicate subsets of your data to isolate use cases. + +## Authentication + +- [JWT Authentication](security/jwt-auth) - Token-based authentication using JSON Web Tokens +- [Basic Authentication](security/basic-auth) - Username and password authentication +- [mTLS Authentication](security/mtls-auth) - Mutual TLS certificate-based authentication + +## Certificate Management + +- [Certificate Management](security/certificate-management) - Managing certificates and Certificate Authorities +- [Certificate Verification](security/certificate-verification) - Certificate revocation checking (CRL/OCSP) + +## Access Control + +- [Configuration](security/configuration) - Security configuration and settings +- [Users and Roles](security/users-and-roles) - Role-based access control and permissions diff --git a/versioned_docs/version-4.7/developers/security/jwt-auth.md b/versioned_docs/version-4.7/developers/security/jwt-auth.md new file mode 100644 index 00000000..832373e4 --- /dev/null +++ b/versioned_docs/version-4.7/developers/security/jwt-auth.md @@ -0,0 +1,96 @@ +--- +title: JWT Authentication +--- + +# JWT Authentication + +Harper uses token based authentication with JSON Web Tokens, JWTs. + +This consists of two primary operations `create_authentication_tokens` and `refresh_operation_token`. These generate two types of tokens, as follows: + +- The `operation_token` which is used to authenticate all Harper operations in the Bearer Token Authorization Header. The default expiry is one day. +- The `refresh_token` which is used to generate a new `operation_token` upon expiry. This token is used in the Bearer Token Authorization Header for the `refresh_operation_token` operation only. The default expiry is thirty days. + +The `create_authentication_tokens` operation can be used at any time to refresh both tokens in the event that both have expired or been lost. + +## Create Authentication Tokens + +Users must initially create tokens using their Harper credentials. The following POST body is sent to Harper. No headers are required for this POST operation. + +```json +{ + "operation": "create_authentication_tokens", + "username": "username", + "password": "password" +} +``` + +A full cURL example can be seen here: + +```bash +curl --location --request POST 'http://localhost:9925' \ +--header 'Content-Type: application/json' \ +--data-raw '{ + "operation": "create_authentication_tokens", + "username": "username", + "password": "password" +}' +``` + +An example expected return object is: + +```json +{ + "operation_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDUwNjQ2MDAsInN1YiI6Im9wZXJhdGlvbiJ9.MpQA-9CMjA-mn-7mHyUXSuSC_-kqMqJXp_NDiKLFtbtMRbodCuY3DzH401rvy_4vb0yCELf0B5EapLVY1545sv80nxSl6FoZFxQaDWYXycoia6zHpiveR8hKlmA6_XTWHJbY2FM1HAFrdtt3yUTiF-ylkdNbPG7u7fRjTmHfsZ78gd2MNWIDkHoqWuFxIyqk8XydQpsjULf2Uacirt9FmHfkMZ-Jr_rRpcIEW0FZyLInbm6uxLfseFt87wA0TbZ0ofImjAuaW_3mYs-3H48CxP152UJ0jByPb0kHsk1QKP7YHWx1-Wce9NgNADfG5rfgMHANL85zvkv8sJmIGZIoSpMuU3CIqD2rgYnMY-L5dQN1fgfROrPMuAtlYCRK7r-IpjvMDQtRmCiNG45nGsM4DTzsa5GyDrkGssd5OBhl9gr9z9Bb5HQVYhSKIOiy72dK5dQNBklD4eGLMmo-u322zBITmE0lKaBcwYGJw2mmkYcrjDOmsDseU6Bf_zVUd9WF3FqwNkhg4D7nrfNSC_flalkxPHckU5EC_79cqoUIX2ogufBW5XgYbU4WfLloKcIpb51YTZlZfwBHlHPSyaq_guaXFaeCUXKq39_i1n0HRF_mRaxNru0cNDFT9Fm3eD7V8axFijSVAMDyQs_JR7SY483YDKUfN4l-vw-EVynImr4", + "refresh_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDc1NzAyMDAsInN1YiI6InJlZnJlc2gifQ.acaCsk-CJWIMLGDZdGnsthyZsJfQ8ihXLyE8mTji8PgGkpbwhs7e1O0uitMgP_pGjHq2tey1BHSwoeCL49b18WyMIB10hK-q2BXGKQkykltjTrQbg7VsdFi0h57mGfO0IqAwYd55_hzHZNnyJMh4b0iPQFDwU7iTD7x9doHhZAvzElpkWbc_NKVw5_Mw3znjntSzbuPN105zlp4Niurin-_5BnukwvoJWLEJ-ZlF6hE4wKhaMB1pWTJjMvJQJE8khTTvlUN8tGxmzoaDYoe1aCGNxmDEQnx8Y5gKzVd89sylhqi54d2nQrJ2-ElfEDsMoXpR01Ps6fNDFtLTuPTp7ixj8LvgL2nCjAg996Ga3PtdvXJAZPDYCqqvaBkZZcsiqOgqLV0vGo3VVlfrcgJXQImMYRr_Inu0FCe47A93IAWuQTs-KplM1KdGJsHSnNBV6oe6QEkROJT5qZME-8xhvBYvOXqp9Znwg39bmiBCMxk26Ce66_vw06MNgoa3D5AlXPWemfdVKPZDnj_aLVjZSs0gAfFElcVn7l9yjWJOaT2Muk26U8bJl-2BEq_DSclqKHODuYM5kkPKIdE4NFrsqsDYuGxcA25rlNETFyl0q-UXj1aoz_joy5Hdnr4mFELmjnoo4jYQuakufP9xeGPsj1skaodKl0mmoGcCD6v1F60" +} +``` + +## Using JWT Authentication Tokens + +The `operation_token` value is used to authenticate all operations in place of our standard Basic auth. In order to pass the token you will need to create an Bearer Token Authorization Header like the following request: + +```bash +curl --location --request POST 'http://localhost:9925' \ +--header 'Content-Type: application/json' \ +--header 'Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDUwNjQ2MDAsInN1YiI6Im9wZXJhdGlvbiJ9.MpQA-9CMjA-mn-7mHyUXSuSC_-kqMqJXp_NDiKLFtbtMRbodCuY3DzH401rvy_4vb0yCELf0B5EapLVY1545sv80nxSl6FoZFxQaDWYXycoia6zHpiveR8hKlmA6_XTWHJbY2FM1HAFrdtt3yUTiF-ylkdNbPG7u7fRjTmHfsZ78gd2MNWIDkHoqWuFxIyqk8XydQpsjULf2Uacirt9FmHfkMZ-Jr_rRpcIEW0FZyLInbm6uxLfseFt87wA0TbZ0ofImjAuaW_3mYs-3H48CxP152UJ0jByPb0kHsk1QKP7YHWx1-Wce9NgNADfG5rfgMHANL85zvkv8sJmIGZIoSpMuU3CIqD2rgYnMY-L5dQN1fgfROrPMuAtlYCRK7r-IpjvMDQtRmCiNG45nGsM4DTzsa5GyDrkGssd5OBhl9gr9z9Bb5HQVYhSKIOiy72dK5dQNBklD4eGLMmo-u322zBITmE0lKaBcwYGJw2mmkYcrjDOmsDseU6Bf_zVUd9WF3FqwNkhg4D7nrfNSC_flalkxPHckU5EC_79cqoUIX2ogufBW5XgYbU4WfLloKcIpb51YTZlZfwBHlHPSyaq_guaXFaeCUXKq39_i1n0HRF_mRaxNru0cNDFT9Fm3eD7V8axFijSVAMDyQs_JR7SY483YDKUfN4l-vw-EVynImr4' \ +--data-raw '{ + "operation":"search_by_hash", + "schema":"dev", + "table":"dog", + "hash_values":[1], + "get_attributes": ["*"] +}' +``` + +## Token Expiration + +`operation_token` expires at a set interval. Once it expires it will no longer be accepted by Harper. This duration defaults to one day, and is configurable in [harperdb-config.yaml](../../deployments/configuration). To generate a new `operation_token`, the `refresh_operation_token` operation is used, passing the `refresh_token` in the Bearer Token Authorization Header. A full cURL example can be seen here: + +```bash +curl --location --request POST 'http://localhost:9925' \ +--header 'Content-Type: application/json' \ +--header 'Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDc1NzAyMDAsInN1YiI6InJlZnJlc2gifQ.acaCsk-CJWIMLGDZdGnsthyZsJfQ8ihXLyE8mTji8PgGkpbwhs7e1O0uitMgP_pGjHq2tey1BHSwoeCL49b18WyMIB10hK-q2BXGKQkykltjTrQbg7VsdFi0h57mGfO0IqAwYd55_hzHZNnyJMh4b0iPQFDwU7iTD7x9doHhZAvzElpkWbc_NKVw5_Mw3znjntSzbuPN105zlp4Niurin-_5BnukwvoJWLEJ-ZlF6hE4wKhaMB1pWTJjMvJQJE8khTTvlUN8tGxmzoaDYoe1aCGNxmDEQnx8Y5gKzVd89sylhqi54d2nQrJ2-ElfEDsMoXpR01Ps6fNDFtLTuPTp7ixj8LvgL2nCjAg996Ga3PtdvXJAZPDYCqqvaBkZZcsiqOgqLV0vGo3VVlfrcgJXQImMYRr_Inu0FCe47A93IAWuQTs-KplM1KdGJsHSnNBV6oe6QEkROJT5qZME-8xhvBYvOXqp9Znwg39bmiBCMxk26Ce66_vw06MNgoa3D5AlXPWemfdVKPZDnj_aLVjZSs0gAfFElcVn7l9yjWJOaT2Muk26U8bJl-2BEq_DSclqKHODuYM5kkPKIdE4NFrsqsDYuGxcA25rlNETFyl0q-UXj1aoz_joy5Hdnr4mFELmjnoo4jYQuakufP9xeGPsj1skaodKl0mmoGcCD6v1F60' \ +--data-raw '{ + "operation":"refresh_operation_token" +}' +``` + +This will return a new `operation_token`. An example expected return object is: + +```bash +{ + "operation_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6eyJfX2NyZWF0ZWR0aW1lX18iOjE2MDQ5NzgxODkxNTEsIl9fdXBkYXRlZHRpbWVfXyI6MTYwNDk3ODE4OTE1MSwiYWN0aXZlIjp0cnVlLCJyb2xlIjp7Il9fY3JlYXRlZHRpbWVfXyI6MTYwNDk0NDE1MTM0NywiX191cGRhdGVkdGltZV9fIjoxNjA0OTQ0MTUxMzQ3LCJpZCI6IjdiNDNlNzM1LTkzYzctNDQzYi05NGY3LWQwMzY3Njg5NDc4YSIsInBlcm1pc3Npb24iOnsic3VwZXJfdXNlciI6dHJ1ZSwic3lzdGVtIjp7InRhYmxlcyI6eyJoZGJfdGFibGUiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9hdHRyaWJ1dGUiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9zY2hlbWEiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl91c2VyIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119LCJoZGJfcm9sZSI6eyJyZWFkIjp0cnVlLCJpbnNlcnQiOmZhbHNlLCJ1cGRhdGUiOmZhbHNlLCJkZWxldGUiOmZhbHNlLCJhdHRyaWJ1dGVfcGVybWlzc2lvbnMiOltdfSwiaGRiX2pvYiI6eyJyZWFkIjp0cnVlLCJpbnNlcnQiOmZhbHNlLCJ1cGRhdGUiOmZhbHNlLCJkZWxldGUiOmZhbHNlLCJhdHRyaWJ1dGVfcGVybWlzc2lvbnMiOltdfSwiaGRiX2xpY2Vuc2UiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9pbmZvIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119LCJoZGJfbm9kZXMiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl90ZW1wIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119fX19LCJyb2xlIjoic3VwZXJfdXNlciJ9LCJ1c2VybmFtZSI6InVzZXJuYW1lIn0sImlhdCI6MTYwNDk3ODcxMywiZXhwIjoxNjA1MDY1MTEzLCJzdWIiOiJvcGVyYXRpb24ifQ.qB4FS7fzryCO5epQlFCQe4mQcUEhzXjfsXRFPgauXrGZwSeSr2o2a1tE1xjiI3qjK0r3f2bdi2xpFlDR1thdY-m0mOpHTICNOae4KdKzp7cyzRaOFurQnVYmkWjuV_Ww4PJgr6P3XDgXs5_B2d7ZVBR-BaAimYhVRIIShfpWk-4iN1XDk96TwloCkYx01BuN87o-VOvAnOG-K_EISA9RuEBpSkfUEuvHx8IU4VgfywdbhNMh6WXM0VP7ZzSpshgsS07MGjysGtZHNTVExEvFh14lyfjfqKjDoIJbo2msQwD2FvrTTb0iaQry1-Wwz9QJjVAUtid7tJuP8aBeNqvKyMIXRVnl5viFUr-Gs-Zl_WtyVvKlYWw0_rUn3ucmurK8tTy6iHyJ6XdUf4pYQebpEkIvi2rd__e_Z60V84MPvIYs6F_8CAy78aaYmUg5pihUEehIvGRj1RUZgdfaXElw90-m-M5hMOTI04LrzzVnBu7DcMYg4UC1W-WDrrj4zUq7y8_LczDA-yBC2-bkvWwLVtHLgV5yIEuIx2zAN74RQ4eCy1ffWDrVxYJBau4yiIyCc68dsatwHHH6bMK0uI9ib6Y9lsxCYjh-7MFcbP-4UBhgoDDXN9xoUToDLRqR9FTHqAHrGHp7BCdF5d6TQTVL5fmmg61MrLucOo-LZBXs1NY" +} +``` + +The `refresh_token` also expires at a set interval, but a longer interval. Once it expires it will no longer be accepted by Harper. This duration defaults to thirty days, and is configurable in [harperdb-config.yaml](../../deployments/configuration). To generate a new `operation_token` and a new `refresh_token` the `create_authentication_tokensoperation` is called. + +## Configuration + +Token timeouts are configurable in [harperdb-config.yaml](../../deployments/configuration) with the following parameters: + +- `operationsApi.authentication.operationTokenTimeout`: Defines the length of time until the operation_token expires (default 1d). +- `operationsApi.authentication.refreshTokenTimeout`: Defines the length of time until the refresh_token expires (default 30d). + +A full list of valid values for both parameters can be found [here](https://github.com/vercel/ms). diff --git a/versioned_docs/version-4.7/developers/security/mtls-auth.md b/versioned_docs/version-4.7/developers/security/mtls-auth.md new file mode 100644 index 00000000..f757f60d --- /dev/null +++ b/versioned_docs/version-4.7/developers/security/mtls-auth.md @@ -0,0 +1,24 @@ +--- +title: mTLS Authentication +--- + +# mTLS Authentication + +Harper supports mTLS authentication for incoming connections. When enabled in the [HTTP config settings](../../deployments/configuration#http) the client certificate will be checked against the certificate authority specified with `tls.certificateAuthority`. If the certificate can be properly verified, the connection will authenticate users where the user's id/username is specified by the `CN` (common name) from the client certificate's `subject`, by default. The [HTTP config settings](../../deployments/configuration#http) allow you to determine if mTLS is required for all connections or optional. + +## Certificate Revocation Checking + +When using mTLS authentication, you can optionally enable certificate revocation checking to ensure that revoked certificates cannot be used, even if they are otherwise valid and trusted. This adds an important security layer by checking whether certificates have been explicitly revoked by the issuing Certificate Authority. + +Harper supports both CRL (Certificate Revocation List) and OCSP (Online Certificate Status Protocol) for checking certificate revocation status, using a CRL-first strategy with OCSP fallback for optimal performance and reliability. + +**To enable certificate verification:** + +```yaml +http: + mtls: + required: true + certificateVerification: true # Enable revocation checking +``` + +Certificate revocation checking is **disabled by default** and must be explicitly enabled. For detailed information about certificate revocation checking, including configuration options, performance considerations, and best practices, see [Certificate Management - Certificate Revocation Checking](./certificate-management#certificate-revocation-checking). diff --git a/versioned_docs/version-4.7/developers/security/users-and-roles.md b/versioned_docs/version-4.7/developers/security/users-and-roles.md new file mode 100644 index 00000000..cff17e5a --- /dev/null +++ b/versioned_docs/version-4.7/developers/security/users-and-roles.md @@ -0,0 +1,273 @@ +--- +title: Users & Roles +--- + +# Users & Roles + +Harper utilizes a Role-Based Access Control (RBAC) framework to manage access to Harper instances. A user is assigned a role that determines the user’s permissions to access database resources and run core operations. + +## Roles in Harper + +Role permissions in Harper are broken into two categories – permissions around database manipulation and permissions around database definition. + +**Database Manipulation**: A role defines CRUD (create, read, update, delete) permissions against database resources (i.e. data) in a Harper instance. + +1. At the table-level access, permissions must be explicitly defined when adding or altering a role – _i.e. Harper will assume CRUD access to be FALSE if not explicitly provided in the permissions JSON passed to the `add_role` and/or `alter_role` API operations._ +1. At the attribute-level, permissions for attributes in all tables included in the permissions set will be assigned based on either the specific attribute-level permissions defined in the table’s permission set or, if there are no attribute-level permissions defined, permissions will be based on the table’s CRUD set. + +**Database Definition**: Permissions related to managing databases, tables, roles, users, and other system settings and operations are restricted to the built-in `super_user` role. + +**Built-In Roles** + +There are three built-in roles within Harper. See full breakdown of operations restricted to only super_user roles [here](users-and-roles#role-based-operation-restrictions). + +- `super_user` - This role provides full access to all operations and methods within a Harper instance, this can be considered the admin role. + - This role provides full access to all Database Definition operations and the ability to run Database Manipulation operations across the entire database schema with no restrictions. +- `cluster_user` - This role is an internal system role type that is managed internally to allow clustered instances to communicate with one another. + - This role is an internally managed role to facilitate communication between clustered instances. +- `structure_user` - This role provides specific access for creation and deletion of data. + - When defining this role type you can either assign a value of true which will allow the role to create and drop databases & tables. Alternatively the role type can be assigned a string array. The values in this array are databases and allows the role to only create and drop tables in the designated databases. + +**User-Defined Roles** + +In addition to built-in roles, admins (i.e. users assigned to the super_user role) can create customized roles for other users to interact with and manipulate the data within explicitly defined tables and attributes. + +- Unless the user-defined role is given `super_user` permissions, permissions must be defined explicitly within the request body JSON. +- Describe operations will return metadata for all databases, tables, and attributes that a user-defined role has CRUD permissions for. + +**Role Permissions** + +When creating a new, user-defined role in a Harper instance, you must provide a role name and the permissions to assign to that role. _Reminder, only super users can create and manage roles._ + +- `role` name used to easily identify the role assigned to individual users. + + _Roles can be altered/dropped based on the role name used in and returned from a successful `add_role` , `alter_role`, or `list_roles` operation._ + +- `permissions` used to explicitly define CRUD access to existing table data. + +Example JSON for `add_role` request + +```json +{ + "operation": "add_role", + "role": "software_developer", + "permission": { + "super_user": false, + "database_name": { + "tables": { + "table_name1": { + "read": true, + "insert": true, + "update": true, + "delete": false, + "attribute_permissions": [ + { + "attribute_name": "attribute1", + "read": true, + "insert": true, + "update": true + } + ] + }, + "table_name2": { + "read": true, + "insert": true, + "update": true, + "delete": false, + "attribute_permissions": [] + } + } + } + } +} +``` + +**Setting Role Permissions** + +There are two parts to a permissions set: + +- `super_user` – boolean value indicating if role should be provided super_user access. + + _If `super_user` is set to true, there should be no additional database-specific permissions values included since the role will have access to the entire database schema. If permissions are included in the body of the operation, they will be stored within Harper, but ignored, as super_users have full access to the database._ + +- `permissions`: Database tables that a role should have specific CRUD access to should be included in the final, database-specific `permissions` JSON. + + _For user-defined roles (i.e. non-super_user roles, blank permissions will result in the user being restricted from accessing any of the database schema._ + +**Table Permissions JSON** + +Each table that a role should be given some level of CRUD permissions to must be included in the `tables` array for its database in the roles permissions JSON passed to the API (_see example above_). + +```jsonc +{ + "table_name": { // the name of the table to define CRUD perms for + "read": boolean, // access to read from this table + "insert": boolean, // access to insert data to table + "update": boolean, // access to update data in table + "delete": boolean, // access to delete row data in table + "attribute_permissions": [ // permissions for specific table attributes + { + "attribute_name": "attribute_name", // attribute to assign permissions to + "read": boolean, // access to read this attribute from table + "insert": boolean, // access to insert this attribute into the table + "update": boolean // access to update this attribute in the table + } + ] + } +} +``` + +**Important Notes About Table Permissions** + +1. If a database and/or any of its tables are not included in the permissions JSON, the role will not have any CRUD access to the database and/or tables. +1. If a table-level CRUD permission is set to false, any attribute-level with that same CRUD permission set to true will return an error. + +**Important Notes About Attribute Permissions** + +1. If there are attribute-specific CRUD permissions that need to be enforced on a table, those need to be explicitly described in the `attribute_permissions` array. +1. If a non-hash attribute is given some level of CRUD access, that same access will be assigned to the table’s `hash_attribute` (also referred to as the `primary_key`), even if it is not explicitly defined in the permissions JSON. + + _See table_name1’s permission set for an example of this – even though the table’s hash attribute is not specifically defined in the attribute_permissions array, because the role has CRUD access to ‘attribute1’, the role will have the same access to the table’s hash attribute._ + +1. If attribute-level permissions are set – _i.e. attribute_permissions.length > 0_ – any table attribute not explicitly included will be assumed to have not CRUD access (with the exception of the `hash_attribute` described in #2). + + _See table_name1’s permission set for an example of this – in this scenario, the role will have the ability to create, insert and update ‘attribute1’ and the table’s hash attribute but no other attributes on that table._ + +1. If an `attribute_permissions` array is empty, the role’s access to a table’s attributes will be based on the table-level CRUD permissions. + + _See table_name2’s permission set for an example of this._ + +1. The `__createdtime__` and `__updatedtime__` attributes that Harper manages internally can have read perms set but, if set, all other attribute-level permissions will be ignored. +1. Please note that DELETE permissions are not included as a part of an individual attribute-level permission set. That is because it is not possible to delete individual attributes from a row, rows must be deleted in full. + - If a role needs the ability to delete rows from a table, that permission should be set on the table-level. + - The practical approach to deleting an individual attribute of a row would be to set that attribute to null via an update statement. + +## Role-Based Operation Restrictions + +The table below includes all API operations available in Harper and indicates whether or not the operation is restricted to super_user roles. + +_Keep in mind that non-super_user roles will also be restricted within the operations they do have access to by the database-level CRUD permissions set for the roles._ + +| Databases and Tables | Restricted to Super_Users | +| -------------------- | :-----------------------: | +| describe_all | | +| describe_database | | +| describe_table | | +| create_database | X | +| drop_database | X | +| create_table | X | +| drop_table | X | +| create_attribute | | +| drop_attribute | X | + +| NoSQL Operations | Restricted to Super_Users | +| -------------------- | :-----------------------: | +| insert | | +| update | | +| upsert | | +| delete | | +| search_by_hash | | +| search_by_value | | +| search_by_conditions | | + +| SQL Operations | Restricted to Super_Users | +| -------------- | :-----------------------: | +| select | | +| insert | | +| update | | +| delete | | + +| Bulk Operations | Restricted to Super_Users | +| --------------- | :-----------------------: | +| csv_data_load | | +| csv_file_load | | +| csv_url_load | | +| import_from_s3 | | + +| Users and Roles | Restricted to Super_Users | +| --------------- | :-----------------------: | +| list_roles | X | +| add_role | X | +| alter_role | X | +| drop_role | X | +| list_users | X | +| user_info | | +| add_user | X | +| alter_user | X | +| drop_user | X | + +| Clustering | Restricted to Super_Users | +| --------------------- | :-----------------------: | +| cluster_set_routes | X | +| cluster_get_routes | X | +| cluster_delete_routes | X | +| add_node | X | +| update_node | X | +| cluster_status | X | +| remove_node | X | +| configure_cluster | X | + +| Components | Restricted to Super_Users | +| ------------------ | :-----------------------: | +| get_components | X | +| get_component_file | X | +| set_component_file | X | +| drop_component | X | +| add_component | X | +| package_component | X | +| deploy_component | X | + +| Custom Functions | Restricted to Super_Users | +| ------------------------------- | :-----------------------: | +| custom_functions_status | X | +| get_custom_functions | X | +| get_custom_function | X | +| set_custom_function | X | +| drop_custom_function | X | +| add_custom_function_project | X | +| drop_custom_function_project | X | +| package_custom_function_project | X | +| deploy_custom_function_project | X | + +| Registration | Restricted to Super_Users | +| ----------------- | :-----------------------: | +| registration_info | | +| get_fingerprint | X | +| set_license | X | + +| Jobs | Restricted to Super_Users | +| ------------------------- | :-----------------------: | +| get_job | | +| search_jobs_by_start_date | X | + +| Logs | Restricted to Super_Users | +| ------------------------------ | :-----------------------: | +| read_log | X | +| read_transaction_log | X | +| delete_transaction_logs_before | X | +| read_audit_log | X | +| delete_audit_logs_before | X | + +| Utilities | Restricted to Super_Users | +| --------------------- | :-----------------------: | +| delete_records_before | X | +| export_local | X | +| export_to_s3 | X | +| system_information | X | +| restart | X | +| restart_service | X | +| get_configuration | X | +| configure_cluster | X | + +| Token Authentication | Restricted to Super_Users | +| ---------------------------- | :-----------------------: | +| create_authentication_tokens | | +| refresh_operation_token | | + +## Error: Must execute as User + +**You may have gotten an error like,** `Error: Must execute as <>`. + +This means that you installed Harper as `<>`. Because Harper stores files natively on the operating system, we only allow the Harper executable to be run by a single user. This prevents permissions issues on files. + +For example if you installed as user_a, but later wanted to run as user_b. User_b may not have access to the hdb files Harper needs. This also keeps Harper more secure as it allows you to lock files down to a specific user and prevents other users from accessing your files. diff --git a/versioned_docs/version-4.7/developers/sql-guide/date-functions.md b/versioned_docs/version-4.7/developers/sql-guide/date-functions.md new file mode 100644 index 00000000..c9747dcd --- /dev/null +++ b/versioned_docs/version-4.7/developers/sql-guide/date-functions.md @@ -0,0 +1,227 @@ +--- +title: SQL Date Functions +--- + +:::warning +Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. +::: + +# SQL Date Functions + +Harper utilizes [Coordinated Universal Time (UTC)](https://en.wikipedia.org/wiki/Coordinated_Universal_Time) in all internal SQL operations. This means that date values passed into any of the functions below will be assumed to be in UTC or in a format that can be translated to UTC. + +When parsing date values passed to SQL date functions in HDB, we first check for [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) formats, then for [RFC 2822](https://tools.ietf.org/html/rfc2822#section-3.3) date-time format and then fall back to new Date(date_string)if a known format is not found. + +### CURRENT_DATE() + +Returns the current date in UTC in `YYYY-MM-DD` String format. + +``` +"SELECT CURRENT_DATE() AS current_date_result" returns + { + "current_date_result": "2020-04-22" + } +``` + +### CURRENT_TIME() + +Returns the current time in UTC in `HH:mm:ss.SSS` String format. + +``` +"SELECT CURRENT_TIME() AS current_time_result" returns + { + "current_time_result": "15:18:14.639" + } +``` + +### CURRENT_TIMESTAMP + +Referencing this variable will evaluate as the current Unix Timestamp in milliseconds. + +``` +"SELECT CURRENT_TIMESTAMP AS current_timestamp_result" returns + { + "current_timestamp_result": 1587568845765 + } +``` + +### DATE([date_string]) + +Formats and returns the date_string argument in UTC in `YYYY-MM-DDTHH:mm:ss.SSSZZ` String format. + +If a date_string is not provided, the function will return the current UTC date/time value in the return format defined above. + +``` +"SELECT DATE(1587568845765) AS date_result" returns + { + "date_result": "2020-04-22T15:20:45.765+0000" + } +``` + +``` +"SELECT DATE(CURRENT_TIMESTAMP) AS date_result2" returns + { + "date_result2": "2020-04-22T15:20:45.765+0000" + } +``` + +### DATE_ADD(date, value, interval) + +Adds the defined amount of time to the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted interval values: Either string value (key or shorthand) can be passed as the interval argument. + +| Key | Shorthand | +| ------------ | --------- | +| years | y | +| quarters | Q | +| months | M | +| weeks | w | +| days | d | +| hours | h | +| minutes | m | +| seconds | s | +| milliseconds | ms | + +``` +"SELECT DATE_ADD(1587568845765, 1, 'days') AS date_add_result" AND +"SELECT DATE_ADD(1587568845765, 1, 'd') AS date_add_result" both return + { + "date_add_result": 1587655245765 + } +``` + +``` +"SELECT DATE_ADD(CURRENT_TIMESTAMP, 2, 'years') +AS date_add_result2" returns + { + "date_add_result2": 1650643129017 + } +``` + +### DATE_DIFF(date_1, date_2[, interval]) + +Returns the difference between the two date values passed based on the interval as a Number. If an interval is not provided, the function will return the difference value in milliseconds. + +Accepted interval values: + +- years +- months +- weeks +- days +- hours +- minutes +- seconds + +``` +"SELECT DATE_DIFF(CURRENT_TIMESTAMP, 1650643129017, 'hours') +AS date_diff_result" returns + { + "date_diff_result": -17519.753333333334 + } +``` + +### DATE_FORMAT(date, format) + +Formats and returns a date value in the String format provided. Find more details on accepted format values in the [moment.js docs](https://momentjs.com/docs/#/displaying/format/). + +``` +"SELECT DATE_FORMAT(1524412627973, 'YYYY-MM-DD HH:mm:ss') +AS date_format_result" returns + { + "date_format_result": "2018-04-22 15:57:07" + } +``` + +### DATE_SUB(date, value, interval) + +Subtracts the defined amount of time from the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted date_sub interval values- Either string value (key or shorthand) can be passed as the interval argument. + +| Key | Shorthand | +| ------------ | --------- | +| years | y | +| quarters | Q | +| months | M | +| weeks | w | +| days | d | +| hours | h | +| minutes | m | +| seconds | s | +| milliseconds | ms | + +``` +"SELECT DATE_SUB(1587568845765, 2, 'years') AS date_sub_result" returns + { + "date_sub_result": 1524410445765 + } +``` + +### EXTRACT(date, date_part) + +Extracts and returns the date_part requested as a String value. Accepted date_part values below show value returned for date = "2020-03-26T15:13:02.041+000" + +| date_part | Example return value\* | +| ----------- | ---------------------- | +| year | "2020" | +| month | "3" | +| day | "26" | +| hour | "15" | +| minute | "13" | +| second | "2" | +| millisecond | "41" | + +``` +"SELECT EXTRACT(1587568845765, 'year') AS extract_result" returns + { + "extract_result": "2020" + } +``` + +### GETDATE() + +Returns the current Unix Timestamp in milliseconds. + +``` +"SELECT GETDATE() AS getdate_result" returns + { + "getdate_result": 1587568845765 + } +``` + +### GET_SERVER_TIME() + +Returns the current date/time value based on the server’s timezone in `YYYY-MM-DDTHH:mm:ss.SSSZZ` String format. + +``` +"SELECT GET_SERVER_TIME() AS get_server_time_result" returns + { + "get_server_time_result": "2020-04-22T15:20:45.765+0000" + } +``` + +### OFFSET_UTC(date, offset) + +Returns the UTC date time value with the offset provided included in the return String value formatted as `YYYY-MM-DDTHH:mm:ss.SSSZZ`. The offset argument will be added as minutes unless the value is less than 16 and greater than -16, in which case it will be treated as hours. + +``` +"SELECT OFFSET_UTC(1587568845765, 240) AS offset_utc_result" returns + { + "offset_utc_result": "2020-04-22T19:20:45.765+0400" + } +``` + +``` +"SELECT OFFSET_UTC(1587568845765, 10) AS offset_utc_result2" returns + { + "offset_utc_result2": "2020-04-23T01:20:45.765+1000" + } +``` + +### NOW() + +Returns the current Unix Timestamp in milliseconds. + +``` +"SELECT NOW() AS now_result" returns + { + "now_result": 1587568845765 + } +``` diff --git a/versioned_docs/version-4.7/developers/sql-guide/features-matrix.md b/versioned_docs/version-4.7/developers/sql-guide/features-matrix.md new file mode 100644 index 00000000..f436ad62 --- /dev/null +++ b/versioned_docs/version-4.7/developers/sql-guide/features-matrix.md @@ -0,0 +1,88 @@ +--- +title: SQL Features Matrix +--- + +# SQL Features Matrix + +:::warning +Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. +::: + +## SQL Features Matrix + +Harper provides access to most SQL functions, and we’re always expanding that list. Check below to see if we cover what you need. + +| INSERT | | +| ---------------------------------- | --- | +| Values - multiple values supported | ✔ | +| Sub-SELECT | ✗ | + +| UPDATE | | +| ---------------- | --- | +| SET | ✔ | +| Sub-SELECT | ✗ | +| Conditions | ✔ | +| Date Functions\* | ✔ | +| Math Functions | ✔ | + +| DELETE | | +| ---------- | --- | +| FROM | ✔ | +| Sub-SELECT | ✗ | +| Conditions | ✔ | + +| SELECT | | +| -------------------- | --- | +| Column SELECT | ✔ | +| Aliases | ✔ | +| Aggregator Functions | ✔ | +| Date Functions\* | ✔ | +| Math Functions | ✔ | +| Constant Values | ✔ | +| Distinct | ✔ | +| Sub-SELECT | ✗ | + +| FROM | | +| ---------------- | --- | +| Multi-table JOIN | ✔ | +| INNER JOIN | ✔ | +| LEFT OUTER JOIN | ✔ | +| LEFT INNER JOIN | ✔ | +| RIGHT OUTER JOIN | ✔ | +| RIGHT INNER JOIN | ✔ | +| FULL JOIN | ✔ | +| UNION | ✗ | +| Sub-SELECT | ✗ | +| TOP | ✔ | + +| WHERE | | +| -------------------------- | --- | +| Multi-Conditions | ✔ | +| Wildcards | ✔ | +| IN | ✔ | +| LIKE | ✔ | +| Bit-wise Operators AND, OR | ✔ | +| Bit-wise Operators NOT | ✔ | +| NULL | ✔ | +| BETWEEN | ✔ | +| EXISTS,ANY,ALL | ✔ | +| Compare columns | ✔ | +| Compare constants | ✔ | +| Date Functions\* | ✔ | +| Math Functions | ✔ | +| Sub-SELECT | ✗ | + +| GROUP BY | | +| --------------------- | --- | +| Multi-Column GROUP BY | ✔ | + +| HAVING | | +| ----------------------------- | --- | +| Aggregate function conditions | ✔ | + +| ORDER BY | | +| --------------------- | --- | +| Multi-Column ORDER BY | ✔ | +| Aliases | ✔ | +| Date Functions\* | ✔ | +| Math Functions | ✔ | diff --git a/versioned_docs/version-4.7/developers/sql-guide/functions.md b/versioned_docs/version-4.7/developers/sql-guide/functions.md new file mode 100644 index 00000000..789090a4 --- /dev/null +++ b/versioned_docs/version-4.7/developers/sql-guide/functions.md @@ -0,0 +1,145 @@ +--- +title: Harper SQL Functions +--- + +:::warning +Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. +::: + +# Harper SQL Functions + +This SQL keywords reference contains the SQL functions available in Harper. + +## Functions + +### Aggregate + +| Keyword | Syntax | Description | +| ------------------ | --------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `AVG` | `AVG(expression)` | Returns the average of a given numeric expression. | +| `COUNT` | `SELECT COUNT(column_name) FROM database.table WHERE condition` | Returns the number records that match the given criteria. Nulls are not counted. | +| `GROUP_CONCAT` | `GROUP_CONCAT(expression)` | Returns a string with concatenated values that are comma separated and that are non-null from a group. Will return null when there are non-null values. | +| `MAX` | `SELECT MAX(column_name) FROM database.table WHERE condition` | Returns largest value in a specified column. | +| `MIN` | `SELECT MIN(column_name) FROM database.table WHERE condition` | Returns smallest value in a specified column. | +| `SUM` | `SUM(column_name)` | Returns the sum of the numeric values provided. | +| `ARRAY`\* | `ARRAY(expression)` | Returns a list of data as a field. | +| `DISTINCT_ARRAY`\* | `DISTINCT_ARRAY(expression)` | When placed around a standard `ARRAY()` function, returns a distinct (deduplicated) results set. | + +\*For more information on `ARRAY()` and `DISTINCT_ARRAY()` see [this blog](https://www.harperdb.io/post/sql-queries-to-complex-objects). + +### Conversion + +| Keyword | Syntax | Description | +| --------- | ----------------------------------------------- | ---------------------------------------------------------------------- | +| `CAST` | `CAST(expression AS datatype(length))` | Converts a value to a specified datatype. | +| `CONVERT` | `CONVERT(data_type(length), expression, style)` | Converts a value from one datatype to a different, specified datatype. | + +### Date & Time + +| Keyword | Syntax | Description | +| ------------------- | --------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `CURRENT_DATE` | `CURRENT_DATE()` | Returns the current date in UTC in "YYYY-MM-DD" String format. | +| `CURRENT_TIME` | `CURRENT_TIME()` | Returns the current time in UTC in "HH:mm:ss.SSS" string format. | +| `CURRENT_TIMESTAMP` | `CURRENT_TIMESTAMP` | Referencing this variable will evaluate as the current Unix Timestamp in milliseconds. For more information, go here. | +| `DATE` | `DATE([date_string])` | Formats and returns the date string argument in UTC in 'YYYY-MM-DDTHH:mm:ss.SSSZZ' string format. If a date string is not provided, the function will return the current UTC date/time value in the return format defined above. For more information, go here. | +| `DATE_ADD` | `DATE_ADD(date, value, interval)` | Adds the defined amount of time to the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted interval values: Either string value (key or shorthand) can be passed as the interval argument. For more information, go here. | +| `DATE_DIFF` | `DATE_DIFF(date_1, date_2[, interval])` | Returns the difference between the two date values passed based on the interval as a Number. If an interval is not provided, the function will return the difference value in milliseconds. For more information, go here. | +| `DATE_FORMAT` | `DATE_FORMAT(date, format)` | Formats and returns a date value in the String format provided. Find more details on accepted format values in the moment.js docs. For more information, go here. | +| `DATE_SUB` | `DATE_SUB(date, format)` | Subtracts the defined amount of time from the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted date_sub interval values- Either string value (key or shorthand) can be passed as the interval argument. For more information, go here. | +| `DAY` | `DAY(date)` | Return the day of the month for the given date. | +| `DAYOFWEEK` | `DAYOFWEEK(date)` | Returns the numeric value of the weekday of the date given("YYYY-MM-DD").NOTE: 0=Sunday, 1=Monday, 2=Tuesday, 3=Wednesday, 4=Thursday, 5=Friday, and 6=Saturday. | +| `EXTRACT` | `EXTRACT(date, date_part)` | Extracts and returns the date_part requested as a String value. Accepted date_part values below show value returned for date = "2020-03-26T15:13:02.041+000" For more information, go here. | +| `GETDATE` | `GETDATE()` | Returns the current Unix Timestamp in milliseconds. | +| `GET_SERVER_TIME` | `GET_SERVER_TIME()` | Returns the current date/time value based on the server's timezone in `YYYY-MM-DDTHH:mm:ss.SSSZZ` String format. | +| `OFFSET_UTC` | `OFFSET_UTC(date, offset)` | Returns the UTC date time value with the offset provided included in the return String value formatted as `YYYY-MM-DDTHH:mm:ss.SSSZZ`. The offset argument will be added as minutes unless the value is less than 16 and greater than -16, in which case it will be treated as hours. | +| `NOW` | `NOW()` | Returns the current Unix Timestamp in milliseconds. | +| `HOUR` | `HOUR(datetime)` | Returns the hour part of a given date in range of 0 to 838. | +| `MINUTE` | `MINUTE(datetime)` | Returns the minute part of a time/datetime in range of 0 to 59. | +| `MONTH` | `MONTH(date)` | Returns month part for a specified date in range of 1 to 12. | +| `SECOND` | `SECOND(datetime)` | Returns the seconds part of a time/datetime in range of 0 to 59. | +| `YEAR` | `YEAR(date)` | Returns the year part for a specified date. | + +### Logical + +| Keyword | Syntax | Description | +| -------- | ----------------------------------------------- | ------------------------------------------------------------------------------------------ | +| `IF` | `IF(condition, value_if_true, value_if_false)` | Returns a value if the condition is true, or another value if the condition is false. | +| `IIF` | `IIF(condition, value_if_true, value_if_false)` | Returns a value if the condition is true, or another value if the condition is false. | +| `IFNULL` | `IFNULL(expression, alt_value)` | Returns a specified value if the expression is null. | +| `NULLIF` | `NULLIF(expression_1, expression_2)` | Returns null if expression_1 is equal to expression_2, if not equal, returns expression_1. | + +### Mathematical + +| Keyword | Syntax | Description | +| -------- | ------------------------------- | --------------------------------------------------------------------------------------------------- | +| `ABS` | `ABS(expression)` | Returns the absolute value of a given numeric expression. | +| `CEIL` | `CEIL(number)` | Returns integer ceiling, the smallest integer value that is bigger than or equal to a given number. | +| `EXP` | `EXP(number)` | Returns e to the power of a specified number. | +| `FLOOR` | `FLOOR(number)` | Returns the largest integer value that is smaller than, or equal to, a given number. | +| `RANDOM` | `RANDOM(seed)` | Returns a pseudo random number. | +| `ROUND` | `ROUND(number, decimal_places)` | Rounds a given number to a specified number of decimal places. | +| `SQRT` | `SQRT(expression)` | Returns the square root of an expression. | + +### String + +| Keyword | Syntax | Description | +| ------------- | -------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `CONCAT` | `CONCAT(string_1, string_2, ...., string_n)` | Concatenates, or joins, two or more strings together, resulting in a single string. | +| `CONCAT_WS` | `CONCAT_WS(separator, string_1, string_2, ...., string_n)` | Concatenates, or joins, two or more strings together with a separator, resulting in a single string. | +| `INSTR` | `INSTR(string_1, string_2)` | Returns the first position, as an integer, of string_2 within string_1. | +| `LEN` | `LEN(string)` | Returns the length of a string. | +| `LOWER` | `LOWER(string)` | Converts a string to lower-case. | +| `REGEXP` | `SELECT column_name FROM database.table WHERE column_name REGEXP pattern` | Searches column for matching string against a given regular expression pattern, provided as a string, and returns all matches. If no matches are found, it returns null. | +| `REGEXP_LIKE` | `SELECT column_name FROM database.table WHERE REGEXP_LIKE(column_name, pattern)` | Searches column for matching string against a given regular expression pattern, provided as a string, and returns all matches. If no matches are found, it returns null. | +| `REPLACE` | `REPLACE(string, old_string, new_string)` | Replaces all instances of old_string within new_string, with string. | +| `SUBSTRING` | `SUBSTRING(string, string_position, length_of_substring)` | Extracts a specified amount of characters from a string. | +| `TRIM` | `TRIM([character(s) FROM] string)` | Removes leading and trailing spaces, or specified character(s), from a string. | +| `UPPER` | `UPPER(string)` | Converts a string to upper-case. | + +## Operators + +### Logical Operators + +| Keyword | Syntax | Description | +| --------- | ----------------------------------------------------------------------------------------- | ------------------------------------------------------------------------- | +| `BETWEEN` | `SELECT column_name(s) FROM database.table WHERE column_name BETWEEN value_1 AND value_2` | (inclusive) Returns values(numbers, text, or dates) within a given range. | +| `IN` | `SELECT column_name(s) FROM database.table WHERE column_name IN(value(s))` | Used to specify multiple values in a WHERE clause. | +| `LIKE` | `SELECT column_name(s) FROM database.table WHERE column_n LIKE pattern` | Searches for a specified pattern within a WHERE clause. | + +## Queries + +### General + +| Keyword | Syntax | Description | +| ---------- | ---------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- | +| `DISTINCT` | `SELECT DISTINCT column_name(s) FROM database.table` | Returns only unique values, eliminating duplicate records. | +| `FROM` | `FROM database.table` | Used to list the database(s), table(s), and any joins required for a SQL statement. | +| `GROUP BY` | `SELECT column_name(s) FROM database.table WHERE condition GROUP BY column_name(s) ORDER BY column_name(s)` | Groups rows that have the same values into summary rows. | +| `HAVING` | `SELECT column_name(s) FROM database.table WHERE condition GROUP BY column_name(s) HAVING condition ORDER BY column_name(s)` | Filters data based on a group or aggregate function. | +| `SELECT` | `SELECT column_name(s) FROM database.table` | Selects data from table. | +| `WHERE` | `SELECT column_name(s) FROM database.table WHERE condition` | Extracts records based on a defined condition. | + +### Joins + +| Keyword | Syntax | Description | +| -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `CROSS JOIN` | `SELECT column_name(s) FROM database.table_1 CROSS JOIN database.table_2` | Returns a paired combination of each row from `table_1` with row from `table_2`. Note: CROSS JOIN can return very large result sets and is generally considered bad practice. | +| `FULL OUTER` | `SELECT column_name(s) FROM database.table_1 FULL OUTER JOIN database.table_2 ON table_1.column_name = table_2.column_name WHERE condition` | Returns all records when there is a match in either `table_1` (left table) or `table_2` (right table). | +| `[INNER] JOIN` | `SELECT column_name(s) FROM database.table_1 INNER JOIN database.table_2 ON table_1.column_name = table_2.column_name` | Return only matching records from `table_1` (left table) and `table_2` (right table). The INNER keyword is optional and does not affect the result. | +| `LEFT [OUTER] JOIN` | `SELECT column_name(s) FROM database.table_1 LEFT OUTER JOIN database.table_2 ON table_1.column_name = table_2.column_name` | Return all records from `table_1` (left table) and matching data from `table_2` (right table). The OUTER keyword is optional and does not affect the result. | +| `RIGHT [OUTER] JOIN` | `SELECT column_name(s) FROM database.table_1 RIGHT OUTER JOIN database.table_2 ON table_1.column_name = table_2.column_name` | Return all records from `table_2` (right table) and matching data from `table_1` (left table). The OUTER keyword is optional and does not affect the result. | + +### Predicates + +| Keyword | Syntax | Description | +| ------------- | ------------------------------------------------------------------------- | -------------------------- | +| `IS NOT NULL` | `SELECT column_name(s) FROM database.table WHERE column_name IS NOT NULL` | Tests for non-null values. | +| `IS NULL` | `SELECT column_name(s) FROM database.table WHERE column_name IS NULL` | Tests for null values. | + +### Statements + +| Keyword | Syntax | Description | +| -------- | ---------------------------------------------------------------------------------------- | ----------------------------------- | +| `DELETE` | `DELETE FROM database.table WHERE condition` | Deletes existing data from a table. | +| `INSERT` | `INSERT INTO database.table(column_name(s)) VALUES(value(s))` | Inserts new records into a table. | +| `UPDATE` | `UPDATE database.table SET column_1 = value_1, column_2 = value_2, .... WHERE condition` | Alters existing records in a table. | diff --git a/versioned_docs/version-4.7/developers/sql-guide/index.md b/versioned_docs/version-4.7/developers/sql-guide/index.md new file mode 100644 index 00000000..52f245ab --- /dev/null +++ b/versioned_docs/version-4.7/developers/sql-guide/index.md @@ -0,0 +1,88 @@ +--- +title: SQL Guide +--- + +# SQL Guide + +:::warning +Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. +::: + +## Harper SQL Guide + +The purpose of this guide is to describe the available functionality of Harper as it relates to supported SQL functionality. The SQL parser is still actively being developed, many SQL features may not be optimized or utilize indexes. This document will be updated as more features and functionality becomes available. Generally, the REST interface provides a more stable, secure, and performant interface for data interaction, but the SQL functionality can be useful for administrative ad-hoc querying, and utilizing existing SQL statements. **A high-level view of supported features can be found** [**here**](sql-guide/features-matrix)**.** + +Harper adheres to the concept of database & tables. This allows developers to isolate table structures from each other all within one database. + +## Select + +Harper has robust SELECT support, from simple queries all the way to complex joins with multi-conditions, aggregates, grouping & ordering. + +All results are returned as JSON object arrays. + +Query for all records and attributes in the dev.dog table: + +``` +SELECT * FROM dev.dog +``` + +Query specific columns from all rows in the dev.dog table: + +``` +SELECT id, dog_name, age FROM dev.dog +``` + +Query for all records and attributes in the dev.dog table ORDERED BY age in ASC order: + +``` +SELECT * FROM dev.dog ORDER BY age +``` + +_The ORDER BY keyword sorts in ascending order by default. To sort in descending order, use the DESC keyword._ + +## Insert + +Harper supports inserting 1 to n records into a table. The primary key must be unique (not used by any other record). If no primary key is provided, it will be assigned an auto-generated UUID. Harper does not support selecting from one table to insert into another at this time. + +``` +INSERT INTO dev.dog (id, dog_name, age, breed_id) + VALUES(1, 'Penny', 5, 347), (2, 'Kato', 4, 347) +``` + +## Update + +Harper supports updating existing table row(s) via UPDATE statements. Multiple conditions can be applied to filter the row(s) to update. At this time selecting from one table to update another is not supported. + +``` +UPDATE dev.dog + SET owner_name = 'Kyle' + WHERE id IN (1, 2) +``` + +## Delete + +Harper supports deleting records from a table with condition support. + +``` +DELETE FROM dev.dog + WHERE age < 4 +``` + +## Joins + +Harper allows developers to join any number of tables and currently supports the following join types: + +- INNER JOIN LEFT +- INNER JOIN LEFT +- OUTER JOIN + +Here’s a basic example joining two tables from our Get Started example- joining a dogs table with a breeds table: + +``` +SELECT d.id, d.dog_name, d.owner_name, b.name, b.section + FROM dev.dog AS d + INNER JOIN dev.breed AS b ON d.breed_id = b.id + WHERE d.owner_name IN ('Kyle', 'Zach', 'Stephen') + AND b.section = 'Mutt' + ORDER BY d.dog_name +``` diff --git a/versioned_docs/version-4.7/developers/sql-guide/json-search.md b/versioned_docs/version-4.7/developers/sql-guide/json-search.md new file mode 100644 index 00000000..c4bcd1c8 --- /dev/null +++ b/versioned_docs/version-4.7/developers/sql-guide/json-search.md @@ -0,0 +1,177 @@ +--- +title: SQL JSON Search +--- + +:::warning +Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. +::: + +# SQL JSON Search + +Harper automatically indexes all top level attributes in a row / object written to a table. However, any attributes which hold JSON data do not have their nested attributes indexed. In order to make searching and/or transforming these JSON documents easy, Harper offers a special SQL function called SEARCH_JSON. The SEARCH_JSON function works in SELECT & WHERE clauses allowing queries to perform powerful filtering on any element of your JSON by implementing the [JSONata library](https://docs.jsonata.org/overview.html) into our SQL engine. + +## Syntax + +`SEARCH_JSON(expression, attribute)` + +Executes the supplied string _expression_ against data of the defined top level _attribute_ for each row. The expression both filters and defines output from the JSON document. + +### Example 1 + +#### Search a string array + +Here are two records in the database: + +```json +[ + { + "id": 1, + "name": ["Harper", "Penny"] + }, + { + "id": 2, + "name": ["Penny"] + } +] +``` + +Here is a simple query that gets any record with "Harper" found in the name. + +``` +SELECT * +FROM dev.dog +WHERE search_json('"Harper" in *', name) +``` + +### Example 2 + +The purpose of this query is to give us every movie where at least two of our favorite actors from Marvel films have acted together. The results will return the movie title, the overview, release date and an object array of the actor’s name and their character name in the movie. + +Both function calls evaluate the credits.cast attribute, this attribute is an object array of every cast member in a movie. + +``` +SELECT m.title, + m.overview, + m.release_date, + SEARCH_JSON($[name in ["Robert Downey Jr.", "Chris Evans", "Scarlett Johansson", "Mark Ruffalo", "Chris Hemsworth", "Jeremy Renner", "Clark Gregg", "Samuel L. Jackson", "Gwyneth Paltrow", "Don Cheadle"]].{"actor": name, "character": character}, c.`cast`) AS characters +FROM movies.credits c + INNER JOIN movies.movie m + ON c.movie_id = m.id +WHERE SEARCH_JSON($count($[name in ["Robert Downey Jr.", "Chris Evans", "Scarlett Johansson", "Mark Ruffalo", "Chris Hemsworth", "Jeremy Renner", "Clark Gregg", "Samuel L. Jackson", "Gwyneth Paltrow", "Don Cheadle"]]), c.`cast`) >= 2 +``` + +A sample of this data from the movie The Avengers looks like + +```json +[ + { + "cast_id": 46, + "character": "Tony Stark / Iron Man", + "credit_id": "52fe4495c3a368484e02b251", + "gender": "male", + "id": 3223, + "name": "Robert Downey Jr.", + "order": 0 + }, + { + "cast_id": 2, + "character": "Steve Rogers / Captain America", + "credit_id": "52fe4495c3a368484e02b19b", + "gender": "male", + "id": 16828, + "name": "Chris Evans", + "order": 1 + }, + { + "cast_id": 307, + "character": "Bruce Banner / The Hulk", + "credit_id": "5e85e8083344c60015411cfa", + "gender": "male", + "id": 103, + "name": "Mark Ruffalo", + "order": 2 + } +] +``` + +Let’s break down the SEARCH_JSON function call in the SELECT: + +``` +SEARCH_JSON( + $[name in [ + "Robert Downey Jr.", + "Chris Evans", + "Scarlett Johansson", + "Mark Ruffalo", + "Chris Hemsworth", + "Jeremy Renner", + "Clark Gregg", + "Samuel L. Jackson", + "Gwyneth Paltrow", + "Don Cheadle" + ]].{ + "actor": name, + "character": character + }, + c.`cast` +) +``` + +The first argument passed to SEARCH_JSON is the expression to execute against the second argument which is the cast attribute on the credits table. This expression will execute for every row. Looking into the expression it starts with "$[…]" this tells the expression to iterate all elements of the cast array. + +Then the expression tells the function to only return entries where the name attribute matches any of the actors defined in the array: + +``` +name in ["Robert Downey Jr.", "Chris Evans", "Scarlett Johansson", "Mark Ruffalo", "Chris Hemsworth", "Jeremy Renner", "Clark Gregg", "Samuel L. Jackson", "Gwyneth Paltrow", "Don Cheadle"] +``` + +So far, we’ve iterated the array and filtered out rows, but we also want the results formatted in a specific way, so we’ve chained an expression on our filter with: `{"actor": name, "character": character}`. This tells the function to create a specific object for each matching entry. + +**Sample Result** + +```json +[ + { + "actor": "Robert Downey Jr.", + "character": "Tony Stark / Iron Man" + }, + { + "actor": "Chris Evans", + "character": "Steve Rogers / Captain America" + }, + { + "actor": "Mark Ruffalo", + "character": "Bruce Banner / The Hulk" + } +] +``` + +Just having the SEARCH_JSON function in our SELECT is powerful, but given our criteria it would still return every other movie that doesn’t have our matching actors, in order to filter out the movies we do not want we also use SEARCH_JSON in the WHERE clause. + +This function call in the WHERE clause is similar, but we don’t need to perform the same transformation as occurred in the SELECT: + +``` +SEARCH_JSON( + $count( + $[name in [ + "Robert Downey Jr.", + "Chris Evans", + "Scarlett Johansson", + "Mark Ruffalo", + "Chris Hemsworth", + "Jeremy Renner", + "Clark Gregg", + "Samuel L. Jackson", + "Gwyneth Paltrow", + "Don Cheadle" + ]] + ), + c.`cast` +) >= 2 +``` + +As seen above we execute the same name filter against the cast array, the primary difference is we are wrapping the filtered results in $count(…). As it looks this returns a count of the results back which we then use against our SQL comparator of >= 2. + +To see further SEARCH_JSON examples in action view our Postman Collection that provides a [sample database & data with query examples](../operations-api/advanced-json-sql-examples). + +To learn more about how to build expressions check out the JSONata documentation: [https://docs.jsonata.org/overview](https://docs.jsonata.org/overview) diff --git a/versioned_docs/version-4.7/developers/sql-guide/reserved-word.md b/versioned_docs/version-4.7/developers/sql-guide/reserved-word.md new file mode 100644 index 00000000..2cd812ba --- /dev/null +++ b/versioned_docs/version-4.7/developers/sql-guide/reserved-word.md @@ -0,0 +1,207 @@ +--- +title: Harper SQL Reserved Words +--- + +:::warning +Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. +::: + +# Harper SQL Reserved Words + +This is a list of reserved words in the SQL Parser. Use of these words or symbols may result in unexpected behavior or inaccessible tables/attributes. If any of these words must be used, any SQL call referencing a database, table, or attribute must have backticks (`…`) or brackets ([…]) around the variable. + +For Example, for a table called `ASSERT` in the `data` database, a SQL select on that table would look like: + +``` +SELECT * from data.`ASSERT` +``` + +Alternatively: + +``` +SELECT * from data.[ASSERT] +``` + +### RESERVED WORD LIST + +- ABSOLUTE +- ACTION +- ADD +- AGGR +- ALL +- ALTER +- AND +- ANTI +- ANY +- APPLY +- ARRAY +- AS +- ASSERT +- ASC +- ATTACH +- AUTOINCREMENT +- AUTO_INCREMENT +- AVG +- BEGIN +- BETWEEN +- BREAK +- BY +- CALL +- CASE +- CAST +- CHECK +- CLASS +- CLOSE +- COLLATE +- COLUMN +- COLUMNS +- COMMIT +- CONSTRAINT +- CONTENT +- CONTINUE +- CONVERT +- CORRESPONDING +- COUNT +- CREATE +- CROSS +- CUBE +- CURRENT_TIMESTAMP +- CURSOR +- DATABASE +- DECLARE +- DEFAULT +- DELETE +- DELETED +- DESC +- DETACH +- DISTINCT +- DOUBLEPRECISION +- DROP +- ECHO +- EDGE +- END +- ENUM +- ELSE +- EXCEPT +- EXISTS +- EXPLAIN +- FALSE +- FETCH +- FIRST +- FOREIGN +- FROM +- GO +- GRAPH +- GROUP +- GROUPING +- HAVING +- HDB_HASH +- HELP +- IF +- IDENTITY +- IS +- IN +- INDEX +- INNER +- INSERT +- INSERTED +- INTERSECT +- INTO +- JOIN +- KEY +- LAST +- LET +- LEFT +- LIKE +- LIMIT +- LOOP +- MATCHED +- MATRIX +- MAX +- MERGE +- MIN +- MINUS +- MODIFY +- NATURAL +- NEXT +- NEW +- NOCASE +- NO +- NOT +- NULL +- OFF +- ON +- ONLY +- OFFSET +- OPEN +- OPTION +- OR +- ORDER +- OUTER +- OVER +- PATH +- PARTITION +- PERCENT +- PLAN +- PRIMARY +- PRINT +- PRIOR +- QUERY +- READ +- RECORDSET +- REDUCE +- REFERENCES +- RELATIVE +- REPLACE +- REMOVE +- RENAME +- REQUIRE +- RESTORE +- RETURN +- RETURNS +- RIGHT +- ROLLBACK +- ROLLUP +- ROW +- SCHEMA +- SCHEMAS +- SEARCH +- SELECT +- SEMI +- SET +- SETS +- SHOW +- SOME +- SOURCE +- STRATEGY +- STORE +- SYSTEM +- SUM +- TABLE +- TABLES +- TARGET +- TEMP +- TEMPORARY +- TEXTSTRING +- THEN +- TIMEOUT +- TO +- TOP +- TRAN +- TRANSACTION +- TRIGGER +- TRUE +- TRUNCATE +- UNION +- UNIQUE +- UPDATE +- USE +- USING +- VALUE +- VERTEX +- VIEW +- WHEN +- WHERE +- WHILE +- WITH +- WORK diff --git a/versioned_docs/version-4.7/developers/sql-guide/sql-geospatial-functions.md b/versioned_docs/version-4.7/developers/sql-guide/sql-geospatial-functions.md new file mode 100644 index 00000000..f0c571da --- /dev/null +++ b/versioned_docs/version-4.7/developers/sql-guide/sql-geospatial-functions.md @@ -0,0 +1,419 @@ +--- +title: SQL Geospatial Functions +--- + +:::warning +Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. +::: + +# SQL Geospatial Functions + +Harper geospatial features require data to be stored in a single column using the [GeoJSON standard](https://geojson.org/), a standard commonly used in geospatial technologies. Geospatial functions are available to be used in SQL statements. + +If you are new to GeoJSON you should check out the full specification here: https://geojson.org/. There are a few important things to point out before getting started. + +1. All GeoJSON coordinates are stored in `[longitude, latitude]` format. +1. Coordinates or GeoJSON geometries must be passed as string when written directly in a SQL statement. +1. Note if you are using Postman for you testing. Due to limitations in the Postman client, you will need to escape quotes in your strings and your SQL will need to be passed on a single line. + +In the examples contained in the left-hand navigation, database and table names may change, but all GeoJSON data will be stored in a column named geo_data. + +# geoArea + +The geoArea() function returns the area of one or more features in square meters. + +### Syntax + +geoArea(_geoJSON_) + +### Parameters + +| Parameter | Description | +| --------- | ------------------------------- | +| geoJSON | Required. One or more features. | + +#### Example 1 + +Calculate the area, in square meters, of a manually passed GeoJSON polygon. + +``` +SELECT geoArea('{ + "type":"Feature", + "geometry":{ + "type":"Polygon", + "coordinates":[[ + [0,0], + [0.123456,0], + [0.123456,0.123456], + [0,0.123456] + ]] + } +}') +``` + +#### Example 2 + +Find all records that have an area less than 1 square mile (or 2589988 square meters). + +``` +SELECT * FROM dev.locations +WHERE geoArea(geo_data) < 2589988 +``` + +# geoLength + +Takes a GeoJSON and measures its length in the specified units (default is kilometers). + +## Syntax + +geoLength(_geoJSON_[_, units_]) + +## Parameters + +| Parameter | Description | +| --------- | --------------------------------------------------------------------------------------------------------------------- | +| geoJSON | Required. GeoJSON to measure. | +| units | Optional. Specified as a string. Options are ‘degrees’, ‘radians’, ‘miles’, or ‘kilometers’. Default is ‘kilometers’. | + +### Example 1 + +Calculate the length, in kilometers, of a manually passed GeoJSON linestring. + +``` +SELECT geoLength('{ + "type": "Feature", + "geometry": { + "type": "LineString", + "coordinates": [ + [-104.97963309288025,39.76163265441438], + [-104.9823260307312,39.76365323407955], + [-104.99193906784058,39.75616442110704] + ] + } +}') +``` + +### Example 2 + +Find all data plus the calculated length in miles of the GeoJSON, restrict the response to only lengths less than 5 miles, and return the data in order of lengths smallest to largest. + +``` +SELECT *, geoLength(geo_data, 'miles') as length +FROM dev.locations +WHERE geoLength(geo_data, 'miles') < 5 +ORDER BY length ASC +``` + +# geoDifference + +Returns a new polygon with the difference of the second polygon clipped from the first polygon. + +## Syntax + +geoDifference(_polygon1, polygon2_) + +## Parameters + +| Parameter | Description | +| --------- | -------------------------------------------------------------------------- | +| polygon1 | Required. Polygon or MultiPolygon GeoJSON feature. | +| polygon2 | Required. Polygon or MultiPolygon GeoJSON feature to remove from polygon1. | + +### Example + +Return a GeoJSON Polygon that removes City Park (_polygon2_) from Colorado (_polygon1_). + +``` +SELECT geoDifference('{ + "type": "Feature", + "properties": { + "name":"Colorado" + }, + "geometry": { + "type": "Polygon", + "coordinates": [[ + [-109.072265625,37.00255267215955], + [-102.01904296874999,37.00255267215955], + [-102.01904296874999,41.0130657870063], + [-109.072265625,41.0130657870063], + [-109.072265625,37.00255267215955] + ]] + } + }', + '{ + "type": "Feature", + "properties": { + "name":"City Park" + }, + "geometry": { + "type": "Polygon", + "coordinates": [[ + [-104.95973110198975,39.7543828214657], + [-104.95955944061278,39.744781185675386], + [-104.95904445648193,39.74422022399989], + [-104.95835781097412,39.74402223643582], + [-104.94097709655762,39.74392324244047], + [-104.9408483505249,39.75434982844515], + [-104.95973110198975,39.7543828214657] + ]] + } + }' +) +``` + +# geoDistance + +Calculates the distance between two points in units (default is kilometers). + +## Syntax + +geoDistance(_point1, point2_[_, units_]) + +## Parameters + +| Parameter | Description | +| --------- | --------------------------------------------------------------------------------------------------------------------- | +| point1 | Required. GeoJSON Point specifying the origin. | +| point2 | Required. GeoJSON Point specifying the destination. | +| units | Optional. Specified as a string. Options are ‘degrees’, ‘radians’, ‘miles’, or ‘kilometers’. Default is ‘kilometers’. | + +### Example 1 + +Calculate the distance, in miles, between Harper’s headquarters and the Washington Monument. + +``` +SELECT geoDistance('[-104.979127,39.761563]', '[-77.035248,38.889475]', 'miles') +``` + +### Example 2 + +Find all locations that are within 40 kilometers of a given point, return that distance in miles, and sort by distance in an ascending order. + +``` +SELECT *, geoDistance('[-104.979127,39.761563]', geo_data, 'miles') as distance +FROM dev.locations +WHERE geoDistance('[-104.979127,39.761563]', geo_data, 'kilometers') < 40 +ORDER BY distance ASC +``` + +# geoNear + +Determines if point1 and point2 are within a specified distance from each other, default units are kilometers. Returns a Boolean. + +## Syntax + +geoNear(_point1, point2, distance_[_, units_]) + +## Parameters + +| Parameter | Description | +| --------- | --------------------------------------------------------------------------------------------------------------------- | +| point1 | Required. GeoJSON Point specifying the origin. | +| point2 | Required. GeoJSON Point specifying the destination. | +| distance | Required. The maximum distance in units as an integer or decimal. | +| units | Optional. Specified as a string. Options are ‘degrees’, ‘radians’, ‘miles’, or ‘kilometers’. Default is ‘kilometers’. | + +### Example 1 + +Return all locations within 50 miles of a given point. + +``` +SELECT * +FROM dev.locations +WHERE geoNear('[-104.979127,39.761563]', geo_data, 50, 'miles') +``` + +### Example 2 + +Return all locations within 2 degrees of the earth of a given point. (Each degree lat/long is about 69 miles [111 kilometers]). Return all data and the distance in miles, sorted by ascending distance. + +``` +SELECT *, geoDistance('[-104.979127,39.761563]', geo_data, 'miles') as distance +FROM dev.locations +WHERE geoNear('[-104.979127,39.761563]', geo_data, 2, 'degrees') +ORDER BY distance ASC +``` + +# geoContains + +Determines if geo2 is completely contained by geo1. Returns a Boolean. + +## Syntax + +geoContains(_geo1, geo2_) + +## Parameters + +| Parameter | Description | +| --------- | --------------------------------------------------------------------------------- | +| geo1 | Required. Polygon or MultiPolygon GeoJSON feature. | +| geo2 | Required. Polygon or MultiPolygon GeoJSON feature tested to be contained by geo1. | + +### Example 1 + +Return all locations within the state of Colorado (passed as a GeoJSON string). + +``` +SELECT * +FROM dev.locations +WHERE geoContains('{ + "type": "Feature", + "properties": { + "name":"Colorado" + }, + "geometry": { + "type": "Polygon", + "coordinates": [[ + [-109.072265625,37.00255267], + [-102.01904296874999,37.00255267], + [-102.01904296874999,41.01306579], + [-109.072265625,41.01306579], + [-109.072265625,37.00255267] + ]] + } +}', geo_data) +``` + +### Example 2 + +Return all locations which contain Harper Headquarters. + +``` +SELECT * +FROM dev.locations +WHERE geoContains(geo_data, '{ + "type": "Feature", + "properties": { + "name": "Harper Headquarters" + }, + "geometry": { + "type": "Polygon", + "coordinates": [[ + [-104.98060941696167,39.760704817357905], + [-104.98053967952728,39.76065120861263], + [-104.98055577278137,39.760642961109674], + [-104.98037070035934,39.76049450588716], + [-104.9802714586258,39.76056254790385], + [-104.9805235862732,39.76076461167841], + [-104.98060941696167,39.760704817357905] + ]] + } +}') +``` + +# geoEqual + +Determines if two GeoJSON features are the same type and have identical X,Y coordinate values. For more information see https://developers.arcgis.com/documentation/spatial-references/. Returns a Boolean. + +## Syntax + +geoEqual(_geo1_, _geo2_) + +## Parameters + +| Parameter | Description | +| --------- | -------------------------------------- | +| geo1 | Required. GeoJSON geometry or feature. | +| geo2 | Required. GeoJSON geometry or feature. | + +### Example + +Find Harper Headquarters within all locations within the database. + +``` +SELECT * +FROM dev.locations +WHERE geoEqual(geo_data, '{ + "type": "Feature", + "properties": { + "name": "Harper Headquarters" + }, + "geometry": { + "type": "Polygon", + "coordinates": [[ + [-104.98060941696167,39.760704817357905], + [-104.98053967952728,39.76065120861263], + [-104.98055577278137,39.760642961109674], + [-104.98037070035934,39.76049450588716], + [-104.9802714586258,39.76056254790385], + [-104.9805235862732,39.76076461167841], + [-104.98060941696167,39.760704817357905] + ]] + } +}') +``` + +# geoCrosses + +Determines if the geometries cross over each other. Returns boolean. + +## Syntax + +geoCrosses(_geo1, geo2_) + +## Parameters + +| Parameter | Description | +| --------- | -------------------------------------- | +| geo1 | Required. GeoJSON geometry or feature. | +| geo2 | Required. GeoJSON geometry or feature. | + +### Example + +Find all locations that cross over a highway. + +``` +SELECT * +FROM dev.locations +WHERE geoCrosses( + geo_data, + '{ + "type": "Feature", + "properties": { + "name": "Highway I-25" + }, + "geometry": { + "type": "LineString", + "coordinates": [ + [-104.9139404296875,41.00477542222947], + [-105.0238037109375,39.715638134796336], + [-104.853515625,39.53370327008705], + [-104.853515625,38.81403111409755], + [-104.61181640625,38.39764411353178], + [-104.8974609375,37.68382032669382], + [-104.501953125,37.00255267215955] + ] + } + }' +) +``` + +# geoConvert + +Converts a series of coordinates into a GeoJSON of the specified type. + +## Syntax + +geoConvert(_coordinates, geo_type_[, _properties_]) + +## Parameters + +| Parameter | Description | +| ----------- | ---------------------------------------------------------------------------------------------------------------------------------- | +| coordinates | Required. One or more coordinates | +| geo_type | Required. GeoJSON geometry type. Options are ‘point’, ‘lineString’, ‘multiLineString’, ‘multiPoint’, ‘multiPolygon’, and ‘polygon’ | +| properties | Optional. Escaped JSON array with properties to be added to the GeoJSON output. | + +### Example + +Convert a given coordinate into a GeoJSON point with specified properties. + +``` +SELECT geoConvert( + '[-104.979127,39.761563]', + 'point', + '{ + "name": "Harper Headquarters" + }' +) +``` diff --git a/versioned_docs/version-4.7/foundations/core-concepts.md b/versioned_docs/version-4.7/foundations/core-concepts.md new file mode 100644 index 00000000..8ef3113d --- /dev/null +++ b/versioned_docs/version-4.7/foundations/core-concepts.md @@ -0,0 +1,80 @@ +--- +title: Core Concepts +--- + +# Core Concepts + +Before you build your first app with Harper, it helps to understand a few key ideas. These concepts show you how Harper is structured and why it’s flexible enough to power everything from a quick proof-of-concept to a production-ready platform. + +## Components + +**Components** are the building blocks of Harper. +They’re JavaScript-based modules that extend Harper’s core, and they can talk directly to Harper’s [Global APIs](../reference/globals) (databases, tables, resources). + +Because components can build on top of each other, they give you composability. For example, both [Applications](../developers/applications/) and [Plugins](../reference/components/plugins) are just kinds of components: + +- **Plugins** add individual capabilities, like defining tables or serving static assets. +- **Applications** pull multiple plugins and resources together into a complete product. + +:::info +💡 **Why it matters:** Instead of wiring up a backend from scratch, you can piece together pre-built functionality and get to working endpoints fast. +::: + +## Applications (a type of Component) + +An **application** is a special kind of component that pulls everything together. +Applications rely on plugins to do the work: + +- Use `graphqlSchema` to define your data tables. +- Add `rest` to query that data instantly. +- Plug in `static` to serve files or front-end assets. + +You can even run full frameworks like [Next.js](https://github.com/HarperDB/nextjs) or [Apollo](https://github.com/HarperDB/apollo) as Harper applications. + +:::info +💡 **Why it matters:** Applications are how you ship real products on Harper. They let you stitch together resources, APIs, and UI in one place. +::: + +## Plugins + +**Plugins** are a special kind of component that are not meant to run standalone, but instead add features to applications or other components. These were originally called **extensions** (and the [extension API](../reference/components/extensions) is still supported), but the new [plugin API](../reference/components/plugins) is simultaneously a simplification and extensibility upgrade. + +Examples you’ll see in the ecosystem include: + +- **Built in plugins**: These are embedded in Harper and work out of the box. Examples include [graphqlSchema](../reference/components/built-in-extensions#graphqlschema) for database and table definitions, [rest](../reference/components/built-in-extensions#rest) for RESTful access to your data, and [static](../reference/components/built-in-extensions#static) for serving files or frontend assets. + +- **Custom plugins**: These live outside of Harper and are installed from GitHub or npm. Harper supports a few official ones, and the ecosystem may include community plugins as well. Examples include [@harperdb/nextjs](https://github.com/HarperDB/nextjs) for Next.js integration and [@harperdb/apollo](https://github.com/HarperDB/apollo) for Apollo GraphQL. + +:::info +💡 **Why it matters:** Plugins give Harper its flexibility. You can compose them into applications to get powerful functionality without writing boilerplate yourself. +::: + +## Resources + +**Resources** are Harper’s data layer and are implemented using the [`Resource`](../reference/resources/) class. +They represent databases, tables, and other data entities, and they provide a unified API for accessing, querying, modifying, and monitoring records. + +At the simplest level, resources let you: + +- Define schemas and tables for your application data. +- Query and update that data through Harper’s APIs. +- Extend the base `Resource` class with JavaScript to define custom data sources or behaviors. + +Each `Resource` instance can represent a single record or a collection of records at a given point in time. +Static methods on the `Resource` class handle common operations like parsing paths, running transactions, and enforcing access controls, while instance methods give you a transactional view of individual records. + +:::info +💡 **Why it matters:** Whether you’re working with standard tables or custom-defined resources, everything in Harper’s data layer builds on the same model. This gives you consistency when modeling data and flexibility to extend it with your own logic. For full details, see the [Resource reference documentation](../reference/resources/). +::: + +## Server + +At the edge of Harper is the **server layer**, which connects your data to the outside world. Harper supports REST/HTTP, WebSockets, MQTT, and more. A single resource can be available through multiple protocols at once—so the same table can power a real-time dashboard, a mobile app, and a backend API. + +:::info +💡 **Why it matters:** You don’t have to choose between protocols. One data model, many ways to access it. +::: + +--- + +✅ With these concepts in mind, you’re ready to [build your first application](../getting-started/quickstart). That’s where you’ll see how Components, Resources, and Plugins come together in practice. diff --git a/versioned_docs/version-4.7/foundations/harper-architecture.md b/versioned_docs/version-4.7/foundations/harper-architecture.md new file mode 100644 index 00000000..0c6dfb28 --- /dev/null +++ b/versioned_docs/version-4.7/foundations/harper-architecture.md @@ -0,0 +1,101 @@ +--- +title: Harper Architecture +--- + +# Harper Architecture + +Before diving deep into APIs and configuration, it helps to understand the big picture of how Harper works. +Harper uses a **three-layer architecture** designed for distributed, edge-first computing. Each layer builds on the next, letting you start simple and scale as your app grows. + +![](/img/v4.6/harper-architecture.png) + +At a high level: + +- **Core services** handle data, networking, and files. +- **Plugins** layer in reusable features (REST, GraphQL, Next.js, etc.). +- **Applications** bring everything together to deliver user-facing functionality. + +:::info +💡 **Why it matters:** You focus on building your app, while Harper takes care of scaling, networking, and consistency behind the scenes. +::: + +--- + +## Core Services + +Harper ships with three essential services: + +- **Database** → Fast storage, queries, and transactions. +- **Networking** → REST/HTTP, WebSockets, MQTT, and cluster communication. +- **Component Management** → The system that loads, configures, and connects components (applications, plugins, resources) so they work together consistently. + +Think of these as Harper’s foundation—every extension and app builds on them. + +--- + +## Applications & Extensions + +Most of your work will happen here. + +### Applications + +Applications sit at the top layer. They’re where you implement user-facing features. Examples: + +- A **Next.js app** served directly from Harper. +- A **basic app** from the [Getting Started guide](../getting-started/quickstart) that defines a schema, adds a table, and automatically exposes REST endpoints with the `rest` extension. + +Applications don’t re-invent core logic—they declare the plugins they need. + +### Component Configuration + +Every Harper project starts with a **root configuration**. +This configuration declares which components (applications, plugins/extensions, resources) should be loaded and how they should be initialized. + +Some components are self-contained, while others include configuration that ties into additional components. For example: + +- An application in the root config might load the `rest` plugin. +- The `rest` plugin exposes data from the database, so its configuration links to `graphqlSchema`. +- `graphqlSchema` defines the tables that the database service makes available. + +This layering of configuration is what makes Harper composable: by declaring one component in your root config, you can enable entire sets of functionality. + +:::info +💡 **Why it matters:** Instead of wiring everything manually, you declare the root config, and Harper initializes the components in the right relationships. +::: + +--- + +## Resource API + +At the heart of Harper is the **Resource API**. It gives you a unified, consistent way to interact with data. + +- `get()` → fetch data +- `post()` → create data or trigger actions +- `put()` → replace existing data +- `patch()` → update part of a record + +Every call is wrapped in a transaction, so multi-table operations stay consistent without extra boilerplate. + +For the complete API, see the [Resource reference](../reference/resources). + +:::info +💡 **Why it matters:** You can build reliable features—like signups, payments, or analytics—without hand-rolling transaction logic. +::: + +--- + +## Transaction Model + +All requests run inside automatic transactions: + +- Read/write across multiple tables in a single request. +- Automatic change tracking. +- Guaranteed consistency at commit. + +:::info +💡 **Why it matters:** You don’t have to think about database race conditions or half-finished writes—Harper guarantees integrity by default. +::: + +--- + +✅ With this architecture in mind, you can see how Harper scales from “hello world” to complex, distributed applications. Next, try putting it into practice by [building your first app](../developers/applications/). diff --git a/versioned_docs/version-4.7/foundations/use-cases.md b/versioned_docs/version-4.7/foundations/use-cases.md new file mode 100644 index 00000000..642a74f7 --- /dev/null +++ b/versioned_docs/version-4.7/foundations/use-cases.md @@ -0,0 +1,80 @@ +--- +title: Harper Use Cases +--- + +# Harper Use Cases + +Harper is designed to cut out infrastructure complexity so you can move faster. +Here are some common ways developers use Harper in production today — each one showing how Harper’s architecture translates into real-world outcomes. + +--- + +## RESTful APIs for Distributed & Cached Data + +**Great for:** web apps, mobile apps, data-heavy platforms. + +Harper’s most common use case is exposing distributed, cached data over a RESTful interface. +This lets you serve complex or large-scale datasets efficiently, with built-in caching and global distribution. + +- Define your schema with the `graphqlSchema` plugin. +- Expose it instantly over REST using the `rest` plugin. +- Take advantage of Harper’s caching layer to serve hot data without extra infrastructure. +- Power both web and mobile applications from the same API. + +:::info +💡 **Why it matters:** Instead of bolting a cache or API layer onto a database, Harper gives you a unified system that scales for real-world apps. +::: + +--- + +## Online Catalogs & Content Delivery + +**Great for:** e-commerce sites, real estate listings, media & content platforms. + +Harper’s distributed architecture makes your pages load fast worldwide, improving **SEO** and **conversion rates**. + +- Host your frontend directly with the [Next.js Extension](https://github.com/HarperDB/nextjs). +- Support any framework using Harper’s extension system. +- Use Harper’s built-in caching + JavaScript layer to [server-side render pages](https://www.harpersystems.dev/development/tutorials/server-side-rendering-with-multi-tier-cache). +- Keep pages instantly fresh with built-in [WebSocket connections](../developers/real-time#websockets). + +:::info +💡 **Why it matters:** Instead of stitching together CDN + DB + API layers, you deliver catalog and content experiences from a single platform. +::: + +--- + +## Data Delivery Networks + +**Great for:** live sports updates, flight tracking, software updates. + +Harper combines **messaging**, **data storage**, and **application logic** in one system. That means: + +- Push real-time updates directly to clients. +- Process and store data without leaving Harper. +- Eliminate extra message brokers or caching systems. + +Explore the [real-time docs](../developers/real-time) to see how it works. + +:::info +💡 **Why it matters:** You can build real-time data services in hours, not weeks, with fewer moving parts to manage. +::: + +--- + +## Edge Inference Systems + +**Great for:** IoT pipelines, sensor networks, edge AI. + +Normally, capturing and analyzing streams at the edge requires a patchwork of tools. Harper simplifies this with: + +- **Self-healing connections** that keep data flowing even in flaky environments. +- The same Harper runtime running at both layers. + +:::info +💡 **Why it matters:** One consistent stack across edge and cloud makes AI/ML inference faster, cheaper, and easier to scale. +::: + +--- + +✅ Want to explore more? [Contact us](https://www.harpersystems.dev/contact) and we’ll walk you through building your own use case. diff --git a/versioned_docs/version-4.7/getting-started/installation.md b/versioned_docs/version-4.7/getting-started/installation.md new file mode 100644 index 00000000..f7cb1cf3 --- /dev/null +++ b/versioned_docs/version-4.7/getting-started/installation.md @@ -0,0 +1,50 @@ +--- +title: Install and Connect Harper +--- + +# Install and Connect Harper + +The recommended approach for efficiently developing applications with Harper is to install Harper locally for efficient development of an application and deploy it to [Harper Fabric](https://fabric.harper.fast), our distributed data application platform service. However, you can also develop directly in Fabric, if you want to quickly try it out. You can also run a self-hosted Harper server, and manage it with our Fabric studio management UI. + +## Install with npm + +The fastest way to get Harper running locally is to install with npm. Make sure you have [Node.js](https://nodejs.org/) (LTS or newer). Then run: + +```bash +npm install -g harperdb +harperdb +``` + +The first time, you’ll set up your destination, username, password, and [configuration](../deployments/configuration). That’s it! Harper is now running locally. + +✅ Quick check: open http://localhost:9925, which will launch the studio UI for managing your local server, or run this for a quick health check: + +```bash +curl http://localhost:9925/health +``` + +Harper can also be [installed with our Docker image or you can download Harper for manual or offline installation](../deployments/install-harper). + +## Manage and Deploy with Fabric + +Fabric is our service for managing and deploying Harper on a distributed network. Fabric makes it easy to create new Harper "clusters", the Harper application platform running on distributed nodes, and deploy your application to this service. Fabric has a management interface, and provides a UI for managing your deployments and even your local instance that you just installed. You can sign up for Fabric for free, and create a free Harper cluster to deploy your application: + +- Go to [Fabric](https://fabric.harper.fast) and sign-up for a new account. + - You will need to agree to the terms of service and verify your email address. +- Once you have created an account, you can create an organization. This will allow you to collaboratively managing your Harper services with others. This will also define the host domain that will be used. +- You can now create a new Harper cluster or instance: + - Create a free Harper cluster for trying out Harper. + - Purchase a Harper cluster with higher performance, scalability, and limits. + - Add your own local instance to manage everything in one place. +- Once you have a Harper cluster, you will be ready to create a new application directly on Fabric, or be ready to deploy an application to Fabric. + +Once Harper is running or you are connected to Fabric, we recommend that you walk through the steps of [building your first application](../getting-started/quickstart) and learn more about Harper's concepts and architecture: + +- [Build your first application](../getting-started/quickstart) +- Explore the [Core Concepts](../foundations/core-concepts) +- Learn about [Harper's architecture](../foundations/harper-architecture) +- Review [Configuration options](../deployments/configuration) + +:::info +Need help? Please don’t hesitate to [reach out](https://www.harpersystems.dev/contact). +::: diff --git a/versioned_docs/version-4.7/getting-started/quickstart.md b/versioned_docs/version-4.7/getting-started/quickstart.md new file mode 100644 index 00000000..e8956d0e --- /dev/null +++ b/versioned_docs/version-4.7/getting-started/quickstart.md @@ -0,0 +1,218 @@ +--- +title: Create Your First Application +--- + +# Create Your First Application + +Now that you've set up Harper, let's build a simple API. Harper lets you build powerful APIs with minimal effort. In just a few minutes, you'll have a functional REST API with automatic validation, indexing, and querying—all without writing a single line of code. + +## Setup Your Project + +If you have installed Harper locally, start by cloning the Harper application template: + +```bash +git clone https://github.com/HarperDB/application-template my-app +cd my-app +``` + +If you are working the Fabric studio UI, you can navigate to your cluster and then to the "Applications" tab. Then choose to "Create New Application" (using the standard application template). This will create a new application based on the `application-template`. + +## Creating our first Table + +The core of a Harper application is the database, so let's create a database table. + +A quick and expressive way to define a table is through a [GraphQL Schema](https://graphql.org/learn/schema). Using your editor of choice, edit the file named `schema.graphql` in the root of the application directory, `my-app`, that we created above. In the Fabric UI, simply click on `schema.graphql` to start editing it. To create a table, we will need to add a `type` of `@table` named `Dog` (and you can remove the example table in the template): + +```graphql +type Dog @table { + # properties will go here soon +} +``` + +And then we'll add a primary key named `id` of type `ID`: + +_(Note: A GraphQL schema is a fast method to define tables in Harper, but you are by no means required to use GraphQL to query your application, nor should you necessarily do so)_ + +```graphql +type Dog @table { + id: ID @primaryKey +} +``` + +Now we tell Harper to run this as an application: + +```bash +harperdb dev . # tell Harper cli to run current directory as an application in dev mode +``` + +If you are using the Fabric UI, you can click "Restart Cluster" to apply these schema changes. + +Harper will now create the `Dog` table and its `id` attribute we just defined. Not only is this an easy way to create a table, but this schema is included in our application, which will ensure that this table exists wherever we deploy this application (to any Harper instance). + +## Adding Attributes to our Table + +Next, let's expand our `Dog` table by adding additional typed attributes for dog `name`, `breed` and `age`. + +```graphql +type Dog @table { + id: ID @primaryKey + name: String + breed: String + age: Int +} +``` + +This will ensure that new records must have these properties with these types. + +Because we ran `harperdb dev .` earlier (dev mode), Harper is now monitoring the contents of our application directory for changes and reloading when they occur. This means that once we save our schema file with these new attributes, Harper will automatically reload our application, read `my-app/schema.graphql` and update the `Dog` table and attributes we just defined. The dev mode will also ensure that any logging or errors are immediately displayed in the console (rather only in the log file). + +If you are running in Fabric, again, you can click "Restart Cluster" to apply any changes. You can navigate to the "Databases" page to see your new table and add records to it. + +As a document database, Harper supports heterogeneous records, so you can freely specify additional properties on any record. If you do want to restrict the records to only defined properties, you can always do that by adding the sealed directive: + +```graphql +type Dog @table @sealed { + id: ID @primaryKey + name: String + breed: String + age: Int + tricks: [String] +} +``` + +## Adding an Endpoint + +Now that we have a running application with a database (with data if you imported any data), let's make this data accessible from a RESTful URL by adding an endpoint. To do this, we simply add the `@export` directive to our `Dog` table: + +```graphql +type Dog @table @export { + id: ID @primaryKey + name: String + breed: String + age: Int + tricks: [String] +} +``` + +For a local instance, by default the application HTTP server port is `9926` (this can be [configured here](../deployments/configuration#http)), so the local URL would be `http://localhost:9926/Dog/` with a full REST API. In Fabric, a public hostname/URL will be created, and you can go to the "Config" page to see your "Application URL", which should look like `your-cluster.your-org.harperfabric.com`. You can directly query this with an HTTPS URL, by including authentication information. + +We can PUT or POST data into this table using this new path, and then GET or DELETE from it as well (you can even view data directly from the browser). If you have not added any records yet, we could use a PUT or POST to add a record. PUT is appropriate if you know the id, and POST can be used to assign an id: + +```bash +curl -X POST http://localhost:9926/Dog/ \ + -H "Content-Type: application/json" \ + -d '{ + "name": "Harper", + "breed": "Labrador", + "age": 3, + "tricks": ["sits"] + }' +``` + +Or in Fabric: + +```bash +curl -X POST https://your-cluster.your-org.harperfabric.com/Dog/ \ + -H "Content-Type: application/json" \ + -H "Authentication: Basic " + -d '{ + "name": "Harper", + "breed": "Labrador", + "age": 3, + "tricks": ["sits"] + }' +``` + +With this a record will be created and the auto-assigned id will be available through the `Location` header. If you added a record, you can visit the path `/Dog/` to view that record. Alternately, the curl command `curl http://localhost:9926/Dog/` will achieve the same thing. + +## Authenticating Endpoints + +Now that you've created your first API endpoints, it's important to ensure they're protected. Without authentication, anyone could potentially access, misuse, or overload your APIs, whether by accident or malicious intent. Authentication verifies who is making the request and enables you to control access based on identity, roles, or permissions. It’s a foundational step in building secure, reliable applications. + +Endpoints created with Harper automatically support `Basic` authentication, JWT authentication, and maintaining authentication with cookie-based session. See the documentation on [security](../developers/security/) for more information on different levels of access. + +By default, Harper also automatically authorizes all requests from loopback IP addresses (from the same computer) as the superuser, to make it simple to interact for local development. If you want to test authentication/authorization, or enforce stricter security, you may want to disable the [`authentication.authorizeLocal` setting](../deployments/configuration#authentication). + +### Content Negotiation + +These endpoints support various content types, including `JSON`, `CBOR`, `MessagePack` and `CSV`. Simply include an `Accept` header in your requests with the preferred content type. We recommend `CBOR` as a compact, efficient encoding with rich data types, but `JSON` is familiar and great for web application development, and `CSV` can be useful for exporting data to spreadsheets or other processing. + +Harper works with other important standard HTTP headers as well, and these endpoints are even capable of caching interaction: + +``` +Authorization: Basic +Accept: application/cbor +If-None-Match: "etag-id" # browsers can automatically provide this +``` + +## Querying + +Querying your application database is straightforward and easy, as tables exported with the `@export` directive are automatically exposed via [REST endpoints](../developers/rest). Simple queries can be crafted through [URL query parameters](https://en.wikipedia.org/wiki/Query_string). + +In order to maintain reasonable query speed on a database as it grows in size, it is critical to select and establish the proper indexes. So, before we add the `@export` declaration to our `Dog` table and begin querying it, let's take a moment to target some table properties for indexing. We'll use `name` and `breed` as indexed table properties on our `Dog` table. All we need to do to accomplish this is tag these properties with the `@indexed` directive: + +```graphql +type Dog @table { + id: ID @primaryKey + name: String @indexed + breed: String @indexed + owner: String + age: Int + tricks: [String] +} +``` + +And finally, we'll add the `@export` directive to expose the table as a RESTful endpoint + +```graphql +type Dog @table @export { + id: ID @primaryKey + name: String @indexed + breed: String @indexed + owner: String + age: Int + tricks: [String] +} +``` + +Now we can start querying. Again, we just simply access the endpoint with query parameters (basic GET requests), like: + +``` +http://localhost:9926/Dog/?name=Harper +http://localhost:9926/Dog/?breed=Labrador +http://localhost:9926/Dog/?breed=Husky&name=Balto&select(id,name,breed) +``` + +In Fabric, you can directly open such URLs directly in the browser, where the browser will prompt you for your username and password: + +``` +https://your-cluster.your-org.harperfabric.com/Dog/?name=Harper +... +``` + +Congratulations, you now have created a secure database application backend with a table, a well-defined structure, access controls, and a functional REST endpoint with query capabilities! See the [REST documentation for more information on HTTP access](../developers/rest) and see the [Schema reference](../developers/applications/defining-schemas) for more options for defining schemas. If you were developing locally, you are ready to deploy to Fabric. + +> Additionally, you may now use GraphQL (over HTTP) to create queries. See the documentation for that new feature [here](../reference/graphql). + +## Deploy to Fabric + +In the recommended flow, you have been developing your application locally, but now you are ready to deploy your application to Fabric. The recommended way of doing this is to commit your code to a git repository, where Harper can directly pull your application from the repository and run it. To get started, it is easiest to put this in a public repository for ease of access and deployment. Once you have committed your code to a git repository, you can go to the "Applications" page, and select "Import Application". You can then enter the URL of your repository and Fabric will deploy in on your cluster. We also recommend using git tags and deploying by tag name for control over application versioning. You can import and deploy a tag in a repository using import of a URL like "git+https://git@github.com/my-org/my-app.git#semver:v1.0.27". + +You can also deploy to fabric using the CLI. With this approach, you can "push" your application code into your Fabric cluster. From the command line, go into your application directory and run: + +```bash +harperdb deploy_component \ + project= \ + package= \ # optional, uses cwd if not specified + target= \ + username= \ + password= \ + restart=true \ + replicated=true # deploy to your whole cluster +``` + +Once you have deployed and restarted, your application is live and ready to be used by the world! + +## Key Takeaway + +Harper's schema-driven approach means you can build production-ready APIs in minutes, not hours. Start with pure schema definitions to get 90% of your functionality, then add custom code only where needed. This gives you the best of both worlds: rapid development with the flexibility to customize when required. diff --git a/versioned_docs/version-4.7/index.mdx b/versioned_docs/version-4.7/index.mdx new file mode 100644 index 00000000..e7fa8d53 --- /dev/null +++ b/versioned_docs/version-4.7/index.mdx @@ -0,0 +1,83 @@ +--- +title: Harper Docs +--- + +import CustomDocCardList from '@site/src/components/CustomDocCardList'; + +# Harper Docs + +:::info +[Connect with our team!](https://www.harpersystems.dev/contact) +::: + +Harper is an all-in-one backend technology that fuses database technologies, caching, application hosting, and messaging functions into a single system. Unlike traditional architectures where each piece runs independently and incurs extra costs and latency from serialization and network operations between processes, Harper systems can handle workloads seamlessly and efficiently. + +Here, you'll find all things Harper, and everything you need to get started, troubleshoot issues, and make the most of our platform. + +## Getting Started + +The recommended approach for efficiently developing applications with Harper is to develop locally and deploy them to Harper Fabric, our distributed data application platform service. Our getting started guide will walk you through how to install Harper locally, sign up for Fabric service, build a simple application and deploy it. + + + +## Building with Harper + + diff --git a/versioned_docs/version-4.7/reference/_category_.json b/versioned_docs/version-4.7/reference/_category_.json new file mode 100644 index 00000000..1a36ae90 --- /dev/null +++ b/versioned_docs/version-4.7/reference/_category_.json @@ -0,0 +1,6 @@ +{ + "link": { + "type": "doc", + "id": "reference/index" + } +} diff --git a/versioned_docs/version-4.7/reference/analytics.md b/versioned_docs/version-4.7/reference/analytics.md new file mode 100644 index 00000000..742a299d --- /dev/null +++ b/versioned_docs/version-4.7/reference/analytics.md @@ -0,0 +1,173 @@ +--- +title: Analytics +--- + +# Analytics + +Harper provides extensive telemetry and analytics data to help monitor the status of the server and work loads, and to help understand traffic and usage patterns to identify issues and scaling needs, and identify queries and actions that are consuming the most resources. + +Harper collects statistics for all operations, URL endpoints, and messaging topics, aggregating information by thread, operation, resource, and methods, in real-time. These statistics are logged in the `hdb_raw_analytics` and `hdb_analytics` table in the `system` database. + +There are two "levels" of analytics in the Harper analytics table: the first is the immediate level of raw direct logging of real-time statistics. These analytics entries are recorded once a second (when there is activity) by each thread, and include all recorded activity in the last second, along with system resource information. The records have a primary key that is the timestamp in milliseconds since epoch. This can be queried (with `superuser` permission) using the search_by_conditions operation (this will search for 10 seconds worth of analytics) on the `hdb_raw_analytics` table: + +``` +POST http://localhost:9925 +Content-Type: application/json + +{ + "operation": "search_by_conditions", + "schema": "system", + "table": "hdb_raw_analytics", + "conditions": [{ + "search_attribute": "id", + "search_type": "between", + "search_value": [168859400000, 1688594010000] + }] +} +``` + +And a typical response looks like: + +``` +{ + "time": 1688594390708, + "period": 1000.8336279988289, + "metrics": [ + { + "metric": "bytes-sent", + "path": "search_by_conditions", + "type": "operation", + "median": 202, + "mean": 202, + "p95": 202, + "p90": 202, + "count": 1 + }, + ... + { + "metric": "memory", + "threadId": 2, + "rss": 1492664320, + "heapTotal": 124596224, + "heapUsed": 119563120, + "external": 3469790, + "arrayBuffers": 798721 + }, + { + "metric": "utilization", + "idle": 138227.52767700003, + "active": 70.5066209952347, + "utilization": 0.0005098165086230495 + } + ], + "threadId": 2, + "totalBytesProcessed": 12182820, + "id": 1688594390708.6853 +} +``` + +The second level of analytics recording is aggregate data. The aggregate records are recorded once a minute, and aggregate the results from all the per-second entries from all the threads, creating a summary of statistics once a minute. The ids for these milliseconds since epoch can be queried from the `hdb_analytics` table. You can query these with an operation like: + +``` +POST http://localhost:9925 +Content-Type: application/json + +{ + "operation": "search_by_conditions", + "schema": "system", + "table": "hdb_analytics", + "conditions": [{ + "search_attribute": "id", + "search_type": "between", + "search_value": [1688194100000, 1688594990000] + }] +} +``` + +And a summary record looks like: + +``` +{ + "period": 60000, + "metric": "bytes-sent", + "method": "connack", + "type": "mqtt", + "median": 4, + "mean": 4, + "p95": 4, + "p90": 4, + "count": 1, + "id": 1688589569646, + "time": 1688589569646 +} +``` + +# Standard Analytics Metrics + +While applications can define their own metrics, Harper provides a set of standard metrics that are tracked for all services: + +## HTTP + +The following metrics are tracked for all HTTP requests: + +| `metric` | `path` | `method` | `type` | Unit | Description | +| ------------------ | ------------- | -------------- | ---------------------------------------------- | ------------ | ------------------------------------------------------- | +| `duration` | resource path | request method | `cache-hit` or `cache-miss` if a caching table | milliseconds | Duration of request handler | +| `duration` | route path | request method | fastify-route | milliseconds | | +| `duration` | operation | | operation | milliseconds | | +| `success` | resource path | request method | | % | | +| `success` | route path | request method | fastify-route | % | | +| `success` | operation | | operation | % | | +| `bytes-sent` | resource path | request method | | bytes | | +| `bytes-sent` | route path | request method | fastify-route | bytes | | +| `bytes-sent` | operation | | operation | bytes | | +| `transfer` | resource path | request method | operation | milliseconds | duration of transfer | +| `transfer` | route path | request method | fastify-route | milliseconds | duration of transfer | +| `transfer` | operation | | operation | milliseconds | duration of transfer | +| `socket-routed` | | | | % | percentage of sockets that could be immediately routed | +| `tls-handshake` | | | | milliseconds | | +| `tls-reused` | | | | % | percentage of TLS that reuses sessions | +| `cache-hit` | table name | | | % | The percentage of cache hits | +| `cache-resolution` | table name | | | milliseconds | The duration of resolving requests for uncached entries | + +The following are metrics for real-time MQTT connections: +| `metric` | `path` | `method` | `type` | Unit | Description | +|---|---|---|---|---|---| +| `mqtt-connections` | | | | count | The number of open direct MQTT connections | +| `ws-connections` | | | | count | number of open WS connections| +| `connection` | `mqtt` | `connect` | | % | percentage of successful direct MQTT connections | +| `connection` | `mqtt` | `disconnect` | | % | percentage of explicit direct MQTT disconnects | +| `connection` | `ws` | `connect` | | % | percentage of successful WS connections | +| `connection` | `ws` | `disconnect` | | % | percentage of explicit WS disconnects | +| `bytes-sent` | topic | mqtt command | `mqtt` | bytes | The number of bytes sent for a given command and topic | + +The following are metrics for replication: + +| `metric` | `path` | `method` | `type` | Unit | Description | +| ---------------- | ------------- | ------------- | --------- | ----- | ----------------------------------------------------- | +| `bytes-sent` | node.database | `replication` | `egress` | bytes | The number of bytes sent for replication | +| `bytes-sent` | node.database | `replication` | `blob` | bytes | The number of bytes sent for replication of blobs | +| `bytes-received` | node.database | `replication` | `ingress` | bytes | The number of bytes received for replication | +| `bytes-received` | node.database | `replication` | `blob` | bytes | The number of bytes received for replication of blobs | + +The following are general resource usage statistics that are tracked: + +| `metric` | primary attribute(s) | other attribute(s) | Unit | Description | +| ------------------------- | ------------------------------------------------------------------------------------------------ | ------------------- | ------- | ----------------------------------------------------------------------------------------------------------------------------- | +| `database-size` | `size`, `used`, `free`, `audit` | `database` | bytes | The size of the database in bytes | +| `main-thread-utilization` | `idle`, `active`, `taskQueueLatency`, `rss`, `heapTotal`, `heapUsed`, `external`, `arrayBuffers` | `time` | various | Main thread resource usage; including idle time, active time, task queue latency, RSS, heap, buffer and external memory usage | +| `resource-usage` | | | various | [See breakout below](#resource-usage) | +| `storage-volume` | `available`, `free`, `size` | `database` | bytes | The size of the storage volume in bytes | +| `table-size` | `size` | `database`, `table` | bytes | The size of the table in bytes | +| `utilization` | | | % | How much of the time the worker was processing requests | + + +`resource-usage` metrics are everything returned by [node:process.resourceUsage()](https://nodejs.org/api/process.html#processresourceusage)[^1] plus the following additional metrics: + +| `metric` | Unit | Description | +| ---------------- | ---- | ----------------------------------------------------- | +| `time` | ms | Current time when metric was recorded (Unix time) | +| `period` | ms | Duration of the metric period | +| `cpuUtilization` | % | CPU utilization percentage (user and system combined) | + +[^1]: The `userCPUTime` and `systemCPUTime` metrics are converted to milliseconds to match the other time-related metrics. diff --git a/versioned_docs/version-4.7/reference/architecture.md b/versioned_docs/version-4.7/reference/architecture.md new file mode 100644 index 00000000..4155d5ff --- /dev/null +++ b/versioned_docs/version-4.7/reference/architecture.md @@ -0,0 +1,42 @@ +--- +title: Architecture +--- + +# Architecture + +Harper's architecture consists of resources, which includes tables and user defined data sources and extensions, and server interfaces, which includes the RESTful HTTP interface, operations API, and MQTT. Servers are supported by routing and auth services. + +``` + ┌──────────┐ ┌──────────┐ + │ Clients │ │ Clients │ + └────┬─────┘ └────┬─────┘ + │ │ + ▼ ▼ + ┌────────────────────────────────────────┐ + │ │ + │ Socket routing/management │ + ├───────────────────────┬────────────────┤ + │ │ │ + │ Server Interfaces ─►│ Authentication │ + │ RESTful HTTP, MQTT │ Authorization │ + │ ◄─┤ │ + │ ▲ └────────────────┤ + │ │ │ │ + ├───┼──────────┼─────────────────────────┤ + │ │ │ ▲ │ + │ ▼ Resources ▲ │ ┌───────────┐ │ + │ │ └─┤ │ │ + ├─────────────────┴────┐ │ App │ │ + │ ├─►│ resources │ │ + │ Database tables │ └───────────┘ │ + │ │ ▲ │ + ├──────────────────────┘ │ │ + │ ▲ ▼ │ │ + │ ┌────────────────┐ │ │ + │ │ External │ │ │ + │ │ data sources ├────┘ │ + │ │ │ │ + │ └────────────────┘ │ + │ │ + └────────────────────────────────────────┘ +``` diff --git a/versioned_docs/version-4.7/reference/blob.md b/versioned_docs/version-4.7/reference/blob.md new file mode 100644 index 00000000..ae7a8355 --- /dev/null +++ b/versioned_docs/version-4.7/reference/blob.md @@ -0,0 +1,109 @@ +--- +title: Blob +--- + +# Blob + +Blobs are binary large objects that can be used to store any type of unstructured/binary data and is designed for large content. Blobs support streaming and feature better performance for content larger than about 20KB. Blobs are built off the native JavaScript `Blob` type, and HarperDB extends the native `Blob` type for integrated storage with the database. To use blobs, you would generally want to declare a field as a `Blob` type in your schema: + +```graphql +type MyTable { + id: Any! @primaryKey + data: Blob +} +``` + +You can then create a blob which writes the binary data to disk, and can then be included (as a reference) in a record. For example, you can create a record with a blob like: + +```javascript +let blob = createBlob(largeBuffer); +await MyTable.put({ id: 'my-record', data: blob }); +``` + +The `data` attribute in this example is a blob reference, and can be used like any other attribute in the record, but it is stored separately, and the data must be accessed asynchronously. You can retrieve the blob data with the standard `Blob` methods: + +```javascript +let buffer = await blob.bytes(); +``` + +If you are creating a resource method, you can return a `Response` object with a blob as the body: + +```javascript +export class MyEndpoint extends MyTable { + async get(target) { + let record = super.get(target); + return { + status: 200, + headers: {}, + body: record.data, // record.data is a blob + }); + } +} +``` + +One of the important characteristics of blobs is they natively support asynchronous streaming of data. This is important for both creation and retrieval of large data. When we create a blob with `createBlob`, the returned blob will create the storage entry, but the data will be streamed to storage. This means that you can create a blob from a buffer or from a stream. You can also create a record that references a blob before the blob is fully written to storage. For example, you can create a blob from a stream: + +```javascript +let blob = createBlob(stream); +// at this point the blob exists, but the data is still being written to storage +await MyTable.put({ id: 'my-record', data: blob }); +// we now have written a record that references the blob +let record = await MyTable.get('my-record'); +// we now have a record that gives us access to the blob. We can asynchronously access the blob's data or stream the data, and it will be available as blob the stream is written to the blob. +let stream = record.data.stream(); +``` + +This can be powerful functionality for large media content, where content can be streamed into storage as it streamed out in real-time to users as it is received, or even for web content where low latency transmission of data from origin is critical. However, this also means that blobs are _not_ atomic or [ACID](https://en.wikipedia.org/wiki/ACID) compliant; streaming functionality achieves the opposite behavior of ACID/atomic writes that would prevent access to data as it is being written, and wait until data is fully available before a commit. Alternately, we can also use the `saveBeforeCommit` flag to indicate that the blob should be fully written to storage before committing a transaction to ensure that the whole blob is available before the transaction commits and writes the record: + +```javascript +let blob = createBlob(stream, { saveBeforeCommit: true }); +// this put will not commit and resolve until the blob is written and then the record is written +await MyTable.put({ id: 'my-record', data: blob }); +``` + +Note that using `saveBeforeCommit` does not necessarily guarantee full ACID compliance. This can be combined with the `flush` flag to provide a stronger guarantee that a blob is flushed to disk before commiting a transaction. However, the error handling below provides a stronger guarantee of proper blob handling when the process of streaming/writing a blob is interrupted and using proper error handling is recommended, instead of relying `saveBeforeCommit`, for the best combination reliability and performance. + +### Error Handling + +Because blobs can be streamed and referenced prior to their completion, there is a chance that an error or interruption could occur while streaming data to the blob (after the record is committed). We can create an error handler for the blob to handle the case of an interrupted blob: + +```javascript +export class MyEndpoint extends MyTable { + async get(target) { + const record = super.get(target); + let blob = record.data; + blob.on('error', () => { + // if this was a caching table, we may want to invalidate or delete this record: + MyTable.invalidate(target); + // we may want to re-retrieve the blob + }); + return { + status: 200, + headers: {}, + body: blob + }); + } +} +``` + +### Blob `size` + +Blobs that are created from streams may not have the standard `size` property available, because the size may not be known while data is being streamed. Consequently, the `size` property may be undefined until the size is determined. You can listen for the `size` event to be notified when the size is available: + +```javascript +let record = await MyTable.get('my-record'); +let blob = record.data; +blob.size; // will be available if it was saved with a known size +let stream = blob.stream(); // start streaming the data +if (blob.size === undefined) { + blob.on('size', (size) => { + // will be called once the size is available + }); +} +``` + +### Blob Coercion + +When a field is defined to use the `Blob` type, any strings or buffers that are assigned to that field in a `put`, `patch`, or `publish`, will automatically be coerced to a `Blob`. This makes it easy to use a `Blob` type even with JSON data that may come HTTP request bodies or MQTT messages, that do not natively support a `Blob` type. + +See the [configuration](../deployments/configuration) documentation for more information on configuring where blob are stored. diff --git a/versioned_docs/version-4.7/reference/components/applications.md b/versioned_docs/version-4.7/reference/components/applications.md new file mode 100644 index 00000000..41210f38 --- /dev/null +++ b/versioned_docs/version-4.7/reference/components/applications.md @@ -0,0 +1,221 @@ +--- +title: Applications +--- + +# Applications + +> The contents of this page predominantly relate to **application** components. Extensions are not necessarily _deployable_. The ambiguity of the term "components" is being worked on and will be improved in future releases. As we work to clarify the terminology, please keep in mind that the component operations are synonymous with application management. In general, "components" is the general term for both applications and extensions, but in context of the operations API it refers to applications only. + +Harper offers several approaches to managing applications that differ between local development and Harper managed instances. This page will cover the recommended methods of developing, installing, deploying, and running Harper applications. + +## Local Development + +Harper is designed to be simple to run locally. Generally, Harper should be installed locally on a machine using a global package manager install (i.e. `npm i -g harperdb`). + +> Before continuing, ensure Harper is installed and the `harperdb` CLI is available. For more information, review the [installation guide](../../deployments/install-harper/). + +When developing an application locally there are a number of ways to run it on Harper. + +### `dev` and `run` commands + +The quickest way to run an application is by using the `dev` command within the application directory. + +The `harperdb dev .` command will automatically watch for file changes within the application directory and restart the Harper threads when changes are detected. + +The `dev` command will **not** restart the main thread; if this is a requirement, switch to using `run` instead and manually start/stop the process to execute the main thread. + +Stop execution for either of these processes by sending a SIGINT (generally CTRL+C) signal to the process. + +### Deploying to a local Harper instance + +Alternatively, to mimic interfacing with a hosted Harper instance, use operation commands instead. + +1. Start up Harper with `harperdb` +1. _Deploy_ the application to the local instance by executing: + + ```sh + harperdb deploy \ + project= \ + package= \ + restart=true + ``` + + - Make sure to omit the `target` option so that it _deploys_ to the Harper instance running locally + - The `package=` option creates a symlink to the application simplifying restarts + - By default, the `deploy` operation command will _deploy_ the current directory by packaging it up and streaming the bytes. By specifying `package`, it skips this and references the file path directly + - The `restart=true` option automatically restarts Harper threads after the application is deployed + - If set to `'rolling'`, a rolling restart will be triggered after the application is deployed + +1. In another terminal, use the `harperdb restart` command to restart the instance's threads at any time + - With `package=`, the application source is symlinked so changes will automatically be picked up between restarts + - If `package` was omitted, run the `deploy` command again with any new changes +1. To remove the application use `harperdb drop_component project=` + +Similar to the previous section, if the main thread needs to be restarted, start and stop the Harper instance manually (with the application deployed). Upon Harper startup, the application will automatically be loaded and executed across all threads. + +> Not all [component operations](../../developers/operations-api/components) are available via CLI. When in doubt, switch to using the Operations API via network requests to the local Harper instance. + +For example, to properly _deploy_ a `test-application` locally, the command would look like: + +```sh +harperdb deploy \ + project=test-application \ + package=/Users/dev/test-application \ + restart=true +``` + +> If the current directory is the application directory, use a shortcut such as `package=$(pwd)` to avoid typing out the complete path. + +Keep in mind that using a local file path for `package` will only work locally; deploying to a remote instance requires a different approach. + +## Remote Management + +Managing applications on a remote Harper instance is best accomplished through [component operations](../../developers/operations-api/components), similar to using the `deploy` command locally. Before continuing, always backup critical Harper instances. Managing, deploying, and executing applications can directly impact a live system. + +Remote Harper instances work very similarly to local Harper instances. The primary application management operations still include `deploy_component`, `drop_component`, and `restart`. + +The key to remote management is specifying a remote `target` along with appropriate username/password values. These can all be specified using CLI arguments: `target`, `username`, and `password`. Alternatively, the `CLI_TARGET_USERNAME` and `CLI_TARGET_PASSWORD` environment variables can replace the `username` and `password` arguments. + +All together: + +```sh +harperdb deploy \ + project= \ + package= \ + username= \ + password= \ + target= \ + restart=true \ + replicated=true +``` + +Or, using environment variables: + +```sh +export CLI_TARGET_USERNAME= +export CLI_TARGET_PASSWORD= +harperdb deploy \ + project= \ + package= \ + target= \ + restart=true \ + replicated=true +``` + +Unlike local development where `package` should be set to a local file path for symlinking and improved development experience purposes, now it has some additional options. + +A local application can be deployed to a remote instance by **omitting** the `package` field. Harper will automatically package the local directory and include that along with the rest of the deployment operation. + +Furthermore, the `package` field can be set to any valid [npm dependency value](https://docs.npmjs.com/cli/v11/configuring-npm/package-json#dependencies). + +- For applications deployed to npm, specify the package name: `package="@harperdb/status-check"` +- For applications on GitHub, specify the URL: `package="https://github.com/HarperDB/status-check"`, or the shorthand `package=HarperDB/status-check` +- Private repositories also work if the correct SSH keys are on the server: `package="git+ssh://git@github.com:HarperDB/secret-applications.git"` + - Reference the [SSH Key](../../developers/operations-api/components#add-ssh-key) operations for more information on managing SSH keys on a remote instance +- Even tarball URLs are supported: `package="https://example.com/application.tar.gz"` + +> When using git tags, we highly recommend that you use the semver directive to ensure consistent and reliable installation by npm. In addition to tags, you can also reference branches or commit numbers. + +These `package` values are all supported because behind-the-scenes, Harper is generating a `package.json` file for the components. Then, it uses a form of `npm install` to resolve them as dependencies. This is why symlinks are generated when specifying a file path locally. The following [Advanced](#advanced) section explores this pattern in more detail. + +Finally, don't forget to include `restart=true`, or run `harperdb restart target=`. + +## Dependency Management + +Naturally, applications may have dependencies. Since we operate on top of Node.js, we default to leveraging `npm` and `package.json` for dependency management. + +As already covered, there are a number of ways to run an application on Harper. From symlinking to a local directory, to deploying it via the `deploy_component` operation. Harper does its best to seamlessly run your application. + +During application loading, if an application directory contains a `node_modules` directory or it excludes a `package.json`, Harper will skip dependency installation. Otherwise, Harper will check the application's config (values specified in the `harperdb-config.yaml` file) for `install: { command, timeout }` fields (see the example below for more information). If it exists, Harper will use the specified command to install dependencies. If not, then Harper will attempt to derive the package manager from the [`package.json#devEngines#packageManager`](https://docs.npmjs.com/cli/v10/configuring-npm/package-json#devengines) field (which can specify an npm alternate like yarn or pnpm). Finally, if no package manager or install command could be derived, Harper will default to using `npm install`. + +The Application operations [`add_component`](../../developers/operations-api/components.md#add-component) and [`deploy_component`](../../developers/operations-api/components.md#deploy-component) support customizing the install command (and timeout) through the `install_command` and `install_timeout` fields. + +If you plan to use an alternative package manager than `npm`, ensure it is installed and configured on the host machine. Harper does not currently support the `"onFail": "download"` option in `package.json#devEngines#packageManager` and will fallback to `"onFail": "error"` behavior. + +### Example `harperdb-config.yaml` + +```yaml +myApp: + package: ./my-app + install: + command: yarn install + timeout: 600000 # 10 minutes +``` + +### Example `package.json` + +```json +{ + "name": "my-app", + "version": "1.0.0", + "devEngines": { + "packageManager": { + "name": "pnpm", + "onFail": "error" + } + } +} +``` + +## Advanced + +The following methods are advanced and should be executed with caution as they can have unintended side-effects. Always backup any critical Harper instances before continuing. + +First, locate the Harper installation `rootPath` directory. Generally, this is `~/hdb`. It can be retrieved by running `harperdb get_configuration` and looking for the `rootPath` field. + +> For a useful shortcut on POSIX compliant machines run: `harperdb get_configuration json=true | jq ".rootPath" | sed 's/"/g'` + +This path is the Harper instance. Within this directory, locate the root config titled `harperdb-config.yaml`, and the components root path. The components root path will be `/components` by default (thus, `~/hdb/components`), but it can also be configured. If necessary, use `harperdb get_configuration` again and look for the `componentsRoot` field for the exact path. + +### Adding components to root + +Similar to how components can specify other components within their `config.yaml`, applications can be added to Harper by adding them to the `harperdb-config.yaml`. + +The configuration is very similar to that of `config.yaml`. Entries are comprised of a top-level `:`, and an indented `package: ` field. Any additional component options can also be included as indented fields. + +```yaml +status-check: + package: '@harperdb/status-check' +``` + +The key difference between this and a component's `config.yaml` is that the name does **not** need to be associated with a `package.json` dependency. When Harper starts up, it transforms these configurations into a `package.json` file, and then executes a form of `npm install`. Thus, the `package: ` can be any valid dependency syntax such as npm packages, GitHub repos, tarballs, and local directories are all supported. + +Given a root config like: + +```yaml +myGithubComponent: + package: HarperDB-Add-Ons/package#v2.2.0 # install from GitHub +myNPMComponent: + package: harperdb # install from npm +myTarBall: + package: /Users/harper/cool-component.tar # install from tarball +myLocal: + package: /Users/harper/local # install from local path +myWebsite: + package: https://harperdb-component # install from URL +``` + +Harper will generate a `package.json` like: + +```json +{ + "dependencies": { + "myGithubComponent": "github:HarperDB-Add-Ons/package#v2.2.0", + "myNPMComponent": "npm:harperdb", + "myTarBall": "file://Users/harper/cool-component.tar", + "myLocal": "file://Users/harper/local", + "myWebsite": "https://harperdb-component" + } +} +``` + +npm will install all the components and store them in ``. A symlink back to `/node_modules` is also created for dependency resolution purposes. + +The package prefix is automatically added, however you can manually set it in your package reference. + +```yaml +myCoolComponent: + package: file://Users/harper/cool-component.tar +``` + +By specifying a file path, npm will generate a symlink and then changes will be automatically picked up between restarts. diff --git a/versioned_docs/version-4.7/reference/components/built-in-extensions.md b/versioned_docs/version-4.7/reference/components/built-in-extensions.md new file mode 100644 index 00000000..81cb456e --- /dev/null +++ b/versioned_docs/version-4.7/reference/components/built-in-extensions.md @@ -0,0 +1,319 @@ +--- +title: Built-In Extensions +--- + +# Built-In Extensions + +Harper provides extended features using built-in extensions. They do **not** need to be installed with a package manager, and simply must be specified in a config to run. These are used throughout many Harper docs, guides, and examples. Unlike custom extensions which have their own semantic versions, built-in extensions follow Harper's semantic version. + +For more information read the [Components, Applications, and Extensions](../../developers/applications/) documentation section. + +- [Built-In Extensions](#built-in-extensions) + - [dataLoader](#dataloader) + - [fastifyRoutes](#fastifyroutes) + - [graphql](#graphql) + - [graphqlSchema](#graphqlschema) + - [jsResource](#jsresource) + - [loadEnv](#loadenv) + - [rest](#rest) + - [roles](#roles) + - [static](#static) + - [Options](#options) + - [Examples](#examples) + - [Basic Static File Serving](#basic-static-file-serving) + - [Enable automatic `index.html` serving](#enable-automatic-indexhtml-serving) + - [Enable automatic `.html` extension matching](#enable-automatic-html-extension-matching) + - [Provide a custom `404 Not Found` page](#provide-a-custom-404-not-found-page) + - [Fully customize not found response](#fully-customize-not-found-response) + +## dataLoader + +Load data from JSON or YAML files into Harper tables as part of component deployment. + +This component is an [Extension](..#extensions) and can be configured with the `files` configuration option. + +Complete documentation for this feature is available here: [Data Loader](../../developers/applications/data-loader) + +```yaml +dataLoader: + files: 'data/*.json' +``` + +## fastifyRoutes + +Specify custom endpoints using [Fastify](https://fastify.dev/). + +This component is a [Resource Extension](./extensions#resource-extension) and can be configured with the [`files` and `urlPath`](./extensions#resource-extension-configuration) configuration options. + +Complete documentation for this feature is available here: [Define Fastify Routes](../../developers/applications/define-routes) + +```yaml +fastifyRoutes: + files: 'routes/*.js' +``` + +## graphql + +> GraphQL querying is **experimental**, and only partially implements the GraphQL Over HTTP / GraphQL specifications. + +Enables GraphQL querying via a `/graphql` endpoint loosely implementing the GraphQL Over HTTP specification. + +Complete documentation for this feature is available here: [GraphQL](../graphql) + +```yaml +graphql: true +``` + +## graphqlSchema + +Specify schemas for Harper tables and resources via GraphQL schema syntax. + +This component is a [Resource Extension](./extensions#resource-extension) and can be configured with the [`files` and `urlPath`](./extensions#resource-extension-configuration) configuration options. + +Complete documentation for this feature is available here: [Defining Schemas](../../developers/applications/defining-schemas) + +```yaml +graphqlSchema: + files: 'schemas.graphql' +``` + +## jsResource + +Specify custom, JavaScript based Harper resources. + +Refer to the Application [Custom Functionality with JavaScript](../../developers/applications/#custom-functionality-with-javascript) guide, or [Resource Class](../resources/) reference documentation for more information on custom resources. + +This component is a [Resource Extension](./extensions#resource-extension) and can be configured with the [`files` and `urlPath`](./extensions#resource-extension-configuration) configuration options. + +```yaml +jsResource: + files: 'resource.js' +``` + +## loadEnv + +Load environment variables via files like `.env`. + +This component is a [Resource Extension](./extensions#resource-extension) and can be configured with the [`files` and `urlPath`](./extensions#resource-extension-configuration) configuration options. + +Ensure this component is specified first in `config.yaml` so that environment variables are loaded prior to loading any other components. + +```yaml +loadEnv: + files: '.env' +``` + +This component matches the default behavior of dotenv where existing variables take precedence. Specify the `override` option in order to override existing environment variables assigned to `process.env`: + +```yaml +loadEnv: + files: '.env' + override: true +``` + +> Important: Harper is a single process application. Environment variables are loaded onto `process.env` and will be shared throughout all Harper components. This means environment variables loaded by one component will be available on other components (as long as the components are loaded in the correct order). + + + + + + + + + +## rest + +Enable automatic REST endpoint generation for exported resources with this component. + +Complete documentation for this feature is available here: [REST](../../developers/rest) + +```yaml +rest: true +``` + +This component contains additional options: + +To enable `Last-Modified` header support: + +```yaml +rest: + lastModified: true +``` + +To disable automatic WebSocket support: + +```yaml +rest: + webSocket: false +``` + +## roles + +Specify roles for Harper tables and resources. + +This component is a [Resource Extension](./extensions#resource-extension) and can be configured with the [`files` and `urlPath`](./extensions#resource-extension-configuration) configuration options. + +Complete documentation for this feature is available here: [Defining Roles](../../developers/applications/defining-roles) + +```yaml +roles: + files: 'roles.yaml' +``` + +## static + +Serve static files via HTTP. + +Use the [Resource Extension](./extensions#resource-extension) configuration options [`files` and `urlPath`](./extensions#resource-extension-configuration) to specify the files to be served. + +``` +my-app/ +├─ site/ +│ ├─ index.html +│ ├─ about.html +│ ├─ blog/ +│ ├─ post-1.html +│ ├─ post-2.html +├─ config.yaml +``` + +The `static` plugin can be configured to serve the `site/` directory by specifying: + +```yaml +static: + files: 'site/**' +``` + +Then you could access the files relative to the `site` directory, thus `GET localhost:9926/index.html` would return the contents of `site/index.html`, and `GET localhost:9926/blog/post-1.html` would return the contents of `site/blog/post-1.html`. + +You can use the `urlPath` option to serve the files from a different URL path, for example: + +```yaml +static: + files: 'site/**' + urlPath: 'app' +``` + +Now, `GET localhost:9926/app/index.html` would return the contents of `site/index.html`, and `GET localhost:9926/app/blog/post-1.html` would return the contents of `site/blog/post-1.html`. + +Moreover, if the `site/` directory was nested another level, such as: + +``` +my-app/ +├─ site/ +│ ├─ pages/ +│ ├─ index.html +│ ├─ about.html +│ ├─ blog/ +│ ├─ post-1.html +│ ├─ post-2.html +│ ├─ cache-info/ +│ ├─ index.json +│ ├─ about.json +│ ├─ ... +├─ config.yaml +``` + +Now a pattern such as `site/pages/**` will match all files within the `pages` directory (including subdirectories) so a request to `GET localhost:9926/index.html` will return the contents of `site/pages/index.html`, and `GET localhost:9926/blog/post-1.html` will return the contents of `site/pages/blog/post-1.html`. + +Because this plugin is implemented using the new [Plugin API](./plugins.md), it automatically updates to application changes. From updating the `config.yaml` to adding, removing, or modifying files, everything is handled automatically and Harper should **not** require a restart. + +### Options + +In addition to the general Plugin configuration options (`files`, `urlPath`, and `timeout`), this plugin supports the following configuration options: + +- `extensions` - `string[]` - _optional_ - An array of file extensions to try and serve when an exact path is not found. For example, `['html']` and the path `/site/page-1` will match `/site/page-1.html`. +- `fallthrough` - `boolean` - _optional_ - If `true`, the plugin will fall through to the next handler if the requested file is not found. Make sure to disable this option if you want to customize the 404 Not Found response with the `notFound` option. Defaults to `true`. +- `index` - `boolean` - _optional_ - If `true`, the plugin will serve an `index.html` file if it exists in the directory specified by the `files` pattern. Defaults to `false`. +- `notFound` - `string | { file: string; statusCode: number }` - _optional_ - Specify a custom file to be returned for 404 Not Found responses. If you want to specify a different statusCode when a given path cannot be found, use the object form and specify the `file` and `statusCode` properties (this is particularly useful for SPAs). + +### Examples + +The `static` plugin can be configured in various ways to provide different behaviors. Here are some common examples: + +#### Basic Static File Serving + +Serve all files contained within the `static/` directory as is. + +```yaml +static: + files: 'static/**' +``` + +Requests must match the file names exactly (relative to the `static/` directory). + +#### Enable automatic `index.html` serving + +Serve all files contained within the `static/` directory, and automatically serve an `index.html` file if it exists in the directory. + +```yaml +static: + files: 'static/**' + index: true +``` + +Now given a directory structure like: + +``` +my-app/ +├─ static/ +│ ├─ index.html +│ ├─ blog/ +│ ├─ index.html +│ ├─ post-1.html +``` + +Requests would map like: + +``` +GET / -> static/index.html +GET /blog -> static/blog/index.html +GET /blog/post-1.html -> static/blog/post-1.html +``` + +#### Enable automatic `.html` extension matching + +Expanding on the previous example, if you specify the `extensions` option, the plugin will automatically try to match the requested path with the specified extensions. + +```yaml +static: + files: 'static/**' + index: true + extensions: ['html'] +``` + +Now with the same directory structure, requests would map like: + +``` +GET / -> static/index.html +GET /blog -> static/blog/index.html +GET /blog/post-1 -> static/blog/post-1.html +``` + +#### Provide a custom `404 Not Found` page + +Sometimes when a `404 Not Found` response is not sufficient, and you want to provide a custom page or resource, you can use the `notFound` option to specify a custom file to be returned when a requested path is not found. + +```yaml +static: + files: 'static/**' + notFound: 'static/404.html' +``` + +Now if a request is made to a path that does not exist, such as `/non-existent`, the plugin will return the contents of `static/404.html` with a `404` status code. + +#### Fully customize not found response + +Most common in SPAs relying on client-side routing, you may want to override the default `404` status code when a path is not found. + +You can do this by specifying the `notFound` option as an object with a `file` and `statusCode` property. + +```yaml +static: + files: 'static/**' + notFound: + file: 'static/index.html' + statusCode: 200 +``` + +Now if a request is made to a path that does not exist, such as `/non-existent`, the plugin will return the contents of `static/index.html` with a `200` status code. This is particularly useful for SPAs where you want to serve the main application file regardless of the requested path. diff --git a/versioned_docs/version-4.7/reference/components/configuration.md b/versioned_docs/version-4.7/reference/components/configuration.md new file mode 100644 index 00000000..2175a03d --- /dev/null +++ b/versioned_docs/version-4.7/reference/components/configuration.md @@ -0,0 +1,89 @@ +--- +title: Component Configuration +--- + +# Component Configuration + +> For information on the distinction between the types of components (applications and extensions), refer to beginning of the [Applications](../../developers/applications) documentation section. + +Harper components are configured with a `config.yaml` file located in the root of the component module directory. This file is how an components configures other components it depends on. Each entry in the file starts with a component name, and then configuration values are indented below it. + +```yaml +name: + option-1: value + option-2: value +``` + +It is the entry's `name` that is used for component resolution. It can be one of the [built-in extensions](./built-in-extensions), or it must match a package dependency of the component as specified by `package.json`. The [Custom Component Configuration](#custom-component-configuration) section provides more details and examples. + +For some built-in extensions they can be configured with as little as a top-level boolean; for example, the [rest](./built-in-extensions#rest) extension can be enabled with just: + +```yaml +rest: true +``` + +Most components generally have more configuration options. Some options are ubiquitous to the Harper platform, such as the `files` and `urlPath` options for an [extension](./extensions) or [plugin](./plugins), or `package` for any [custom component](#custom-component-configuration). + +[Extensions](./extensions) and [plugins](./plugins) require specifying the `extensionModule` or `pluginModule` option respectively. Refer to their respective API reference documentation for more information. + +## Custom Component Configuration + +Any custom component **must** be configured with the `package` option in order for Harper to load that component. When enabled, the name of package must match a dependency of the component. For example, to use the `@harperdb/nextjs` extension, it must first be included in `package.json`: + +```json +{ + "dependencies": { + "@harperdb/nextjs": "1.0.0" + } +} +``` + +Then, within `config.yaml` it can be enabled and configured using: + +```yaml +'@harperdb/nextjs': + package: '@harperdb/nextjs' + # ... +``` + +Since npm allows for a [variety of dependency configurations](https://docs.npmjs.com/cli/configuring-npm/package-json#dependencies), this can be used to create custom references. For example, to depend on a specific GitHub branch, first update the `package.json`: + +```json +{ + "dependencies": { + "harper-nextjs-test-feature": "HarperDB/nextjs#test-feature" + } +} +``` + +And now in `config.yaml`: + +```yaml +harper-nextjs-test-feature: + package: '@harperdb/nextjs' + files: './' + # ... +``` + +## Default Component Configuration + +Harper components do not need to specify a `config.yaml`. Harper uses the following default configuration to load components. + +```yaml +rest: true +graphqlSchema: + files: '*.graphql' +roles: + files: 'roles.yaml' +jsResource: + files: 'resources.js' +fastifyRoutes: + files: 'routes/*.js' + urlPath: '.' +static: + files: 'web/**' +``` + +Refer to the [built-in components](./built-in-extensions) documentation for more information on these fields. + +If a `config.yaml` is defined, it will **not** be merged with the default config. diff --git a/versioned_docs/version-4.7/reference/components/extensions.md b/versioned_docs/version-4.7/reference/components/extensions.md new file mode 100644 index 00000000..78012b7b --- /dev/null +++ b/versioned_docs/version-4.7/reference/components/extensions.md @@ -0,0 +1,187 @@ +--- +title: Extensions API +--- + +# Extensions API + +> As of Harper v4.6, a new iteration of the extension API was released called **Plugins**. They are simultaneously a simplification and an extensibility upgrade. Plugins are **experimental**, but we encourage developers to consider developing with the [plugin API](./plugins) instead of the extension API. In time we plan to deprecate the concept of extensions in favor of plugins, but for now, both are supported. + +There are two key types of Extensions: **Resource Extension** and **Protocol Extensions**. The key difference is a **Protocol Extensions** can return a **Resource Extension**. + +Furthermore, what defines an extension separately from a component is that it leverages any of the [Resource Extension](#resource-extension-api) or [Protocol Extension](#protocol-extension-api) APIs. + +All extensions must define a `config.yaml` file and declare an `extensionModule` option. This must be a path to the extension module source code. The path must resolve from the root of the module directory. + +For example, the [Harper Next.js Extension](https://github.com/HarperDB/nextjs) `config.yaml` specifies `extensionModule: ./extension.js`. + +If the plugin is being written in something other than JavaScript (such as TypeScript), ensure that the path resolves to the built version, (i.e. `extensionModule: ./dist/index.js`) + +## Resource Extension + +A Resource Extension is for processing a certain type of file or directory. For example, the built-in [jsResource](./built-in-extensions#jsresource) extension handles executing JavaScript files. + +Resource Extensions are comprised of four distinct function exports, [`handleFile()`](#handlefilecontents-urlpath-absolutepath-resources-void--promisevoid), [`handleDirectory()`](#handledirectoryurlpath-absolutepath-resources-boolean--void--promiseboolean--void), [`setupFile()`](#setupfilecontents-urlpath-absolutepath-resources-void--promisevoid), and [`setupDirectory()`](#setupdirectoryurlpath-absolutepath-resources-boolean--void--promiseboolean--void). The `handleFile()` and `handleDirectory()` methods are executed on **all worker threads**, and are _executed again during restarts_. The `setupFile()` and `setupDirectory()` methods are only executed **once** on the **main thread** during the initial system start sequence. + +> Keep in mind that the CLI command `harperdb restart` or CLI argument `restart=true` only restarts the worker threads. If a component is deployed using `harperdb deploy`, the code within the `setupFile()` and `setupDirectory()` methods will not be executed until the system is completely shutdown and turned back on. + +Other than their execution behavior, the `handleFile()` and `setupFile()` methods, and `handleDirectory()` and `setupDirectory()` methods have identical function definitions (arguments and return value behavior). + +### Resource Extension Configuration + +Any [Resource Extension](#resource-extension) can be configured with the `files` and `urlPath` options. These options control how _files_ and _directories_ are resolved in order to be passed to the extension's `handleFile()`, `setupFile()`, `handleDirectory()`, and `setupDirectory()` methods. + +> Harper relies on the [fast-glob](https://github.com/mrmlnc/fast-glob) library for glob pattern matching. + +- `files` - `string | string[] | Object` - _required_ - A [glob pattern](https://github.com/mrmlnc/fast-glob?tab=readme-ov-file#pattern-syntax) string, array of glob pattern strings, or a more expressive glob options object determining the set of files and directories to be resolved for the extension. If specified as an object, the `source` property is required. By default, Harper **matches files and directories**; this is configurable using the `only` option. + - `source` - `string | string[]` - _required_ - The glob pattern string or array of strings. + - `only` - `'all' | 'files' | 'directories'` - _optional_ - The glob pattern will match only the specified entry type. Defaults to `'all'`. + - `ignore` - `string[]` - _optional_ - An array of glob patterns to exclude from matches. This is an alternative way to use negative patterns. Defaults to `[]`. +- `urlPath` - `string` - _optional_ - A base URL path to prepend to the resolved `files` entries. + - If the value starts with `./`, such as `'./static/'`, the component name will be included in the base url path + - If the value is `.`, then the component name will be the base url path + - Note: `..` is an invalid pattern and will result in an error + - Otherwise, the value here will be base url path. Leading and trailing `/` characters will be handled automatically (`/static/`, `/static`, and `static/` are all equivalent to `static`) + +For example, to configure the [static](./built-in-extensions#static) component to serve all HTML files from the `web` source directory on the `static` URL endpoint: + +```yaml +static: + files: 'web/*.html' + urlPath: 'static' +``` + +If there are files such as `web/index.html` and `web/blog.html`, they would be available at `localhost/static/index.html` and `localhost/static/blog.html` respectively. + +Furthermore, if the component is located in the `test-component` directory, and the `urlPath` was set to `'./static/'` instead, then the files would be served from `localhost/test-component/static/*` instead. + +The `urlPath` is optional, for example to configure the [graphqlSchema](./built-in-extensions#graphqlschema) component to load all schemas within the `src/schema` directory, only specifying a `files` glob pattern is required: + +```yaml +graphqlSchema: + files: 'src/schema/*.schema' +``` + +The `files` option also supports a more complex options object. These additional fields enable finer control of the glob pattern matching. + +For example, to match files within `web`, and omit any within the `web/images` directory, the configuration could be: + +```yaml +static: + files: + source: 'web/**/*' + ignore: ['web/images'] +``` + +In order to match only files: + +```yaml +test-component: + files: + source: 'dir/**/*' + only: 'files' +``` + +### Resource Extension API + +In order for an extension to be classified as a Resource Extension it must implement at least one of the `handleFile()`, `handleDirectory()`, `setupFile()`, or `setupDirectory()` methods. As a standalone extension, these methods should be named and exported directly. For example: + +```js +// ESM +export function handleFile() {} +export function setupDirectory() {} + +// or CJS +function handleDirectory() {} +function setupFile() {} + +module.exports = { handleDirectory, setupFile }; +``` + +When returned by a [Protocol Extension](#protocol-extension), these methods should be defined on the object instead: + +```js +export function start() { + return { + handleFile() {}, + }; +} +``` + +#### `handleFile(contents, urlPath, absolutePath, resources): void | Promise` + +#### `setupFile(contents, urlPath, absolutePath, resources): void | Promise` + +These methods are for processing individual files. They can be async. + +> Remember! +> +> `setupFile()` is executed **once** on the **main thread** during the main start sequence. +> +> `handleFile()` is executed on **worker threads** and is executed again during restarts. + +Parameters: + +- `contents` - `Buffer` - The contents of the file +- `urlPath` - `string` - The recommended URL path of the file +- `absolutePath` - `string` - The absolute path of the file + +- `resources` - `Object` - A collection of the currently loaded resources + +Returns: `void | Promise` + +#### `handleDirectory(urlPath, absolutePath, resources): boolean | void | Promise` + +#### `setupDirectory(urlPath, absolutePath, resources): boolean | void | Promise` + +These methods are for processing directories. They can be async. + +If the function returns or resolves a truthy value, then the component loading sequence will end and no other entries within the directory will be processed. + +> Remember! +> +> `setupFile()` is executed **once** on the **main thread** during the main start sequence. +> +> `handleFile()` is executed on **worker threads** and is executed again during restarts. + +Parameters: + +- `urlPath` - `string` - The recommended URL path of the directory +- `absolutePath` - `string` - The absolute path of the directory + +- `resources` - `Object` - A collection of the currently loaded resources + +Returns: `boolean | void | Promise` + +## Protocol Extension + +A Protocol Extension is a more advanced form of a Resource Extension and is mainly used for implementing higher level protocols. For example, the [Harper Next.js Extension](https://github.com/HarperDB/nextjs) handles building and running a Next.js project. A Protocol Extension is particularly useful for adding custom networking handlers (see the [`server`](../globals#server) global API documentation for more information). + +### Protocol Extension Configuration + +In addition to the `files` and `urlPath` [Resource Extension configuration](#resource-extension-configuration) options, and the `package` [Custom Component configuration](#custom-component-configuration) option, Protocol Extensions can also specify additional configuration options. Any options added to the extension configuration (in `config.yaml`), will be passed through to the `options` object of the `start()` and `startOnMainThread()` methods. + +For example, the [Harper Next.js Extension](https://github.com/HarperDB/nextjs#options) specifies multiple option that can be included in its configuration. For example, a Next.js app using `@harperdb/nextjs` may specify the following `config.yaml`: + +```yaml +'@harperdb/nextjs': + package: '@harperdb/nextjs' + files: './' + prebuilt: true + dev: false +``` + +Many protocol extensions will use the `port` and `securePort` options for configuring networking handlers. Many of the [`server`](../globals#server) global APIs accept `port` and `securePort` options, so components replicated this for simpler pass-through. + +### Protocol Extension API + +A Protocol Extension is made up of two distinct methods, [`start()`](#startoptions-resourceextension--promiseresourceextension) and [`startOnMainThread()`](#startonmainthreadoptions-resourceextension--promiseresourceextension). Similar to a Resource Extension, the `start()` method is executed on _all worker threads_, and _executed again on restarts_. The `startOnMainThread()` method is **only** executed **once** during the initial system start sequence. These methods have identical `options` object parameter, and can both return a Resource Extension (i.e. an object containing one or more of the methods listed above). + +#### `start(options): ResourceExtension | Promise` + +#### `startOnMainThread(options): ResourceExtension | Promise` + +Parameters: + +- `options` - `Object` - An object representation of the extension's configuration options. + +Returns: `Object` - An object that implements any of the [Resource Extension APIs](#resource-extension-api) diff --git a/versioned_docs/version-4.7/reference/components/index.md b/versioned_docs/version-4.7/reference/components/index.md new file mode 100644 index 00000000..30ce276d --- /dev/null +++ b/versioned_docs/version-4.7/reference/components/index.md @@ -0,0 +1,39 @@ +--- +title: Components +--- + +# Components + +**Components** are the high-level concept for modules that extend the Harper core platform adding additional functionality. Components encapsulate both applications and extensions. + +> We are actively working to disambiguate the terminology. When you see "component", such as in the Operations API or CLI, it generally refers to an application. We will do our best to clarify exactly which classification of a component whenever possible. + +**Applications** are best defined as the implementation of a specific user-facing feature or functionality. Applications are built on top of extensions and can be thought of as the end product that users interact with. For example, a Next.js application that serves a web interface or an Apollo GraphQL server that provides a GraphQL API are both applications. + +**Extensions** are the building blocks of the Harper component system. Applications depend on extensions to provide the functionality the application is implementing. For example, the built-in `graphqlSchema` extension enables applications to define their databases and tables using GraphQL schemas. Furthermore, the `@harperdb/nextjs` and `@harperdb/apollo` extensions are the building blocks that provide support for building Next.js and Apollo applications. + +> As of Harper v4.6, a new, **experimental** component system has been introduced called **plugins**. Plugins are a **new iteration of the existing extension system**. They are simultaneously a simplification and an extensibility upgrade. Instead of defining multiple methods (`start` vs `startOnMainThread`, `handleFile` vs `setupFile`, `handleDirectory` vs `setupDirectory`), plugins only have to define a single `handleApplication` method. Plugins are **experimental**, and complete documentation is available on the [plugin API](components/plugins) page. In time we plan to deprecate the concept of extensions in favor of plugins, but for now, both are supported. + +All together, the support for implementing a feature is the extension, and the actual implementation of the feature is the application. + +For more information on the differences between applications and extensions, refer to the beginning of the [Applications](../developers/applications/) guide documentation section. + +This technical reference section has detailed information on various component systems: + +- [Built-In Extensions](components/built-in-extensions) +- [Configuration](components/configuration) +- [Managing Applications](components/applications) +- [Extensions](components/extensions) +- [(Experimental) Plugins](components/plugins) + +## Custom Applications + +- [`@harperdb/status-check`](https://github.com/HarperDB/status-check) +- [`@harperdb/prometheus-exporter`](https://github.com/HarperDB/prometheus-exporter) +- [`@harperdb/acl-connect`](https://github.com/HarperDB/acl-connect) + +## Custom Extensions + +- [`@harperdb/nextjs`](https://github.com/HarperDB/nextjs) +- [`@harperdb/apollo`](https://github.com/HarperDB/apollo) +- [`@harperdb/astro`](https://github.com/HarperDB/astro) diff --git a/versioned_docs/version-4.7/reference/components/plugins.md b/versioned_docs/version-4.7/reference/components/plugins.md new file mode 100644 index 00000000..7ce1d3c3 --- /dev/null +++ b/versioned_docs/version-4.7/reference/components/plugins.md @@ -0,0 +1,629 @@ +--- +title: Experimental Plugins +--- + +# Experimental Plugins + +The new, experimental **plugin** API is an iteration of the existing extension system. It simplifies the API by removing the need for multiple methods (`start`, `startOnMainThread`, `handleFile`, `setupFile`, etc.) and instead only requires a single `handleApplication` method. Plugins are designed to be more extensible and easier to use, and they are intended to replace the concept of extensions in the future. + +Similar to the existing extension API, a plugin must specify an `pluginModule` option within `config.yaml`. This must be a path to the plugin module source code. The path must resolve from the root of the module directory. For example: `pluginModule: plugin.js`. + +If the plugin is being written in something other than JavaScript (such as TypeScript), ensure that the path resolves to the built version, (i.e. `pluginModule: ./dist/index.js`) + +It is also recommended that all extensions have a `package.json` that specifies JavaScript package metadata such as name, version, type, etc. Since plugins are just JavaScript packages, they can do anything a JavaScript package can normally do. It can be written in TypeScript, and compiled to JavaScript. It can export an executable (using the [bin](https://docs.npmjs.com/cli/configuring-npm/package-json#bin) property). It can be published to npm. The possibilities are endless! + +The key to a plugin is the [`handleApplication()`](#function-handleapplicationscope-scope-void--promisevoid) method. It must be exported by the `pluginModule`, and cannot coexist with any of the other extension methods such as `start`, `handleFile`, etc. The component loader will throw an error if both are defined. + +The `handleApplication()` method is executed **sequentially** across all **worker threads** during the component loading sequence. It receives a single, `scope` argument that contains all of the relevant metadata and APIs for interacting with the associated component. + +The method can be async and it is awaited by the component loader. + +However, it is highly recommended to avoid event-loop-blocking operations within the `handleApplication()` method. See the examples section for best practices on how to use the `scope` argument effectively. + +## Configuration + +As plugins are meant to be used by applications in order to implement some feature, many plugins provide a variety of configuration options to customize their behavior. Some plugins even require certain configuration options to be set in order to function properly. + +As a brief overview, the general configuration options available for plugins are: + +- `files` - `string` | `string[]` | [`FilesOptionObject`](#interface-filesoptionobject) - _optional_ - A glob pattern string or array of strings that specifies the files and directories to be handled by the plugin's default `EntryHandler` instance. +- `urlPath` - `string` - _optional_ - A base URL path to prepend to the resolved `files` entries handled by the plugin's default `EntryHandler` instance. +- `timeout` - `number` - _optional_ - The timeout in milliseconds for the plugin's operations. If not specified, the system default is **30 seconds**. Plugins may override the system default themselves, but this configuration option is the highest priority and takes precedence. + +### File Entries + +Just like extensions, plugins support the `files` and `urlPath` options for file entry matching. The values specified for these options are used for the default `EntryHandler` instance created by the `scope.handleEntry()` method. As the reference documentation details, similar options can be used to create custom `EntryHandler` instances too. + +The `files` option can be a glob pattern string, an array of glob pattern strings, or a more expressive glob options object. + +- The patterns **cannot** contain `..` or start with `/`. +- The pattern `.` or `./` is transformed into `**/*` automatically. +- Often, it is best to omit a leading `.` or `./` in the glob pattern. + +The `urlPath` option is a base URL path that is prepended to the resolved `files` entries. + +- It **cannot** contain `..`. +- If it starts with `./` or is just `.`, the name of the plugin will be automatically prepended to it. + +Putting this all together, to configure the [static](./built-in-extensions#static) built-in extension to serve files from the `web` directory but at the `/static/` path, the `config.yaml` would look like this: + +```yaml +static: + files: 'web/**/*' + urlPath: '/static/' +``` + +Keep in mind the `urlPath` option is completely optional. + +As another example, to configure the [graphqlSchema](./built-in-extensions#graphqlschema) built-in extension to serve only `*.graphql` files from within the top-level of the `src/schema` directory, the `config.yaml` would look like this: + +```yaml +graphqlSchema: + files: 'src/schema/*.graphql' +``` + +As detailed, the `files` option also supports a more complex object syntax for advanced use cases. + +For example, to match files within the `web` directory, and omit any within `web/images`, you can use a configuration such as: + +```yaml +static: + files: + source: 'web/**/*' + ignore: 'web/images/**' +``` + +> If you're transitioning from the [extension](./extensions) system, the `files` option object no longer supports an `only` field. Instead, use the `entryEvent.entryType` or the specific `entryEvent.eventType` fields in [`onEntryEventHandler(entryEvent)`](#function-onentryeventhandlerentryevent-fileentryevent--directoryentryevent-void) method or any of the specific [`EntryHandler`](#class-entryhandler) events. + +### Timeouts + +The default timeout for all plugins is **30 seconds**. If the method does not complete within this time, the component loader will throw an error and unblock the component loading sequence. This is to prevent the component loader from hanging indefinitely if a plugin fails to respond or takes too long to execute. + +The plugin module can export a `defaultTimeout` variable (in milliseconds) that will override the system default. + +For example: + +```typescript +export const defaultTimeout = 60_000; // 60 seconds +``` + +Additionally, users can specify a `timeout` option in their application's `config.yaml` file for a specific plugin. This option takes precedence over the plugin's `defaultTimeout` and the system default. + +For example: + +```yaml +customPlugin: + package: '@harperdb/custom-plugin' + files: 'foo.js' + timeout: 45_000 # 45 seconds +``` + +## Example: Statically hosting files + +This is a functional example of how the `handleApplication()` method and `scope` argument can be used to create a simple static file server plugin. This example assumes that the component has a `config.yaml` with the `files` option set to a glob pattern that matches the files to be served. + +> This is a simplified form of the [static](./built-in-extensions#static) built-in extension. + +```js +export function handleApplication(scope) { + const staticFiles = new Map(); + + scope.options.on('change', (key, value, config) => { + if (key[0] === 'files' || key[0] === 'urlPath') { + // If the files or urlPath options change, we need to reinitialize the static files map + staticFiles.clear(); + logger.info(`Static files reinitialized due to change in ${key.join('.')}`); + } + }); + + scope.handleEntry((entry) => { + if (entry.entryType === 'directory') { + logger.info(`Cannot serve directories. Update the files option to only match files.`); + return; + } + + switch (entry.eventType) { + case 'add': + case 'change': + // Store / Update the file contents in memory for serving + staticFiles.set(entry.urlPath, entry.contents); + break; + case 'unlink': + // Remove the file from memory when it is deleted + staticFiles.delete(entry.urlPath); + break; + } + }); + + scope.server.http( + (req, next) => { + if (req.method !== 'GET') return next(req); + + // Attempt to retrieve the requested static file from memory + const staticFile = staticFiles.get(req.pathname); + + return staticFile + ? { + statusCode: 200, + body: staticFile, + } + : { + statusCode: 404, + body: 'File not found', + }; + }, + { runFirst: true } + ); +} +``` + +In this example, the entry handler method passed to `handleEntry` will manage the map of static files in memory using their computed `urlPath` and the `contents`. If the config file changes (and thus a new default file or url path is specified) the plugin will clear the file map as well to remove artifacts. Furthermore, it uses the `server.http()` middleware to hook into the HTTP request handling. + +This example is heavily simplified, but it demonstrates how the different key parts of `scope` can be used together to provide a performant and reactive application experience. + +## API + +### TypeScript support + +The classes and types referenced below are all exported by the `harperdb` package. Just import the ones you need like this: + +```typescript +import { Scope, type Config } from 'harperdb'; +``` + +### Function: `handleApplication(scope: Scope): void | Promise` + +Parameters: + +- `scope` - [`Scope`](#class-scope) - An instance of the `Scope` class that provides access to the relative application's configuration, resources, and other APIs. + +Returns: `void | Promise` + +This is the only method a plugin module must export. It can be async and is awaited by the component loader. The `scope` argument provides access to the relative application's configuration, resources, and other APIs. + +### Class: `Scope` + +- Extends [`EventEmitter`](https://nodejs.org/docs/latest/api/events.html#class-eventemitter) + +#### Event: `'close'` + +Emitted after the scope is closed via the `close()` method. + +#### Event: `'error'` + +- `error` - `unknown` - The error that occurred. + +#### Event: `'ready'` + +Emitted when the Scope is ready to be used after loading the associated config file. It is awaited by the component loader, so it is not necessary to await it within the `handleApplication()` method. + +#### `scope.close()` + +Returns: `this` - The current `Scope` instance. + +Closes all associated entry handlers, the associated `scope.options` instance, emits the `'close'` event, and then removes all other listeners on the instance. + +#### `scope.handleEntry([files][, handler])` + +Parameters: + +- `files` - [`FilesOption`](#interface-filesoption) | [`FileAndURLPathConfig`](#interface-fileandurlpathconfig) | [`onEntryEventHandler`](#function-onentryeventhandlerentryevent-fileentryevent--directoryentryevent-void) - _optional_ +- `handler` - [`onEntryEventHandler`](#function-onentryeventhandlerentryevent-fileentryevent--directoryentryevent-void) - _optional_ + +Returns: [`EntryHandler`](#class-entryhandler) - An instance of the `EntryHandler` class that can be used to handle entries within the scope. + +The `handleEntry()` method is the key to handling file system entries specified by a `files` glob pattern option in `config.yaml`. This method is used to register an entry event handler, specifically for the `EntryHandler` [`'all'`](#event-all) event. The method signature is very flexible, and allows for the following variations: + +- `scope.handleEntry()` (with no arguments) Returns the default `EntryHandler` created by the `files` and `urlPath` options in the `config.yaml`. +- `scope.handleEntry(handler)` (where `handler` is an `onEntryEventHandler`) Returns the default `EntryHandler` instance (based on the options within `config.yaml`) and uses the provided `handler` for the [`'all'`](#event-all) event. +- `scope.handleEntry(files)` (where `files` is `FilesOptions` or `FileAndURLPathConfig`) Returns a new `EntryHandler` instance that handles the specified `files` configuration. +- `scope.handleEntry(files, handler)` (where `files` is `FilesOptions` or `FileAndURLPathConfig`, and `handler` is an `onEntryEventHandler`) Returns a new `EntryHandler` instance that handles the specified `files` configuration and uses the provided `handler` for the [`'all'`](#event-all) event. + +For example: + +```js +export function handleApplication(scope) { + // Get the default EntryHandler instance + const defaultEntryHandler = scope.handleEntry(); + + // Assign a handler for the 'all' event on the default EntryHandler + scope.handleEntry((entry) => { + /* ... */ + }); + + // Create a new EntryHandler for the 'src/**/*.js' files option with a custom `'all'` event handler. + const customEntryHandler = scope.handleEntry( + { + files: 'src/**/*.js', + }, + (entry) => { + /* ... */ + } + ); + + // Create another custom EntryHandler for the 'src/**/*.ts' files option, but without a `'all'` event handler. + const anotherCustomEntryHandler = scope.handleEntry({ + files: 'src/**/*.ts', + }); +} +``` + +And thus, if the previous code was used by a component with the following `config.yaml`: + +```yaml +customPlugin: + files: 'web/**/*' +``` + +Then the default `EntryHandler` instances would be created to handle all entries within the `web` directory. + +#### `scope.requestRestart()` + +Returns: `void` + +Request a Harper restart. This **does not** restart the instance immediately, but rather indicates to the user that a restart is required. This should be called when the plugin cannot handle the entry event and wants to indicate to the user that the Harper instance should be restarted. + +This method is called automatically by the `scope` instance if the user has not defined an `scope.options.on('change')` handler or if an event handler exists and is missing a necessary handler method. + +#### `scope.resources` + +Returns: `Map` - A map of the currently loaded [Resource](../globals#resource) instances. + +#### `scope.server` + +Returns: `server` - A reference to the [server](../globals#server) global API. + +#### `scope.options` + +Returns: [`OptionsWatcher`](#class-optionswatcher) - An instance of the `OptionsWatcher` class that provides access to the application's configuration options. Emits `'change'` events when the respective plugin part of the component's config file is modified. + +For example, if the plugin `customPlugin` is configured by an application with: + +```yaml +customPlugin: + files: 'foo.js' +``` + +And has the following `handleApplication(scope)` implementation: + +```typescript +export function handleApplication(scope) { + scope.options.on('change', (key, value, config) => { + if (key[0] === 'files') { + // Handle the change in the files option + scope.logger.info(`Files option changed to: ${value}`); + } + }); +} +``` + +Then modifying the `files` option in the `config.yaml` to `bar.js` would log the following: + +```plaintext +Files option changed to: bar.js +``` + +#### `scope.logger` + +Returns: `logger` - A scoped instance of the [`logger`](../globals#logger) class that provides logging capabilities for the plugin. + +It is recommended to use this instead of the `logger` global. + +#### `scope.name` + +Returns: `string` - The name of the plugin as configured in the `config.yaml` file. This is the key under which the plugin is configured. + +#### `scope.directory` + +Returns: `string` - The directory of the application. This is the root directory of the component where the `config.yaml` file is located. + +### Interface: `FilesOption` + +- `string` | `string[]` | [`FilesOptionObject`](#interface-filesoptionobject) + +### Interface: `FilesOptionObject` + +- `source` - `string` | `string[]` - _required_ - The glob pattern string or array of strings. +- `ignore` - `string` | `string[]` - _optional_ - An array of glob patterns to exclude from matches. This is an alternative way to use negative patterns. Defaults to `[]`. + +### Interface: `FileAndURLPathConfig` + +- `files` - [`FilesOption`](#interface-filesoption) - _required_ - A glob pattern string, array of glob pattern strings, or a more expressive glob options object determining the set of files and directories to be resolved for the plugin. +- `urlPath` - `string` - _optional_ - A base URL path to prepend to the resolved `files` entries. + +### Class: `OptionsWatcher` + +- Extends [`EventEmitter`](https://nodejs.org/docs/latest/api/events.html#class-eventemitter) + +#### Event: `'change'` + +- `key` - `string[]` - The key of the changed option split into parts (e.g. `foo.bar` becomes `['foo', 'bar']`). +- `value` - [`ConfigValue`](#interface-configvalue) - The new value of the option. +- `config` - [`ConfigValue`](#interface-configvalue) - The entire configuration object of the plugin. + +The `'change'` event is emitted whenever an configuration option is changed in the configuration file relative to the application and respective plugin. + +Given an application using the following `config.yaml`: + +```yaml +customPlugin: + files: 'web/**/*' +otherPlugin: + files: 'index.js' +``` + +The `scope.options` for the respective plugin's `customPlugin` and `otherPlugin` would emit `'change'` events when the `files` options relative to them are modified. + +For example, if the `files` option for `customPlugin` is changed to `web/**/*.js`, the following event would be emitted _only_ within the `customPlugin` scope: + +```js +scope.options.on('change', (key, value, config) => { + key; // ['files'] + value; // 'web/**/*.js' + config; // { files: 'web/**/*.js' } +}); +``` + +#### Event: `'close'` + +Emitted when the `OptionsWatcher` is closed via the `close()` method. The watcher is not usable after this event is emitted. + +#### Event: `'error'` + +- `error` - `unknown` - The error that occurred. + +#### Event: `'ready'` + +- `config` - [`ConfigValue`](#interface-configvalue) | `undefined` - The configuration object of the plugin, if present. + +This event can be emitted multiple times. It is first emitted upon the initial load, but will also be emitted after restoring a configuration file or configuration object after a `'remove'` event. + +#### Event: `'remove'` + +The configuration was removed. This can happen if the configuration file was deleted, the configuration object within the file is deleted, or if the configuration file fails to parse. Once restored, the `'ready'` event will be emitted again. + +#### `options.close()` + +Returns: `this` - The current `OptionsWatcher` instance. + +Closes the options watcher, removing all listeners and preventing any further events from being emitted. The watcher is not usable after this method is called. + +#### `options.get(key)` + +Parameters: + +- `key` - `string[]` - The key of the option to get, split into parts (e.g. `foo.bar` is represented as `['foo', 'bar']`). + +Returns: [`ConfigValue`](#interface-configvalue) | `undefined` + +If the config is defined it will attempt to retrieve the value of the option at the specified key. If the key does not exist, it will return `undefined`. + +#### `options.getAll()` + +Returns: [`ConfigValue`](#interface-configvalue) | `undefined` + +Returns the entire configuration object for the plugin. If the config is not defined, it will return `undefined`. + +#### `options.getRoot()` + +Returns: [`Config`](#interface-config) | `undefined` + +Returns the root configuration object of the application. This is the entire configuration object, basically the parsed form of the `config.yaml`. If the config is not defined, it will return `undefined`. + +#### Interface: `Config` + +- `[key: string]` [`ConfigValue`](#interface-configvalue) + +An object representing the `config.yaml` file configuration. + +#### Interface: `ConfigValue` + +- `string` | `number` | `boolean` | `null` | `undefined` | `ConfigValue[]` | [`Config`](#interface-config) + +Any valid configuration value type. Essentially, the primitive types, an array of those types, or an object comprised of values of those types. + +### Class: `EntryHandler` + +Extends: [`EventEmitter`](https://nodejs.org/docs/latest/api/events.html#class-eventemitter) + +Created by calling [`scope.handleEntry()`](#scopehandleentry) method. + +#### Event: `'all'` + +- `entry` - [`FileEntry`](#interface-fileentry) | [`DirectoryEntry`](#interface-directoryentry) - The entry that was added, changed, or removed. + +The `'all'` event is emitted for all entry events, including file and directory events. This is the event that the handler method in `scope.handleEntry` is registered for. The event handler receives an `entry` object that contains the entry metadata, such as the file contents, URL path, and absolute path. + +An effective pattern for this event is: + +```js +async function handleApplication(scope) { + scope.handleEntry((entry) => { + switch (entry.eventType) { + case 'add': + // Handle file addition + break; + case 'change': + // Handle file change + break; + case 'unlink': + // Handle file deletion + break; + case 'addDir': + // Handle directory addition + break; + case 'unlinkDir': + // Handle directory deletion + break; + } + }); +} +``` + +#### Event: `'add'` + +- `entry` - [`AddFileEvent`](#interface-addfileevent) - The file entry that was added. + +The `'add'` event is emitted when a file is created (or the watcher sees it for the first time). The event handler receives an `AddFileEvent` object that contains the file contents, URL path, absolute path, and other metadata. + +#### Event: `'addDir'` + +- `entry` - [`AddDirectoryEvent`](#interface-adddirectoryevent) - The directory entry that was added. + +The `'addDir'` event is emitted when a directory is created (or the watcher sees it for the first time). The event handler receives an `AddDirectoryEvent` object that contains the URL path and absolute path of the directory. + +#### Event: `'change'` + +- `entry` - [`ChangeFileEvent`](#interface-changefileevent) - The file entry that was changed. + +The `'change'` event is emitted when a file is modified. The event handler receives a `ChangeFileEvent` object that contains the updated file contents, URL path, absolute path, and other metadata. + +#### Event: `'close'` + +Emitted when the entry handler is closed via the [`entryHandler.close()`](#entryhandlerclose) method. + +#### Event: `'error'` + +- `error` - `unknown` - The error that occurred. + +#### Event: `'ready'` + +Emitted when the entry handler is ready to be used. This is not automatically awaited by the component loader, but also is not required. Calling `scope.handleEntry()` is perfectly sufficient. This is generally useful if you need to do something _after_ the entry handler is absolutely watching and handling entries. + +#### Event: `'unlink'` + +- `entry` - [`UnlinkFileEvent`](#interface-unlinkfileevent) - The file entry that was deleted. + +The `'unlink'` event is emitted when a file is deleted. The event handler receives an `UnlinkFileEvent` object that contains the URL path and absolute path of the deleted file. + +#### Event: `'unlinkDir'` + +- `entry` - [`UnlinkDirectoryEvent`](#interface-unlinkdirectoryevent) - The directory entry that was deleted. + +The `'unlinkDir'` event is emitted when a directory is deleted. The event handler receives an `UnlinkDirectoryEvent` object that contains the URL path and absolute path of the deleted directory. + +#### `entryHandler.name` + +Returns: `string` - The name of the plugin as configured in the `config.yaml` file. This is the key under which the plugin is configured. + +The name of the plugin. + +#### `entryHandler.directory` + +Returns: `string` + +The directory of the application. This is the root directory of the component where the `config.yaml` file is located. + +#### `entryHandler.close()` + +Returns: `this` - The current `EntryHandler` instance. + +Closes the entry handler, removing all listeners and preventing any further events from being emitted. The handler can be started again using the [`entryHandler.update()`](#entryhandlerupdateconfig) method. + +#### `entryHandler.update(config)` + +Parameters: + +- `config` - [`FilesOption`](#interface-filesoption) | [`FileAndURLPathConfig`](#interface-fileandurlpathconfig) - The configuration object for the entry handler. + +This method will update an existing entry handler to watch new entries. It will close the underlying watcher and create a new one, but will maintain any existing listeners on the EntryHandler instance itself. + +This method returns a promise associated with the ready event of the updated handler. + +#### Interface: `BaseEntry` + +- `stats` - [`fs.Stats`](https://nodejs.org/docs/latest/api/fs.html#class-fsstats) | `undefined` - The file system stats for the entry. +- `urlPath` - `string` - The recommended URL path of the entry. +- `absolutePath` - `string` - The absolute path of the entry. + +The foundational entry handle event object. The `stats` may or may not be present depending on the event, entry type, and platform. + +The `urlPath` is resolved based on the configured pattern (`files:` option) combined with the optional `urlPath` option. This path is generally useful for uniquely representing the entry. It is used in the built-in components such as `jsResource` and `static`. + +The `absolutePath` is the file system path for the entry. + +#### Interface: `FileEntry` + +Extends [`BaseEntry`](#interface-baseentry) + +- `contents` - `Buffer` - The contents of the file. + +A specific extension of the `BaseEntry` interface representing a file entry. We automatically read the contents of the file so the user doesn't have to bother with FS operations. + +There is no `DirectoryEntry` since there is no other important metadata aside from the `BaseEntry` properties. If a user wants the contents of a directory, they should adjust the pattern to resolve files instead. + +#### Interface: `EntryEvent` + +Extends [`BaseEntry`](#interface-baseentry) + +- `eventType` - `string` - The type of entry event. +- `entryType` - `string` - The type of entry, either a file or a directory. + +A general interface representing the entry handle event objects. + +#### Interface: `AddFileEvent` + +Extends [`EntryEvent`](#interface-entryevent), [FileEntry](#interface-fileentry) + +- `eventType` - `'add'` +- `entryType` - `'file'` + +Event object emitted when a file is created (or the watcher sees it for the first time). + +#### Interface: `ChangeFileEvent` + +Extends [`EntryEvent`](#interface-entryevent), [FileEntry](#interface-fileentry) + +- `eventType` - `'change'` +- `entryType` - `'file'` + +Event object emitted when a file is modified. + +#### Interface: `UnlinkFileEvent` + +Extends [`EntryEvent`](#interface-entryevent), [FileEntry](#interface-fileentry) + +- `eventType` - `'unlink'` +- `entryType` - `'file'` + +Event object emitted when a file is deleted. + +#### Interface: `FileEntryEvent` + +- `AddFileEvent` | `ChangeFileEvent` | `UnlinkFileEvent` + +A union type representing the file entry events. These events are emitted when a file is created, modified, or deleted. The `FileEntry` interface provides the file contents and other metadata. + +#### Interface: `AddDirectoryEvent` + +Extends [`EntryEvent`](#interface-entryevent) + +- `eventType` - `'addDir'` +- `entryType` - `'directory'` + +Event object emitted when a directory is created (or the watcher sees it for the first time). + +#### Interface: `UnlinkDirectoryEvent` + +Extends [`EntryEvent`](#interface-entryevent) + +- `eventType` - `'unlinkDir'` +- `entryType` - `'directory'` + +Event object emitted when a directory is deleted. + +#### Interface: `DirectoryEntryEvent` + +- `AddDirectoryEvent` | `UnlinkDirectoryEvent` + +A union type representing the directory entry events. There are no change events for directories since they are not modified in the same way as files. + +#### Function: `onEntryEventHandler(entryEvent: FileEntryEvent | DirectoryEntryEvent): void` + +Parameters: + +- `entryEvent` - [`FileEntryEvent`](#interface-fileentryevent) | [`DirectoryEntryEvent`](#interface-directoryentryevent) + +Returns: `void` + +This function is what is passed to the `scope.handleEntry()` method as the handler for the `'all'` event. This is also applicable to a custom `.on('all', handler)` method for any `EntryHandler` instance. diff --git a/versioned_docs/version-4.7/reference/content-types.md b/versioned_docs/version-4.7/reference/content-types.md new file mode 100644 index 00000000..b7d223f4 --- /dev/null +++ b/versioned_docs/version-4.7/reference/content-types.md @@ -0,0 +1,35 @@ +--- +title: Content Types +--- + +# Content Types + +Harper supports several different content types (or MIME types) for both HTTP request bodies (describing operations) as well as for serializing content into HTTP response bodies. Harper follows HTTP standards for specifying both request body content types and acceptable response body content types. Any of these content types can be used with any of the standard Harper operations. + +:::tip Need a custom content type? + +Harper's extensible content type system lets you add support for any serialization format (XML, YAML, proprietary formats, etc.) by registering custom handlers in the [`contentTypes`](./globals.md#contenttypes) global Map. See the linked API reference for detailed implementation types, handler properties, and examples. + +::: + +For request body content, the content type should be specified with the `Content-Type` header. For example with JSON, use `Content-Type: application/json` and for CBOR, include `Content-Type: application/cbor`. To request that the response body be encoded with a specific content type, use the `Accept` header. If you want the response to be in JSON, use `Accept: application/json`. If you want the response to be in CBOR, use `Accept: application/cbor`. + +The following content types are supported: + +## JSON - application/json + +JSON is the most widely used content type, and is relatively readable and easy to work with. However, JSON does not support all the data types that are supported by Harper, and can't be used to natively encode data types like binary data or explicit Maps/Sets. Also, JSON is not as efficient as binary formats. When using JSON, compression is recommended (this also follows standard HTTP protocol with the `Accept-Encoding` header) to improve network transfer performance (although there is server performance overhead). JSON is a good choice for web development and when standard JSON types are sufficient and when combined with compression and debuggability/observability is important. + +## CBOR - application/cbor + +CBOR is a highly efficient binary format, and is a recommended format for most production use cases with Harper. CBOR supports the full range of Harper data types, including binary data, typed dates, and explicit Maps/Sets. CBOR is very performant and space efficient even without compression. Compression will still yield better network transfer size/performance, but compressed CBOR is generally not any smaller than compressed JSON. CBOR also natively supports streaming for optimal performance (using indefinite length arrays). The CBOR format has excellent standardization and Harper's CBOR provides an excellent balance of performance and size efficiency. + +## MessagePack - application/x-msgpack + +MessagePack is another efficient binary format like CBOR, with support for all Harper data types. MessagePack generally has wider adoption than CBOR and can be useful in systems that don't have CBOR support (or good support). However, MessagePack does not have native support for streaming of arrays of data (for query results), and so query results are returned as a (concatenated) sequence of MessagePack objects/maps. MessagePack decoders used with Harper's MessagePack must be prepared to decode a direct sequence of MessagePack values to properly read responses. + +## Comma-separated Values (CSV) - text/csv + +Comma-separated values is an easy to use and understand format that can be readily imported into spreadsheets or used for data processing. CSV lacks hierarchical structure for most data types, and shouldn't be used for frequent/production use, but when you need it, it is available. + +In addition, with the REST interface, you can use file-style extensions to indicate an encoding like [https://host/path.csv](https://host/path.csv) to indicate CSV encoding. See the [REST documentation](../developers/rest) for more information on how to do this. diff --git a/versioned_docs/version-4.7/reference/data-types.md b/versioned_docs/version-4.7/reference/data-types.md new file mode 100644 index 00000000..df03e718 --- /dev/null +++ b/versioned_docs/version-4.7/reference/data-types.md @@ -0,0 +1,60 @@ +--- +title: Data Types +--- + +# Data Types + +Harper supports a rich set of data types for use in records in databases. Various data types can be used from both direct JavaScript interfaces in Custom Functions and the HTTP operations APIs. Using JSON for communication naturally limits the data types to those available in JSON (Harper’s supports all of JSON data types), but JavaScript code and alternate data formats facilitate the use of additional data types. Harper supports MessagePack and CBOR, which allows for all of Harper supported data types. [Schema definitions can specify the expected types for fields, with GraphQL Schema Types](../developers/applications/defining-schemas), which are used for validation of incoming typed data (JSON, MessagePack), and is used for auto-conversion of untyped data (CSV, [query parameters](../developers/rest)). Available data types include: + +(Note that these labels are descriptive, they do not necessarily correspond to the GraphQL schema type names, but the schema type names are noted where possible) + +## Boolean + +true or false. The GraphQL schema type name is `Boolean`. + +## String + +Strings, or text, are a sequence of any unicode characters and are internally encoded with UTF-8. The GraphQL schema type name is `String`. + +## Number + +Numbers can be stored as signed integers up to a 1000 bits of precision (about 300 digits) or floating point with 64-bit floating point precision, and numbers are automatically stored using the most optimal type. With JSON, numbers are automatically parsed and stored in the most appropriate format. Custom components and applications may use BigInt numbers to store/access integers that are larger than 53-bit. The following GraphQL schema type name are supported: + +- `Float` - Any number that can be represented with [64-bit double precision floating point number](https://en.wikipedia.org/wiki/Double-precision_floating-point_format) ("double") +- `Int` - Any integer between from -2147483648 to 2147483647 +- `Long` - Any integer between from -9007199254740992 to 9007199254740992 +- `BigInt` - Any integer (negative or positive) with less than 300 digits + +Note that `BigInt` is a distinct and separate type from standard numbers in JavaScript, so custom code should handle this type appropriately. + +## Object/Map + +Objects, or maps, that hold a set named properties can be stored in Harper. When provided as JSON objects or JavaScript objects, all property keys are stored as strings. The order of properties is also preserved in Harper’s storage. Duplicate property keys are not allowed (they are dropped in parsing any incoming data). + +## Array + +Arrays hold an ordered sequence of values and can be stored in Harper. There is no support for sparse arrays, although you can use objects to store data with numbers (converted to strings) as properties. + +## Null + +A null value can be stored in Harper property values as well. + +## Date + +Dates can be stored as a specific data type. This is not supported in JSON, but is supported by MessagePack and CBOR. Custom Functions can also store and use Dates using JavaScript Date instances. The GraphQL schema type name is `Date`. + +## Binary Data + +Binary data can be stored in property values as well, with two different data types that are available: + +### Bytes + +JSON doesn’t have any support for encoding binary data, but MessagePack and CBOR support binary data in data structures, and this will be preserved in HarperDB. Custom Functions can also store binary data by using NodeJS’s Buffer or Uint8Array instances to hold the binary data. The GraphQL schema type name is `Bytes`. + +### Blobs + +Binary data can also be stored with [`Blob`s](blob), which can scale much better for larger content than `Bytes`, as it is designed to be streamed and does not need to be held entirely in memory. It is recommended that `Blob`s are used for content larger than 20KB. + +## Explicit Map/Set + +Explicit instances of JavaScript Maps and Sets can be stored and preserved in Harper as well. This can’t be represented with JSON, but can be with CBOR. diff --git a/versioned_docs/version-4.7/reference/dynamic-schema.md b/versioned_docs/version-4.7/reference/dynamic-schema.md new file mode 100644 index 00000000..97f5792d --- /dev/null +++ b/versioned_docs/version-4.7/reference/dynamic-schema.md @@ -0,0 +1,148 @@ +--- +title: Dynamic Schema +--- + +# Dynamic Schema + +When tables are created without any schema, through the operations API (without specifying attributes) or studio, the tables follow "dynamic-schema" behavior. Generally it is best-practice to define schemas for your tables to ensure predictable, consistent structures with data integrity and precise control over indexing, without dependency on data itself. However, it can often be simpler and quicker to simply create a table and let the data auto-generate the schema dynamically with everything being auto-indexed for broad querying. + +With dynamic schemas individual attributes are reflexively created as data is ingested, meaning the table will adapt to the structure of data ingested. Harper tracks the metadata around schemas, tables, and attributes allowing for describe table, describe schema, and describe all operations. + +### Databases + +Harper databases hold a collection of tables together in a single file that are transactionally connected. This means that operations across tables within a database can be performed in a single atomic transaction. By default tables are added to the default database called "data", but other databases can be created and specified for tables. + +### Tables + +Harper tables group records together with a common data pattern. To create a table users must provide a table name and a primary key. + +- **Table Name**: Used to identify the table. +- **Primary Key**: This is a required attribute that serves as the unique identifier for a record and is also known as the `hash_attribute` in Harper operations API. + +## Primary Key + +The primary key (also referred to as the `hash_attribute`) is used to uniquely identify records. Uniqueness is enforced on the primary; inserts with the same primary key will be rejected. If a primary key is not provided on insert, a GUID will be automatically generated and returned to the user. The [Harper Storage Algorithm](storage-algorithm) utilizes this value for indexing. + +**Standard Attributes** + +With tables that are using dynamic schemas, additional attributes are reflexively added via insert and update operations (in both SQL and NoSQL) when new attributes are included in the data structure provided to Harper. As a result, schemas are additive, meaning new attributes are created in the underlying storage algorithm as additional data structures are provided. Harper offers `create_attribute` and `drop_attribute` operations for users who prefer to manually define their data model independent of data ingestion. When new attributes are added to tables with existing data the value of that new attribute will be assumed `null` for all existing records. + +**Audit Attributes** + +Harper automatically creates two audit attributes used on each record if the table is created without a schema. + +- `__createdtime__`: The time the record was created in [Unix Epoch with milliseconds](https://www.epochconverter.com/) format. +- `__updatedtime__`: The time the record was updated in [Unix Epoch with milliseconds](https://www.epochconverter.com/) format. + +### Dynamic Schema Example + +To better understand the behavior let’s take a look at an example. This example utilizes [Harper API operations](../developers/operations-api/databases-and-tables). + +**Create a Database** + +```bash +{ + "operation": "create_database", + "schema": "dev" +} +``` + +**Create a Table** + +Notice the schema name, table name, and primary key name are the only required parameters. + +```bash +{ + "operation": "create_table", + "database": "dev", + "table": "dog", + "primary_key": "id" +} +``` + +At this point the table does not have structure beyond what we provided, so the table looks like this: + +**dev.dog** + +![](/img/v4.6/reference/dynamic_schema_2_create_table.png.webp) + +**Insert Record** + +To define attributes we do not need to do anything beyond sending them in with an insert operation. + +```bash +{ + "operation": "insert", + "database": "dev", + "table": "dog", + "records": [ + {"id": 1, "dog_name": "Penny", "owner_name": "Kyle"} + ] +} +``` + +With a single record inserted and new attributes defined, our table now looks like this: + +**dev.dog** + +![](/img/v4.6/reference/dynamic_schema_3_insert_record.png.webp) + +Indexes have been automatically created for `dog_name` and `owner_name` attributes. + +**Insert Additional Record** + +If we continue inserting records with the same data schema no schema updates are required. One record will omit the hash attribute from the insert to demonstrate GUID generation. + +```bash +{ + "operation": "insert", + "database": "dev", + "table": "dog", + "records": [ + {"id": 2, "dog_name": "Monk", "owner_name": "Aron"}, + {"dog_name": "Harper","owner_name": "Stephen"} + ] +} +``` + +In this case, there is no change to the schema. Our table now looks like this: + +**dev.dog** + +![](/img/v4.6/reference/dynamic_schema_4_insert_additional_record.png.webp) + +**Update Existing Record** + +In this case, we will update a record with a new attribute not previously defined on the table. + +```bash +{ + "operation": "update", + "database": "dev", + "table": "dog", + "records": [ + {"id": 2, "weight_lbs": 35} + ] +} +``` + +Now we have a new attribute called `weight_lbs`. Our table now looks like this: + +**dev.dog** + +![](/img/v4.6/reference/dynamic_schema_5_update_existing_record.png.webp) + +**Query Table with SQL** + +Now if we query for all records where `weight_lbs` is `null` we expect to get back two records. + +```bash +{ + "operation": "sql", + "sql": "SELECT * FROM dev.dog WHERE weight_lbs IS NULL" +} +``` + +This results in the expected two records being returned. + +![](/img/v4.6/reference/dynamic_schema_6_query_table_with_sql.png.webp) diff --git a/versioned_docs/version-4.7/reference/globals.md b/versioned_docs/version-4.7/reference/globals.md new file mode 100644 index 00000000..0e09b54a --- /dev/null +++ b/versioned_docs/version-4.7/reference/globals.md @@ -0,0 +1,383 @@ +--- +title: Globals +--- + +# Globals + +The primary way that JavaScript code can interact with Harper is through the global variables, which has several objects and classes that provide access to the tables, server hooks, and resources that Harper provides for building applications. As global variables, these can be directly accessed in any module. + +These global variables are also available through the `harperdb` module/package, which can provide better typing in TypeScript. To use this with your own directory, make sure you link the package to your current `harperdb` installation: + +```bash +npm link harperdb +``` + +The `harperdb` package is automatically linked for all installed components. Once linked, if you are using EcmaScript module syntax you can import function from `harperdb` like: + +```javascript +import { tables, Resource } from 'harperdb'; +``` + +Or if you are using CommonJS format for your modules: + +```javascript +const { tables, Resource } = require('harperdb'); +``` + +The global variables include: + +## `tables` + +This is an object that holds all the tables for the default database (called `data`) as properties. Each of these property values is a table class that subclasses the Resource interface and provides access to the table through the Resource interface. For example, you can get a record from a table (in the default database) called 'my-table' with: + +```javascript +import { tables } from 'harperdb'; +const { MyTable } = tables; +async function getRecord() { + let record = await MyTable.get(recordId); +} +``` + +It is recommended that you [define a database](../developers/applications/defining-schemas) for all the tables that are required to exist in your application. This will ensure that the tables exist on the `tables` object. Also note that the property names follow a CamelCase convention for use in JavaScript and in the GraphQL Schemas, but these are translated to snake_case for the actual table names, and converted back to CamelCase when added to the `tables` object. + +## `databases` + +This is an object that holds all the databases in Harper, and can be used to explicitly access a table by database name. Each database will be a property on this object, each of these property values will be an object with the set of all tables in that database. The default database, `databases.data` should equal the `tables` export. For example, if you want to access the "dog" table in the "dev" database, you could do so: + +```javascript +import { databases } from 'harperdb'; +const { Dog } = databases.dev; +``` + +## `Resource` + +This is the base class for all resources, including tables and external data sources. This is provided so that you can extend it to implement custom data source providers. See the [Resource API documentation](resources/) for more details about implementing a Resource class. + +## `auth(username, password?): Promise` + +This returns the user object with permissions/authorization information based on the provided username. If a password is provided, the password will be verified before returning the user object (if the password is incorrect, an error will be thrown). + +## `logger` + +This provides methods `trace`, `debug`, `info`, `warn`, `error`, `fatal`, and `notify` for logging. See the [logging documentation](../administration/logging/standard-logging) for more information. + +## `server` + +The `server` global object provides a number of functions and objects to interact with Harper's HTTP, networking, and authentication services. + +### `server.http(listener: RequestListener, options: HttpOptions): HttpServer[]` + +Alias: `server.request` + +Add a handler method to the HTTP server request listener middleware chain. + +Returns an array of server instances based on the specified `options.port` and `options.securePort`. + +Example: + +```js +server.http( + (request, next) => { + return request.url === '/graphql' ? handleGraphQLRequest(request) : next(request); + }, + { + runFirst: true, // run this handler first + } +); +``` + +#### `RequestListener` + +Type: `(request: Request, next: RequestListener) => Promise` + +The HTTP request listener to be added to the middleware chain. To continue chain execution pass the `request` to the `next` function such as `return next(request);`. + +### `Request` and `Response` + +The `Request` and `Response` classes are based on the WHATWG APIs for the [`Request`](https://developer.mozilla.org/en-US/docs/Web/API/Request) and [`Response`](https://developer.mozilla.org/en-US/docs/Web/API/Response) classes. Requests and responses are based on these standard-based APIs to facilitate reuse with modern web code. While Node.js' HTTP APIs are powerful low-level APIs, the `Request`/`Response` APIs provide excellent composability characteristics, well suited for layered middleware and for clean mapping to [RESTful method handlers](./resources/) with promise-based responses, as well as interoperability with other standards-based APIs like [streams](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStream) used with [`Blob`s](https://developer.mozilla.org/en-US/docs/Web/API/Blob). However, the Harper implementation of these classes is not a direct implementation of the WHATWG APIs, but implements additional/distinct properties for the the Harper server environment: + +#### `Request` + +A `Request` object is passed to the direct static REST handlers, and preserved as the context for instance methods, and has the following properties: + +- `url` - This is the request target, which is the portion of the URL that was received by the server. If a client sends a request to `https://example.com:8080/path?query=string`, the actual received request is `GET /path?query=string` and the `url` property will be `/path?query=string`. +- `method` - This is the HTTP method of the request. This is a string like `GET`, `POST`, `PUT`, `DELETE`, etc. +- `headers` - This is a [`Headers`](https://developer.mozilla.org/en-US/docs/Web/API/Headers) object that contains the headers of the request. +- `pathname` - This is the path portion of the URL, without the query string. For example, if the URL is `/path?query=string`, the `pathname` will be `/path`. +- `protocol` - This is the protocol of the request, like `http` or `https`. +- `data` - This is the deserialized body of the request (based on the type of data specified by `Content-Type` header). +- `ip` - This is the remote IP address of the client that made the request (or the remote IP address of the last proxy to connect to Harper). +- `host` - This is the host of the request, like `example.com`. +- `sendEarlyHints(link: string, headers?: object): void` - This method sends an early hints response to the client, prior to actually returning a response. This is useful for sending a link header to the client to indicate that another resource should be preloaded. The `headers` argument can be used to send additional headers with the early hints response, in addition to the `link`. This is generally most helpful in a cache resolution function, where you can send hints _if_ the data is not in the cache and is resolving from an origin: + +```javascript +class Origin { + async get(request) { + // if we are fetching data from origin, send early hints + this.getContext().requestContext.sendEarlyHints(''); + let response = await fetch(request); + ... + } +} +Cache.sourcedFrom(Origin); +``` + +- `login(username, password): Promise` - This method can be called to start an authenticated session. The login will authenticate the user by username and password. If the authentication was successful, a session will be created and a cookie will be set on the response header that references the session. All subsequent requests from the client that sends the cookie in requests will be authenticated as the user that logged in and the session record will be attached to the request. This method returns a promise that resolves when the login is successful, and rejects if the login is unsuccessful. +- `session` - This is the session object that is associated with current cookie-maintained session. This object is used to store session data for the current session. This is `Table` record instance, and can be updated by calling `request.session.update({ key: value })` or session can be retrieved with `request.session.get()`. If the cookie has not been set yet, a cookie will be set the first time a session is updated or a login occurs. +- `_nodeRequest` - This is the underlying Node.js [`http.IncomingMessage`](https://nodejs.org/api/http.html#http_class_http_incomingmessage) object. This can be used to access the raw request data, such as the raw headers, raw body, etc. However, this is discouraged and should be used with caution since it will likely break any other server handlers that depends on the layered `Request` call with `Response` return pattern. +- `_nodeResponse` - This is the underlying Node.js [`http.ServerResponse`](https://nodejs.org/api/http.html#http_class_http_serverresponse) object. This can be used to access the raw response data, such as the raw headers. Again, this is discouraged and can cause problems for middleware, should only be used if you are certain that other server handlers will not attempt to return a different `Response` object. + +#### `Response` + +REST methods can directly return data that is serialized and returned to users, or it can return a `Response` object (or a promise to a `Response`), or it can return a `Response`-like object with the following properties (or again, a promise to it): + +- `status` - This is the HTTP status code of the response. This is a number like `200`, `404`, `500`, etc. +- `headers` - This is a [`Headers`](https://developer.mozilla.org/en-US/docs/Web/API/Headers) object that contains the headers of the response. +- `data` - This is the data to be returned of the response. This will be serialized using Harper's content negotiation. +- `body` - Alternately (to `data`), the raw body can be returned as a `Buffer`, string, stream (Node.js or [`ReadableStream`](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStream)), or a [`Blob`](https://developer.mozilla.org/en-US/docs/Web/API/Blob). + +#### `HttpOptions` + +Type: `Object` + +Properties: + +- `runFirst` - _optional_ - `boolean` - Add listener to the front of the middleware chain. Defaults to `false` +- `port` - _optional_ - `number` - Specify which HTTP server middleware chain to add the listener to. Defaults to the Harper system default HTTP port configured by `harperdb-config.yaml`, generally `9926` +- `securePort` - _optional_ - `number` - Specify which HTTPS server middleware chain to add the listener to. Defaults to the Harper system default HTTP secure port configured by `harperdb-config.yaml`, generally `9927` + +#### `HttpServer` + +Node.js [`http.Server`](https://nodejs.org/api/http.html#class-httpserver) or [`https.SecureServer`](https://nodejs.org/api/https.html#class-httpsserver) instance. + +### `server.socket(listener: ConnectionListener, options: SocketOptions): SocketServer` + +Creates a socket server on the specified `options.port` or `options.securePort`. + +Only one socket server will be created. A `securePort` takes precedence. + +#### `ConnectionListener` + +Node.js socket server connection listener as documented in [`net.createServer`](https://nodejs.org/api/net.html#netcreateserveroptions-connectionlistener) or [`tls.createServer`](https://nodejs.org/api/tls.html#tlscreateserveroptions-secureconnectionlistener) + +#### `SocketOptions` + +- `port` - _optional_ - `number` - Specify the port for the [`net.Server`](https://nodejs.org/api/net.html#class-netserver) instance. +- `securePort` - _optional_ - `number` - Specify the port for the [`tls.Server`](https://nodejs.org/api/tls.html#class-tlsserver) instance. + +#### `SocketServer` + +Node.js [`net.Server`](https://nodejs.org/api/net.html#class-netserver) or [`tls.Server`](https://nodejs.org/api/tls.html#class-tlsserver) instance. + +### `server.ws(listener: WsListener, options: WsOptions): HttpServer[]` + +Add a listener to the WebSocket connection listener middleware chain. The WebSocket server is associated with the HTTP server specified by the `options.port` or `options.securePort`. Use the [`server.upgrade()`](globals#serverupgradelistener-upgradelistener-options-upgradeoptions-void) method to add a listener to the upgrade middleware chain. + +Example: + +```js +server.ws((ws, request, chainCompletion) => { + chainCompletion.then(() => { + ws.on('error', console.error); + + ws.on('message', function message(data) { + console.log('received: %s', data); + }); + + ws.send('something'); + }); +}); +``` + +#### `WsListener` + +Type: `(ws: WebSocket, request: Request, chainCompletion: ChainCompletion, next: WsListener): Promise` + +The WebSocket connection listener. + +- The `ws` argument is the [WebSocket](https://github.com/websockets/ws/blob/master/doc/ws.md#class-websocket) instance as defined by the `ws` module. +- The `request` argument is Harper's transformation of the `IncomingMessage` argument of the standard ['connection'](https://github.com/websockets/ws/blob/master/doc/ws.md#event-connection) listener event for a WebSocket server. +- The `chainCompletion` argument is a `Promise` of the associated HTTP server's request chain. Awaiting this promise enables the user to ensure the HTTP request has finished being processed before operating on the WebSocket. +- The `next` argument is similar to that of other `next` arguments in Harper's server middlewares. To continue execution of the WebSocket connection listener middleware chain, pass all of the other arguments to this one such as: `next(ws, request, chainCompletion)` + +#### `WsOptions` + +Type: `Object` + +Properties: + +- `maxPayload` - _optional_ - `number` - Set the max payload size for the WebSocket server. Defaults to 100 MB. +- `runFirst` - _optional_ - `boolean` - Add listener to the front of the middleware chain. Defaults to `false` +- `port` - _optional_ - `number` - Specify which WebSocket server middleware chain to add the listener to. Defaults to the Harper system default HTTP port configured by `harperdb-config.yaml`, generally `9926` +- `securePort` - _optional_ - `number` - Specify which WebSocket secure server middleware chain to add the listener to. Defaults to the Harper system default HTTP secure port configured by `harperdb-config.yaml`, generally `9927` + +### `server.upgrade(listener: UpgradeListener, options: UpgradeOptions): void` + +Add a listener to the HTTP Server [upgrade](https://nodejs.org/api/http.html#event-upgrade_1) event. If a WebSocket connection listener is added using [`server.ws()`](globals#serverwslistener-wslistener-options-wsoptions-httpserver), a default upgrade handler will be added as well. The default upgrade handler will add a `__harperdb_request_upgraded` boolean to the `request` argument to signal the connection has already been upgraded. It will also check for this boolean _before_ upgrading and if it is `true`, it will pass the arguments along to the `next` listener. + +This method should be used to delegate HTTP upgrade events to an external WebSocket server instance. + +Example: + +> This example is from the Harper Next.js component. See the complete source code [here](https://github.com/HarperDB/nextjs/blob/main/extension.js) + +```js +server.upgrade( + (request, socket, head, next) => { + if (request.url === '/_next/webpack-hmr') { + return upgradeHandler(request, socket, head).then(() => { + request.__harperdb_request_upgraded = true; + + next(request, socket, head); + }); + } + + return next(request, socket, head); + }, + { runFirst: true } +); +``` + +#### `UpgradeListener` + +Type: `(request, socket, head, next) => void` + +The arguments are passed to the middleware chain from the HTTP server [`'upgrade'`](https://nodejs.org/api/http.html#event-upgrade_1) event. + +#### `UpgradeOptions` + +Type: `Object` + +Properties: + +- `runFirst` - _optional_ - `boolean` - Add listener to the front of the middleware chain. Defaults to `false` +- `port` - _optional_ - `number` - Specify which HTTP server middleware chain to add the listener to. Defaults to the Harper system default HTTP port configured by `harperdb-config.yaml`, generally `9926` +- `securePort` - _optional_ - `number` - Specify which HTTP secure server middleware chain to add the listener to. Defaults to the Harper system default HTTP secure port configured by `harperdb-config.yaml`, generally `9927` + +### `server.config` + +This provides access to the Harper configuration object. This comes from the [harperdb-config.yaml](../deployments/configuration) (parsed into object form). + +### `server.recordAnalytics(value, metric, path?, method?, type?)` + +This records the provided value as a metric into Harper's analytics. Harper efficiently records and tracks these metrics and makes them available through [analytics API](analytics). The values are aggregated and statistical information is computed when many operations are performed. The optional parameters can be used to group statistics. For the parameters, make sure you are not grouping on too fine of a level for useful aggregation. The parameters are: + +- `value` - This is a numeric value for the metric that is being recorded. This can be a value measuring time or bytes, for example. +- `metric` - This is the name of the metric. +- `path` - This is an optional path (like a URL path). For a URL like /my-resource/, you would typically include a path of "my-resource", not including the id so you can group by all the requests to "my-resource" instead of individually aggregating by each individual id. +- `method` - Optional method to group by. +- `type` - Optional type to group by. + +### `server.getUser(username): Promise` + +This returns the user object with permissions/authorization information based on the provided username. This does not verify the password, so it is generally used for looking up users by username. If you want to verify a user by password, use [`server.authenticateUser`](globals#serverauthenticateuserusername-password-user). + +### `server.authenticateUser(username, password): Promise` + +This returns the user object with permissions/authorization information based on the provided username. The password will be verified before returning the user object (if the password is incorrect, an error will be thrown). + +### `server.resources: Resources` + +This provides access to the map of all registered resources. This is the central registry in Harper for registering any resources to be exported for use by REST, MQTT, or other components. Components that want to register resources should use the `server.resources.set(name, resource)` method to add to this map. Exported resources can be found by passing in a path to `server.resources.getMatch(path)` which will find any resource that matches the path or beginning of the path. + +#### `server.resources.set(name, resource, exportTypes?)` + +Register a resource with the server. For example: + +``` +class NewResource extends Resource { +} +server.resources.set('NewResource', Resource); +/ or limit usage: +server.resources.set('NewResource', Resource, { rest: true, mqtt: false, 'my-protocol': true }); +``` + +#### `server.resources.getMatch(path, exportType?)` + +Find a resource that matches the path. For example: + +``` +server.resources.getMatch('/NewResource/some-id'); +/ or specify the export/protocol type, to allow it to be limited: +server.resources.getMatch('/NewResource/some-id', 'my-protocol'); +``` + +### `server.operation(operation: Object, context?: Object, authorize?: boolean)` + +Execute an operation from the [Operations API](../developers/operations-api) + +Parameters: + +- `operation` - `Object` - Object matching desired operation's request body +- `context` - `Object` - `{ username: string}` - _optional_ - The specified user +- `authorize` - `boolean` - _optional_ - Indicate the operation should authorize the user or not. Defaults to `false` + +Returns a `Promise` with the operation's response as per the [Operations API documentation](../developers/operations-api). + +### `server.nodes` + +Returns an array of node objects registered in the cluster + +### `server.shards` + +Returns map of shard number to an array of its associated nodes + +### `server.hostname` + +Returns the hostname of the current node + +### `server.contentTypes` + +Returns the `Map` of registered content type handlers. Same as the [`contentTypes`](./globals#contenttypes) global. + +## `contentTypes` + +Returns a [`Map`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map) of content type handlers for request/response serialization. + +HarperDB uses content negotiation to automatically handle data serialization and deserialization for HTTP requests and other protocols. This process works by: + +1. **Request Processing**: Comparing the `Content-Type` header with registered handlers to deserialize incoming data into structured formats for processing and storage +2. **Response Generation**: Comparing the `Accept` header with registered handlers to serialize structured data into the appropriate response format + +### Built-in Content Types + +HarperDB includes handlers for common formats: + +- **JSON** (`application/json`) +- **CBOR** (`application/cbor`) +- **MessagePack** (`application/msgpack`) +- **CSV** (`text/csv`) +- **Event-Stream** (`text/event-stream`) +- And more... + +### Custom Content Type Handlers + +You can extend or replace content type handlers by modifying the `contentTypes` map from the `server` global (or `harperdb` export). The map is keyed by MIME type, with values being handler objects containing these optional properties: + +#### Handler Properties + +- **`serialize(data: any): Buffer | Uint8Array | string`** + Called to convert data structures into the target format for responses. Should return binary data (Buffer/Uint8Array) or a string. + +- **`serializeStream(data: any): ReadableStream`** + Called to convert data structures into streaming format. Useful for handling asynchronous iterables or large datasets. + +- **`deserialize(buffer: Buffer | string): any`** + Called to convert incoming request data into structured format. Receives a string for text MIME types (`text/*`) and a Buffer for binary types. Only used if `deserializeStream` is not defined. + +- **`deserializeStream(stream: ReadableStream): any`** + Called to convert incoming request streams into structured format. Returns deserialized data (potentially as an asynchronous iterable). + +- **`q: number`** _(default: 1)_ + Quality indicator between 0 and 1 representing serialization fidelity. Used in content negotiation to select the best format when multiple options are available. The server chooses the content type with the highest product of client quality × server quality values. + +For example, if you wanted to define an XML serializer (that can respond with XML to requests with `Accept: text/xml`) you could write: + +```javascript +contentTypes.set('text/xml', { + serialize(data) { + return '' ... some serialization ''; + }, + q: 0.8, +}); +``` diff --git a/versioned_docs/version-4.7/reference/graphql.md b/versioned_docs/version-4.7/reference/graphql.md new file mode 100644 index 00000000..cc43eec9 --- /dev/null +++ b/versioned_docs/version-4.7/reference/graphql.md @@ -0,0 +1,254 @@ +--- +title: GraphQL Querying +--- + +# GraphQL Querying + +Harper supports GraphQL in a variety of ways. It can be used for [defining schemas](../developers/applications/defining-schemas), and for querying [Resources](./resources/). + +Get started by setting `graphql: true` in `config.yaml`. + +This automatically enables a `/graphql` endpoint that can be used for GraphQL queries. + +> Harper's GraphQL component is inspired by the [GraphQL Over HTTP](https://graphql.github.io/graphql-over-http/draft/#) specification; however, it does not fully implement neither that specification nor the [GraphQL](https://spec.graphql.org/) specification. + +Queries can either be `GET` or `POST` requests, and both follow essentially the same request format. `GET` requests must use search parameters, and `POST` requests use the request body. + +For example, to request the GraphQL Query: + +```graphql +query GetDogs { + Dog { + id + name + } +} +``` + +The `GET` request would look like: + +```http +GET /graphql?query=query+GetDogs+%7B+Dog+%7B+id+name+%7D+%7D+%7D +Accept: application/graphql-response+json +``` + +And the `POST` request would look like: + +```http +POST /graphql/ +Content-Type: application/json +Accept: application/graphql-response+json + +{ + "query": "query GetDogs { Dog { id name } } }" +} +``` + +> Tip: For the best user experience, include the `Accept: application/graphql-response+json` header in your request. This provides better status codes for errors. + +The Harper GraphQL querying system is strictly limited to exported Harper Resources. For many users, this will typically be a table that uses the `@exported` directive in its schema. Queries can only specify Harper Resources and their attributes in the selection set. Queries can filter using [arguments](https://graphql.org/learn/queries/#arguments) on the top-level Resource field. Harper provides a short form pattern for simple queries, and a long form pattern based off of the [Resource Query API](./resources/#query) for more complex queries. + +Unlike REST queries, GraphQL queries can specify multiple resources simultaneously: + +```graphql +query GetDogsAndOwners { + Dog { + id + name + breed + } + + Owner { + id + name + occupation + } +} +``` + +This will return all dogs and owners in the database. And is equivalent to executing two REST queries: + +```http +GET /Dog/?select(id,name,breed) +# and +GET /Owner/?select(id,name,occupation) +``` + +### Request Parameters + +There are three request parameters for GraphQL queries: `query`, `operationName`, and `variables` + +1. `query` - _Required_ - The string representation of the GraphQL document. + 1. Limited to [Executable Definitions](https://spec.graphql.org/October2021/#executabledefinition) only. + 1. i.e. GraphQL [`query`](https://graphql.org/learn/queries/#fields) or `mutation` (coming soon) operations, and [fragments](https://graphql.org/learn/queries/#fragments). + 1. If an shorthand, unnamed, or singular named query is provided, they will be executed by default. Otherwise, if there are multiple queries, the `operationName` parameter must be used. +1. `operationName` - _Optional_ - The name of the query operation to execute if multiple queries are provided in the `query` parameter +1. `variables` - _Optional_ - A map of variable values to be used for the specified query + +### Type Checking + +The Harper GraphQL Querying system takes many liberties from the GraphQL specification. This extends to how it handle type checking. In general, the querying system does **not** type check. Harper uses the `graphql` parser directly, and then performs a transformation on the resulting AST. We do not control any type checking/casting behavior of the parser, and since the execution step diverges from the spec greatly, the type checking behavior is only loosely defined. + +In variable definitions, the querying system will ensure non-null values exist (and error appropriately), but it will not do any type checking of the value itself. + +For example, the variable `$name: String!` states that `name` should be a non-null, string value. + +- If the request does not contain the `name` variable, an error will be returned +- If the request provides `null` for the `name` variable, an error will be returned +- If the request provides any non-string value for the `name` variable, i.e. `1`, `true`, `{ foo: "bar" }`, the behavior is undefined and an error may or may not be returned. +- If the variable definition is changed to include a default value, `$name: String! = "John"`, then when omitted, `"John"` will be used. + - If `null` is provided as the variable value, an error will still be returned. + - If the default value does not match the type specified (i.e. `$name: String! = 0`), this is also considered undefined behavior. It may or may not fail in a variety of ways. +- Fragments will generally extend non-specified types, and the querying system will do no validity checking on them. For example, `fragment Fields on Any { ... }` is just as valid as `fragment Fields on MadeUpTypeName { ... }`. See the Fragments sections for more details. + +The only notable place the querying system will do some level of type analysis is the transformation of arguments into a query. + +- Objects will be transformed into properly nested attributes +- Strings and Boolean values are passed through as their AST values +- Float and Int values will be parsed using the JavaScript `parseFloat` and `parseInt` methods respectively. +- List and Enums are not supported. + +### Fragments + +The querying system loosely supports fragments. Both fragment definitions and inline fragments are supported, and are entirely a composition utility. Since this system does very little type checking, the `on Type` part of fragments is entirely pointless. Any value can be used for `Type` and it will have the same effect. + +For example, in the query + +```graphql +query Get { + Dog { + ...DogFields + } +} + +fragment DogFields on Dog { + name + breed +} +``` + +The `Dog` type in the fragment has no correlation to the `Dog` resource in the query (that correlates to the Harper `Dog` resource). + +You can literally specify anything in the fragment and it will behave the same way: + +```graphql +fragment DogFields on Any { ... } # this is recommended +fragment DogFields on Cat { ... } +fragment DogFields on Animal { ... } +fragment DogFields on LiterallyAnything { ... } +``` + +As an actual example, fragments should be used for composition: + +```graphql +query Get { + Dog { + ...sharedFields + breed + } + Owner { + ...sharedFields + occupation + } +} + +fragment sharedFields on Any { + id + name +} +``` + +### Short Form Querying + +Any attribute can be used as an argument for a query. In this short form, multiple arguments is treated as multiple equivalency conditions with the default `and` operation. + +For example, the following query requires an `id` variable to be provided, and the system will search for a `Dog` record matching that id. + +```graphql +query GetDog($id: ID!) { + Dog(id: $id) { + name + breed + owner { + name + } + } +} +``` + +And as a properly formed request: + +```http +POST /graphql/ +Content-Type: application/json +Accept: application/graphql-response+json + +{ + "query": "query GetDog($id: ID!) { Dog(id: $id) { name breed owner {name}}", + "variables": { + "id": "0" + } +} +``` + +The REST equivalent would be: + +```http +GET /Dog/?id==0&select(name,breed,owner{name}) +# or +GET /Dog/0?select(name,breed,owner{name}) +``` + +Short form queries can handle nested attributes as well. + +For example, return all dogs who have an owner with the name `"John"` + +```graphql +query GetDog { + Dog(owner: { name: "John" }) { + name + breed + owner { + name + } + } +} +``` + +Would be equivalent to + +```http +GET /Dog/?owner.name==John&select(name,breed,owner{name}) +``` + +And finally, we can put all of these together to create semi-complex, equality based queries! + +The following query has two variables and will return all dogs who have the specified name as well as the specified owner name. + +```graphql +query GetDog($dogName: String!, $ownerName: String!) { + Dog(name: $dogName, owner: { name: $ownerName }) { + name + breed + owner { + name + } + } +} +``` + +### Long Form Querying + +> Coming soon! + +### Mutations + +> Coming soon! + +### Subscriptions + +> Coming soon! + +### Directives + +> Coming soon! diff --git a/versioned_docs/version-4.7/reference/headers.md b/versioned_docs/version-4.7/reference/headers.md new file mode 100644 index 00000000..5c85fc88 --- /dev/null +++ b/versioned_docs/version-4.7/reference/headers.md @@ -0,0 +1,12 @@ +--- +title: Harper Headers +--- + +# Harper Headers + +All Harper API responses include headers that are important for interoperability and debugging purposes. The following headers are returned with all Harper API responses: + +| Key | Example Value | Description | +| ------------- | ---------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | +| server-timing | db;dur=7.165 | This reports the duration of the operation, in milliseconds. This follows the standard for Server-Timing and can be consumed by network monitoring tools. | +| content-type | application/json | This reports the MIME type of the returned content, which is negotiated based on the requested content type in the Accept header. | diff --git a/versioned_docs/version-4.7/reference/index.md b/versioned_docs/version-4.7/reference/index.md new file mode 100644 index 00000000..4c5d867a --- /dev/null +++ b/versioned_docs/version-4.7/reference/index.md @@ -0,0 +1,9 @@ +--- +title: Reference +--- + +# Reference + +This section contains technical details and reference materials for Harper. + +Please choose a topic from the navigation menu on the left. diff --git a/versioned_docs/version-4.7/reference/limits.md b/versioned_docs/version-4.7/reference/limits.md new file mode 100644 index 00000000..97214620 --- /dev/null +++ b/versioned_docs/version-4.7/reference/limits.md @@ -0,0 +1,37 @@ +--- +title: Harper Limits +--- + +# Harper Limits + +This document outlines limitations of Harper. + +## Database Naming Restrictions + +**Case Sensitivity** + +Harper database metadata (database names, table names, and attribute/column names) are case sensitive. Meaning databases, tables, and attributes can differ only by the case of their characters. + +**Restrictions on Database Metadata Names** + +Harper database metadata (database names, table names, and attribute names) cannot contain the following UTF-8 characters: + +``` +/`¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ +``` + +Additionally, they cannot contain the first 31 non-printing characters. Spaces are allowed, but not recommended as best practice. The regular expression used to verify a name is valid is: + +``` +^[\x20-\x2E|\x30-\x5F|\x61-\x7E]*$ +``` + +## Table Limitations + +**Attribute Maximum** + +Harper limits the number of total indexed attributes across tables (including the primary key of each table) to 10,000 per database. + +## Primary Keys + +The maximum length of a primary key is 1978 bytes or 659 characters (whichever is shortest). diff --git a/versioned_docs/version-4.7/reference/resources/index.md b/versioned_docs/version-4.7/reference/resources/index.md new file mode 100644 index 00000000..7c1014ac --- /dev/null +++ b/versioned_docs/version-4.7/reference/resources/index.md @@ -0,0 +1,742 @@ +--- +title: Resource Class +--- + +# Resource Class + +## Resource Class + +The Resource class is designed to provide a unified API for modeling different data resources within Harper. Database/table data can be accessed through the Resource API. The Resource class can be extended to create new data sources. Resources can be exported to define endpoints. Tables themselves extend the Resource class, and can be extended by users. + +Conceptually, a Resource class provides an interface for accessing, querying, modifying, and monitoring a set of entities or records. Instances of a Resource class can represent a single record or entity, or a collection of records, at a given point in time, that you can interact with through various methods or queries. Resource instances can represent an atomic transactional view of a resource and facilitate transactional interaction. A Resource instance holds the primary key/identifier, context information, and any pending updates to the record, so any instance methods can act on the record and have full access to this information during execution. Therefore, there are distinct resource instances created for every record or query that is accessed, and the instance methods are used for interaction with the data. + +Resource classes also have static methods, which are generally the preferred way to externally interact with tables and resources. The static methods handle parsing paths and query strings, starting a transaction as necessary, performing access authorization checks (if required), creating a resource instance, and calling the instance methods. This general rule for how to interact with resources: + +- If you want to _act upon_ a table or resource, querying or writing to it, then use the static methods to initially access or write data. For example, you could use `MyTable.get(34)` to access the record with a primary key of `34`. +- If you want to _define custom behavior_ for a table or resource (to control how a resource responds to queries/writes), then extend the class and override/define instance methods. + +The Resource API is heavily influenced by the REST/HTTP API, and the methods and properties of the Resource class are designed to map to and be used in a similar way to how you would interact with a RESTful API. + +The REST-based API is a little different from traditional Create-Read-Update-Delete (CRUD) APIs that were designed with single-server interactions in mind. Semantics that attempt to guarantee no existing record or overwrite-only behavior require locks that don't scale well in distributed database. Centralizing writes around `put` calls provides much more scalable, simple, and consistent behavior in a distributed eventually consistent database. You can generally think of CRUD operations mapping to REST operations like this: + +- Read - `get` +- Create with a known primary key - `put` +- Create with a generated primary key - `post`/`create` +- Update (Full) - `put` +- Update (Partial) - `patch` +- Delete - `delete` + +The RESTful HTTP server and other server interfaces will directly call resource methods of the same name to fulfill incoming requests so resources can be defined as endpoints for external interaction. When resources are used by the server interfaces, the static method will be executed (which starts a transaction and does access checks), which will then create the resource instance and call the corresponding instance method. Paths (URL, MQTT topics) are mapped to different resource instances. Using a path that specifies an ID like `/MyResource/3492` will be mapped an instance of MyResource, and will call the instance methods like `get(target)`, `put(target, data)`, and `post(target, data)`, where target is based on the `/3492` part of the path. + +It is recommended that you use the latest version (V2) of the Resource API with the legacy instance binding behavior disabled. This is done by setting the static `loadAsInstance` property to `false` on the Resource class. This will become the default behavior in Harper version 5.0. This page is written assuming `loadAsInstance` is set to `false`. If you want to use the legacy instance binding behavior, you can set `loadAsInstance` to `true` on the Resource class. If you have existing code that you want to migrate, please see the [migration guide](resources/migration) for more information. + +You can create classes that extend `Resource` to define your own data sources, typically to interface with external data sources (the `Resource` base class is available as a global variable in the Harper JS environment). In doing this, you will generally be extending and providing implementations for the instance methods below. For example: + +```javascript +export class MyExternalData extends Resource { + static loadAsInstance = false; // enable the updated API + async get(target) { + // fetch data from an external source, using our id + let response = await this.fetch(target.id); + // do something with the response + } + put(target, data) { + // send the data into the external source + } + delete(target) { + // delete an entity in the external data source + } + subscribe(subscription) { + // if the external data source is capable of real-time notification of changes, can subscribe + } +} +// we can export this class from resources.json as our own endpoint, or use this as the source for +// a Harper data to store and cache the data coming from this data source: +tables.MyCache.sourcedFrom(MyExternalData); +``` + +You can also extend table classes in the same way, overriding the instance methods for custom functionality. The `tables` object is a global variable in the Harper JavaScript environment, along with `Resource`: + +```javascript +export class MyTable extends tables.MyTable { + static loadAsInstance = false; // enable the updated API + get(target) { + // we can add properties or change properties before returning data: + return { ...super.get(target), newProperty: 'newValue', existingProperty: 42 }; // returns the record, with additional properties + } + put(target, data) { + // can change data any way we want + super.put(target, data); + } + delete(target) { + super.delete(target); + } + post(target, data) { + // providing a post handler (for HTTP POST requests) is a common way to create additional + // actions that aren't well described with just PUT or DELETE + } +} +``` + +Make sure that if are extending and `export`ing your table with this class, that you remove the `@export` directive in your schema, so that you aren't exporting the same table/class name twice. + +All Resource methods that are called from HTTP methods may directly return data or may return a [`Response`](https://developer.mozilla.org/en-US/docs/Web/API/Response) object or an object with `headers` and a `status` (HTTP status code), to explicitly return specific headers and status code. + +## Global Variables + +### `tables` + +This is an object with all the tables in the default database (the default database is "data"). Each table that has been declared or created will be available as a (standard) property on this object, and the value will be the table class that can be used to interact with that table. The table classes implement the Resource API. + +### `databases` + +This is an object with all the databases that have been defined in Harper (in the running instance). Each database that has been declared or created will be available as a (standard) property on this object. The property values are an object with the tables in that database, where each property is a table, like the `tables` object. In fact, `databases.data === tables` should always be true. + +### `Resource` + +This is the Resource base class. This can be directly extended for custom resources, and is the base class for all tables. + +### `server` + +This object provides extension points for extension components that wish to implement new server functionality (new protocols, authentication, etc.). See the [extensions documentation for more information](./components/extensions). + +### `transaction` + +This provides a function for starting transactions. See the [transactions documentation](./transactions) for more information. + +### `contentTypes` + +This provides an interface for defining new content type handlers. See the content type extensions documentation for more information. + +### TypeScript Support + +While these objects/methods are all available as global variables, it is easier to get TypeScript support (code assistance, type checking) for these interfaces by explicitly `import`ing them. This can be done by setting up a package link to the main Harper package in your app: + +``` +# you may need to go to your harper directory and set it up as a link first +npm link harperdb +``` + +And then you can import any of the main Harper APIs you will use, and your IDE should understand the full typings associated with them: + +``` +import { databases, tables, Resource } from 'harperdb'; +``` + +## Resource Class (Instance) Methods + +### Properties/attributes declared in schema + +Properties that have been defined in your table's schema can be accessed and modified as direct properties on the Resource instances. + +### `get(target: RequestTarget | Id): Promise|AsyncIterable` + +This retrieves a record, or queries for records, and is called by HTTP GET requests. This can be called with a `RequestTarget` which can specify a path/id and query parameters as well as search parameters. For tables, this can also be called directly with an id (string or number) to retrieve a record by id. When defining Resource classes, you can define or override this method to define exactly what should be returned when retrieving a record. HTTP requests will always call `get` with a full `RequestTarget`. The default `get` method (`super.get(target)`) returns the current record as a plain object. + +The `target` object represents the target of a request and can be used to access the path, coerced id, and any query parameters that were included in the URL. For example, with a request to `/my-resource/some-id?param1=value`, we can access URL/request information: + +```javascript +class extends Resource { + static loadAsInstance = false; + get(target) { + let param1 = target.get('param1'); // returns 'value' + let id = target.id; // returns 'some-id' + let path = target.pathname; // returns /some-id + let fullTarget = target.target; // returns /some-id?param1=value + ... + } +``` + +If `get` is called for a single record (for a request like `/Table/some-id`), the default action is to return the record identified by the path. If `get` is called on a collection (`/Table/?name=value`), the target will have the `isCollection` property set to `true` and default action is to `search` and return an AsyncIterable of results. + +### `search(query: RequestTarget)`: AsyncIterable + +This performs a query on this resource or table. By default, this is called by `get(query)` from a collection resource. When this is called for the root resource (like `/Table/`) it searches through all records in the table. You can define or override this method to define how records should be queried. The default `search` method on tables (`super.search(query)`) will perform a query and return an `AsyncIterable` of results. The `query` object can be used to specify the desired query. + +### `put(target: RequestTarget | Id, data: object): void|Response` + +This will assign the provided record or data to this resource, and is called for HTTP PUT requests. You can define or override this method to define how records should be updated. The default `put` method on tables (`super.put(target, data)`) writes the record to the table (updating or inserting depending on if the record previously existed) as part of the current transaction for the resource instance. + +The `target` object represents the target of a request and can be used to access the path, coerced id, and any query parameters that were included in the URL. + +### `patch(target: RequestTarget | Id, data: object): void|Response` + +This will update the existing record with the provided data's properties, and is called for HTTP PATCH requests. You can define or override this method to define how records should be updated. The default `patch` method on tables (`super.patch(target, data)`) updates the record. The properties will be applied to the existing record, overwriting the existing records properties, and preserving any properties in the record that are not specified in the `data` object. This is performed as part of the current transaction for the resource instance. The `target` object represents the target of a request and can be used to access the path, coerced id, and any query parameters that were included in the URL. + +### `update(target: RequestTarget, updates?: object): Updatable` + +This can be called to get an Updatable class for updating a record. An `Updatable` instance provides direct access to record properties as properties on `Updatable` instance. The properties can also be modified and any changes are tracked and written to the record when the transaction commits. For example, if we wanted to update the quantify of a product in the Product table, in response to a post, we could write: + +```javascript +class ... { + post(target, data) { + static loadAsInstance = false; + let updatable = this.update(target); + updatable.quantity = updatable.quantity - 1; + } +} +``` + +In addition, the `Updatable` class has the following methods. + +### `Updatable` class + +#### `addTo(property, value)` + +This adds the provided value to the specified property using conflict-free data type (CRDT) incrementation. This ensures that even if multiple calls are simultaneously made to increment a value, the resulting merge of data changes from different threads and nodes will properly sum all the added values. We could improve the example above to reliably ensure the quantity is decremented even when it occurs in multiple nodes simultaneously: + +```javascript +class ... { + static loadAsInstance = false; + post(target, data) { + let updatable = this.update(target); + updatable.addTo('quantity', -1); + } +} +``` + +#### `subtractFrom(property, value)` + +This functions exactly the same as `addTo`, except it subtracts the value. + +The `Updatable` also inherits the `getUpdatedTime` and `getExpiresAt` methods from the `RecordObject` class. + +### `delete(target: RequestTarget): void|Response` + +This will delete this record or resource identified by the target, and is called for HTTP DELETE requests. You can define or override this method to define how records should be deleted. The default `delete` method on tables (`super.delete(target)`) deletes the record identified by target from the table as part of the current transaction. The `target` object represents the target of a request and can be used to access the path, coerced id, and any query parameters that were included in the URL. + +### `publish(target: RequestTarget, message): void|Response` + +This will publish a message to this resource, and is called for MQTT publish commands. You can define or override this method to define how messages should be published. The default `publish` method on tables (`super.publish(target, message)`) records the published message as part of the current transaction; this will not change the data in the record but will notify any subscribers to the record/topic. The `target` object represents the target of a request and can be used to access the path, coerced id, and any query parameters that were included in the URL. + +### `post(target: RequestTarget, data: object): void|Response` + +This is called for HTTP POST requests. You can define this method to provide your own implementation of how POST requests should be handled. Generally `POST` provides a generic mechanism for various types of data updates, and is a good place to define custom functionality for updating records. The default behavior is to create a new record/resource. The `target` object represents the target of a request and can be used to access the path, coerced id, and any query parameters that were included in the URL. + +### `invalidate(target: RequestTarget)` + +This method is available on tables. This will invalidate the specified record in the table. This can be used with a caching table and is used to indicate that the source data has changed, and the record needs to be reloaded when next accessed. + +### `subscribe(subscriptionRequest: SubscriptionRequest): Promise` + +This will subscribe to the current resource, and is called for MQTT subscribe commands. You can define or override this method to define how subscriptions should be handled. The default `subscribe` method on tables (`super.publish(message)`) will set up a listener that will be called for any changes or published messages to this resource. + +The returned (promise resolves to) Subscription object is an `AsyncIterable` that you can use a `for await` to iterate through. It also has a `queue` property which holds (an array of) any messages that are ready to be delivered immediately (if you have specified a start time, previous count, or there is a message for the current or "retained" record, these may be immediately returned). + +The `SubscriptionRequest` object supports the following properties (all optional): + +- `includeDescendants` - If this is enabled, this will create a subscription to all the record updates/messages that are prefixed with the id. For example, a subscription request of `{id:'sub', includeDescendants: true}` would return events for any update with an id/topic of the form sub/\* (like `sub/1`). +- `startTime` - This will begin the subscription at a past point in time, returning all updates/messages since the start time (a catch-up of historical messages). This can be used to resume a subscription, getting all messages since the last subscription. +- `previousCount` - This specifies the number of previous updates/messages to deliver. For example, `previousCount: 10` would return the last ten messages. Note that `previousCount` can not be used in conjunction with `startTime`. +- `omitCurrent` - Indicates that the current (or retained) record should _not_ be immediately sent as the first update in the subscription (if no `startTime` or `previousCount` was used). By default, the current record is sent as the first update. + +### `connect(target: RequestTarget, incomingMessages?: AsyncIterable): AsyncIterable` + +This is called when a connection is received through WebSockets or Server Sent Events (SSE) to this resource path. This is called with `incomingMessages` as an iterable stream of incoming messages when the connection is from WebSockets, and is called with no arguments when the connection is from a SSE connection. This can return an asynchronous iterable representing the stream of messages to be sent to the client. + +### `getUpdatedTime(): number` + +This returns the last updated time of the resource (timestamp of last commit). This is returned as milliseconds from epoch. + +### `wasLoadedFromSource(): boolean` + +Indicates if the record had been loaded from source. When using caching tables, this indicates that there was a cache miss and the data had to be loaded from the source (or waiting on an inflight request from the source to finish). + +### `getContext(): Context` + +Returns the context for this resource. The context contains information about the current transaction, the user that initiated this action, and other metadata that should be retained through the life of an action. + +#### `Context` + +The `Context` object has the following (potential) properties: + +- `user` - This is the user object, which includes information about the username, role, and authorizations. +- `transaction` - The current transaction If the current method was triggered by an HTTP request, the following properties are available: +- `lastModified` - This value is used to indicate the last modified or updated timestamp of any resource(s) that are accessed and will inform the response's `ETag` (or `Last-Modified`) header. This can be updated by application code if it knows that modification should cause this timestamp to be updated. + +When a resource gets a request through HTTP, the request object is the context, which has the following properties: + +- `url` - The local path/URL of the request (this will not include the protocol or host name, but will start at the path and includes the query string). +- `method` - The method of the HTTP request. +- `headers` - This is an object with the headers that were included in the HTTP request. You can access headers by calling `context.headers.get(headerName)`. +- `responseHeaders` - This is an object with the headers that will be included in the HTTP response. You can set headers by calling `context.responseHeaders.set(headerName, value)`. +- `pathname` - This provides the path part of the URL (no querystring). +- `host` - This provides the host name of the request (from the `Host` header). +- `ip` - This provides the ip address of the client that made the request. +- `body` - This is the request body as a raw NodeJS Readable stream, if there is a request body. +- `data` - If the HTTP request had a request body, this provides a promise to the deserialized data from the request body. (Note that for methods that normally have a request body like `POST` and `PUT`, the resolved deserialized data is passed in as the main argument, but accessing the data from the context provides access to this for requests that do not traditionally have a request body like `DELETE`). + +When a resource is accessed as a data source: + +- `requestContext` - For resources that are acting as a data source for another resource, this provides access to the context of the resource that is making a request for data from the data source resource. Note that it is generally not recommended to rely on this context. The resolved data may be used fulfilled many different requests, and relying on this first request context may not be representative of future requests. Also, source resolution may be triggered by various actions, not just specified endpoints (for example queries, operations, studio, etc.), so make sure you are not relying on specific request context information. + +### `operation(operationObject: Object, authorize?: boolean): Promise` + +This method is available on tables and will execute a Harper operation, using the current table as the target of the operation (the `table` and `database` do not need to be specified). See the [operations API](../developers/operations-api/) for available operations that can be performed. You can set the second argument to `true` if you want the current user to be checked for authorization for the operation (if `true`, will throw an error if they are not authorized). + +### `allowStaleWhileRevalidate(entry: { version: number, localTime: number, expiresAt: number, value: object }, id): boolean` + +For caching tables, this can be defined to allow stale entries to be returned while revalidation is taking place, rather than waiting for revalidation. The `version` is the timestamp/version from the source, the `localTime` is when the resource was last refreshed, the `expiresAt` is when the resource expired and became stale, and the `value` is the last value (the stale value) of the record/resource. All times are in milliseconds since epoch. Returning `true` will allow the current stale value to be returned while revalidation takes place concurrently. Returning `false` will cause the response to wait for the data source or origin to revalidate or provide the latest value first, and then return the latest value. + +## Resource Static Methods and Properties + +The Resource class also has static methods that mirror the instance methods with an initial argument that is the id of the record to act on. The static methods are generally the preferred and most convenient method for interacting with tables outside of methods that are directly extending a table. Whereas instances methods are bound to a specific record, the static methods allow you to specify any record in the table to act on. + +The `get`, `put`, `delete`, `publish`, `subscribe`, and `connect` methods all have static equivalents. There is also a `static search()` method for specifically handling searching a table with query parameters. By default, the Resource static methods default to creating an instance bound to the record specified by the arguments, and calling the instance methods. Again, generally static methods are the preferred way to interact with resources and call them from application code. These methods are available on all user Resource classes and tables. + +### `get(target: RequestTarget|Id, context?: Resource|Context)` + +This will retrieve a resource instance by id. For example, if you want to retrieve comments by id in the retrieval of a blog post you could do: + +```javascript +const { MyTable, Comment } = tables; +... +// in class: + async get() { + for (let commentId of this.commentIds) { + let comment = await Comment.get(commentId, this); + // now you can do something with the comment record + } + } +``` + +Type definition for `Id`: + +```typescript +Id = string | number | array; +``` + +### `get(query: Query, context?: Resource|Context)` + +This can be used to retrieve a resource instance by a query. The query can be used to specify a single/unique record by an `id` property, and can be combined with a `select`: + +```javascript +MyTable.get({ id: 34, select: ['name', 'age'] }); +``` + +This method may also be used to retrieve a collection of records by a query. If the query is not for a specific record id, this will call the `search` method, described above. + +### `put(target: RequestTarget|Id, record: object, context?: Resource|Context): Promise` + +This will save the provided record or data to this resource. This will create a new record or fully replace an existing record if one exists with the same `id` (primary key). + +### `put(record: object, context?: Resource|Context): Promise` + +This will save the provided record or data to this resource. This will create a new record or fully replace an existing record if one exists with the same primary key provided in the record. If your table doesn't have a primary key attribute, you will need to use the method with the `id` argument. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. + +### `create(record: object, context?: Resource|Context): Promise` + +This will create a new record using the provided record for all fields (except primary key), generating a new primary key for the record. This does _not_ check for an existing record; the record argument should not have a primary key and should use the generated primary key. This will (asynchronously) return the new resource instance. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. + +### `post(target: RequestTarget|Id, data: object, context?: Resource|Context): Promise|any` + +This will save the provided data to this resource. By default, this will create a new record (by calling `create`). However, the `post` method is specifically intended to be available for custom behaviors, so extending a class to support custom `post` method behavior is encouraged. + +### `patch(target: RequestTarget|Id, recordUpdate: object, context?: Resource|Context): Promise|void` + +This will save the provided updates to the record. The `recordUpdate` object's properties will be applied to the existing record, overwriting the existing records properties, and preserving any properties in the record that are not specified in the `recordUpdate` object. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. + +### `delete(target: RequestTarget|Id, context?: Resource|Context): Promise|void` + +Deletes this resource's record or data. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. + +### `publish(target: RequestTarget|Id, message: object, context?: Resource|Context): Promise|void` + +Publishes the given message to the record entry specified by the id in the context. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. + +### `subscribe(subscriptionRequest?, context?: Resource|Context): Promise` + +Subscribes to a record/resource. See the description of the `subscriptionRequest` object above for more information on how to use this. + +### `search(query: RequestTarget, context?: Resource|Context): AsyncIterable` + +This will perform a query on this table or collection. The query parameter can be used to specify the desired query. + +### `setComputedAttribute(name: string, computeFunction: (record: object) => any)` + +This will define the function to use for a computed attribute. To use this, the attribute must be defined in the schema as a computed attribute. The `computeFunction` will be called with the record as an argument and should return the computed value for the attribute. For example: + +```javascript +MyTable.setComputedAttribute('computedAttribute', (record) => { + return record.attribute1 + record.attribute2; +}); +``` + +For a schema like: + +```graphql +type MyTable @table { + id: ID @primaryKey + attribute1: Int + attribute2: Int + computedAttribute: Int @computed +} +``` + +See the [schema documentation](../developers/applications/defining-schemas) for more information on computed attributes. + +### `primaryKey` + +This property indicates the name of the primary key attribute for a table. You can get the primary key for a record using this property name. For example: + +```javascript +let record34 = await Table.get(34); +record34[Table.primaryKey] -> 34 +``` + +There are additional methods that are only available on table classes (which are a type of resource). + +### `Table.sourcedFrom(Resource, options)` + +This defines the source for a table. This allows a table to function as a cache for an external resource. When a table is configured to have a source, any request for a record that is not found in the table will be delegated to the source resource to retrieve (via `get`) and the result will be cached/stored in the table. All writes to the table will also first be delegated to the source (if the source defines write functions like `put`, `delete`, etc.). The `options` parameter can include an `expiration` property that will configure the table with a time-to-live expiration window for automatic deletion or invalidation of older entries. The `options` parameter (also) supports: + +- `expiration` - Default expiration time for records in seconds. +- `eviction` - Eviction time for records in seconds. +- `scanInterval` - Time period for scanning the table for records to evict. + +If the source resource implements subscription support, real-time invalidation can be performed to ensure the cache is guaranteed to be fresh (and this can eliminate or reduce the need for time-based expiration of data). + +### `directURLMapping` + +This property can be set to force the direct URL request target to be mapped to the resource primary key. Normally, URL resource targets are parsed, where the path is mapped to the primary key of the resource (and decoded using standard URL decoding), and any query string parameters are used to query that resource. But if this is turned on, the full URL is used as the primary key. For example: + +```javascript +export class MyTable extends tables.MyTable { + static directURLMapping = true; +} +``` + +```http request +GET /MyTable/test?foo=bar +``` + +This will be mapped to the resource with a primary key of `test?foo=bar`, and no querying will be performed on that resource. + +### `getRecordCount({ exactCount: boolean })` + +This will return the number of records in the table. By default, this will return an approximate count of records, which is fast and efficient. If you want an exact count, you can pass `{ exactCount: true }` as the first argument, but this will be slower and more expensive. The return value will be a Promise that resolves to an object with a `recordCount` property, which is the number of records in the table. If this was not an exact count, it will also include `estimatedRange` array with estimate range of the count. + +### `parsePath(path, context, query) {` + +This is called by static methods when they are responding to a URL (from HTTP request, for example), and translates the path to an id. By default, this will parse `.property` suffixes for accessing properties and specifying preferred content type in the URL (and for older tables it will convert a multi-segment path to multipart an array id). However, in some situations you may wish to preserve the path directly as a string. You can override `parsePath` for simpler path to id preservation: + +```javascript + static parsePath(path) { + return path; // return the path as the id + } +``` + +### `getRecordCount: Promise<{}>` + +### `isCollection(resource: Resource): boolean` + +This returns a boolean indicating if the provide resource instance represents a collection (can return a query result) or a single record/entity. + +### Context and Transactions + +Whenever you implement an action that is calling other resources, it is recommended that you provide the "context" for the action. This allows a secondary resource to be accessed through the same transaction, preserving atomicity and isolation. + +This also allows timestamps that are accessed during resolution to be used to determine the overall last updated timestamp, which informs the header timestamps (which facilitates accurate client-side caching). The context also maintains user, session, and request metadata information that is communicated so that contextual request information (like headers) can be accessed and any writes are properly attributed to the correct user, or any additional security checks to be applied to the user. + +When using an export resource class, the REST interface will automatically create a context for you with a transaction and request metadata, and you can pass this to other actions by simply including `this` as the source argument (second argument) to the static methods. + +For example, if we had a method to post a comment on a blog, and when this happens we also want to update an array of comment IDs on the blog record, but then add the comment to a separate comment table. We might do this: + +```javascript +const { Comment } = tables; + +export class BlogPost extends tables.BlogPost { + post(comment) { + // add a comment record to the comment table, using this resource as the source for the context + Comment.put(comment, this); + this.comments.push(comment.id); // add the id for the record to our array of comment ids + // Both of these actions will be committed atomically as part of the same transaction + } +} +``` + +Please see the [transaction documentation](./transactions) for more information on how transactions work in Harper. + +### Query + +The `get`/`search` methods accept a Query object that can be used to specify a query for data. The query is an object that has the following properties, which are all optional: + +#### `conditions` + +This is an array of objects that specify the conditions to use the match records (if conditions are omitted or it is an empty array, this is a search for everything in the table). Each condition object can have the following properties: + +- `attribute`: Name of the property/attribute to match on. +- `value`: The value to match. +- `comparator`: This can specify how the value is compared. This defaults to "equals", but can also be "greater_than", "greater_than_equal", "less_than", "less_than_equal", "starts_with", "contains", "ends_with", "between", and "not_equal". +- `conditions`: An array of conditions, which follows the same structure as above. +- `operator`: Specifies the operator to apply to this set of conditions (`and` or `or`. This is optional and defaults to `and`). For example, a complex query might look like: + +For example, a more complex query might look like: + +```javascript +Table.search({ + conditions: [ + { attribute: 'price', comparator: 'less_than', value: 100 }, + { + operator: 'or', + conditions: [ + { attribute: 'rating', comparator: 'greater_than', value: 4 }, + { attribute: 'featured', value: true }, + ], + }, + ], +}); +``` + +**Chained Attributes/Properties** + +Chained attribute/property references can be used to search on properties within related records that are referenced by [relationship properties](../developers/applications/defining-schemas) (in addition to the [schema documentation](../developers/applications/defining-schemas), see the [REST documentation](../developers/rest) for more of overview of relationships and querying). Chained property references are specified with an array, with each entry in the array being a property name for successive property references. For example, if a relationship property called `brand` has been defined that references a `Brand` table, we could search products by brand name: + +```javascript +Product.search({ conditions: [{ attribute: ['brand', 'name'], value: 'Harper' }] }); +``` + +This effectively executes a join, searching on the `Brand` table and joining results with matching records in the `Product` table. Chained array properties can be used in any condition, as well nested/grouped conditions. The chain of properties may also be more than two entries, allowing for multiple relationships to be traversed, effectively joining across multiple tables. An array of chained properties can also be used as the `attribute` in the `sort` property, allowing for sorting by an attribute in a referenced joined tables. + +#### `operator` + +Specifies if the conditions should be applied as an `"and"` (records must match all conditions), or as an "or" (records must match at least one condition). This is optional and defaults to `"and"`. + +#### `limit` + +This specifies the limit of the number of records that should be returned from the query. + +#### `offset` + +This specifies the number of records that should be skipped prior to returning records in the query. This is often used with `limit` to implement "paging" of records. + +#### `select` + +This specifies the specific properties that should be included in each record that is returned. This can be an array, to specify a set of properties that should be included in the returned objects. The array can specify an `select.asArray = true` property and the query results will return a set of arrays of values of the specified properties instead of objects; this can be used to return more compact results. Each of the elements in the array can be a property name, or can be an object with a `name` and `select` array itself that specifies properties that should be returned by the referenced sub-object or related record. For example, a `select` can defined: + +```javascript +Table.search({ select: [ 'name', 'age' ], conditions: ...}) +``` + +Or nested/joined properties from referenced objects can be specified, here we are including the referenced `related` records, and returning the `description` and `id` from each of the related objects: + +```javascript +Table.search({ select: [ 'name', `{ name: 'related', select: ['description', 'id'] }` ], conditions: ...}) +``` + +The select properties can also include certain special properties: + +- `$id` - This will specifically return the primary key of the record (regardless of name, even if there is no defined primary key attribute for the table). +- `$updatedtime` - This will return the last updated timestamp/version of the record (regardless of whether there is an attribute for the updated time). + +Alternately, the select value can be a string value, to specify that the value of the specified property should be returned for each iteration/element in the results. For example to just return an iterator of the `id`s of object: + +```javascript +Table.search({ select: 'id', conditions: ...}) +``` + +#### `sort` + +This defines the sort order, and should be an object that can have the following properties: + +- `attribute`: The attribute to sort on. +- `descending`: If true, will sort in descending order (optional and defaults to `false`). +- `next`: Specifies the next sort order to resolve ties. This is an object that follows the same structure as `sort`. + +#### `explain` + +This will return the conditions re-ordered as Harper will execute them. Harper will estimate the number of the matching records for each condition and apply the narrowest condition applied first. + +#### `enforceExecutionOrder` + +This will force the conditions to be executed in the order they were supplied, rather than using query estimation to re-order them. + +The query results are returned as an `AsyncIterable`. In order to access the elements of the query results, you must use a `for await` loop (it does _not_ return an array, you can not access the results by index). + +For example, we could do a query like: + +```javascript +let { Product } = tables; +let results = Product.search({ + conditions: [ + { attribute: 'rating', value: 4.5, comparator: 'greater_than' }, + { attribute: 'price', value: 100, comparator: 'less_than' }, + ], + offset: 20, + limit: 10, + select: ['id', 'name', 'price', 'rating'], + sort: { attribute: 'price' }, +}); +for await (let record of results) { + // iterate through each record in the query results +} +``` + +`AsyncIterable`s can be returned from resource methods, and will be properly serialized in responses. When a query is performed, this will open/reserve a read transaction until the query results are iterated, either through your own `for await` loop or through serialization. Failing to iterate the results this will result in a long-lived read transaction which can degrade performance (including write performance), and may eventually be aborted. + +### `RequestTarget` + +The `RequestTarget` class is used to represent a URL path that can be mapped to a resource. This is used by the REST interface to map a URL path to a resource class. All REST methods are called with a `RequestTarget` as the first argument, which is used to determine which record or entry to access or modify. Methods on a `Resource` class can be called with a primary key as a string or number value as the first argument, to access or modify a record by primary key, which will work with all the default methods. The static methods will be transform the primary key to a `RequestTarget` instance to call the instance methods for argument normalization. +When RequestTarget is constructed with a URL path (from the REST methods). The static methods will also automatically parse the path to a `RequestTarget` instance, including parsing the search string into query parameters. +Below are the properties and methods of the `RequestTarget` class: + +- `pathname` - The path of the URL relative to the resource path that matched this request. This excluded the query/search string +- `toString()` - The full relative path and search string of the URL +- `search` - The search/query part the target path (the part after the first `?` character) +- `id` - The primary key of the resource, as determined by the path +- `checkPermission` - This property is set to an object indicating that a permission check should be performed on the + resource. This is used by the REST interface to determine if a user has permission to access the resource. The object + contains: + - `action` - The type of action being performed (read/write/delete) + - `resource` - The resource being accessed + - `user` - The user requesting access + +`RequestTarget` is subclass of `URLSearchParams`, and these methods are available for accessing and modifying the query parameters: + +- `get(name: string)` - Get the value of the query parameter with the specified name +- `getAll(name: string)` - Get all the values of the query parameter with the specified name +- `set(name: string, value: string)` - Set the value of the query parameter with the specified name +- `append(name: string, value: string)` - Append the value to the query parameter with the specified name +- `delete(name: string)` - Delete the query parameter with the specified name +- `has(name: string)` - Check if the query parameter with the specified name exists + +In addition, the `RequestTarget` class is an iterable, so you can iterate through the query parameters: + +- `for (let [name, value] of target)` - Iterate through the query parameters + +When a `RequestTarget` has query parameters using Harper's extended query syntax, the REST static methods will parse the `RequestTarget` and potentially add any of the following properties if they are present in the query: + +- `conditions` - An array of conditions that will be used to filter the query results +- `limit` - The limit of the number of records to return +- `offset` - The number of records to skip before returning the results +- `sort` - The sort order of the query results +- `select` - The properties to return in the query results + +### `RecordObject` + +The `get` method will return a `RecordObject` instance, which is an object containing all the properties of the record. Any property on the record can be directly accessed and the properties can be enumerated with standard JS capabilities like `for`-`in` and `Object.keys`. The `RecordObject` instance will also have the following methods: + +- `getUpdatedTime()` - Get the last updated time (the version number) of the record +- `getExpiresAt()` - Get the expiration time of the entry, if there is one. + +### Interacting with the Resource Data Model + +When extending or interacting with table resources, you can interact through standard CRUD/REST methods to create, read, update, and delete records. You can idiomatic property access and modification to interact with the records themselves. For example, let's say we defined a product schema: + +```graphql +type Product @table { + id: ID @primaryKey + name: String + rating: Int + price: Float +} +``` + +If we have extended this table class with our own `get()` we can interact with the record: + +```javascript +export class CustomProduct extends Product { + async get(target) { + let record = await super.get(target); + let name = record.name; // this is the name of the current product + let rating = record.rating; // this is the rating of the current product + // we can't directly modify the record (it is frozen), but we can copy if we want to return a modification + record = { ...record, rating: 3 }; + return record; + } +} +``` + +Likewise, we can interact with resource instances in the same way when retrieving them through the static methods: + +```javascript +let product1 = await Product.get(1); +let name = product1.name; // this is the name of the product with a primary key of 1 +let rating = product1.rating; // this is the rating of the product with a primary key of 1 +// if we want to update a single property: +await Product.patch(1, { rating: 3 }); +``` + +When running inside a transaction, we can use the `update` method and updates are automatically saved when a request completes: + +```javascript +export class CustomProduct extends Product { + post(target, data) { + let record = this.update(target); + record.name = data.name; + record.description = data.description; + // both of these changes will be saved automatically as this transaction commits + } +} +``` + +We can also interact with properties in nested objects and arrays, following the same patterns. For example we could define more complex types on our product: + +```graphql +type Product @table { + id: ID @primaryKey + name: String + rating: Int + price: Float + brand: Brand; + variations: [Variation]; +} +type Brand { + name: String +} +type Variation { + name: String + price: Float +} +``` + +We can interact with these nested properties: + +```javascript +export class CustomProduct extends Product { + post(data) { + let record = this.update(target); + let brandName = record.brand.name; + let firstVariationPrice = record.variations[0].price; + let additionalInfoOnBrand = record.brand.additionalInfo; // not defined in schema, but can still try to access property + // make some changes + record.variations.splice(0, 1); // remove first variation + record.variations.push({ name: 'new variation', price: 9.99 }); // add a new variation + record.brand.name = 'new brand name'; + // all these change will be saved + } +} +``` + +If you need to delete a property, you can do with the `delete` method: + +```javascript +let product1 = await Product.update(1); +product1.delete('additionalInformation'); +``` + +## Response Object + +The resource methods can return an object that will be serialized and returned as the response to the client. However, these methods can also return a `Response` style object with `status`, `headers`, and optionally `body` or `data` properties. This allows you to have more control over the response, including setting custom headers and status codes. For example, you could return a redirect response like: + +```javascript +return `{ status: 302, headers: { Location: '/new-location' }` }; +``` + +If you include a `body` property, this must be a string or buffer that will be returned as the response body. If you include a `data` property, this must be an object that will be serialized as the response body (using the standard content negotiation). For example, we could return an object with a custom header: + +```javascript +return { status: 200, headers: { 'X-Custom-Header': 'custom value' }, data: `{ message: 'Hello, World!' }` }; +``` + +### Throwing Errors + +You may throw errors (and leave them uncaught) from the response methods and these should be caught and handled by protocol the handler. For REST requests/responses, this will result in an error response. By default the status code will be 500. You can assign a property of `statusCode` to errors to indicate the HTTP status code that should be returned. For example: + +```javascript +if (notAuthorized()) { + let error = new Error('You are not authorized to access this'); + error.statusCode = 403; + throw error; +} +``` diff --git a/versioned_docs/version-4.7/reference/resources/instance-binding.md b/versioned_docs/version-4.7/reference/resources/instance-binding.md new file mode 100644 index 00000000..5c507e32 --- /dev/null +++ b/versioned_docs/version-4.7/reference/resources/instance-binding.md @@ -0,0 +1,721 @@ +--- +title: Resource Class with Resource Instance Binding behavior +--- + +# Resource Class with Resource Instance Binding behavior + +This document describes the legacy instance binding behavior of the Resource class. It is recommended that you use the [updated behavior of the Resource API](./) instead, but this legacy API is preserved for backwards compatibility. + +## Resource Class + +```javascript +export class MyExternalData extends Resource { + static loadAsInstance = true; + async get() { + // fetch data from an external source, using our id + let response = await this.fetch(this.id); + // do something with the response + } + put(data) { + // send the data into the external source + } + delete() { + // delete an entity in the external data source + } + subscribe(options) { + // if the external data source is capable of real-time notification of changes, can subscribe + } +} +// we can export this class from resources.json as our own endpoint, or use this as the source for +// a Harper data to store and cache the data coming from this data source: +tables.MyCache.sourcedFrom(MyExternalData); +``` + +You can also extend table classes in the same way, overriding the instance methods for custom functionality. The `tables` object is a global variable in the Harper JavaScript environment, along with `Resource`: + +```javascript +export class MyTable extends tables.MyTable { + get() { + // we can add properties or change properties before returning data: + this.newProperty = 'newValue'; + this.existingProperty = 44; + return super.get(); // returns the record, modified with the changes above + } + put(data) { + // can change data any way we want + super.put(data); + } + delete() { + super.delete(); + } + post(data) { + // providing a post handler (for HTTP POST requests) is a common way to create additional + // actions that aren't well described with just PUT or DELETE + } +} +``` + +Make sure that if are extending and `export`ing your table with this class, that you remove the `@export` directive in your schema, so that you aren't exporting the same table/class name twice. + +All Resource methods that are called from HTTP methods may directly return data or may return a [`Response`](https://developer.mozilla.org/en-US/docs/Web/API/Response) object or an object with `headers` and a `status` (HTTP status code), to explicitly return specific headers and status code. + +## Global Variables + +### `tables` + +This is an object with all the tables in the default database (the default database is "data"). Each table that has been declared or created will be available as a (standard) property on this object, and the value will be the table class that can be used to interact with that table. The table classes implement the Resource API. + +### `databases` + +This is an object with all the databases that have been defined in Harper (in the running instance). Each database that has been declared or created will be available as a (standard) property on this object. The property values are an object with the tables in that database, where each property is a table, like the `tables` object. In fact, `databases.data === tables` should always be true. + +### `Resource` + +This is the Resource base class. This can be directly extended for custom resources, and is the base class for all tables. + +### `server` + +This object provides extension points for extension components that wish to implement new server functionality (new protocols, authentication, etc.). See the [extensions documentation for more information](../components/extensions). + +### `transaction` + +This provides a function for starting transactions. See the [transactions documentation](../transactions) for more information. + +### `contentTypes` + +This provides an interface for defining new content type handlers. See the content type extensions documentation for more information. + +### TypeScript Support + +While these objects/methods are all available as global variables, it is easier to get TypeScript support (code assistance, type checking) for these interfaces by explicitly `import`ing them. This can be done by setting up a package link to the main Harper package in your app: + +``` +# you may need to go to your harper directory and set it up as a link first +npm link harperdb +``` + +And then you can import any of the main Harper APIs you will use, and your IDE should understand the full typings associated with them: + +``` +import { databases, tables, Resource } from 'harperdb'; +``` + +## Resource Class (Instance) Methods + +### Properties/attributes declared in schema + +Properties that have been defined in your table's schema can be accessed and modified as direct properties on the Resource instances. + +### `get(queryOrProperty?)`: Resource|AsyncIterable + +This is called to return the record or data for this resource, and is called by HTTP GET requests. This may be optionally called with a `query` object to specify a query should be performed, or a string to indicate that the specified property value should be returned. When defining Resource classes, you can define or override this method to define exactly what should be returned when retrieving a record. The default `get` method (`super.get()`) returns the current record as a plain object. + +The query object can be used to access any query parameters that were included in the URL. For example, with a request to `/my-resource/some-id?param1=value`, we can access URL/request information: + +```javascript +get(query) { + // note that query will only exist (as an object) if there is a query string + let param1 = query?.get?.('param1'); // returns 'value' + let id = this.getId(); // returns 'some-id' + ... +} +``` + +If `get` is called for a single record (for a request like `/Table/some-id`), the default action is to return `this` instance of the resource. If `get` is called on a collection (`/Table/?name=value`), the default action is to `search` and return an AsyncIterable of results. + +It is important to note that `this` is the resource instance for a specific record, specified by the primary key. Therefore, calling `super.get(query)` performs a `get` on this specific record/resource, not on the whole table. If you wish to access a _different_ record, you should use the static `get` method on the table class, like `Table.get(otherId, context)`. + +### `search(query: Query)`: AsyncIterable + +This performs a query on this resource, searching for records that are descendants. By default, this is called by `get(query)` from a collection resource. When this is called for the root resource (like `/Table/`) it searches through all records in the table. However, if you call search from an instance with a specific ID like `1` from a path like `Table/1`, it will only return records that are descendants of that record, like `[1, 1]` (path of Table/1/1) and `[1, 2]` (path of Table/1/2). If you want to do a standard search of the table, make you call the static method like `Table.search(...)`. You can define or override this method to define how records should be queried. The default `search` method on tables (`super.search(query)`) will perform a query and return an AsyncIterable of results. The query object can be used to specify the desired query. + +### `getId(): string|number|Array` + +Returns the primary key value for this resource. + +### `put(data: object, query?: Query): Resource|void|Response` + +This will assign the provided record or data to this resource, and is called for HTTP PUT requests. You can define or override this method to define how records should be updated. The default `put` method on tables (`super.put(data)`) writes the record to the table (updating or inserting depending on if the record previously existed) as part of the current transaction for the resource instance. + +It is important to note that `this` is the resource instance for a specific record, specified by the primary key. Therefore, calling `super.put(data)` updates this specific record/resource, not another records in the table. If you wish to update a _different_ record, you should use the static `put` method on the table class, like `Table.put(data, context)`. + +The `query` argument is used to represent any additional query parameters that were included in the URL. For example, with a request to `/my-resource/some-id?param1=value`, we can access URL/request information: + +```javascript +put(data, query) { + let param1 = query?.get?.('param1'); // returns 'value' + ... +} +``` + +### `patch(data: object): Resource|void|Response` + +### `patch(data: object, query?: Query)` + +This will update the existing record with the provided data's properties, and is called for HTTP PATCH requests. You can define or override this method to define how records should be updated. The default `patch` method on tables (`super.patch(data)`) updates the record. The properties will be applied to the existing record, overwriting the existing records properties, and preserving any properties in the record that are not specified in the `data` object. This is performed as part of the current transaction for the resource instance. The `query` argument is used to represent any additional query parameters that were included. + +### `update(data: object, fullUpdate: boolean?)` + +This is called by the default `put` and `patch` handlers to update a record. `put` calls with `fullUpdate` as `true` to indicate a full record replacement (`patch` calls it with the second argument as `false`). Any additional property changes that are made before the transaction commits will also be persisted. + +### `delete(queryOrProperty?): Resource|void|Response` + +This will delete this record or resource, and is called for HTTP DELETE requests. You can define or override this method to define how records should be deleted. The default `delete` method on tables (`super.delete(record)`) deletes the record from the table as part of the current transaction. + +### `publish(message): Resource|void|Response` + +This will publish a message to this resource, and is called for MQTT publish commands. You can define or override this method to define how messages should be published. The default `publish` method on tables (`super.publish(message)`) records the published message as part of the current transaction; this will not change the data in the record but will notify any subscribers to the record/topic. + +### `post(data: object, query?: Query): Resource|void|Response` + +This is called for HTTP POST requests. You can define this method to provide your own implementation of how POST requests should be handled. Generally `POST` provides a generic mechanism for various types of data updates, and is a good place to define custom functionality for updating records. The default behavior is to create a new record/resource. The `query` argument is used to represent any additional query parameters that were included. + +### `invalidate()` + +This method is available on tables. This will invalidate the current record in the table. This can be used with a caching table and is used to indicate that the source data has changed, and the record needs to be reloaded when next accessed. + +### `subscribe(subscriptionRequest: SubscriptionRequest): Promise` + +This will subscribe to the current resource, and is called for MQTT subscribe commands. You can define or override this method to define how subscriptions should be handled. The default `subscribe` method on tables (`super.publish(message)`) will set up a listener that will be called for any changes or published messages to this resource. + +The returned (promise resolves to) Subscription object is an `AsyncIterable` that you can use a `for await` to iterate through. It also has a `queue` property which holds (an array of) any messages that are ready to be delivered immediately (if you have specified a start time, previous count, or there is a message for the current or "retained" record, these may be immediately returned). + +The `SubscriptionRequest` object supports the following properties (all optional): + +- `includeDescendants` - If this is enabled, this will create a subscription to all the record updates/messages that are prefixed with the id. For example, a subscription request of `{id:'sub', includeDescendants: true}` would return events for any update with an id/topic of the form sub/\* (like `sub/1`). +- `startTime` - This will begin the subscription at a past point in time, returning all updates/messages since the start time (a catch-up of historical messages). This can be used to resume a subscription, getting all messages since the last subscription. +- `previousCount` - This specifies the number of previous updates/messages to deliver. For example, `previousCount: 10` would return the last ten messages. Note that `previousCount` can not be used in conjunction with `startTime`. +- `omitCurrent` - Indicates that the current (or retained) record should _not_ be immediately sent as the first update in the subscription (if no `startTime` or `previousCount` was used). By default, the current record is sent as the first update. + +### `connect(incomingMessages?: AsyncIterable, query?: Query): AsyncIterable` + +This is called when a connection is received through WebSockets or Server Sent Events (SSE) to this resource path. This is called with `incomingMessages` as an iterable stream of incoming messages when the connection is from WebSockets, and is called with no arguments when the connection is from a SSE connection. This can return an asynchronous iterable representing the stream of messages to be sent to the client. + +### `set(property, value)` + +This will assign the provided value to the designated property in the resource's record. During a write operation, this will indicate that the record has changed and the changes will be saved during commit. During a read operation, this will modify the copy of the record that will be serialized during serialization (converted to the output format of JSON, MessagePack, etc.). + +### `allowCreate(user: any, data: Promise, context: Context): boolean | Promise` + +This is called to determine if the user has permission to create the current resource. This is called as part of external incoming requests (HTTP). The default behavior for a generic resource is that this requires super-user permission and the default behavior for a table is to check the user's role's insert permission to the table. The allow method may be asynchronous and return a promise that resolves to a boolean, and may await the `data` promise to determine if the data is valid for creation. + +### `allowRead(user: any, query: Map | void, context: Context): boolean | Promise` + +This is called to determine if the user has permission to read from the current resource. This is called as part of external incoming requests (HTTP GET). The default behavior for a generic resource is that this requires super-user permission and the default behavior for a table is to check the user's role's read permission to the table. The allow method may be asynchronous and return a promise that resolves to a boolean. + +### `allowUpdate(user: any, data: Promise, context: Context): boolean | Promise` + +This is called to determine if the user has permission to update the current resource. This is called as part of external incoming requests (HTTP PUT). The default behavior for a generic resource is that this requires super-user permission and the default behavior for a table is to check the user's role's update permission to the table. The allow method may be asynchronous and return a promise that resolves to a boolean, and may await the `data` promise to determine if the data is valid for creation. + +### `allowDelete(user: any, query: Map | void, context: Context): boolean | Promise` + +This is called to determine if the user has permission to delete the current resource. This is called as part of external incoming requests (HTTP DELETE). The default behavior for a generic resource is that this requires super-user permission and the default behavior for a table is to check the user's role's delete permission to the table. The allow method may be asynchronous and return a promise that resolves to a boolean. + +### `addTo(property, value)` + +This adds to provided value to the specified property using conflict-free data type (CRDT) incrementation. This ensures that even if multiple calls are simultaneously made to increment a value, the resulting merge of data changes from different threads and nodes will properly sum all the added values. + +### `getUpdatedTime(): number` + +This returns the last updated time of the resource (timestamp of last commit). This is returned as milliseconds from epoch. + +### `wasLoadedFromSource(): boolean` + +Indicates if the record had been loaded from source. When using caching tables, this indicates that there was a cache miss and the data had to be loaded from the source (or waiting on an inflight request from the source to finish). + +### `getContext(): Context` + +Returns the context for this resource. The context contains information about the current transaction, the user that initiated this action, and other metadata that should be retained through the life of an action. + +#### `Context` + +The `Context` object has the following (potential) properties: + +- `user` - This is the user object, which includes information about the username, role, and authorizations. +- `transaction` - The current transaction If the current method was triggered by an HTTP request, the following properties are available: +- `lastModified` - This value is used to indicate the last modified or updated timestamp of any resource(s) that are accessed and will inform the response's `ETag` (or `Last-Modified`) header. This can be updated by application code if it knows that modification should cause this timestamp to be updated. + +When a resource gets a request through HTTP, the request object is the context, which has the following properties: + +- `url` - The local path/URL of the request (this will not include the protocol or host name, but will start at the path and includes the query string). +- `method` - The method of the HTTP request. +- `headers` - This is an object with the headers that were included in the HTTP request. You can access headers by calling `context.headers.get(headerName)`. +- `responseHeaders` - This is an object with the headers that will be included in the HTTP response. You can set headers by calling `context.responseHeaders.set(headerName, value)`. +- `pathname` - This provides the path part of the URL (no querystring). +- `host` - This provides the host name of the request (from the `Host` header). +- `ip` - This provides the ip address of the client that made the request. +- `body` - This is the request body as a raw NodeJS Readable stream, if there is a request body. +- `data` - If the HTTP request had a request body, this provides a promise to the deserialized data from the request body. (Note that for methods that normally have a request body like `POST` and `PUT`, the resolved deserialized data is passed in as the main argument, but accessing the data from the context provides access to this for requests that do not traditionally have a request body like `DELETE`). + +When a resource is accessed as a data source: + +- `requestContext` - For resources that are acting as a data source for another resource, this provides access to the context of the resource that is making a request for data from the data source resource. Note that it is generally not recommended to rely on this context. The resolved data may be used fulfilled many different requests, and relying on this first request context may not be representative of future requests. Also, source resolution may be triggered by various actions, not just specified endpoints (for example queries, operations, studio, etc.), so make sure you are not relying on specific request context information. + +### `operation(operationObject: Object, authorize?: boolean): Promise` + +This method is available on tables and will execute a Harper operation, using the current table as the target of the operation (the `table` and `database` do not need to be specified). See the [operations API](../../developers/operations-api/) for available operations that can be performed. You can set the second argument to `true` if you want the current user to be checked for authorization for the operation (if `true`, will throw an error if they are not authorized). + +### `allowStaleWhileRevalidate(entry: { version: number, localTime: number, expiresAt: number, value: object }, id): boolean` + +For caching tables, this can be defined to allow stale entries to be returned while revalidation is taking place, rather than waiting for revalidation. The `version` is the timestamp/version from the source, the `localTime` is when the resource was last refreshed, the `expiresAt` is when the resource expired and became stale, and the `value` is the last value (the stale value) of the record/resource. All times are in milliseconds since epoch. Returning `true` will allow the current stale value to be returned while revalidation takes place concurrently. Returning `false` will cause the response to wait for the data source or origin to revalidate or provide the latest value first, and then return the latest value. + +## Resource Static Methods and Properties + +The Resource class also has static methods that mirror the instance methods with an initial argument that is the id of the record to act on. The static methods are generally the preferred and most convenient method for interacting with tables outside of methods that are directly extending a table. Whereas instances methods are bound to a specific record, the static methods allow you to specify any record in the table to act on. + +The `get`, `put`, `delete`, `publish`, `subscribe`, and `connect` methods all have static equivalents. There is also a `static search()` method for specifically handling searching a table with query parameters. By default, the Resource static methods default to creating an instance bound to the record specified by the arguments, and calling the instance methods. Again, generally static methods are the preferred way to interact with resources and call them from application code. These methods are available on all user Resource classes and tables. + +### `get(id: Id, context?: Resource|Context)` + +This will retrieve a resource instance by id. For example, if you want to retrieve comments by id in the retrieval of a blog post you could do: + +```javascript +const { MyTable, Comment } = tables; +... +// in class: + async get() { + for (let commentId of this.commentIds) { + let comment = await Comment.get(commentId, this); + // now you can do something with the comment record + } + } +``` + +Type definition for `Id`: + +```typescript +Id = string | number | array; +``` + +### `get(query: Query, context?: Resource|Context)` + +This can be used to retrieve a resource instance by a query. The query can be used to specify a single/unique record by an `id` property, and can be combined with a `select`: + +```javascript +MyTable.get({ id: 34, select: ['name', 'age'] }); +``` + +This method may also be used to retrieve a collection of records by a query. If the query is not for a specific record id, this will call the `search` method, described above. + +### `put(id: Id, record: object, context?: Resource|Context): Promise` + +This will save the provided record or data to this resource. This will create a new record or fully replace an existing record if one exists with the same `id` (primary key). + +### `put(record: object, context?: Resource|Context): Promise` + +This will save the provided record or data to this resource. This will create a new record or fully replace an existing record if one exists with the same primary key provided in the record. If your table doesn't have a primary key attribute, you will need to use the method with the `id` argument. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. + +### `create(record: object, context?: Resource|Context): Promise` + +This will create a new record using the provided record for all fields (except primary key), generating a new primary key for the record. This does _not_ check for an existing record; the record argument should not have a primary key and should use the generated primary key. This will (asynchronously) return the new resource instance. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. + +### `post(id: Id, data: object, context?: Resource|Context): Promise` + +### `post(data: object, context?: Resource|Context): Promise` + +This will save the provided data to this resource. By default, this will create a new record (by calling `create`). However, the `post` method is specifically intended to be available for custom behaviors, so extending a class to support custom `post` method behavior is encouraged. + +### `patch(recordUpdate: object, context?: Resource|Context): Promise` + +### `patch(id: Id, recordUpdate: object, context?: Resource|Context): Promise` + +This will save the provided updates to the record. The `recordUpdate` object's properties will be applied to the existing record, overwriting the existing records properties, and preserving any properties in the record that are not specified in the `recordUpdate` object. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. + +### `delete(id: Id, context?: Resource|Context): Promise` + +Deletes this resource's record or data. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. + +### `publish(message: object, context?: Resource|Context): Promise` + +### `publish(topic: Id, message: object, context?: Resource|Context): Promise` + +Publishes the given message to the record entry specified by the id in the context. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. + +### `subscribe(subscriptionRequest?, context?: Resource|Context): Promise` + +Subscribes to a record/resource. See the description of the `subscriptionRequest` object above for more information on how to use this. + +### `search(query: Query, context?: Resource|Context): AsyncIterable` + +This will perform a query on this table or collection. The query parameter can be used to specify the desired query. + +### `setComputedAttribute(name: string, computeFunction: (record: object) => any)` + +This will define the function to use for a computed attribute. To use this, the attribute must be defined in the schema as a computed attribute. The `computeFunction` will be called with the record as an argument and should return the computed value for the attribute. For example: + +```javascript +MyTable.setComputedAttribute('computedAttribute', (record) => { + return record.attribute1 + record.attribute2; +}); +``` + +For a schema like: + +```graphql +type MyTable @table { + id: ID @primaryKey + attribute1: Int + attribute2: Int + computedAttribute: Int @computed +} +``` + +See the [schema documentation](../../developers/applications/defining-schemas) for more information on computed attributes. + +### `primaryKey` + +This property indicates the name of the primary key attribute for a table. You can get the primary key for a record using this property name. For example: + +```javascript +let record34 = await Table.get(34); +record34[Table.primaryKey] -> 34 +``` + +There are additional methods that are only available on table classes (which are a type of resource). + +### `Table.sourcedFrom(Resource, options)` + +This defines the source for a table. This allows a table to function as a cache for an external resource. When a table is configured to have a source, any request for a record that is not found in the table will be delegated to the source resource to retrieve (via `get`) and the result will be cached/stored in the table. All writes to the table will also first be delegated to the source (if the source defines write functions like `put`, `delete`, etc.). The `options` parameter can include an `expiration` property that will configure the table with a time-to-live expiration window for automatic deletion or invalidation of older entries. The `options` parameter (also) supports: + +- `expiration` - Default expiration time for records in seconds. +- `eviction` - Eviction time for records in seconds. +- `scanInterval` - Time period for scanning the table for records to evict. + +If the source resource implements subscription support, real-time invalidation can be performed to ensure the cache is guaranteed to be fresh (and this can eliminate or reduce the need for time-based expiration of data). + +### `directURLMapping` + +This property can be set to force the direct URL request target to be mapped to the resource primary key. Normally, URL resource targets are parsed, where the path is mapped to the primary key of the resource (and decoded using standard URL decoding), and any query string parameters are used to query that resource. But if this is turned on, the full URL is used as the primary key. For example: + +```javascript +export class MyTable extends tables.MyTable { + static directURLMapping = true; +} +``` + +```http request +GET /MyTable/test?foo=bar +``` + +This will be mapped to the resource with a primary key of `test?foo=bar`, and no querying will be performed on that resource. + +### `getRecordCount({ exactCount: boolean })` + +This will return the number of records in the table. By default, this will return an approximate count of records, which is fast and efficient. If you want an exact count, you can pass `{ exactCount: true }` as the first argument, but this will be slower and more expensive. The return value will be a Promise that resolves to an object with a `recordCount` property, which is the number of records in the table. If this was not an exact count, it will also include `estimatedRange` array with estimate range of the count. + +### `parsePath(path, context, query) {` + +This is called by static methods when they are responding to a URL (from HTTP request, for example), and translates the path to an id. By default, this will parse `.property` suffixes for accessing properties and specifying preferred content type in the URL (and for older tables it will convert a multi-segment path to multipart an array id). However, in some situations you may wish to preserve the path directly as a string. You can override `parsePath` for simpler path to id preservation: + +```javascript + static parsePath(path) { + return path; // return the path as the id + } +``` + +### `getRecordCount: Promise<{}>` + +### `isCollection(resource: Resource): boolean` + +This returns a boolean indicating if the provide resource instance represents a collection (can return a query result) or a single record/entity. + +### Context and Transactions + +Whenever you implement an action that is calling other resources, it is recommended that you provide the "context" for the action. This allows a secondary resource to be accessed through the same transaction, preserving atomicity and isolation. + +This also allows timestamps that are accessed during resolution to be used to determine the overall last updated timestamp, which informs the header timestamps (which facilitates accurate client-side caching). The context also maintains user, session, and request metadata information that is communicated so that contextual request information (like headers) can be accessed and any writes are properly attributed to the correct user, or any additional security checks to be applied to the user. + +When using an export resource class, the REST interface will automatically create a context for you with a transaction and request metadata, and you can pass this to other actions by simply including `this` as the source argument (second argument) to the static methods. + +For example, if we had a method to post a comment on a blog, and when this happens we also want to update an array of comment IDs on the blog record, but then add the comment to a separate comment table. We might do this: + +```javascript +const { Comment } = tables; + +export class BlogPost extends tables.BlogPost { + post(comment) { + // add a comment record to the comment table, using this resource as the source for the context + Comment.put(comment, this); + this.comments.push(comment.id); // add the id for the record to our array of comment ids + // Both of these actions will be committed atomically as part of the same transaction + } +} +``` + +Please see the [transaction documentation](../transactions) for more information on how transactions work in Harper. + +### Query + +The `get`/`search` methods accept a Query object that can be used to specify a query for data. The query is an object that has the following properties, which are all optional: + +#### `conditions` + +This is an array of objects that specify the conditions to use the match records (if conditions are omitted or it is an empty array, this is a search for everything in the table). Each condition object can have the following properties: + +- `attribute`: Name of the property/attribute to match on. +- `value`: The value to match. +- `comparator`: This can specify how the value is compared. This defaults to "equals", but can also be "greater_than", "greater_than_equal", "less_than", "less_than_equal", "starts_with", "contains", "ends_with", "between", and "not_equal". +- `conditions`: An array of conditions, which follows the same structure as above. +- `operator`: Specifies the operator to apply to this set of conditions (`and` or `or`. This is optional and defaults to `and`). For example, a complex query might look like: + +For example, a more complex query might look like: + +```javascript +Table.search({ + conditions: [ + { attribute: 'price', comparator: 'less_than', value: 100 }, + { + operator: 'or', + conditions: [ + { attribute: 'rating', comparator: 'greater_than', value: 4 }, + { attribute: 'featured', value: true }, + ], + }, + ], +}); +``` + +**Chained Attributes/Properties** + +Chained attribute/property references can be used to search on properties within related records that are referenced by [relationship properties](../../developers/applications/defining-schemas) (in addition to the [schema documentation](../../developers/applications/defining-schemas), see the [REST documentation](../../developers/rest) for more of overview of relationships and querying). Chained property references are specified with an array, with each entry in the array being a property name for successive property references. For example, if a relationship property called `brand` has been defined that references a `Brand` table, we could search products by brand name: + +```javascript +Product.search({ conditions: [{ attribute: ['brand', 'name'], value: 'Harper' }] }); +``` + +This effectively executes a join, searching on the `Brand` table and joining results with matching records in the `Product` table. Chained array properties can be used in any condition, as well nested/grouped conditions. The chain of properties may also be more than two entries, allowing for multiple relationships to be traversed, effectively joining across multiple tables. An array of chained properties can also be used as the `attribute` in the `sort` property, allowing for sorting by an attribute in a referenced joined tables. + +#### `operator` + +Specifies if the conditions should be applied as an `"and"` (records must match all conditions), or as an "or" (records must match at least one condition). This is optional and defaults to `"and"`. + +#### `limit` + +This specifies the limit of the number of records that should be returned from the query. + +#### `offset` + +This specifies the number of records that should be skipped prior to returning records in the query. This is often used with `limit` to implement "paging" of records. + +#### `select` + +This specifies the specific properties that should be included in each record that is returned. This can be an array, to specify a set of properties that should be included in the returned objects. The array can specify an `select.asArray = true` property and the query results will return a set of arrays of values of the specified properties instead of objects; this can be used to return more compact results. Each of the elements in the array can be a property name, or can be an object with a `name` and `select` array itself that specifies properties that should be returned by the referenced sub-object or related record. For example, a `select` can defined: + +```javascript +Table.search({ select: [ 'name', 'age' ], conditions: ...}) +``` + +Or nested/joined properties from referenced objects can be specified, here we are including the referenced `related` records, and returning the `description` and `id` from each of the related objects: + +```javascript +Table.search({ select: [ 'name', `{ name: 'related', select: ['description', 'id'] }` ], conditions: ...}) +``` + +The select properties can also include certain special properties: + +- `$id` - This will specifically return the primary key of the record (regardless of name, even if there is no defined primary key attribute for the table). +- `$updatedtime` - This will return the last updated timestamp/version of the record (regardless of whether there is an attribute for the updated time). + +Alternately, the select value can be a string value, to specify that the value of the specified property should be returned for each iteration/element in the results. For example to just return an iterator of the `id`s of object: + +```javascript +Table.search({ select: 'id', conditions: ...}) +``` + +#### `sort` + +This defines the sort order, and should be an object that can have the following properties: + +- `attributes`: The attribute to sort on. +- `descending`: If true, will sort in descending order (optional and defaults to `false`). +- `next`: Specifies the next sort order to resolve ties. This is an object that follows the same structure as `sort`. + +#### `explain` + +This will return the conditions re-ordered as Harper will execute them. Harper will estimate the number of the matching records for each condition and apply the narrowest condition applied first. + +#### `enforceExecutionOrder` + +This will force the conditions to be executed in the order they were supplied, rather than using query estimation to re-order them. + +The query results are returned as an `AsyncIterable`. In order to access the elements of the query results, you must use a `for await` loop (it does _not_ return an array, you can not access the results by index). + +For example, we could do a query like: + +```javascript +let { Product } = tables; +let results = Product.search({ + conditions: [ + { attribute: 'rating', value: 4.5, comparator: 'greater_than' }, + { attribute: 'price', value: 100, comparator: 'less_than' }, + ], + offset: 20, + limit: 10, + select: ['id', 'name', 'price', 'rating'], + sort: { attribute: 'price' }, +}); +for await (let record of results) { + // iterate through each record in the query results +} +``` + +`AsyncIterable`s can be returned from resource methods, and will be properly serialized in responses. When a query is performed, this will open/reserve a read transaction until the query results are iterated, either through your own `for await` loop or through serialization. Failing to iterate the results this will result in a long-lived read transaction which can degrade performance (including write performance), and may eventually be aborted. + +### Interacting with the Resource Data Model + +When extending or interacting with table resources, when a resource instance is retrieved and instantiated, it will be loaded with the record data from its table. You can interact with this record through the resource instance. For any properties that have been defined in the table's schema, you can direct access or modify properties through standard property syntax. For example, let's say we defined a product schema: + +```graphql +type Product @table { + id: ID @primaryKey + name: String + rating: Int + price: Float +} +``` + +If we have extended this table class with our get() we can interact with any these specified attributes/properties: + +```javascript +export class CustomProduct extends Product { + get(query) { + let name = this.name; // this is the name of the current product + let rating = this.rating; // this is the rating of the current product + this.rating = 3; // we can also modify the rating for the current instance + // (with a get this won't be saved by default, but will be used when serialized) + return super.get(query); + } +} +``` + +Likewise, we can interact with resource instances in the same way when retrieving them through the static methods: + +```javascript +let product1 = await Product.get(1); +let name = product1.name; // this is the name of the product with a primary key of 1 +let rating = product1.rating; // this is the rating of the product with a primary key of 1 +product1.rating = 3; // modify the rating for this instance (this will be saved without a call to update()) +``` + +If there are additional properties on (some) products that aren't defined in the schema, we can still access them through the resource instance, but since they aren't declared, there won't be getter/setter definition for direct property access, but we can access properties with the `get(propertyName)` method and modify properties with the `set(propertyName, value)` method: + +```javascript +let product1 = await Product.get(1); +let additionalInformation = product1.get('additionalInformation'); // get the additionalInformation property value even though it isn't defined in the schema +product1.set('newProperty', 'some value'); // we can assign any properties we want with set +``` + +And likewise, we can do this in an instance method, although you will probably want to use super.get()/set() so you don't have to write extra logic to avoid recursion: + +```javascript +export class CustomProduct extends Product { + get(query) { + let additionalInformation = super.get('additionalInformation'); // get the additionalInformation property value even though it isn't defined in the schema + super.set('newProperty', 'some value'); // we can assign any properties we want with set + } +} +``` + +Note that you may also need to use `get`/`set` for properties that conflict with existing method names. For example, your schema defines an attribute called `getId` (not recommended), you would need to access that property through `get('getId')` and `set('getId', value)`. + +If you want to save the changes you make, you can call the \`update()\`\` method: + +```javascript +let product1 = await Product.get(1); +product1.rating = 3; +product1.set('newProperty', 'some value'); +product1.update(); // save both of these property changes +``` + +Updates are automatically saved inside modifying methods like put and post: + +```javascript +export class CustomProduct extends Product { + post(data) { + this.name = data.name; + this.set('description', data.description); + // both of these changes will be saved automatically as this transaction commits + } +} +``` + +We can also interact with properties in nested objects and arrays, following the same patterns. For example we could define more complex types on our product: + +```graphql +type Product @table { + id: ID @primaryKey + name: String + rating: Int + price: Float + brand: Brand; + variations: [Variation]; +} +type Brand { + name: String +} +type Variation { + name: String + price: Float +} +``` + +We can interact with these nested properties: + +```javascript +export class CustomProduct extends Product { + post(data) { + let brandName = this.brand.name; + let firstVariationPrice = this.variations[0].price; + let additionalInfoOnBrand = this.brand.get('additionalInfo'); // not defined in schema, but can still try to access property + // make some changes + this.variations.splice(0, 1); // remove first variation + this.variations.push({ name: 'new variation', price: 9.99 }); // add a new variation + this.brand.name = 'new brand name'; + // all these change will be saved + } +} +``` + +If you need to delete a property, you can do with the `delete` method: + +```javascript +let product1 = await Product.get(1); +product1.delete('additionalInformation'); +product1.update(); +``` + +You can also get "plain" object representation of a resource instance by calling `toJSON`, which will return a simple frozen object with all the properties (whether defined in the schema) as direct normal properties (note that this object can _not_ be modified, it is frozen since it is belongs to a cache): + +```javascript +let product1 = await Product.get(1); +let plainObject = product1.toJSON(); +for (let key in plainObject) { + // can iterate through the properties of this record +} +``` + +## Response Object + +The resource methods can return an object that will be serialized and returned as the response to the client. However, these methods can also return a `Response` style object with `status`, `headers`, and optionally `body` or `data` properties. This allows you to have more control over the response, including setting custom headers and status codes. For example, you could return a redirect response like: + +```javascript +return `{ status: 302, headers: { Location: '/new-location' }` }; +``` + +If you include a `body` property, this must be a string or buffer that will be returned as the response body. If you include a `data` property, this must be an object that will be serialized as the response body (using the standard content negotiation). For example, we could return an object with a custom header: + +```javascript +return { status: 200, headers: { 'X-Custom-Header': 'custom value' }, data: `{ message: 'Hello, World!' }` }; +``` + +### Throwing Errors + +You may throw errors (and leave them uncaught) from the response methods and these should be caught and handled by protocol the handler. For REST requests/responses, this will result in an error response. By default the status code will be 500. You can assign a property of `statusCode` to errors to indicate the HTTP status code that should be returned. For example: + +```javascript +if (notAuthorized()) { + let error = new Error('You are not authorized to access this'); + error.statusCode = 403; + throw error; +} +``` diff --git a/versioned_docs/version-4.7/reference/resources/migration.md b/versioned_docs/version-4.7/reference/resources/migration.md new file mode 100644 index 00000000..51ec4c83 --- /dev/null +++ b/versioned_docs/version-4.7/reference/resources/migration.md @@ -0,0 +1,137 @@ +--- +title: Migration to Resource API version 2 (non-instance binding) +--- + +# Migration to Resource API version 2 (non-instance binding) + +The Resource API was inspired by two major design ideas: the REST architectural design and the [Active Record pattern](https://en.wikipedia.org/wiki/Active_record_pattern) (made popular by Ruby on Rails and heavily used as a pattern in many ORMs). The basic design goal of the Resource API is to integrate these concepts into a single construct that can directly map RESTful methods (specifically the "uniform interface" of HTTP) to an active record data model. However, while the active record pattern has been for _consumption_ of data, implementing methods for endpoint definitions and caching sources as a data _provider_ can be confusing and cumbersome to implement. The updated non-instance binding Resource API is designed to make it easier and more consistent to implement a data provider and interact with records across a table, while maintaining more explicit control over what data is loaded and when. + +The updated Resource API is enabled on a per-class basis by setting static `loadAsInstance` property to `false`. When this property is set to `false`, this means that the Resource instances will not be bound to a specific record. Instead instances represent the whole table, capturing the context and current transactional state. Any records in the table can be loaded or modified from `this` instance. There are a number of implications and different behaviors from a Resource class with `static loadAsInstance = false`: + +- The `get` method (both static and instance) will directly return the record, a frozen enumerable object with direct properties, instead of a Resource instance. +- When instance methods are called, there will not be any record preloaded beforehand and the resource instance will not have properties mapped to a record. +- All instance methods accept a `target`, an instance of `RequestTarget`, as the first argument, which identifies the target record or query. + - The `target` will have an `id` property identifying the target resource, along with target information. + - The `getId()` method is no longer used and will return `undefined`. + - The `target` will provide access to query parameters, search operators, and other directives. + - A `target` property of `checkPermission` indicates that a method should check the permission before of request before proceeding. The default instance methods provide the default authorization behavior. + - This supplants the need for `allowRead`, `allowUpdate`, `allowCreate`, and `allowDelete` methods, which shouldn't need to be used (and don't provide the id of the target record). +- Any data from a POST, PUT, and PATCH request will be available in the second argument. This reverses the order of the arguments to `put`, `post`, and `patch` compared to the legacy Resource API. +- Context is tracked using asynchronous context tracking, and will automatically be available to calls to other resources. This can be disabled by setting `static explicitContext = true`, which can improve performance. +- The `update` method will return an `Updatable` object (instead of a Resource instance), which provides properties mapped to a record, but these properties can be updated and changes will be saved when the transaction is committed. + +The following are examples of how to migrate to the non-instance binding Resource API. + +Previous code with a `get` method: + +```javascript +export class MyData extends tables.MyData { + async get(query) { + let id = this.getId(); // get the id + if (query?.size > 0) { + // check number of query parameters + let idWithQuery = id + query.toString(); // add query parameters + let resource = await tables.MyData.get(idWithQuery, this); // retrieve another record + resource.newProperty = 'value'; // assign a new value to the returned resource instance + return resource; + } else { + this.newProperty = 'value'; // assign a new value to this instance + return super.get(query); + } + } +} +``` + +Updated code: + +```javascript +export class MyData extends tables.MyData { + static loadAsInstance = false; // opt in to updated behavior + async get(target) { + let id = target.id; // get the id + let record; + if (target.size > 0) { + // check number of query parameters + let idWithQuery = target.toString(); // this is the full target with the path query parameters + // we can retrieve another record from this table directly with this.get/super.get or with tables.MyData.get + record = await super.get(idWithQuery); + } else { + record = await super.get(target); // we can just directly use the target as well + } + // the record itself is frozen, but we can copy/assign to a new object with additional properties if we want + return { ...record, newProperty: 'value' }; + } +} +``` + +Here is an example of the preferred approach for authorization: +Previous code with a `get` method: + +```javascript +export class MyData extends tables.MyData { + allowRead(user) { + // allow any authenticated user + return user ? true : false; + } + async get(query) { + // any get logic + return super.get(query); + } +} +``` + +```javascript +export class MyData extends tables.MyData { + static loadAsInstance = false; // opt in to updated behavior + async get(target) { + // While you can still use allowRead, it is not called before get is called, and it is generally encouraged + // to perform/call authorization explicitly in direct get, put, post methods rather than using allow* methods. + if (!this.getContext().user) throw new Error('Unauthorized'); + target.checkPermissions = false; // authorization complete, no need to further check permissions below + // target.checkPermissions is set to true or left in place, this default get method will perform the default permissions checks + return super.get(target); // we can just directly use the query as well + } +} +``` + +Here is an example of how to convert/upgrade an implementation of a `post` method: +Previous code with a `post` method: + +```javascript +export class MyData extends tables.MyData { + async post(data, query) { + let resource = await tables.MyData.get(data.id, this); + if (resource) { + // update a property + resource.someProperty = 'value'; + // or + tables.MyData.patch(data.id, { someProperty: 'value' }, this); + } else { + // create a new record + MyData.create(data, this); + } + } +} +``` + +Updated code: + +```javascript +export class MyData extends tables.MyData { + static loadAsInstance = false; // opt in to updated behavior + // IMPORTANT: arguments are reversed: + async post(target, data) { + let record = await this.get(data.id); + if (record) { + // update a property + const updatable = await this.update(data.id); // we can alternately pass a target to update + updatable.someProperty = 'value'; + // or + this.patch(data.id, { someProperty: 'value' }); + } else { + // create a new record + this.create(data); + } + } +} +``` diff --git a/versioned_docs/version-4.7/reference/resources/query-optimization.md b/versioned_docs/version-4.7/reference/resources/query-optimization.md new file mode 100644 index 00000000..139b862b --- /dev/null +++ b/versioned_docs/version-4.7/reference/resources/query-optimization.md @@ -0,0 +1,37 @@ +--- +title: Query Optimization +--- + +## Query Optimization + +Harper has powerful query functionality with excellent performance characteristics. However, like any database, different queries can vary significantly in performance. It is important to understand how querying works to help you optimize your queries for the best performance. + +### Query Execution + +At a fundamental level, querying involves defining conditions to find matching data and then executing those conditions against the database and delivering the results based on required fields, relationships, and ordering. Harper supports indexed fields, and these indexes are used to speed up query execution. When conditions are specified in a query, Harper will attempt to utilize indexes to optimize the speed of query execution. When a field is not indexed, a query specifies a condition on that field, and the database check each potential record to determine if it matches the condition. + +When a query is performed with multiple conditions, Harper will attempt to optimize the ordering of these conditions. When using intersecting conditions (the default, an `and` operator, matching records must all match all conditions), Harper will attempt to to apply the most selective and performant condition first. This means that if one condition can use an index and is more selective than another, it will be used first to find the initial matching set of data and then filter based on the remaining conditions. If a condition can search an indexed field, with a selective condition, it will be used before conditions that aren't indexed, or as selective. The `search` method includes an `explain` flag that can be used to return a query execution order to understand how the query is being executed. This can be useful for debugging and optimizing queries. + +For a union query, each condition is executed separately and the results are combined/merged. + +### Condition, Operators, and Indexing + +When a query is performed, the conditions specified in the query are evaluated against the data in the database. The conditions can be simple or complex, and can include scalar operators such as `=`, `!=`, `>`, `<`, `>=`, `<=`, as well as `starts_with`, `contains`, and `ends_with`. The use of these operators can affect the performance of the query, especially when used with indexed fields. If an indexed field is not used, the database will have to check each potential record to determine if it matches the condition. If the only condition is not indexed, or there are no conditions with an indexed field, the database will have to check every record with a full table scan and can be very slow for large datasets (it will get slower as the dataset grows, `O(n)`). + +The use of indexed fields can significantly improve the performance of a query, providing fast performance even as the database grows in size (`O(log n)`). However, indexed fields require extra writes to the database when performing insert, update, or delete operations. This is because the index must be updated to reflect the changes in the data. This can slow down write operations, but the trade-off is often worth it if the field is frequently used in queries. + +The different operators can also affect the performance of a query. For example, using the `=` operator on an indexed field is generally faster than using the `!=` operator, as the latter requires checking all records that do not match the condition. An index is a sorted listed of values, so the greater than and less than operators will also utilize indexed fields when possible. If the range is narrow, these operations can be very fast. A wide range could yield a large number of records and will naturally incur more overhead. The `starts_with` operator can also leverage indexed fields because it quickly find the correct matching entries in the sorted index. On other hand, the `contains` and `ends_with` and not equal (`!=` or `not_equal`) operators can not leverage the indexes, so they will require a full table scan to find the matching records if they are not used in conjunction in with a selective/indexed condition. There is a special case of `!= null` which can use indexes to find non-null records. However, there is generally only helpful for sparse fields where a small subset are non-null values. More generally, operators are more efficient if they are selecting on fields with a high cardinality. + +Conditions can be applied to primary key fields or other indexed fields (known as secondary indexes). In general, querying on a primary key will be faster than querying on a secondary index, as the primary key is the most efficient way to access data in the database, and doesn't require cross-referencing to the main records. + +### Relationships/Joins + +Harper supports relationships between tables, allowing for "join" queries that. This does result in more complex queries with potentially larger performance overhead, as more lookups are necessary to connect matched or selected data with other tables. Similar principles apply to conditions which use relationships. Indexed fields and comparators that leverage the ordering are still valuable for performance. It is also important that if a condition on a table is connected to another table's foreign key, that that foreign key also be indexed. Likewise, if a query `select`s data from a related table that uses a foreign key to relate, that it is indexed. The same principles of higher cardinality applies here as well, more unique values allow for efficient lookups. + +### Sorting + +Queries can also specify a sort order. This can also significantly impact performance. If a query specifies a sort order on an indexed field, the database can use the index to quickly retrieve the data in the specified order. A sort order can be used in conjunction with a condition on the same (indexed) field can utilize the index for ordering. However, if the sort order is not on an indexed field, or the query specifies conditions on different fields, Harper will generally need to sort the data after retrieving it, which can be slow for large datasets. The same principles apply to sorting as they do to conditions. Sorting on a primary key is generally faster than sorting on a secondary index, if the condition aligns with the sort order. + +### Streaming + +One of the unique and powerful features of Harper's querying functionality is the ability to stream query results. When possible, Harper can return records from a query as they are found, rather than waiting for the entire query to complete. This can significantly improve performance for large queries, as it allows the application to start processing results or sending the initial data before the entire query is complete (improving time-to-first-byte speed, for example). However, using a sort order on a query with conditions that are not on an aligned index requires that the entire query result be loaded in order to perform the sorting, which defeats the streaming benefits. diff --git a/versioned_docs/version-4.7/reference/roles.md b/versioned_docs/version-4.7/reference/roles.md new file mode 100644 index 00000000..2e3dc570 --- /dev/null +++ b/versioned_docs/version-4.7/reference/roles.md @@ -0,0 +1,117 @@ +--- +title: Roles +--- + +# Roles + +Roles in Harper are part of the application’s role-based access control (RBAC) system. You can declare roles in your application and manage their permissions through a roles configuration file. When the application starts, Harper will ensure all declared roles exist with the specified permissions, updating them if necessary. + +## Configuring Roles + +Point to a roles configuration file from your application’s `config.yaml`: + +```yaml +roles: + files: roles.yaml +``` + +You can declare one or more files. Each file should define one or more roles in YAML format. + +## Roles File Structure + +A roles file (`roles.yaml`) contains role definitions keyed by role name. Each role may contain: + +- **super_user** – a boolean that grants all permissions. +- **databases** – one or more databases the role has access to. +- **tables** – within each database, table-level and attribute-level permissions. + +**Full Example** + +```yaml +: + super_user: # optional + : + : + read: + insert: + update: + delete: + attributes: + : + read: + insert: + update: +``` + +## Role Flags + +- `super_user: true` — grants full system access. +- `super_user: false` — the role only has the explicit permissions defined in the role. + +## Database and Table Permissions + +Within each role, you may specify one or more databases. Each database can declare permissions for tables. + +Example: + +```yaml +analyst: + super_user: false + data: + Sales: + read: true + insert: false + update: false + delete: false +``` + +In this example, the `analyst` role has read-only access to the `Sales` table in the `data` database. + +## Attribute-Level Permissions + +You can also grant or deny access at the attribute level within a table. + +Example: + +```yaml +editor: + data: + Articles: + read: true + insert: true + update: true + attributes: + title: + read: true + update: true + author: + read: true + update: false +``` + +Here, the `editor` role can update the `title` of an article but cannot update the `author`. + +## Multiple Roles + +Roles can be defined side by side in a single file: + +```yaml +reader: + super_user: false + data: + Dog: + read: true + +writer: + super_user: false + data: + Dog: + insert: true + update: true +``` + +## Behavior on Startup + +- If a declared role does not exist, Harper creates it. +- If a declared role already exists, Harper updates its permissions to match the definition. +- Roles are enforced consistently across deployments, keeping access control in sync with your application code. diff --git a/versioned_docs/version-4.7/reference/storage-algorithm.md b/versioned_docs/version-4.7/reference/storage-algorithm.md new file mode 100644 index 00000000..03c4c014 --- /dev/null +++ b/versioned_docs/version-4.7/reference/storage-algorithm.md @@ -0,0 +1,27 @@ +--- +title: Storage Algorithm +--- + +# Storage Algorithm + +The Harper storage algorithm is fundamental to the Harper core functionality, enabling the [Dynamic Schema](dynamic-schema) and all other user-facing functionality. Harper is built on top of Lightning Memory-Mapped Database (LMDB), a key-value store offering industry leading performance and functionality, which allows for our storage algorithm to store data in tables as rows/objects. This document will provide additional details on how data is stored within Harper. + +## Query Language Agnostic + +The Harper storage algorithm was designed to abstract the data storage from any individual query language. Harper currently supports both SQL and NoSQL on top of this storage algorithm, with the ability to add additional query languages in the future. This means data can be inserted via NoSQL and read via SQL while hitting the same underlying data storage. + +## ACID Compliant + +Utilizing Multi-Version Concurrency Control (MVCC) through LMDB, Harper offers ACID compliance independently on each node. Readers and writers operate independently of each other, meaning readers don’t block writers and writers don’t block readers. Each Harper table has a single writer process, avoiding deadlocks and assuring that writes are executed in the order in which they were received. Harper tables can have multiple reader processes operating at the same time for consistent, high scale reads. + +## Universally Indexed + +All top level attributes are automatically indexed immediately upon ingestion. The [Harper Dynamic Schema](dynamic-schema) reflexively creates both the attribute and index reflexively as new schema metadata comes in. Indexes are agnostic of datatype, honoring the following order: booleans, numbers ordered naturally, strings ordered lexically. Within the LMDB implementation, table records are grouped together into a single LMDB environment file, where each attribute index is a sub-database (dbi) inside said environment file. An example of the indexing scheme can be seen below. + +## Additional LMDB Benefits + +Harper inherits both functional and performance benefits by implementing LMDB as the underlying key-value store. Data is memory-mapped, which enables quick data access without data duplication. All writers are fully serialized, making writes deadlock-free. LMDB is built to maximize operating system features and functionality, fully exploiting buffer cache and built to run in CPU cache. To learn more about LMDB, visit their documentation. + +## Harper Indexing Example (Single Table) + +![](/img/v4.6/reference/HarperDB-3.0-Storage-Algorithm.png.webp) diff --git a/versioned_docs/version-4.7/reference/transactions.md b/versioned_docs/version-4.7/reference/transactions.md new file mode 100644 index 00000000..7e8546fb --- /dev/null +++ b/versioned_docs/version-4.7/reference/transactions.md @@ -0,0 +1,40 @@ +--- +title: Transactions +--- + +# Transactions + +Transactions are an important part of robust handling of data in data-driven applications. Harper provides ACID-compliant support for transactions, allowing for guaranteed atomic, consistent, and isolated data handling within transactions, with durability guarantees on commit. Understanding how transactions are tracked and behave is important for properly leveraging transactional support in Harper. For most operations this is very intuitive, each HTTP request is executed in a transaction, so when multiple actions are executed in a single request, they are normally automatically included in the same transaction. + +Transactions span a database. Once a read snapshot is started, it is an atomic snapshot of all the tables in a database. And writes that span multiple tables in the database will all be committed atomically together (no writes in one table will be visible before writes in another table in the same database). If a transaction is used to access or write data in multiple databases, there will actually be a separate database transaction used for each database, and there is no guarantee of atomicity between separate transactions in separate databases. This can be an important consideration when deciding if and how tables should be organized into different databases. + +Because Harper is designed to be a low-latency distributed database, locks are avoided in data handling. Because of this, transactions do not lock data within the transaction. When a transaction starts, it will provide a read snapshot of the database for any retrievals or queries, which means all reads will be performed on a single version of the database isolated from any other writes that are concurrently taking place. And within a transaction all writes are aggregated and atomically written on commit. These writes are all isolated (from other transactions) until committed, and all become visible atomically. However, because transactions are non-locking, it is possible that writes from other transactions may occur between when reads are performed and when the writes are committed (at which point the last write will win for any records that have been written concurrently). Support for locks in transactions is planned for a future release. + +Transactions can also be explicitly started using the `transaction` global function that is provided in the Harper environment: + +## `transaction(context?, callback: (transaction) => any): Promise` + +This executes the callback in a transaction, providing a context that can be used for any resource methods that are called. This returns a promise for when the transaction has been committed. The callback itself may be asynchronous (return a promise), allowing for asynchronous activity within the transaction. This is useful for starting a transaction when your code is not already running within a transaction (in an HTTP request handler, a transaction will typically already be started). For example, if we wanted to run an action on a timer that periodically loads data, we could ensure that the data is loaded in single transactions like this (note that HDB is multi-threaded and if we do a timer-based job, we very likely want it to only run in one thread): + +```javascript +import { tables } from 'harperdb'; +const { MyTable } = tables; +if (isMainThread) // only on main thread + setInterval(async () => { + let someData = await (await fetch(... some URL ...)).json(); + transaction((txn) => { + for (let item in someData) { + MyTable.put(item, txn); + } + }); + }, 3600000); // every hour +``` + +You can provide your own context object for the transaction to attach to. If you call `transaction` with a context that already has a transaction started, it will simply use the current transaction, execute the callback and immediately return (this can be useful for ensuring that a transaction has started). + +Once the transaction callback is completed (for non-nested transaction calls), the transaction will commit, and if the callback throws an error, the transaction will abort. However, the callback is called with the `transaction` object, which also provides the following methods and property: + +- `commit(): Promise` - Commits the current transaction. The transaction will be committed once the returned promise resolves. +- `abort(): void` - Aborts the current transaction and resets it. +- `resetReadSnapshot(): void` - Resets the read snapshot for the transaction, resetting to the latest data in the database. +- `timestamp: number` - This is the timestamp associated with the current transaction. diff --git a/versioned_sidebars/version-4.7-sidebars.json b/versioned_sidebars/version-4.7-sidebars.json new file mode 100644 index 00000000..0fc204ab --- /dev/null +++ b/versioned_sidebars/version-4.7-sidebars.json @@ -0,0 +1,69 @@ +{ + "docsSidebar": [ + { + "type": "doc", + "id": "index", + "label": "Harper Docs" + }, + { + "type": "category", + "label": "Getting Started", + "items": ["getting-started/installation", "getting-started/quickstart"] + }, + { + "type": "category", + "label": "Foundations of Harper", + "items": ["foundations/harper-architecture", "foundations/core-concepts", "foundations/use-cases"] + }, + { + "type": "category", + "label": "Developers", + "link": { + "type": "generated-index", + "title": "Developer Documentation", + "description": "Comprehensive guides and references for building applications with HarperDB", + "keywords": ["developers", "api", "applications"] + }, + "items": [ + { + "type": "autogenerated", + "dirName": "developers" + } + ] + }, + { + "type": "category", + "label": "Administration", + "items": [ + { + "type": "autogenerated", + "dirName": "administration" + } + ] + }, + { + "type": "category", + "label": "Deployments", + "items": [ + { + "type": "autogenerated", + "dirName": "deployments" + } + ] + }, + { + "type": "category", + "label": "Reference", + "link": { + "type": "doc", + "id": "reference/index" + }, + "items": [ + { + "type": "autogenerated", + "dirName": "reference" + } + ] + } + ] +} diff --git a/versions.json b/versions.json index 740d5077..7bd6509f 100644 --- a/versions.json +++ b/versions.json @@ -1 +1 @@ -["4.6", "4.5", "4.4", "4.3", "4.2", "4.1"] +["4.7", "4.6", "4.5", "4.4", "4.3", "4.2", "4.1"]