diff --git a/antora.yml b/antora.yml index e6e9a5cc..4c98e152 100644 --- a/antora.yml +++ b/antora.yml @@ -8,37 +8,52 @@ nav: asciidoc: attributes: + #General attributes company: 'DataStax' - product: 'Zero Downtime Migration' - product-short: 'ZDM' - product-proxy: 'ZDM Proxy' - product-utility: 'ZDM Utility' - product-automation: 'ZDM Proxy Automation' - product-demo: 'ZDM Demo Client' - dsbulk-migrator: 'DSBulk Migrator' - dsbulk-loader: 'DSBulk Loader' + support-url: 'https://support.datastax.com' + #Other product attributes + cass-reg: 'Apache Cassandra(R)' cass: 'Apache Cassandra' cass-short: 'Cassandra' - cass-reg: 'Apache Cassandra(R)' - cass-migrator: 'Cassandra Data Migrator' - cass-migrator-short: 'CDM' dse: 'DataStax Enterprise (DSE)' dse-short: 'DSE' hcd: 'Hyper-Converged Database (HCD)' hcd-short: 'HCD' + mc: 'Mission Control (MC)' + mc-short: 'MC' + mc-brief: 'Mission Control' + #Astra DB attributes astra-db: 'Astra DB' astra: 'Astra' - data-api: 'Data API' db-serverless: 'Serverless (Non-Vector)' db-serverless-vector: 'Serverless (Vector)' astra-ui: 'Astra Portal' astra-url: 'https://astra.datastax.com' astra-ui-link: '{astra-url}[{astra-ui}^]' + scb: 'Secure Connect Bundle (SCB)' + scb-short: 'SCB' + scb-brief: 'Secure Connect Bundle' + #Sideloader has a specific name in this repo. It is not identical to the one in the Serverless repo. sstable-sideloader: '{astra-db} Sideloader' + #devops api attributes devops-api: 'DevOps API' devops-api-ref-url: 'xref:astra-api-docs:ROOT:attachment$devops-api/index.html' - support-url: 'https://support.datastax.com' - mc: 'Mission Control (MC)' - mc-short: 'MC' - -#TODO: Bring SCB attributes \ No newline at end of file + #data api attributes + data-api: 'Data API' + #Migration docs attributes + product: 'Zero Downtime Migration' + product-short: 'ZDM' + product-proxy: 'ZDM Proxy' + product-proxy-repo: 'https://github.com/datastax/zdm-proxy' + product-utility: 'ZDM Utility' + product-automation: 'ZDM Proxy Automation' + product-automation-repo: 'https://github.com/datastax/zdm-proxy-automation' + product-automation-shield: 'image:https://img.shields.io/github/v/release/datastax/zdm-proxy-automation?label=latest[alt="Latest zdm-proxy-automation release on GitHub",link="{product-automation-repo}/releases"]' + product-demo: 'ZDM Demo Client' + dsbulk-migrator: 'DSBulk Migrator' + dsbulk-migrator-repo: 'https://github.com/datastax/dsbulk-migrator' + dsbulk-loader: 'DSBulk Loader' + cass-migrator: 'Cassandra Data Migrator' + cass-migrator-short: 'CDM' + cass-migrator-repo: 'https://github.com/datastax/cassandra-data-migrator' + cass-migrator-shield: 'image:https://img.shields.io/github/v/release/datastax/cassandra-data-migrator?label=latest[alt="Latest cassandra-data-migrator release on GitHub",link="{cass-migrator-repo}/packages"]' \ No newline at end of file diff --git a/modules/ROOT/nav.adoc b/modules/ROOT/nav.adoc index 0def7a1e..d59aed68 100644 --- a/modules/ROOT/nav.adoc +++ b/modules/ROOT/nav.adoc @@ -1,54 +1,54 @@ +.Get started +* xref:ROOT:components.adoc[] +* xref:ROOT:zdm-proxy-migration-paths.adoc[] +* Product-specific migration paths +** xref:ROOT:astra-migration-paths.adoc[] +** {dse-short} 6.9 +*** xref:6.9@dse:tooling:migration-path-dse.adoc[{dse-short} 6.9 migration tools] +*** xref:6.9@dse:managing:operations/migrate-data.adoc[Migrate data to {dse-short} 6.9] +** {dse-short} 6.8 +*** xref:6.8@dse:tooling:migration-path-dse.adoc[{dse-short} 6.8 migration tools] +*** xref:6.8@dse:managing:operations/migrate-data.adoc[Migrate data to {dse-short} 6.8] +** {dse-short} 5.1 +*** xref:5.1@dse:managing:operations/migrate-data.adoc[Migrate data to {dse-short} 5.1] +** {mc-brief} migrations +*** xref:mission-control:migrate:oss-cass-to-mission-control.adoc[Migrate {cass-short} clusters to {mc-short}] +*** xref:mission-control:migrate:dse-to-mission-control.adoc[Migrate {dse-short} clusters to {mc-short}] + .{product} -* xref:introduction.adoc[] -* xref:components.adoc[] +* xref:ROOT:introduction.adoc[] * Planning -** xref:preliminary-steps.adoc[] -** xref:feasibility-checklists.adoc[] -** xref:deployment-infrastructure.adoc[] -** xref:create-target.adoc[] -** xref:rollback.adoc[] +** xref:ROOT:feasibility-checklists.adoc[] +** xref:ROOT:deployment-infrastructure.adoc[] +** xref:ROOT:create-target.adoc[] +** xref:ROOT:rollback.adoc[] * Phase 1 -** xref:phase1.adoc[] -** xref:setup-ansible-playbooks.adoc[] -** xref:deploy-proxy-monitoring.adoc[] -** xref:tls.adoc[] -** xref:connect-clients-to-proxy.adoc[] -** xref:metrics.adoc[] -** xref:manage-proxy-instances.adoc[] +** xref:ROOT:phase1.adoc[] +** xref:ROOT:setup-ansible-playbooks.adoc[] +** xref:ROOT:deploy-proxy-monitoring.adoc[] +** xref:ROOT:tls.adoc[] +** xref:ROOT:connect-clients-to-proxy.adoc[] +** xref:ROOT:metrics.adoc[] +** xref:ROOT:manage-proxy-instances.adoc[] * Phase 2 -** xref:migrate-and-validate-data.adoc[] -** xref:cassandra-data-migrator.adoc[{cass-migrator}] -** xref:dsbulk-migrator.adoc[{dsbulk-migrator}] +** xref:ROOT:migrate-and-validate-data.adoc[] +** xref:sideloader:sideloader-zdm.adoc[] +** xref:ROOT:cassandra-data-migrator.adoc[] +** xref:ROOT:dsbulk-migrator.adoc[] * Phase 3 -** xref:enable-async-dual-reads.adoc[] +** xref:ROOT:enable-async-dual-reads.adoc[] * Phase 4 -** xref:change-read-routing.adoc[] +** xref:ROOT:change-read-routing.adoc[] * Phase 5 -** xref:connect-clients-to-target.adoc[] +** xref:ROOT:connect-clients-to-target.adoc[] * References -** xref:troubleshooting-tips.adoc[] -** xref:troubleshooting-scenarios.adoc[] -** xref:contributions.adoc[] -** xref:faqs.adoc[] -** xref:glossary.adoc[] -** https://github.com/datastax/zdm-proxy/releases[{product-proxy} release notes] -** https://github.com/datastax/zdm-proxy-automation/releases[{product-automation} release notes] - -.{cass-migrator} -* xref:cdm-overview.adoc[{cass-migrator}] -* https://github.com/datastax/cassandra-data-migrator/releases[{cass-migrator-short} release notes] - -.{dsbulk-loader} -* xref:dsbulk:overview:dsbulk-about.adoc[{dsbulk-loader}] -* xref:dsbulk:installing:install.adoc[Installing {dsbulk-loader}] -* Loading and unloading data -** xref:dsbulk:getting-started:simple-load.adoc[Loading data without a configuration file] -** xref:dsbulk:getting-started:simple-unload.adoc[Unloading data without a configuration file] -** xref:dsbulk:developing:loading-unloading-vector-data.adoc[Loading and unloading vector data] -** xref:dsbulk:reference:load.adoc[Loading data examples] -** xref:dsbulk:reference:unload.adoc[Unloading data examples] -* xref:dsbulk:reference:dsbulk-cmd.adoc#escaping-and-quoting-command-line-arguments[Escaping and quoting command line arguments] -* https://github.com/datastax/dsbulk/releases[{dsbulk-loader} release notes] +** xref:ROOT:troubleshooting-tips.adoc[] +** xref:ROOT:troubleshooting-scenarios.adoc[] +** xref:ROOT:contributions.adoc[] +** xref:ROOT:faqs.adoc[] +** xref:ROOT:glossary.adoc[] +** {product-proxy-repo}/releases[{product-proxy} release notes] +** {product-automation-repo}/releases[{product-automation} release notes] .{sstable-sideloader} * xref:sideloader:sideloader-overview.adoc[] @@ -58,18 +58,9 @@ * xref:sideloader:cleanup-sideloader.adoc[] * xref:sideloader:troubleshoot-sideloader.adoc[] -.Product-specific migration paths -* {astra-db} -** xref:astra-db-serverless:databases:migration-path-serverless.adoc[] -* {dse} -** {dse-short} 6.9 -*** xref:6.9@dse:tooling:migration-path-dse.adoc[{dse-short} 6.9 migration tools] -*** xref:6.9@dse:managing:operations/migrate-data.adoc[Migrate data to {dse-short} 6.9] -** {dse-short} 6.8 -*** xref:6.8@dse:tooling:migration-path-dse.adoc[{dse-short} 6.8 migration tools] -*** xref:6.8@dse:managing:operations/migrate-data.adoc[Migrate data to {dse-short} 6.8] -** {dse-short} 5.1 -*** xref:5.1@dse:managing:operations/migrate-data.adoc[Migrate data to {dse-short} 5.1] -* {mc} -** xref:mission-control:migrate:oss-cass-to-mission-control.adoc[Migrate {cass-short} clusters to {mc-short}] -** xref:mission-control:migrate:dse-to-mission-control.adoc[[Migrate {cass-short} clusters to {dse-short}] \ No newline at end of file +.{cass-migrator} +* xref:ROOT:cdm-overview.adoc[] +* {cass-migrator-repo}/releases[{cass-migrator-short} release notes] + +.{dsbulk-migrator} +* xref:ROOT:dsbulk-migrator-overview.adoc[] \ No newline at end of file diff --git a/modules/ROOT/pages/astra-migration-paths.adoc b/modules/ROOT/pages/astra-migration-paths.adoc new file mode 100644 index 00000000..00c42cba --- /dev/null +++ b/modules/ROOT/pages/astra-migration-paths.adoc @@ -0,0 +1,106 @@ += {astra} Migration Toolkit +:description: Learn which migration tools are compatible with your origin cluster. + +The {astra} Migration Toolkit includes all xref:ROOT:components.adoc[{company} migration tools] that are designed to help you migrate your data to {astra-db}. + +== Migration tool compatibility + +Use the following table to learn which tools are compatible with your current database provider or service: + +[cols="2,1,1,1,1"] +|=== +|Origin |{sstable-sideloader} |{cass-migrator} |{product-proxy} |{dsbulk-migrator}/{dsbulk-loader} + +|Aiven for {cass-short} +|✅ +|✅ +|✅ +|✅ + +|Amazon Keyspaces +|❌ +|✅ +|✅ +|✅ + +|{cass-reg} OSS 3.11 or later +|✅ +|✅ +|✅ +|✅ + +|{cass-reg} OSS 3.10 or earlier +|❌ +|✅ +|✅ +|✅ + +|Azure Cosmos DB ({cass-short} API) +|❌ +|✅ +|✅ +|✅ + +|Azure Managed Instance for {cass} +|✅ +|✅ +|✅ +|✅ + +|{dse-short} 5.1 or later +|✅ +|✅ +|✅ +|✅ + +|{dse-short} 5.0 or earlier +|❌ +|✅ +|✅ +|✅ + +|Instaclustr Managed {cass-short} +|✅ +|✅ +|✅ +|✅ + +|K8ssandra (self-managed) +|✅ +|✅ +|✅ +|✅ + +|Scylla Cloud +|❌ +|✅ +|✅ +|✅ + +|Scylla OSS or Enterprise +|❌ +|✅ +|✅ +|✅ + +|Yugabyte Aeon (YCQL) +|❌ +|✅ +|✅ +|✅ + +|Yugabyte OSS or Anywhere +|❌ +|✅ +|✅ +|✅ + +|=== + +== Get support for your migration + +If you have questions about migrating from a specific source to {astra-db}, contact your {company} account representative, {support-url}[{company} Support], or an https://www.datastax.com/products/datastax-astra/migration-toolkit[{astra} Migration Toolkit expert]. + +== See also + +* xref:astra-db-serverless:databases:migration-path-serverless.adoc[Migrate to {astra-db}] \ No newline at end of file diff --git a/modules/ROOT/pages/cassandra-data-migrator.adoc b/modules/ROOT/pages/cassandra-data-migrator.adoc index 6c5f27e6..79e64dfc 100644 --- a/modules/ROOT/pages/cassandra-data-migrator.adoc +++ b/modules/ROOT/pages/cassandra-data-migrator.adoc @@ -1,15 +1,30 @@ -= {cass-migrator} += Use {cass-migrator} with {product-short} +:navtitle: Use {cass-migrator} +:description: Use {cass-migrator} to migrate data with {product-short} :page-aliases: cdm-parameters.adoc, ROOT:cdm-steps.adoc //This page was an exact duplicate of cdm-overview.adoc and the (now deleted) cdm-steps.adoc, they are just in different parts of the nav. // tag::body[] -You can use {cass-migrator} ({cass-migrator-short}) to migrate and validate tables between the origin and target {cass-short} clusters, with optional logging and reconciliation support. +You can use {cass-migrator} ({cass-migrator-short}) to migrate and validate tables between {cass-short}-based clusters. +It is designed to connect to your target cluster, compare it with the origin cluster, log any differences, and, optionally, automatically reconcile inconsistencies and missing data. {cass-migrator-short} facilitates data transfer by creating multiple jobs that access the {cass-short} cluster concurrently, making it an ideal choice for migrating large datasets. It offers extensive configuration options, including logging, reconciliation, performance optimization, and more. -//TODO: Bring over content from the page that introduces the 3 options, and the features, limitations, and performance recommendations in the README https://github.com/datastax/cassandra-data-migrator?tab=readme-ov-file#features +{cass-migrator-short} features include the following: + +* Validate migration accuracy and performance using examples that provide a smaller, randomized data set. +* Preserve internal `writetime` timestamps and Time To Live (TTL) values. +* Use advanced data types, including sets, lists, maps, and UDTs. +* Filter records from the origin cluster's data, using {cass-short}'s internal `writetime` timestamp. +* Use SSL Support, including custom cipher algorithms. + +For more features and information, see the {cass-migrator-repo}?tab=readme-ov-file#features[{cass-migrator-short} GitHub repository]. + +== {cass-migrator} requirements + +To use {cass-migrator-short} successfully, your origin and target clusters must have matching schemas. == Install {cass-migrator} @@ -66,7 +81,7 @@ If you deploy CDM on a Spark cluster, you must modify your `spark-submit` comman * Remove parameters related to single-VM installations, such as `--driver-memory` and `--executor-memory`. ==== -. Download the latest `cassandra-data-migrator` JAR file image:https://img.shields.io/github/v/release/datastax/cassandra-data-migrator?label=GitHub[alt="Latest cassandra-data-migrator release on GitHub",link="https://github.com/datastax/cassandra-data-migrator/packages"] from the https://github.com/datastax/cassandra-data-migrator[{cass-migrator-short} repository]. +. Download the latest {cass-migrator-repo}/packages[cassandra-data-migrator JAR file] {cass-migrator-shield}. . Add the `cassandra-data-migrator` dependency to `pom.xml`: + @@ -83,7 +98,7 @@ Replace `**VERSION**` with your {cass-migrator-short} version. . Run `mvn install`. -If you need to build the JAR for local development or your environment only has Scala version 2.12.x, see the alternative installation instructions in the https://github.com/datastax/cassandra-data-migrator?tab=readme-ov-file[{cass-migrator-short} README]. +If you need to build the JAR for local development or your environment only has Scala version 2.12.x, see the alternative installation instructions in the {cass-migrator-repo}?tab=readme-ov-file[{cass-migrator-short} README]. -- ====== @@ -95,13 +110,13 @@ If you use a different name, make sure you specify the correct filename in your . Configure the properties for your environment. + -In the {cass-migrator-short} repository, you can find a https://github.com/datastax/cassandra-data-migrator/blob/main/src/resources/cdm.properties[sample properties file with default values], as well as a https://github.com/datastax/cassandra-data-migrator/blob/main/src/resources/cdm-detailed.properties[fully annotated properties file]. +In the {cass-migrator-short} repository, you can find a {cass-migrator-repo}/blob/main/src/resources/cdm.properties[sample properties file with default values], as well as a {cass-migrator-repo}/blob/main/src/resources/cdm-detailed.properties[fully annotated properties file]. + {cass-migrator-short} jobs process all uncommented parameters. Any parameters that are commented out are ignored or use default values. + If you want to reuse a properties file created for a previous {cass-migrator-short} version, make sure it is compatible with the version you are currently using. -Check the https://github.com/datastax/cassandra-data-migrator/releases[{cass-migrator-short} release notes] for possible breaking changes in interim releases. +Check the {cass-migrator-repo}/releases[{cass-migrator-short} release notes] for possible breaking changes in interim releases. For example, the 4.x series of {cass-migrator-short} isn't backwards compatible with earlier properties files. . Store your properties file where it can be accessed while running {cass-migrator-short} jobs using `spark-submit`. @@ -249,7 +264,7 @@ For example: + When validating large datasets or multiple tables, you might want to extract the complete list of missing or mismatched records. There are many ways to do this. -For example, you can grep for all `ERROR` entries in your {cass-migrator-short} log files or use the `log4j2` example provided in the https://github.com/datastax/cassandra-data-migrator?tab=readme-ov-file#steps-for-data-validation[{cass-migrator-short} repository]. +For example, you can grep for all `ERROR` entries in your {cass-migrator-short} log files or use the `log4j2` example provided in the {cass-migrator-repo}?tab=readme-ov-file#steps-for-data-validation[{cass-migrator-short} repository]. === Run a validation job in AutoCorrect mode @@ -293,8 +308,8 @@ For example, you can do the following: * Use the `partition.min` and `partition.max` parameters to migrate or validate specific token ranges. * Use the `track-run` feature to monitor progress and rerun a failed migration or validation job from point of failure. -For all options, see the https://github.com/datastax/cassandra-data-migrator[{cass-migrator-short} repository]. -Specifically, see the https://github.com/datastax/cassandra-data-migrator/blob/main/src/resources/cdm-detailed.properties[fully annotated properties file]. +For all options, see the {cass-migrator-repo}[{cass-migrator-short} repository]. +Specifically, see the {cass-migrator-repo}/blob/main/src/resources/cdm-detailed.properties[fully annotated properties file]. == Troubleshoot {cass-migrator-short} @@ -317,6 +332,6 @@ If you installed an earlier version of {cass-migrator-short}, you might need to ==== You can use the `track-run` feature to track the progress of a migration or validation, and then, if necessary, use the `run-id` to rerun a failed job from the last successful migration or validation point. -For more information, see the https://github.com/datastax/cassandra-data-migrator[{cass-migrator-short} repository] and the https://github.com/datastax/cassandra-data-migrator/blob/main/src/resources/cdm-detailed.properties[fully annotated properties file]. +For more information, see the {cass-migrator-repo}[{cass-migrator-short} repository] and the {cass-migrator-repo}/blob/main/src/resources/cdm-detailed.properties[fully annotated properties file]. ==== // end::body[] \ No newline at end of file diff --git a/modules/ROOT/pages/components.adoc b/modules/ROOT/pages/components.adoc index 1350062d..25916146 100644 --- a/modules/ROOT/pages/components.adoc +++ b/modules/ROOT/pages/components.adoc @@ -1,18 +1,28 @@ -= Components += Compare {company} migration tools +:navtitle: Compare migration tools +:description: Learn about {company} migration tools. :page-tag: migration,zdm,zero-downtime,zdm-proxy,components -The main component of the {company} {product} product suite is **{product-proxy}**, which by design is a simple and lightweight proxy that handles all the real-time requests generated by your client applications. +{company} migration tools include the {product} {product-short} toolkit and three data migration tools. -{product-proxy} is open-source software (OSS) and available in its https://github.com/datastax/zdm-proxy[Public GitHub repo]. -You can view the source files and contribute code for potential inclusion via Pull Requests (PRs) initiated on a fork of the repo. +{product-short} is comprised of {product-proxy}, {product-utility}, and {product-automation}, which orchestrate activity-in-transition on your clusters. +To move and validate data, you use {sstable-sideloader}, {cass-migrator}, or {dsbulk-migrator}. -The {product-proxy} itself doesn't have any capability to migrate data or knowledge that a migration may be ongoing, and it is not coupled to the migration process in any way. +You can also use {sstable-sideloader}, {cass-migrator-short}, and {dsbulk-migrator} on their own, outside the context of {product-short}. -* {company} {product} also provides the **{product-utility}** and **{product-automation}** to set up and run the Ansible playbooks that deploy and manage the {product-proxy} and its monitoring stack. +== {product-proxy} -* Multiple data migration tools such as **{cass-migrator}** and **{dsbulk-migrator}** are available. +The main component of the {company} {product} toolkit is {product-proxy}, which is designed to be a lightweight proxy that handles all real-time requests generated by your client applications during the migration process. -== Role of {product-proxy} +{product-proxy} is open-source software that is available from the {product-proxy-repo}[zdm-proxy GitHub repo]. +This project is open for public contributions. + +The {product-proxy} is an orchestrator for monitoring application activity and keeping multiple clusters in sync through dual writes. +{product-proxy} isn't linked to the actual migration process. +It doesn't perform data migrations and it doesn't have awareness of ongoing migrations. +Instead, you use a data migration tool, like {sstable-sideloader}, {cass-migrator}, or {dsbulk-migrator}, to perform the data migration and validate migrated data. + +=== How {product-proxy} works {company} created {product-proxy} to function between the application and both the origin and target databases. The databases can be any CQL-compatible data store, such as {cass-reg}, {dse}, and {astra-db}. @@ -28,28 +38,23 @@ The primary cluster is initially the origin cluster, and you change it to the ta {product-proxy} is designed to be highly available. It can be scaled horizontally, so typical deployments are made up of a minimum of 3 servers. {product-proxy} can be restarted in a rolling fashion, for example, to change configuration for different phases of the migration. -[TIP] -==== -{product-proxy} has been designed to run in a **clustered** fashion so that it is never a single point of failure. -Unless it is for a demo or local testing environment, a {product-proxy} deployment should always comprise multiple {product-proxy} instances. - -The term {product-proxy} indicates the whole deployment, and {product-proxy} instance refers to an individual proxy process in the deployment. -==== - === Key features of {product-proxy} -* Allows you to lift-and-shift existing application code from your origin cluster to your target cluster by changing only the connection string. +* Allows you to lift-and-shift existing application code from your origin cluster to your target cluster by changing only the connection string, if all else is compatible. -* Reduces risks to upgrades and migrations by decoupling the origin cluster from the target cluster, and allowing you to determine an explicit cut-over point once you're ready to commit to using the target cluster permanently. +* Reduces risks to upgrades and migrations by decoupling the origin cluster from the target cluster. +You can determine an explicit cut-over point once you're ready to commit to using the target cluster permanently. * Bifurcates writes synchronously to both clusters during the migration process. -* Returns (for read operations) the response from the primary cluster, which is its designated source of truth. +* Read operations return the response from the primary (origin) cluster, which is its designated source of truth. ++ During a migration, the primary cluster is typically the origin cluster. Near the end of the migration, you shift the primary cluster to be the target cluster. -* Can be configured to also read asynchronously from the target cluster. -This capability is called **Asynchronous Dual Reads** (also known as **Read Mirroring**), and it allows you to observe what read latencies and throughput the target cluster can achieve under the actual production load. +* Option to read asynchronously from the target cluster as well as the origin cluster +This capability is called **Asynchronous Dual Reads** or **Read Mirroring**, and it allows you to observe what read latencies and throughput the target cluster can achieve under the actual production load. ++ ** Results from the asynchronous reads executed on the target cluster are not sent back to the client application. ** This design implies that a failure on asynchronous reads from the target cluster does not cause an error on the client application. ** Asynchronous dual reads can be enabled and disabled dynamically with a rolling restart of the {product-proxy} instances. @@ -61,57 +66,31 @@ This behavior is expected and desired. The idea is to mimic the full read and write load on the target cluster so there are no surprises during the last migration phase; that is, after cutting over completely to the target cluster. ==== -=== {product-utility} and {product-automation} - -https://www.ansible.com/[Ansible] is a suite of software tools that enables infrastructure as code. -It is open source and its capabilities include software provisioning, configuration management, and application deployment functionality. - -The Ansible automation for {product-short} is organized into playbooks, each implementing a specific operation. -The machine from which the playbooks are run is known as the Ansible Control Host. -In {product-short}, the Ansible Control Host will run as a Docker container. +=== Run multiple {product-proxy} instances -You will use the **{product-utility}** to set up Ansible in a Docker container, and **{product-automation}** to run the Ansible playbooks from the Docker container created by {product-utility}. -In other words,the {product-utility} creates the Docker container acting as the **Ansible Control Host**, from which the {product-automation} allows you to deploy and manage the {product-proxy} instances and the associated monitoring stack - Prometheus metrics and Grafana visualization of the metric data. - -{product-utility} and {product-automation} expect that you have already provisioned the recommended infrastructure, as outlined in xref:deployment-infrastructure.adoc[]. - -The source for both of these tools are in a public repo. - -For details, see: - -* xref:setup-ansible-playbooks.adoc[] -* xref:deploy-proxy-monitoring.adoc[] - -== Data migration tools +{product-proxy} has been designed to run in a clustered fashion so that it is never a single point of failure. +Unless it is for a demo or local testing environment, a {product-proxy} deployment should always comprise multiple {product-proxy} instances. -As part of the overall migration process, you can use {cass-migrator} and/or {dsbulk-migrator} to migrate your data. -Other technologies such as Apache Spark(TM) can be used to write your own custom data migration process. +Throughout the documentation, the term _{product-proxy} deployment_ refers to the entire deployment, and _{product-proxy} instance_ refers to an individual proxy process in the deployment. -=== {cass-migrator} +You can use the {product-utility} and {product-automation} to set up and run Ansible playbooks that deploy and manage {product-proxy} and its monitoring stack. -[TIP] -==== -To use {cass-migrator}, the schema on your origin and target clusters must match. -==== +== {product-utility} and {product-automation} -Use {cass-migrator} to: +You can use the {product-automation-repo}[{product-utility} and {product-automation}] to set up and run Ansible playbooks that deploy and manage {product-proxy} and its monitoring stack. -* Migrate your data from any CQL-supported origin cluster to any CQL-supported target cluster. -Examples of databases that support CQL are {cass-reg}, {dse}, and {astra-db}. -* Validate migration accuracy and performance using examples that provide a smaller, randomized data set. -* Preserve internal `writetime` timestamps and Time To Live (TTL) values. -* Take advantage of advanced data types (Sets, Lists, Maps, UDTs). -* Filter records from the origin cluster's data, using {cass-short}'s internal `writetime` timestamp. -* Use SSL Support, including custom cipher algorithms. +https://www.ansible.com/[Ansible] is a suite of software tools that enables infrastructure as code. +It is open source and its capabilities include software provisioning, configuration management, and application deployment functionality. +The Ansible automation for {product-short} is organized into playbooks, each implementing a specific operation. +The machine from which the playbooks are run is known as the Ansible Control Host. +In {product-short}, the Ansible Control Host runs as a Docker container. -{cass-migrator} is designed to: +You use the {product-utility} to set up Ansible in a Docker container, and then you use {product-automation} to run the Ansible playbooks from the Docker container created by {product-utility}. -* Connect to and compare your target database/cluster with the origin database/cluster. -* Report differences in a detailed log file. -* Optionally reconcile any missing records and fix any data inconsistencies in the target cluster by enabling `autocorrect` in a config file. +The {product-utility} creates the Docker container acting as the Ansible Control Host, from which {product-automation} allows you to deploy and manage the {product-proxy} instances and the associated monitoring stack, which includes Prometheus metrics and Grafana visualizations of the metrics data. -=== {dsbulk-migrator} +To use {product-utility} and {product-automation}, you must prepare the recommended infrastructure, as explained in xref:deployment-infrastructure.adoc[]. -You can also take advantage of {dsbulk-migrator} to migrate smaller sets of data. +For more information, see xref:setup-ansible-playbooks.adoc[] and xref:deploy-proxy-monitoring.adoc[]. -For more about both tools, see xref:migrate-and-validate-data.adoc[]. +include::ROOT:migrate-and-validate-data.adoc[tags=migration-tool-summaries] \ No newline at end of file diff --git a/modules/ROOT/pages/connect-clients-to-proxy.adoc b/modules/ROOT/pages/connect-clients-to-proxy.adoc index 1d1e3a92..e0a4ff58 100644 --- a/modules/ROOT/pages/connect-clients-to-proxy.adoc +++ b/modules/ROOT/pages/connect-clients-to-proxy.adoc @@ -15,24 +15,18 @@ You can use the provided sample client applications, in addition to your own, as Finally, we will explain how to connect the `cqlsh` command-line client to the {product-proxy}. -== {company} drivers +== {company}-compatible drivers -You can use {company} drivers to connect your client applications to {cass-short}, {dse-short}, and {astra-db}: +You can use {cass-short} drivers to connect your client applications to {cass-short}, {dse-short}, {hcd-short}, and {astra-db}. +With drivers, you can allow execute queries, iterate through results, access metadata about your cluster, and perform other related activities. -* https://github.com/datastax/java-driver[{company} Java driver] -* https://github.com/datastax/python-driver[{company} Python driver] -* https://github.com/datastax/csharp-driver[{company} C# driver] -* https://github.com/datastax/cpp-driver[{company} C/{cpp} driver] -* https://github.com/datastax/nodejs-driver[{company} Node.js driver] - -These drivers provide a native implementation of the messaging protocols used to communicate with a {cass-short} cluster, {dse-short} cluster, or {astra-db}. -They allow you to execute queries, iterate through results, access metadata about your cluster, and perform other related activities. +For available drivers and driver documentation, see xref:datastax-drivers:compatibility:driver-matrix.adoc[]. [[_connecting_company_drivers_to_cassandra]] -== Connecting {company} drivers to {cass-short} +== Connect drivers to {cass-short} -Perhaps the simplest way to demonstrate how to use the {company} drivers to connect your client application to a {cass-short} cluster is an example in the form of some sample code. -But there's a bit of a problem: the {company} drivers are independent projects implemented natively in the relevant programming language. +Perhaps the simplest way to demonstrate how to use the drivers to connect your client application to a {cass-short} cluster is an example in the form of some sample code. +But there's a bit of a problem: the drivers are independent projects implemented natively in the relevant programming language. This approach offers the benefit of allowing each project to provide an API that makes the most sense for the language or platform on which it's implemented. Unfortunately it also means there is some variation between languages. @@ -70,10 +64,11 @@ Client programs create a Session directly. The details may vary but you'll still see the same general pattern described in the pseudocode in each of the drivers. -This topic does not describe details or APIs for any of the {company} drivers mentioned above. +This topic does not describe details or APIs for any of the drivers mentioned above. All the drivers come with a complete set of documentation for exactly this task. The following links provide some good starting points for learning about the interfaces for each specific driver: +//TODO: Move this to the driver docs and replace this whole list with a link to the connect page. * The https://docs.datastax.com/en/developer/java-driver/latest/manual/core/[core driver section] of the Java driver manual. * The https://docs.datastax.com/en/developer/python-driver/latest/getting_started/[getting started guide] for the Python driver. * The https://docs.datastax.com/en/developer/csharp-driver/latest/index.html#basic-usage[basic usage section] of the C# driver documentation. @@ -86,10 +81,10 @@ The links above lead to the documentation for the most recent version of each dr You can find the documentation for earlier versions by selecting the appropriate version number from the drop-down menu in the upper right. ==== -== Connecting {company} drivers to {product-proxy} +== Connect drivers to {product-proxy} We mentioned above that connecting to a {product-proxy} should be almost indistinguishable from connecting directly to your {cass-short} cluster. -This design decision means there isn't much to say here; everything we discussed in the section above also applies when connecting your {company} driver to a {product-proxy}. +This design decision means there isn't much to say here; everything we discussed in the section above also applies when connecting your driver to {product-proxy}. There are a few extra considerations to keep in mind, though, when using the proxy. === Client-side compression @@ -137,10 +132,14 @@ This is also the case if authentication is required by the target only, but not .How different sets of credentials are used by the {product-proxy} when authentication is enabled on both clusters image::zdm-proxy-credential-usage.png[{product-proxy} credentials usage, 550] -=== A note on the Secure Connect Bundle +=== {astra-db} credentials + +If your {product-proxy} is configured to use {astra-db} as the origin or target cluster, then your client application doesn't need to provide a {scb} when connecting to the proxy. + +As an alternative to providing the {scb-short} directly, you can xref:astra-db-serverless:administration:manage-application-tokens.adoc[generate an application token] with the *Organization Administrator* role, and then specify one of the following sets of credentials generated with the token: -If your {product-proxy} is configured to use {astra-db} as the origin or target cluster, then your client application **does not need** to provide a Secure Connect Bundle (SCB) when connecting to the proxy. -It will, however, have to supply an {astra-db} application token's client ID and client secret as a username and password (respectively). +* Token-only authentication: Set `username` to the literal string `token`, and set `password` to your {astra-db} application token. +* Client ID and secret authentication (legacy): Set `username` to the `clientId` generated with your application token, and then set `password` to the `secret` generated with your application token. == Sample client applications @@ -157,7 +156,7 @@ You can find the details of building and running {product-demo} in the https://g [[_themis_client]] === Themis client -https://github.com/absurdfarce/themis[Themis] is a Java command-line client application that allows you to insert randomly-generated data into some combination of these three sources: +https://github.com/absurdfarce/themis[Themis] is a Java command-line client application that allows you to insert randomly generated data into some combination of these three sources: * Directly into the origin * Directly into the target @@ -180,7 +179,7 @@ Using CQLSH to connect to a {product-proxy} instance is very easy: * Download CQLSH for free from https://downloads.datastax.com/#cqlsh[here] on a machine that has connectivity to the {product-proxy} instances: ** To connect to the {product-proxy}, any version is fine. -** The {astra}-compatible version additionally supports connecting directly to an {astra-db} cluster by passing the cluster's Secure Connect Bundle and valid credentials. +** The {astra}-compatible version additionally supports connecting directly to an {astra-db} cluster by passing the cluster's {scb-short} and valid credentials. * Install it by uncompressing the archive: `tar -xvf cqlsh-<...>.tar.gz`. * Navigate to the `cqlsh-<...>/bin` directory, for example `cd cqlsh-astra/bin`. * Launch CQLSH: diff --git a/modules/ROOT/pages/connect-clients-to-target.adoc b/modules/ROOT/pages/connect-clients-to-target.adoc index 534c5557..a887a1c2 100644 --- a/modules/ROOT/pages/connect-clients-to-target.adoc +++ b/modules/ROOT/pages/connect-clients-to-target.adoc @@ -22,38 +22,38 @@ image::migration-phase5ra.png[In Phase 5, your apps no longer using the proxy an == Configuring your driver to connect to a generic CQL cluster If your target cluster is a generic CQL cluster, such as {cass-short} or {dse-short}, then you can connect your client application to it in a similar way as you previously connected it to the origin cluster, but with the appropriate contact points and any additional configuration that your target cluster may require. -For further information, please refer to the documentation of the driver language and version that you are using. +For further information, see the documentation for your driver language and version. == Configuring your driver to connect to {astra-db} -//TODO: You can use an AstraCS token and the literal string `token` instead of clientID and client secret. +To connect to {astra-db}, you need the following: -To connect to {astra-db}, you need: - -* The ClientID and Client Secret from an {astra-db} application token with *Organization Administrator* permissions for the organization to which your {astra-db} database belongs. +* The xref:astra-db-serverless:administration:manage-application-tokens.adoc[application token] credentials that you used to xref:ROOT:connect-clients-to-proxy.adoc[connect your applications to the {product-proxy}]. ++ +As before, you can use either of the following sets of credentials to connect to your {astra-db} database: + -You will already have used these credentials when you configured the {product-proxy} to connect to your {astra-db} database as the target cluster. -For more information on creating credentials (tokens), see xref:astra-db-serverless:administration:manage-application-tokens.adoc[]. +** Token-only authentication: Set `username` to the literal string `token`, and set `password` to your {astra-db} application token. +** Client ID and secret authentication (legacy): Set `username` to the `clientId` generated with your application token, and then set `password` to the `secret` generated with your application token. -* Your {astra-db} database's Secure Connect Bundle (SCB). +* Your {astra-db} database's {scb}. + -The SCB is a zip file that contains TLS encryption certificates and other metadata required to connect to your database. -Databases can have one or more SCBs. +The {scb-short} is a zip file that contains TLS encryption certificates and other metadata required to connect to your database. +Databases can have one or more {scb-short}s. For more information, see xref:astra-db-serverless:drivers:secure-connect-bundle.adoc[]. + [IMPORTANT] ==== -The SCB contains sensitive information that establishes a connection to your database, including key pairs and certificates. -Treat is as you would any other sensitive values, such as passwords or tokens. +The {scb-short} contains sensitive information that establishes a connection to your database, including key pairs and certificates. +Treat it as you would any other sensitive values, such as passwords or tokens. ==== * Recommended: A driver language and version that is compatible with {astra-db}. For more information, see xref:datastax-drivers:compatibility:driver-matrix.adoc[]. -If your client application uses an old version of a driver without built-in SCB support, {company} strongly recommends upgrading to a compatible driver to simplify configuration and get the latest features and bug fixes. -However, you can still connect to {astra-db} for this migration by using https://github.com/datastax/cql-proxy[CQL Proxy] or extracting the SCB archive and using the individual files to enable mTLS in your driver's configuration. +If your client application uses an old version of a driver without built-in {scb-short} support, {company} strongly recommends upgrading to a compatible driver to simplify configuration and get the latest features and bug fixes. +However, you can still connect to {astra-db} for this migration by using https://github.com/datastax/cql-proxy[CQL Proxy] or extracting the {scb-short} archive and using the individual files to enable mTLS in your driver's configuration. -// The SCB support was made available beginning the following versions in the drivers: +// The {scb-short} support was made available beginning the following versions in the drivers: // // * https://docs.datastax.com/en/developer/cpp-driver/latest/changelog/#2-14-0[Beginning `2.14.0` of {company} C++ Driver]. // @@ -67,7 +67,7 @@ However, you can still connect to {astra-db} for this migration by using https:/ // // Based on this, follow the instructions in the relevant section below. -If your driver has built-in support for the {astra-db} Secure Connect Bundle (SCB), the changes to enable your application to connect to {astra-db} are minimal. +If your driver has built-in support for the {astra-db} {scb-short}, the changes to enable your application to connect to {astra-db} are minimal. //Recalling the xref:connect-clients-to-proxy.adoc#_connecting_company_drivers_to_cassandra[pseudocode to enable your client application to connect to the proxy], here it is how your code needs to change to connect directly to {astra-db}: diff --git a/modules/ROOT/pages/contributions.adoc b/modules/ROOT/pages/contributions.adoc index 14e76d96..b81ffe71 100644 --- a/modules/ROOT/pages/contributions.adoc +++ b/modules/ROOT/pages/contributions.adoc @@ -1,7 +1,6 @@ = Contribution guidelines :page-tag: migration,zdm,zero-downtime,zdm-proxy,contributions -//TODO: remove this. There is a contribution guide on GH. https://github.com/datastax/zdm-proxy/blob/main/CONTRIBUTING.md {company} {product} ({product-short}) provides a simple and reliable way for users to migrate an existing {cass-reg} or {dse} cluster to {astra-db}, or to any {cass-short} or {dse-short} cluster, without any interruption of service to the client applications and data. The {product-proxy} is open source software (OSS). We welcome contributions from the developer community via Pull Requests on a fork, for evaluation by the {product-short} team. @@ -11,21 +10,21 @@ Code contributions for those additional components are not open for PRs at this == {product-proxy} License -{product-proxy} is licensed under the open-source https://github.com/datastax/zdm-proxy/blob/main/LICENSE[**Apache-2.0 license agreement**]. +{product-proxy} is licensed under the open-source {product-proxy-repo}/blob/main/LICENSE[**Apache-2.0 license agreement**]. == Contributor License Agreement Acceptance of the {company} https://cla.datastax.com/[Contributor License Agreement] (CLA) is required before we can consider accepting your {product-proxy} code contribution. -Refer to the https://cla.datastax.com/[CLA terms] and, if you agree, indicate your acceptance on each Pull Request (PR) that you submit while using the https://github.com/datastax/zdm-proxy[{product-proxy} GitHub repository]. +Refer to the https://cla.datastax.com/[CLA terms] and, if you agree, indicate your acceptance on each Pull Request (PR) that you submit while using the {product-proxy-repo}[{product-proxy} GitHub repository]. // You will see the CLA listed on the standard pull request checklist (TBS) -// for the https://github.com/datastax/zdm-proxy[{product-proxy}] repository. +// for the {product-proxy-repo}[{product-proxy}] repository. == {product-proxy} code contributions The overall procedure: -. Fork the https://github.com/datastax/zdm-proxy/[{product-proxy} open-source public repo]. +. Fork the {product-proxy-repo}[{product-proxy} open-source public repo]. . Make your changes locally on your fork. Git commit and push only to your fork. . Wait for CI to run successfully in GitHub Actions before submitting a PR. . Submit a Pull Request (PR) with your forked updates. @@ -34,19 +33,19 @@ As noted above, be sure to indicate in the PR's Comments your acceptance (if you . Wait for the automated PR workflow to do some checks. Members of the {product-proxy} community will review your PR and decide whether to approve and merge it. -In addition to potential {product-proxy} OSS code contribution, we encourage you to submit feedback and ideas via GitHub Issues in the repo, starting from https://github.com/datastax/zdm-proxy/issues. +In addition to potential {product-proxy} OSS code contribution, we encourage you to submit feedback and ideas via GitHub Issues in the repo, starting from {product-proxy-repo}/issues. Add a label to help categorize the issue, such as the complexity level, component name, and other labels you'll find in the repo's Issues display. == Submitting GitHub Issues in related public repos The following {company} {product} GitHub repos are public. You are welcome to read the source and submit feedback and ideas via GitHub Issues per repo. -In addition to the https://github.com/datastax/zdm-proxy[{product-proxy}] open-source repo, refer to: +In addition to the {product-proxy-repo}[{product-proxy}] open-source repo, refer to: -* https://github.com/datastax/zdm-proxy-automation/issues[{product-automation}] repo for Ansible-based {product-automation} and {product-utility}. +* {product-automation-repo}/issues[{product-automation}] repo for Ansible-based {product-automation} and {product-utility}. -* https://github.com/datastax/cassandra-data-migrator/issues[{cass-migrator}] repo. +* {cass-migrator-repo}/issues[{cass-migrator}] repo. -* https://github.com/datastax/dsbulk-migrator/issues[{dsbulk-migrator}] repo. +* {dsbulk-migrator-repo}/issues[{dsbulk-migrator}] repo. Again, add a label to help categorize each issue, such as the complexity level, component name, and other labels you'll find in the repo's Issues display. diff --git a/modules/ROOT/pages/create-target.adoc b/modules/ROOT/pages/create-target.adoc index 32f189b6..dceb98ef 100644 --- a/modules/ROOT/pages/create-target.adoc +++ b/modules/ROOT/pages/create-target.adoc @@ -11,7 +11,7 @@ This section covers in detail the steps to prepare an {astra-db} Serverless data If you intend to use {astra-db} as the target for the migration, you will need to: * Create an {astra-db} Serverless database. -* Retrieve its Secure Connect Bundle (SCB) and upload it to the application instances. +* Retrieve its {scb} and upload it to the application instances. * Create {astra-db} access credentials for your database. * Create the client application schema. @@ -36,28 +36,26 @@ Assign your preferred values for the serverless database: When the {astra-db} database reaches **Active** status, create an application token in the {astra-ui} with the *Read/Write User* role. This role will be used by the client application, the {product-proxy}, and the {product-automation}. -Save the generate token and credentials (Client ID, Client Secret, and Token) in a clearly-named secure file. +Save the generate token and credentials (Client ID, Client Secret, and Token) in a clearly named secure file. -=== Get the Secure Connect Bundle and upload to client instances +=== Get the {scb-brief} and upload to client instances -//TODO: Bring SCB attributes - -xref:astra-db-serverless:drivers:secure-connect-bundle.adoc[Download your {astra-db} database's Secure Connect Bundle (SCB)]. -The SCB is a zip file that contains TLS encryption certificates and other metadata required to connect to your database. +xref:astra-db-serverless:drivers:secure-connect-bundle.adoc[Download your {astra-db} database's {scb}]. +The {scb-short} is a zip file that contains TLS encryption certificates and other metadata required to connect to your database. [IMPORTANT] ==== -The SCB contains sensitive information that establishes a connection to your database, including key pairs and certificates. +The {scb-short} contains sensitive information that establishes a connection to your database, including key pairs and certificates. Treat is as you would any other sensitive values, such as passwords or tokens. ==== -Your client application uses the SCB to connect directly to {astra-db} near the end of the migration, and {cass-migrator} or {dsbulk-migrator} use the SCB to migrate and validate data in {astra-db}. +Your client application uses the {scb-short} to connect directly to {astra-db} near the end of the migration, and {cass-migrator} or {dsbulk-migrator} use the {scb-short} to migrate and validate data in {astra-db}. -Use `scp` to copy the SCB to your client application instance: +Use `scp` to copy the {scb-short} to your client application instance: [source,bash] ---- -scp -i secure-connect-.zip @: +scp -i /path/to/scb.zip @: ---- === Create the client application schema on your {astra-db} database @@ -81,7 +79,7 @@ For more information, see xref:astra-db-serverless:cql:develop-with-cql.adoc#uns You must replace these with supported indexes. For more information, see xref:astra-db-serverless:cql:develop-with-cql.adoc[CQL for {astra-db}]. -To help you prepare the schema from the DDL in your origin cluster, consider using the `generate-ddl` functionality in the link:https://github.com/datastax/dsbulk-migrator[{dsbulk-migrator}]. +To help you prepare the schema from the DDL in your origin cluster, consider using the `generate-ddl` functionality in the {dsbulk-migrator-repo}[{dsbulk-migrator}]. However, this tool doesn't automatically convert MVs or indexes. CQL statements, such as those used to reproduce the schema on the target database, can be executed in {astra-db} using the built-in CQL shell or the standalone CQL shell. @@ -127,5 +125,9 @@ Make sure that all keyspaces and tables being migrated are identical to the corr ==== * To copy the schema, you can run CQL `describe` on the origin cluster to get the schema that is being migrated, and then run the output on your new cluster. -Bear in mind that, if you are migrating from an old version, you may need to adapt some CQL clauses that are no longer supported in newer versions (e.g. `COMPACT STORAGE`). -Please refer to the documentation of the relevant versions for more information. \ No newline at end of file +If you are migrating from an old version, you might need to edit CQL clauses that are no longer supported in newer versions, such as `COMPACT STORAGE`. +For specific changes in each version, see your driver's changelog or release notes. + +== Next steps + +* xref:ROOT:rollback.adoc[] \ No newline at end of file diff --git a/modules/ROOT/pages/deploy-proxy-monitoring.adoc b/modules/ROOT/pages/deploy-proxy-monitoring.adoc index ef6e96ad..5f5067a2 100644 --- a/modules/ROOT/pages/deploy-proxy-monitoring.adoc +++ b/modules/ROOT/pages/deploy-proxy-monitoring.adoc @@ -45,18 +45,6 @@ This is always required. * `zdm_proxy_advanced_config.yml`: Contains advanced configuration that is required in some scenarios, but often left to the default values. * `zdm_proxy_custom_tls_config.yml`: Configures TLS encryption, if needed. -//// -Starting in version 2.2.0 of the {product-automation}, we added the `zdm_proxy_cluster_config.yml` file to contain all the configuration variables for the origin and target clustesr. -Prior to version 2.2.0, the variables were in the `zdm_proxy_core_config.yml` file. - -[TIP] -==== -This change is backward compatible. -If you previously populated the variables in `zdm_proxy_core_config.yml`, these variables will be honored and take precedence over any variables in `zdm_proxy_cluster_config.yml`, if both files are present. -==== - -If you are using a {product-automation} version up to and including 2.1.0, please use `zdm_proxy_core_config.yml` to configure access to your clusters. -//// === Container configuration The first step of the proxy container configuration is to open the `zdm_proxy_container_config.yml` file. Configure the desired {product-proxy} version and create a strategy to inject configuration parameters. @@ -66,48 +54,66 @@ Starting with {product-short} 2.3.0, you can inject the configuration with the Y === Cluster and core configuration The next step is to edit the `zdm_proxy_cluster_config.yml` file in the Docker container. -You'll want to enter your {cass-short}/{dse-short} username, password, and other variables. +You will need to provide values like your {cass-short}/{dse-short} username, password, and other connection credentials. -In the container shell, `cd` to `~/zdm-proxy-automation/ansible/vars` and edit `zdm_proxy_cluster_config.yml`. -The `vi` and `nano` text editors are available in the container. +. Get connection credentials for your origin and target clusters. ++ +* Self-managed clusters with authentication enabled: You need a valid username and password for the cluster. +* {astra-db} databases: xref:astra-db-serverless:administration:manage-application-tokens.adoc[Generate an application token] with a role that can read and write to your database, such as the *Database Administrator* role, and then store the token values (`clientId`, `secret`, and `token`) securely. -[NOTE] +. In the container shell, `cd` to `~/zdm-proxy-automation/ansible/vars` and edit `zdm_proxy_cluster_config.yml`. +The `vi` and `nano` text editors are available in the container. ++ +.If you are on {product-automation} version 2.1.0 or earlier +[%collapsible] ==== -Starting in version 2.2.0 of the {product-automation}, we added the `zdm_proxy_cluster_config.yml` file to contain all the configuration variables for the origin and target clusters. -Prior to version 2.2.0, the variables were in the `zdm_proxy_core_config.yml` file. +Starting in version 2.2.0 of the {product-automation}, all origin and target cluster configuration variables are stored in `zdm_proxy_cluster_config.yml`. +In earlier versions, these variables are in the `zdm_proxy_core_config.yml` file. -If you are using an automation version up to and including 2.1.0, please use `zdm_proxy_core_config.yml` to configure access to your clusters. +This change is backward compatible. +If you previously populated the variables in `zdm_proxy_core_config.yml`, these variables are honored and take precedence over any variables in `zdm_proxy_cluster_config.yml`, if both files are present. +However, consider updating your configuration to use the new file to take advantage of new features in later releases. ==== -There are two sets of variables that you use to configure the {product-proxy} connection to the origin and target clusters. +. In the `ORIGIN CONFIGURATION` and `TARGET CONFIGURATION` sections, uncomment and configure all variables that are required for {product-proxy} to connect to the origin and target clusters. ++ The variables for the origin cluster are prefixed with `origin`, and the variables for the target cluster are prefixed with `target`. - -These two sections are always required. - -Uncomment and provide values for the appropriate variables in each section for the respective cluster, as follows: - -. Cluster credentials: -.. If it is a self-managed cluster, `*_username` and `*_password` must be valid credentials for it. -Leave blank if authentication is not enabled on the cluster. -.. If it is an {astra-db} database, authentication is always enabled: `*_username` must be the Client ID and `*_password` the Client Secret of a valid {astra-db} set of credentials with the `R/W User` role. -. Contact points and port (only relevant for self-managed clusters, leave unset for {astra-db}) -.. `*_contact points`: comma-separated list of IP addresses of the cluster's seed nodes. -.. `*_port`: port on which the cluster listens for client connections. Defaults to 9042. -. For {astra-db}, choose one of the following options and leave unset the other (leave both unset for self-managed clusters): -.. If you wish to manually provide the cluster's Secure Connect Bundle: -... Download it from the {astra-ui} and place it on the jumphost -... Copy it to the container. Open a new shell on the jumphost, run `docker cp zdm-ansible-container:/home/ubuntu` -... Specify its path in `*_astra_secure_connect_bundle_path`. -.. Otherwise, if you wish the automation to download the cluster's Secure Connect Bundle for you, just specify the two following variables: -... `*_astra_db_id`: the cluster's https://docs.datastax.com/en/astra/astra-db-vector/faqs.html#where-do-i-find-the-organization-id-database-id-or-region-id[database id]. -... `*_astra_token`: the token value from an {astra-db} application token with the **Read/Write User** role, prefixed by `AstraCS:`. - -Save the file and exit the editor. - -.Example: zdm_proxy_cluster_config.yml +You must provide connection details in both sections, otherwise {product-proxy} won't be able to connect to both clusters. ++ +* `*_username` and `*_password`: +** For a self-managed cluster with authentication enabled, provide valid username and password values to access the cluster. +** For a self-managed cluster without authentication, leave both values unset. +** For an {astra-db} database, use the values generated with your application token. +Either set `username` to the `clientId` and `password` to the `secret`, or set `username` to the literal string `token` and set `password` to the `token` value, which is prefixed by `AstraCS:`. +* `*_contact_points`: +** For a self-managed cluster, provide a comma-separated list of IP addresses for the cluster's seed nodes. +** For an {astra-db} database, leave this unset. +* `*_port`: +** For a self-managed cluster, provide the port on which the cluster listens for client connections. +The default is 9042. +** For an {astra-db} database, leave this unset. +* `*_astra_secure_connect_bundle_path`, `*_astra_db_id`, and `*_astra_token`: +** For a self-managed cluster, leave all of these unset. +** For an {astra-db} database, provide either `*_astra_secure_connect_bundle_path` _or both_ `*_astra_db_id` and `*_astra_token`. +*** If you want {product-automation} to automatically download your database's {scb}, use `*_astra_db_id` and `*_astra_token`. +Set `*_astra_db_id` to your xref:astra-db-serverless:databases:create-database.adoc#get-db-id[database's ID], and set `*_astra_token` to your application token, which is prefixed by `AstraCS:`. +*** If you want to manually upload your database's {scb-short} to the jumphost, use `*_astra_secure_connect_bundle_path`. ++ +.Manually upload the {scb-short} to the jumphost [%collapsible] ==== -The following example `zdm_proxy_cluster_config.yml` file shows the configuration for a migration from a self-managed origin cluster to an {astra-db} target. +. xref:astra-db-serverless:databases:secure-connect-bundle.adoc[Download your database's {scb-short}]. +. Upload it to the jumphost. +. Open a new shell on the jumpost, and then run `docker cp /path/to/scb.zim zdm-ansible-container:/home/ubuntu` to copy the {scb-short} to the container. +. Set `*_astra_secure_connect_bundle_path` to the path to the {scb-short} on the jumphost. +==== +*** Make sure that you leave the unused credential unset. +For example, if you use `target_astra_db_id` and `target_astra_token`, leave `target_astra_secure_connect_bundle_path` unset. ++ +.Example: Cluster configuration variables +[%collapsible] +==== +The following example `zdm_proxy_cluster_config.yml` file shows the configuration for a migration from a self-managed origin cluster to an {astra-db} target: [source,yml] ---- @@ -131,23 +137,25 @@ origin_port: 9042 target_username: "dqhg...NndY" target_password: "Yc+U_2.gu,9woy0w...9JpAZGt+CCn5" -## Set the following two parameters only if the target is an Astra DB database and you would like the automation to download the Secure Connect Bundle automatically +## Set the following two parameters only if the target is an Astra DB database +## and you want the automation to download the Secure Connect Bundle for you target_astra_db_id: "d425vx9e-f2...c871k" target_astra_token: "AstraCS:dUTGnRs...jeiKoIqyw:01...29dfb7" ---- ==== -The other file you need to be aware of is `zdm_proxy_core_config.yml`. -This file contains some global variables that will be used in subsequent steps during the migration. -It is good to familiarize yourself with this file, although these configuration variables do not need changing at this time: - -. `primary_cluster`: which cluster is going to be the primary source of truth. -This should be left set to its default value of `ORIGIN` at the start of the migration, and will be changed to `TARGET` after migrating all existing data. -. `read_mode`: leave to its default value of `PRIMARY_ONLY`. -See xref:enable-async-dual-reads.adoc[] for more information on this variable. -. `log_level`: leave to its default of `INFO`. +. Save and close the file. -Leave all these variables to their defaults for now. +. Open the `zdm_proxy_core_config.yml` file in the same directory. +This file contains some global variables that are used in subsequent steps during the migration. +Familiarize yourself with these values, but don't change any of them yet: ++ +* `primary_cluster`: The cluster that serves as the primary source of truth for read requests during the migration. +For the majority of the migration, leave this set to the default value of `ORIGIN`. +At the end of the migration, when you're preparing to switch over to the target cluster permanently, you can change it to `TARGET` after migrating all data from the origin cluster. +* `read_mode`: Leave this set to the default value of `PRIMARY_ONLY`. +For more information, see xref:enable-async-dual-reads.adoc[]. +* `log_level`: Leave this set to the default value of `INFO`. === Enable TLS encryption (optional) @@ -167,7 +175,7 @@ For multi-datacenter origin clusters, you will need to specify the name of the d Likewise, for multi-datacenter target clusters you will need to set `target_local_datacenter` appropriately. These two variables are stored in `vars/zdm_proxy_advanced_config.yml`. -Note that this is not relevant for multi-region {astra-db} databases, where this is handled through region-specific Secure Connect Bundles. +Note that this is not relevant for multi-region {astra-db} databases, where this is handled through region-specific {scb-brief}s. [#ports] ==== Ports @@ -314,7 +322,7 @@ With the exception of the origin credentials, target credentials, and the `prima If you wish to change any of the cluster connection configuration variables (other than credentials and `primary_cluster`) on an existing deployment, you will need to re-run the `deploy_zdm_proxy.yml` playbook. This playbook can be run as many times as necessary. -Please note that running the `deploy_zdm_proxy.yml` playbook will result in a brief window of unavailability of the whole {product-proxy} deployment while all the {product-proxy} instances are torn down and recreated. +Be aware that running the `deploy_zdm_proxy.yml` playbook results in a brief window of unavailability of the whole {product-proxy} deployment while all the {product-proxy} instances are torn down and recreated. ==== [[_setting_up_the_monitoring_stack]] diff --git a/modules/ROOT/pages/deployment-infrastructure.adoc b/modules/ROOT/pages/deployment-infrastructure.adoc index 7c2ef01a..6eabc43c 100644 --- a/modules/ROOT/pages/deployment-infrastructure.adoc +++ b/modules/ROOT/pages/deployment-infrastructure.adoc @@ -1,6 +1,8 @@ = Deployment and infrastructure considerations :page-tag: migration,zdm,zero-downtime,zdm-proxy,deploy,infrastructure +As part of planning your migration, you need to prepare your infrastructure. + == Choosing where to deploy the proxy A typical {product-proxy} deployment is made up of multiple proxy instances. @@ -85,7 +87,7 @@ The only direct access to these machines should be from the jumphost. The {product-proxy} machines must be able to connect to the origin and target cluster nodes: * For self-managed clusters ({cass} or {dse-short}), connectivity is needed to the {cass-short} native protocol port (typically 9042). -* For {astra-db}, you will need to ensure outbound connectivity to the {astra} endpoint indicated in the Secure Connect Bundle. +* For {astra-db}, you will need to ensure outbound connectivity to the {astra} endpoint indicated in the {scb}. Connectivity over Private Link is also supported. The connectivity requirements for the jumphost / monitoring machine are: @@ -133,7 +135,7 @@ Host zdm-proxy-2 Host * User - IdentityFile < Filename (with absolute path) of the locally-generated key pair for the ZDM infrastructure. Example ~/.ssh/zdm-key-XXX > + IdentityFile < Filename (with absolute path) of the locally generated key pair for the ZDM infrastructure. Example ~/.ssh/zdm-key-XXX > IdentitiesOnly yes StrictHostKeyChecking no GlobalKnownHostsFile /dev/null @@ -153,3 +155,7 @@ Likewise, connecting to any {product-proxy} instance is as easy as this (replaci ---- ssh -F zdm_ssh_config zdm-proxy-0 ---- + +== Next steps + +* xref:ROOT:create-target.adoc[] \ No newline at end of file diff --git a/modules/ROOT/pages/dsbulk-migrator-overview.adoc b/modules/ROOT/pages/dsbulk-migrator-overview.adoc new file mode 100644 index 00000000..acc2540c --- /dev/null +++ b/modules/ROOT/pages/dsbulk-migrator-overview.adoc @@ -0,0 +1,3 @@ += {dsbulk-migrator} overview + +include::ROOT:dsbulk-migrator.adoc[tags=body] \ No newline at end of file diff --git a/modules/ROOT/pages/dsbulk-migrator.adoc b/modules/ROOT/pages/dsbulk-migrator.adoc index 7238fd5c..0acc1cc7 100644 --- a/modules/ROOT/pages/dsbulk-migrator.adoc +++ b/modules/ROOT/pages/dsbulk-migrator.adoc @@ -1,21 +1,36 @@ -= {dsbulk-migrator} += Use {dsbulk-migrator} with {product-short} +:navtitle: Use {dsbulk-migrator} +:description: Use {dsbulk-migrator} to migrate data with {product-short}. -Use {dsbulk-migrator} to perform simple migration of smaller data quantities, where data validation (other than post-migration row counts) is not necessary. +//TODO: Reorganize this page and consider breaking it up into smaller pages. + +// tag::body[] +Use {dsbulk-migrator} to perform small or simple migrations that don't require data validation other than post-migration row counts. +This tool is also an option for migrations where you can shard data from large tables into more manageable quantities. + +{dsbulk-migrator} extends {dsbulk-loader} with the following commands: + +* `migrate-live`: Start a live data migration using the embedded version of {dsbulk-loader} or your own {dsbulk-loader} installation. +A live migration means that the data migration starts immediately and is performed by the migrator tool through the specified {dsbulk-loader} installation. + +* `generate-script`: Generate a migration script that you can execute to perform a data migration with a your own {dsbulk-loader} installation. +This command _doesn't_ trigger the migration; it only generates the migration script that you must then execute. + +* `generate-ddl`: Read the schema from origin, and then generate CQL files to recreate it in your target {astra-db} database. [[prereqs-dsbulk-migrator]] == {dsbulk-migrator} prerequisites -* Install or switch to Java 11. -* Install https://maven.apache.org/download.cgi[Maven] 3.9.x. -* Optionally install https://docs.datastax.com/en/dsbulk/docs/installing/install.html[{dsbulk-loader}], if you elect to reference your own external installation of {dsbulk-loader}, instead of the embedded {dsbulk-loader} that's in {dsbulk-migrator}. -* Install https://github.com/datastax/simulacron#prerequisites[Simulacron] 0.12.x and its prerequisites, for integration tests. +* Java 11 + +* https://maven.apache.org/download.cgi[Maven] 3.9.x -[[building-dsbulk-migrator]] -== Building {dsbulk-migrator} +* Optional: If you don't want to use the embedded {dsbulk-loader} that is bundled with {dsbulk-migrator}, xref:dsbulk:installing:install.adoc[install {dsbulk-loader}] before installing {dsbulk-migrator}. -Building {dsbulk-migrator} is accomplished with Maven. First, clone the git repo to your local machine. -Example: +== Build {dsbulk-migrator} +. Clone the {dsbulk-migrator-repo}[{dsbulk-migrator} repository]: ++ [source,bash] ---- cd ~/github @@ -23,8 +38,8 @@ git clone git@github.com:datastax/dsbulk-migrator.git cd dsbulk-migrator ---- -Then run: - +. Use Maven to build {dsbulk-migrator}: ++ [source,bash] ---- mvn clean package @@ -32,59 +47,59 @@ mvn clean package The build produces two distributable fat jars: -* `dsbulk-migrator--embedded-driver.jar` : contains an embedded Java driver; suitable for live migrations using an external {dsbulk-loader}, or for script generation. -This jar is NOT suitable for live migrations using an embedded {dsbulk-loader}, since no {dsbulk-loader} classes are present. -* `dsbulk-migrator--embedded-dsbulk.jar`: contains an embedded {dsbulk-loader} and an embedded Java driver; suitable for all operations. -Note that this jar is much bigger than the previous one, due to the presence of {dsbulk-loader} classes. +* `dsbulk-migrator-**VERSION**-embedded-driver.jar` contains an embedded Java driver. +Suitable for script generation or live migrations using an external {dsbulk-loader}. ++ +This jar isn't suitable for live migrations that use the embedded {dsbulk-loader} because no {dsbulk-loader} classes are present. -[[testing-dsbulk-migrator]] -== Testing {dsbulk-migrator} +* `dsbulk-migrator-**VERSION**-embedded-dsbulk.jar` contains an embedded {dsbulk-loader} and an embedded Java driver. +Suitable for all operations. +Much larger than the other JAR due to the presence of {dsbulk-loader} classes. -The project contains a few integration tests. -Run them with: +== Test {dsbulk-migrator} + +The {dsbulk-migrator} project contains some integration tests that require https://github.com/datastax/simulacron[Simulacron]. + +. Clone and build Simulacron, as explained in the https://github.com/datastax/simulacron[Simulacron GitHub repository]. +Note the prerequisites for Simulacron, particularly for macOS. + +. Run the tests: [source,bash] ---- mvn clean verify ---- -The integration tests require https://github.com/datastax/simulacron[Simulacron]. -Be sure to meet all the https://github.com/datastax/simulacron#prerequisites[Simulacron prerequisites] before running the -tests. +== Run {dsbulk-migrator} -[[running-dsbulk-migrator]] -== Running {dsbulk-migrator} - -Launch the {dsbulk-migrator} tool: +Launch {dsbulk-migrator} with the command and options you want to run: [source,bash] ---- java -jar /path/to/dsbulk-migrator.jar { migrate-live | generate-script | generate-ddl } [OPTIONS] ---- -When doing a live migration, the options are used to effectively configure the {dsbulk-migrator} and to connect to +The role and availability of the options depends on the command you run: + +* During a live migration, the options configure {dsbulk-migrator} and establish connections to the clusters. -When generating a migration script, most options serve as default values in the generated scripts. -Note however that, even when generating scripts, this tool still needs to access the origin cluster -in order to gather metadata about the tables to migrate. +* When generating a migration script, most options become default values in the generated scripts. +However, even when generating scripts, {dsbulk-migrator} still needs to access the origin cluster to gather metadata about the tables to migrate. -When generating a DDL file, only a few options are meaningful. -Because the standard {dsbulk-loader} is not used, and the import cluster is never contacted, import options and {dsbulk-loader}-related options are ignored. -The tool still needs to access the origin cluster in order to gather metadata about the keyspaces and tables for which to generate DDL statements. +* When generating a DDL file, import options and {dsbulk-loader}-related options are ignored. +However, {dsbulk-migrator} still needs to access the origin cluster to gather metadata about the keyspaces and tables for the DDL statements. -[[dsbulk-migrator-reference]] -== {dsbulk-migrator} reference +For more information about the commands and their options, see the following references: -* xref:#dsbulk-live[Live migration command-line options] -* xref:#dsbulk-script[Script generation command-line options] -* xref:#dsbulk-ddl[DDL generation command-line options] -* xref:#getting-help-with-dsbulk-migrator[Getting {dsbulk-migrator} help] -* xref:#dsbulk-examples[{dsbulk-migrator} examples] +* <> +* <> +* <> +For help and examples, see <> and <>. [[dsbulk-live]] -=== Live migration command-line options +== Live migration command-line options The following options are available for the `migrate-live` command. Most options have sensible default values and do not need to be specified, unless you want to override the default value. @@ -178,7 +193,7 @@ Options `--export-username` and `--export-password` must be provided together, o | | `--import-bundle=PATH` -| The path to a Secure Connect Bundle to connect to a target {astra-db} cluster. +| The path to a {scb} to connect to a target {astra-db} cluster. Options `--import-host` and `--import-bundle` are mutually exclusive. | @@ -284,9 +299,8 @@ If unspecified, it defaults to the current working directory. |=== - [[dsbulk-script]] -=== Script generation command-line options +== Script generation command-line options The following options are available for the `generate-script` command. Most options have sensible default values and do not need to be specified, unless you want to override the default value. @@ -457,9 +471,8 @@ Case-sensitive table names must be entered in their exact case. |=== - [[dsbulk-ddl]] -=== DDL generation command-line options +== DDL generation command-line options The following options are available for the `generate-ddl` command. Most options have sensible default values and do not need to be specified, unless you want to override the default value. @@ -525,34 +538,13 @@ The default is `all`. |=== - -[[getting-help-with-dsbulk-migrator]] -== Getting help with {dsbulk-migrator} - -Use the following command to display the available {dsbulk-migrator} commands: - -[source,bash] ----- -java -jar /path/to/dsbulk-migrator-embedded-dsbulk.jar --help ----- - -For individual command help and each one's options: - -[source,bash] ----- -java -jar /path/to/dsbulk-migrator-embedded-dsbulk.jar COMMAND --help ----- - [[dsbulk-examples]] == {dsbulk-migrator} examples -[NOTE] -==== -These examples show sample `username` and `password` values that are for demonstration purposes only. -Do not use these values in your environment. -==== +These examples show sample `username` and `password` values that are for demonstration purposes only. +Don't use these values in your environment. -=== Generate migration script +=== Generate a migration script Generate a migration script to migrate from an existing origin cluster to a target {astra-db} cluster: @@ -570,7 +562,7 @@ Generate a migration script to migrate from an existing origin cluster to a targ --import-password=s3cr3t ---- -=== Migrate live using external {dsbulk-loader} install +=== Live migration with an external {dsbulk-loader} installation Perform a live migration from an existing origin cluster to a target {astra-db} cluster using an external {dsbulk-loader} installation: @@ -590,7 +582,7 @@ Perform a live migration from an existing origin cluster to a target {astra-db} Passwords are prompted interactively. -=== Migrate live using embedded {dsbulk-loader} install +=== Live migration with the embedded {dsbulk-loader} Perform a live migration from an existing origin cluster to a target {astra-db} cluster using the embedded {dsbulk-loader} installation: @@ -612,14 +604,14 @@ Perform a live migration from an existing origin cluster to a target {astra-db} --import-dsbulk-option "--executor.maxPerSecond=1000" ---- -Passwords areprompted interactively. +Passwords are prompted interactively. The preceding example passes additional {dsbulk-loader} options. The preceding example requires the `dsbulk-migrator--embedded-dsbulk.jar` fat jar. Otherwise, an error is raised because no embedded {dsbulk-loader} can be found. -=== Generate DDL to recreate the origin schema on the target cluster +=== Generate DDL files to recreate the origin schema on the target cluster Generate DDL files to recreate the origin schema on a target {astra-db} cluster: @@ -632,3 +624,26 @@ Generate DDL files to recreate the origin schema on a target {astra-db} cluster: --export-password=s3cr3t \ --optimize-for-astra ---- + +[[getting-help-with-dsbulk-migrator]] +== Get help with {dsbulk-migrator} + +Use the following command to display the available {dsbulk-migrator} commands: + +[source,bash] +---- +java -jar /path/to/dsbulk-migrator-embedded-dsbulk.jar --help +---- + +For individual command help and each one's options: + +[source,bash] +---- +java -jar /path/to/dsbulk-migrator-embedded-dsbulk.jar COMMAND --help +---- + +== See also + +* xref:dsbulk:overview:dsbulk-about.adoc[{dsbulk-loader}] +* xref:dsbulk:reference:dsbulk-cmd.adoc#escaping-and-quoting-command-line-arguments[Escaping and quoting {dsbulk-loader} command line arguments] +// end::body[] \ No newline at end of file diff --git a/modules/ROOT/pages/faqs.adoc b/modules/ROOT/pages/faqs.adoc index d1659f2a..ef610962 100644 --- a/modules/ROOT/pages/faqs.adoc +++ b/modules/ROOT/pages/faqs.adoc @@ -58,7 +58,7 @@ The Ansible playbooks constitute the {product-automation}. Migrating client applications between clusters is a need that arises in many scenarios. For example, you may want to: * Move to a cloud-native, managed service such as {astra-db}. -* Migrate your client application to a brand new cluster, on a more recent version and perhaps on new infrastructure, or even a different CQL database entirely, without intermediate upgrade steps and ensuring that you always have an easy way to rollback in case of issues. +* Migrate your client application to a brand new cluster, on a more recent version and perhaps on new infrastructure, or even a different CQL database entirely, without intermediate upgrade steps and ensuring that you always have an easy way to roll back in case of issues. * Separate out a client application from a shared cluster to a dedicated one. * Consolidate client applications, currently running on separate clusters, into fewer clusters or even a single one. @@ -66,8 +66,6 @@ Bottom line: You want to migrate your critical database infrastructure without r == Which releases of {cass-short} or {dse-short} are supported for migrations? -include::ROOT:partial$supported-releases.adoc[] - include::ROOT:partial$migration-scenarios.adoc[] == Does {product-short} migrate clusters? @@ -92,7 +90,7 @@ The suite of {product} tools from {company} is free and open-sourced. Free and Pay As You Go plan users do not have support access and must raise questions in the {astra-ui} chat. https://www.datastax.com/products/luna[Luna] is a subscription to the {cass} support and expertise at {company}. -For any observed problems with the {product-proxy}, submit a https://github.com/datastax/zdm-proxy/issues[GitHub Issue] in the {product-proxy} GitHub repo. +For any observed problems with the {product-proxy}, submit a {product-proxy-repo}/issues[GitHub Issue] in the {product-proxy} GitHub repo. Additional examples serve as templates, from which you can learn about migrations. {company} does not assume responsibility for making the templates work for specific use cases. @@ -105,15 +103,13 @@ In addition to sending feedback, you may submit Pull Requests (PRs) for potentia To submit PRs, you must for first agree to the https://cla.datastax.com/[{company} Contribution License Agreement (CLA)]. -* https://github.com/datastax/zdm-proxy[{product-proxy}] repo. - -* https://github.com/datastax/zdm-proxy-automation[{product-automation}] repo for the Ansible-based {product-automation}, which includes the {product-utility}. +* {product-proxy-repo}[{product-proxy}] repo. -* https://github.com/datastax/cassandra-data-migrator[cassandra-data-migrator] repo for the tool that supports migrating larger data quantities as well as detailed verifications and reconciliation options. +* {product-automation-repo}[{product-automation}] repo for the Ansible-based {product-automation}, which includes the {product-utility}. -* https://github.com/datastax/dsbulk-migrator[dsbulk-migrator] repo for the tool that allows simple data migrations without validation and reconciliation capabilities. +* {cass-migrator-repo}[cassandra-data-migrator] repo for the tool that supports migrating larger data quantities as well as detailed verifications and reconciliation options. -// * https://github.com/datastax/migration-docs[Migration documentation] +* {dsbulk-migrator-repo}[dsbulk-migrator] repo for the tool that allows simple data migrations without validation and reconciliation capabilities. == Does {product-proxy} support Transport Layer Security (TLS)? @@ -125,7 +121,7 @@ One-way TLS and Mutual TLS are both supported. * For proxy-to-cluster TLS, the {product-proxy} acts as the TLS client and the cluster as the TLS server. One-way TLS and Mutual TLS are both supported. * When the {product-proxy} connects to {astra-db} clusters, it always implicitly uses Mutual TLS. -This is done through the Secure Connect Bundle (SCB) and does not require any extra configuration. +This is done through the {scb} and does not require any extra configuration. For TLS details, see xref:tls.adoc[]. @@ -167,3 +163,13 @@ Ranging from large enterprises to small teams, IT managers, operators, and devel A CNDB like {astra-db} is a different environment. Running on proven cloud providers like AWS, Google Cloud, and Azure, {astra-db} greatly reduces complexity and increases convenience by surfacing a subset of configurable settings, providing a UI (the {astra-ui}) and APIs and CLI tools to interact with your {astra-db} organizations and databases. + +//TODO: Content to incorporate somewhere: + +//// +Zero-Downtime Migration (ZDM) Proxy is a proxy that mirrors traffic between the origin and target database. The requests are CQL-oriented - effectively, any database that uses the Cassandra drivers can use the proxy. Therefore, it can be used with Apache Cassandra, DataStax Enterprise (including Search and Graph through the driver), ScyllaDB, Instaclutsr, Amazon Keyspaces, Microsoft CosmosDB, Aiven’s Cassandra, Yugabyte, and anything else that the drivers can connect to. A matrix of what we’ve tested is found here: CQL databases tested with ZDM Proxy. + +Cassandra Data Migrator migrates historical data and does validation and reconciliation. It uses the CQL interface to do its work. Therefore, it can be used to migrate data from any origin database that speaks CQL. + +Astra DB Sideloader is able to import native Cassandra data (sstables) such as backups from an origin database and load them directly into the Astra Serverless data plane. Because it takes native Cassandra data, we can only import data from (specific versions of) native Cassandra databases such as Apache Cassandra, DataStax Enterprise, Hyper-Converged Database, Instaclustr, and Aiven. Sideloader cannot import data from databases that are simply Cassandra compatible, such as ScyllaDB, Keyspaces, CosmosDB, or Yugabyte, which don’t have the same native Cassandra format. In such cases, customers can use the Cassandra Data Migrator, which uses CQL. +//// \ No newline at end of file diff --git a/modules/ROOT/pages/feasibility-checklists.adoc b/modules/ROOT/pages/feasibility-checklists.adoc index 4402a43e..69c0328d 100644 --- a/modules/ROOT/pages/feasibility-checklists.adoc +++ b/modules/ROOT/pages/feasibility-checklists.adoc @@ -1,45 +1,55 @@ = Feasibility checks :page-tag: migration,zdm,zero-downtime,zdm-proxy,feasibility +:page-aliases: ROOT:preliminary-steps.adoc Before starting your migration, refer to the following considerations to ensure that your client application workload and xref:glossary.adoc#origin[**Origin**] are suitable for this {product} process. +True zero downtime migration is only possible if your database meets the minimum requirements described on this page. +If your database doesn't meet these requirements, you can still complete the migration, but downtime might be necessary to finish the migration. + == {cass-short} Native Protocol version and cluster version support {product-proxy} supports protocol versions `v3`, `v4`, `DSE_V1`, and `DSE_V2`. -[NOTE] -==== -include::ROOT:partial$supported-releases.adoc[] -==== - +//TODO: Verify v5 status {product-proxy} technically doesn't support `v5`. If `v5` is requested, the proxy handles protocol negotiation so that the client application properly downgrades the protocol version to `v4`. This means that any client application using a recent driver that supports protocol version `v5` can be migrated using the {product-proxy} (as long as it does not use v5-specific functionality). -[IMPORTANT] -==== -*Thrift is not supported by {product-proxy}.* +=== Thrift is not supported by {product-proxy} If you are using a very old driver or cluster version that only supports Thrift, you need to change your client application to use CQL and potentially upgrade your cluster before starting the migration process. -==== -This means that {product-proxy} supports migrations of the following cluster versions (Origin/Target): +=== Supported cluster versions and migration paths + +include::ROOT:partial$migration-scenarios.adoc[] + +//// +TODO: Need to verify as these are in conflict with other information in this guide: -* {cass-reg} 2.1 and higher versions, up to (and including) {cass-short} 4.x. +{product-proxy} supports migrations to and from the following cluster versions: + +* {cass-reg} 2.1 and later, including {cass-short} 4.x. ++ {cass-short} 2.0 migration support may be introduced when protocol version v2 is supported. -* {dse} 4.7.1+ and higher versions. + +* {dse} 4.7.1 and later. ++ {dse-short} 4.6 migration support may be introduced when protocol version v2 is supported. -* {company} {astra-db} (Serverless and Classic). + +* {astra-db}. +//// [TIP] ==== -Ensure that you test your client application on the target (connected directly, without the {product-proxy}) before the migration process begins. +Before you begin the migration process, test directly connecting your client application to your target cluster, without {product-proxy}. +This ensures that you know the connection will work when you disconnect {product-proxy} at the end of the migration. ==== == Schema/keyspace compatibility {product-proxy} does not modify or transform CQL statements besides the optional feature that replaces `now()` functions with timestamp literals. -See <> for more information about this feature. +See <> for more information about this feature. A CQL statement that your client application sends to {product-proxy} must be able to succeed on both clusters. This means that any keyspace that your client application uses must exist on both the origin and target clusters with the same name (although they can have different replication strategies and durable writes settings). @@ -54,7 +64,7 @@ For example, if your compound primary key is `PRIMARY KEY (A, B)` and you always == Considerations for {astra-db} migrations {astra-db} implements guardrails and sets limits to ensure good practices, foster availability, and promote optimal configurations for your databases. -Please check the list of https://docs.datastax.com/en/astra-serverless/docs/plan/planning.html#_astra_db_database_guardrails_and_limits[guardrails and limits] and make sure your application workload can be successful within these limits. +Check the list of https://docs.datastax.com/en/astra-serverless/docs/plan/planning.html#_astra_db_database_guardrails_and_limits[guardrails and limits] to make sure that your application workload can be successful within these limits. If you need to make changes to the application or data model to ensure that your workload can run successfully in {astra-db}, then you need to do these changes before you start the migration process. @@ -101,7 +111,7 @@ Examples of non-idempotent operations in CQL are: * Collection updates with `+=` and `-=` operators * Non-deterministic functions like `now()` and `uuid()` -For more information on how to handle non-deterministic functions please refer to <>. +For more information on how to handle non-deterministic functions, see <>. Given that there are two separate clusters involved, the state of each cluster may be different. For conditional writes, this may create a divergent state for a time. @@ -205,11 +215,17 @@ To enable this feature, set the configuration variable `replace_cql_function` to For more, see xref:manage-proxy-instances.adoc#change-mutable-config-variable[Change a mutable configuration variable]. If you find that the performance is not acceptable when this feature is enabled, or the feature doesn't cover a particular function that your client application is using, then you will have to make a change to your client application so that the value is computed locally (at client application level) before the statement is sent to the database. -Most drivers have utility methods that help you compute these values locally, please refer to the documentation of the driver you are using. - +Most drivers have utility methods that help you compute these values locally. +For more information, see your driver's documentation. == Driver retry policy and query idempotence +[IMPORTANT] +==== +The {product-short} process requires you to perform rolling restarts of your client applications during the migration. +This is standard practice for client applications that are deployed over multiple instances, and it is a widely used approach to roll out releases and configuration changes. +==== + As part of the normal migration process, the {product-proxy} instances will have to be restarted in between phases to apply configuration changes. From the point of view of the client application, this is a similar behavior to a {dse-short} or {cass-short} cluster going through a rolling restart in a non-migration scenario. @@ -245,3 +261,7 @@ The default retry policy retries all requests in case of a connection error *reg Prior to version 2.5.0, this driver did *NOT* retry any requests after they have been written to the socket, it was up to the client application to handle these and retry them if they are suitable for a retry. With the release of 2.5.0, the driver retries requests that are set as `idempotent`. See this C++ 2.x https://docs.datastax.com/en/developer/cpp-driver/2.16/topics/configuration/#query-idempotence[query idempotence documentation section]. + +== Next steps + +* xref:ROOT:deployment-infrastructure.adoc[] \ No newline at end of file diff --git a/modules/ROOT/pages/glossary.adoc b/modules/ROOT/pages/glossary.adoc index 231cbba4..08138cae 100644 --- a/modules/ROOT/pages/glossary.adoc +++ b/modules/ROOT/pages/glossary.adoc @@ -1,6 +1,8 @@ = Glossary :page-tag: migration,zdm,zero-downtime,glossary +//TODO: Determine which terms are actually needed. Convert to partials if the definitions need to be repeated, otherwise replace links to this page with links to more useful and complete information. + Here are a few terms used throughout the {company} {product} documentation and code. [[_ansible_playbooks]] @@ -77,10 +79,10 @@ For more, see xref:glossary.adoc#_primary_cluster[Primary cluster]. Also see xref:enable-async-dual-reads.adoc[]. [[_secure_connect_bundle_scb]] -== Secure Connect Bundle (SCB) +== {scb} -A ZIP file generated in the {astra-url}[{astra-ui}] that contains connection metadata and TLS encryption certificates (but not the database credentials) for your {astra-db} database. -For details, see https://docs.datastax.com/en/astra-serverless/docs/connect/secure-connect-bundle.html[Working with the Secure Connect Bundle]. +A ZIP file that contains connection metadata and TLS encryption certificates (but not the database credentials) for your {astra-db} database. +For more information, see xref:astra-db-serverless:databases:secure-connect-bundle.adoc[]. [[target]] == Target diff --git a/modules/ROOT/pages/index.adoc b/modules/ROOT/pages/index.adoc index 3d3915ec..26cc63a0 100644 --- a/modules/ROOT/pages/index.adoc +++ b/modules/ROOT/pages/index.adoc @@ -1,48 +1,95 @@ -= Introduction to data migration -:page-tag: migration,zdm,zero-downtime,zdm-proxy, introduction += {company} migration tools +:navtitle: Data migration +:page-layout: landing -Enterprises today want to reliably migrate mission-critical client applications and data across environments with little or no downtime during the migration. +{company} has developed a set of thoroughly tested self-service tools that can help you migrate your data from any origin cluster based on {cass-reg} to any {cass-short}-compatible target with little or no downtime. -{company} has developed a set of thoroughly-tested self-service tools that can help you migrate your data from any {cass-short} origin cluster to any {cass-short}-compatible target. +[.[&>h2]:!hidden] +== {empty} -Compatible origin and target clusters include {cass-reg}, {dse}, {hcd}, and {astra-db}. +[subs="macros,attributes"] +++++ +
+
-When the migration is complete, the data is present in the new database, and you can update your client applications to connect exclusively to the new database. -The old database becomes obsolete and can be removed. +

Secure, reliable migrations

-Available migration tools include: +

Bulk-load terabytes while shifting traffic with little or no downtime, securely and reliably.

-* xref:ROOT:introduction.adoc[{product}] ({product-short}): Comprised of {product-proxy}, {product-proxy}, and{product-automation}, you can continue to run your current application and migrate data from the origin to the target database without any downtime. -{product-proxy} helps to manage the activity in transition. +
+ xref:ROOT:introduction.adoc[Get started with {product},role="btn btn-primary btn-solid"] + xref:ROOT:components.adoc[Compare tools,role="btn btn-neutral btn-outlined"] +
-* xref:ROOT:cassandra-data-migrator.adoc[{cass-migrator}]: It can be used in conjunction with the {product-proxy} for a migration with zero downtime. It can also be used on its own for migrations with acceptable downtime. +
+ +++++ -//// -* https://github.com/datastax/zdm-proxy-automation[{product-automation}] repo for Ansible-based {product-proxy} automation. -//{product-automation} 2.3.0, which enables ansible scripts and terraform to work with both Ubuntu and RedHat-family Linux distributions. +[.[&>h2]:!hidden] +== {empty} -* https://github.com/datastax/dsbulk-migrator[{dsbulk-migrator}] repo for migration of smaller data quantities. +[subs="macros,attributes"] +++++ -* https://github.com/datastax/cassandra-data-migrator[{cass-migrator}] repo for migration of larger data quantities and where detailed verifications and reconciliation options are needed. -//// +

Migration tools

-//// -Possible related content: +
+
-https://docs.datastax.com/en/dse/6.8/tooling/migration-path-dse.html + svg:common:ROOT:icons/datastax/cloud-backup-restore.svg[role="mx-auto max-w-xs md:mx-0 lg:max-w-none"] -https://docs.datastax.com/en/dse/6.9/tooling/migration-path-dse.html +

{product-proxy}

-MC only: +

To support live migrations, {product-proxy} orchestrates activity-in-transition on your clusters, allowing your applications to run while you migrate data.

+
+ xref:ROOT:introduction.adoc[Get started with {product-short}] +
-https://docs.datastax.com/en/mission-control/migrate/oss-cass-to-mission-control.html +
+
-https://docs.datastax.com/en/mission-control/migrate/dse-to-mission-control.html -//// \ No newline at end of file + svg:common:ROOT:icons/datastax/cloud-db.svg[role="mx-auto max-w-xs md:mx-0 lg:max-w-none"] + +

{sstable-sideloader}

+ +

{sstable-sideloader} is a service running in {astra-db} that directly imports data from snapshots of your existing {cass-short}-based cluster.

+ +
+ xref:sideloader:sideloader-overview.adoc[Get started with {sstable-sideloader}] +
+ +
+
+ + svg:common:ROOT:icons/datastax/insert-data.svg[role="mx-auto max-w-xs md:mx-0 lg:max-w-none"] + +

{cass-migrator} ({cass-migrator-short})

+ +

{cass-migrator-short} can migrate and validate data between {cass-short}-based clusters, with optional logging and reconciliation support.

+ +
+ xref:ROOT:cdm-overview.adoc[Get started with {cass-migrator-short}] +
+ +
+
+ + svg:common:ROOT:icons/datastax/migrate.svg[role="mx-auto max-w-xs md:mx-0 lg:max-w-none"] + +

{dsbulk-migrator}

+ +

{dsbulk-migrator} is an extension of {dsbulk-loader}.

+ +
+ xref:ROOT:dsbulk-migrator.adoc[Get started with {dsbulk-migrator}] +
+ +
+
+++++ \ No newline at end of file diff --git a/modules/ROOT/pages/introduction.adoc b/modules/ROOT/pages/introduction.adoc index 95360ea2..5aba240f 100644 --- a/modules/ROOT/pages/introduction.adoc +++ b/modules/ROOT/pages/introduction.adoc @@ -1,24 +1,40 @@ -= Introduction to {product} -:navtitle: Introduction += Phases of the {product} process +:navtitle: About the {product-short} process :description: Before you begin, learn about migration concepts, software components, and the sequence of operations. :page-tag: migration,zdm,zero-downtime,zdm-proxy,introduction -{product} provides a simple and reliable way for you to migrate applications from a CQL-based cluster to another CQL-based cluster with little or no downtime and minimal interruption of service to your client applications and data. +With {product}, your applications can continue to run while you migrate data from one {cass-short}-based cluster to another, resulting in little or no downtime and minimal service interruptions. -include::ROOT:partial$supported-releases.adoc[] +.Why migrate? +[%collapsible] +==== +There are many reasons that you might need to migrate data and applications. +For example: -{product-short} keeps your clusters in sync at all times by a dual-write logic configuration, and you can xref:rollback.adoc[roll back] at any point. +* You want to move to a different database provider. +For example, you might move from self-managed clusters to a cloud-based Database-as-a-Service (DBaaS), such as {astra-db}. -[IMPORTANT] -==== -* True zero downtime migration is only possible if your database meets the xref:ROOT:feasibility-checklists.adoc[minimum requirements]. -If your database doesn't meet these requirements, you can still complete the migration, but downtime might be necessary to finish the migration. +* You need to upgrade a cluster to a newer version or infrastructure. + +* You want to move client applications from shared clusters to dedicated clusters for greater control over individual configurations. -* The {product} process requires you to be able to perform rolling restarts of your client applications during the migration. -This is standard practice for client applications that are deployed over multiple instances, and it is a widely used approach to roll out releases and configuration changes. +* You want to consolidate client applications running on separate clusters onto one shared cluster to minimize sprawl and maintenance. ==== -== Migration scenarios +{product-short} is comprised of {product-proxy}, {product-utility}, and {product-automation}, which orchestrate activity-in-transition on your clusters. +To move and validate data, you use {sstable-sideloader}, {cass-migrator}, or {dsbulk-migrator}. +{product-proxy} keeps your clusters in sync at all times by a dual-write logic configuration, which means you can stop the migration or xref:rollback.adoc[roll back] at any point. +For more information about these tools, see xref:ROOT:components.adoc[]. + +When the migration is complete, the data is present in the new database, and you can update your client applications to connect exclusively to the new database. +The old database becomes obsolete and can be removed. + +== {product-short} requirements + +True zero downtime migration is only possible if your database meets the minimum requirements described in xref:ROOT:feasibility-checklists.adoc[]. +If your database doesn't meet these requirements, you can still complete the migration, but downtime might be necessary to finish the migration. + +=== Supported migration paths include::ROOT:partial$migration-scenarios.adoc[] @@ -49,6 +65,15 @@ The table names, column names, and data types must also match. For more information, see xref:feasibility-checklists.adoc#_schemakeyspace_compatibility[Schema/keyspace compatibility]. ==== +=== Migration planning + +Before you begin the migration, plan and prepare for the migration: + +* xref:ROOT:feasibility-checklists.adoc[] +* xref:ROOT:deployment-infrastructure.adoc[] +* xref:ROOT:create-target.adoc[] +* xref:ROOT:rollback.adoc[] + === Phase 1: Deploy {product-proxy} and connect client applications In this first phase, deploy the {product-proxy} instances and connect client applications to the proxies. @@ -59,8 +84,10 @@ image:migration-phase1ra9.png["Migration Phase 1."] === Phase 2: Migrate data -In this phase, migrate existing data using {cass-migrator} or {dsbulk-loader}. -Validate that the migrated data is correct, while continuing to perform dual writes. +In this phase, migrate existing data using {sstable-sideloader}, {cass-migrator}, or {dsbulk-migrator}. +For information about these tools, see xref:ROOT:components.adoc[]. + +{product-proxy} will continue to perform dual writes while you move data and validate that the migrated data is correct. image:migration-phase2ra9a.png["Migration Phase 2."] diff --git a/modules/ROOT/pages/manage-proxy-instances.adoc b/modules/ROOT/pages/manage-proxy-instances.adoc index c6a3f039..a22081e5 100644 --- a/modules/ROOT/pages/manage-proxy-instances.adoc +++ b/modules/ROOT/pages/manage-proxy-instances.adoc @@ -220,7 +220,7 @@ All configuration variables that are not listed in this section are considered i If you wish to change any of the immutable configuration variables on an existing deployment, you will need to re-run the deployment playbook (`deploy_zdm_proxy.yml`, as documented in xref:deploy-proxy-monitoring.adoc[this page]). This playbook can be run as many times as necessary. -Please note that running the `deploy_zdm_proxy.yml` playbook will result in a brief window of unavailability of the whole {product-proxy} deployment while all the {product-proxy} instances are torn down and recreated. +Be aware that running the `deploy_zdm_proxy.yml` playbook results in a brief window of unavailability of the whole {product-proxy} deployment while all the {product-proxy} instances are torn down and recreated. ==== [[_upgrade_the_proxy_version]] diff --git a/modules/ROOT/pages/metrics.adoc b/modules/ROOT/pages/metrics.adoc index 45904200..81b4d2e3 100644 --- a/modules/ROOT/pages/metrics.adoc +++ b/modules/ROOT/pages/metrics.adoc @@ -17,7 +17,7 @@ For this reason, we strongly encourage you to monitor the {product-proxy}, eithe {product-automation} can deploy Prometheus and Grafana, configuring them automatically, as explained xref:deploy-proxy-monitoring.adoc#_setting_up_the_monitoring_stack[here]. The Grafana dashboards are ready to go with metrics that are being scraped from the {product-proxy} instances. -If you already have a Grafana deployment then you can import the dashboards from the two {product-short} dashboard files from this https://github.com/datastax/zdm-proxy-automation/tree/main/grafana-dashboards[{product-automation} GitHub location]. +If you already have a Grafana deployment then you can import the dashboards from the two {product-short} dashboard files from this {product-automation-repo}/tree/main/grafana-dashboards[{product-automation} GitHub location]. == Grafana dashboard for {product-proxy} metrics diff --git a/modules/ROOT/pages/migrate-and-validate-data.adoc b/modules/ROOT/pages/migrate-and-validate-data.adoc index 67d2b0fa..e32b223f 100644 --- a/modules/ROOT/pages/migrate-and-validate-data.adoc +++ b/modules/ROOT/pages/migrate-and-validate-data.adoc @@ -1,54 +1,48 @@ = Phase 2: Migrate and validate data :page-tag: migration,zdm,zero-downtime,validate-data -In Phase 2 of data migration, you migrate data from the origin to the target, and then validate the migrated data. +In Phase 2 of {product}, you migrate data from the origin to the target, and then validate the migrated data. -image::migration-phase2ra.png[In ZDM Phase 2, you migrate data from the origin cluster to the target cluster.] +image::migration-phase2ra.png[In {product-short} Phase 2, you migrate data from the origin cluster to the target cluster.] //For illustrations of all the migration phases, see the xref:introduction.adoc#_migration_phases[Introduction]. -This topic introduces data migration tools that you can use during Phase 2 of your migration project: +To move and validate data, you can use a dedicated data migration tool, such as {sstable-sideloader}, {cass-migrator}, or {dsbulk-migrator}, or your can create your own custom data migration script. -{cass-migrator} ({cass-migrator-short}):: -Best for migrating large amounts of data and for migrations that need support for detailed logging, data verification, table column renaming, and reconciliation. - -{dsbulk-migrator}:: -Extends {dsbulk-loader} with migration-specific commands. Best for simple migration of smaller amounts of data quantities, and migrations that don't require support for data validation during the migration. +// tag::migration-tool-summaries[] +== {sstable-sideloader} -{sstable-sideloader}:: -Exclusively for migrations from a {cass-reg}, {dse}, or {hcd} cluster to an {astra-db} database. -You can use {cass-migrator-short} to validate data after the migration. +{sstable-sideloader} is a service running in {astra-db} that directly imports data from snapshots of your existing {cass-short}-based cluster. +This tool is exclusively for migrations that move data to {astra-db}. -[[cass-migrator-key-features]] -== {cass-migrator} - -{cass-migrator-short} offers extensive functionality and configuration options to support large and complex migrations as well as post-migration data validation. +You can use {sstable-sideloader} alone or in the context of {product-short}. -For more information, see xref:ROOT:cassandra-data-migrator.adoc[] and the https://github.com/datastax/cassandra-data-migrator[{cass-migrator} repository]. +For more information, see xref:sideloader:sideloader-zdm.adoc[]. -[[dsbulk-migrator-key-features]] -== {dsbulk-migrator} +== {cass-migrator} -{dsbulk-migrator}, which is based on {dsbulk-loader}, is best for migrating smaller amounts of data or when you can shard data from table rows into more manageable quantities. +You can use {cass-migrator} ({cass-migrator-short}) to migrate and validate tables between {cass-short}-based clusters. +It is best for migrating large amounts of data and for migrations that need support for detailed logging, data verification, table column renaming, and reconciliation. -{dsbulk-migrator} provides the following commands: +{cass-migrator-short} offers extensive functionality and configuration options to support large and complex migrations as well as post-migration data validation. -* `migrate-live`: Start a live data migration using the embedded version of {dsbulk-loader} or your own {dsbulk-loader} installation. -A live migration means that the data migration starts immediately and is performed by this migrator tool through the specified {dsbulk-loader} installation. +You can use {cass-migrator-short} by itself, in the context of {product-short}, or for data validation after using another migration tool, such as {sstable-sideloader}. -* `generate-script`: Generate a migration script that you can execute to perform a data migration with a your own {dsbulk-loader} installation. -This command _doesn't_ trigger the migration; it only generates the migration script that you must then execute. +For more information, see xref:ROOT:cassandra-data-migrator.adoc[]. -* `generate-ddl`: Read the schema from origin, and then generate CQL files to recreate it in your target {astra-db} database. +== {dsbulk-migrator} -For more information, see xref:ROOT:dsbulk-migrator.adoc[] and the https://github.com/datastax/dsbulk-migrator[{dsbulk-migrator} repository]. +{dsbulk-migrator} is an extension of {dsbulk-loader}. +It is best for smaller migrations or migrations that don't require data validation during the migration process. -== {sstable-sideloader} +In addition to loading and unloading CSV and JSON data, you can use {dsbulk-migrator} to transfer data between databases. +It can read data from a table in your origin database, and then write that data to a table in your target database. -{sstable-sideloader} is a service running in {astra-db} that directly imports data from snapshot backups that you've uploaded to {astra-db} from an existing {cass-short}, {dse-short}, or {hcd-short} cluster. +You can use {dsbulk-migrator} alone or in the context of {product-short}. -Because it imports data directly, {sstable-sideloader} can offer several advantages over CQL-based tools like {dsbulk-migrator} and {cass-migrator}, including faster, more cost-effective data loading, and minimal performance impacts on your origin cluster and target database. +For more information, see xref:ROOT:dsbulk-migrator.adoc[]. -{sstable-sideloader} uses the {astra} {devops-api}, your cloud provider's CLI, and `nodetool`. +== Custom data migration processes -For more information, see xref:sideloader:sideloader-overview.adoc[]. \ No newline at end of file +If you want to write your own custom data migration processes, you can use a tool like Apache Spark(TM). +// end::migration-tool-summaries[] \ No newline at end of file diff --git a/modules/ROOT/pages/preliminary-steps.adoc b/modules/ROOT/pages/preliminary-steps.adoc deleted file mode 100644 index 356d8aa9..00000000 --- a/modules/ROOT/pages/preliminary-steps.adoc +++ /dev/null @@ -1,9 +0,0 @@ -= Preliminary steps -:page-tag: migration,zdm,zero-downtime,preliminary-steps - -Before starting your migration, verify that you met the prerequisites and performed the preliminary tasks as documented in: - -* xref:feasibility-checklists.adoc[Feasibility checks] -* xref:deployment-infrastructure.adoc[Deployment and infrastructure considerations] -* xref:create-target.adoc[Create target environment for migration] -* xref:rollback.adoc[Understand rollback options] diff --git a/modules/ROOT/pages/rollback.adoc b/modules/ROOT/pages/rollback.adoc index 10c6daff..364580c0 100644 --- a/modules/ROOT/pages/rollback.adoc +++ b/modules/ROOT/pages/rollback.adoc @@ -2,12 +2,14 @@ :navtitle: Understand rollback options :page-tag: migration,zdm,zero-downtime,rollback -At any point during the migration process until the very last phase, if you hit any unexpected issue and need to (in effect) "rollback" the migration, you can always easily revert your client applications to connect directly to the origin cluster. +At any point from Phase 1 through Phase 4, if you encounter an unexpected issue and need to stop or roll back the migration, you can revert your client applications to connect directly to the origin cluster. -The migration can be started from scratch once the issue has been addressed. +After addressing the issue, you can restart the migration from the beginning. image::migration-all-phases.png[Migration phases from start to finish.] +== Phase 5 is the point of no return + After moving your client applications off the {product-proxy} instances (Phase 5), writes are no longer sent to both the origin and target clusters. The data on origin cluster is no longer kept up-to-date, and you lose this seamless rollback option. This is the point at which you commit to using the target cluster permanently. @@ -15,3 +17,7 @@ The {product-proxy} deployment can be destroyed, and the origin cluster is no lo However, should you decide to move back to the origin cluster later, or if you want to move to a new cluster entirely, you can rerun the same migration process. In this case, you use your original target cluster as the new origin cluster, and you set the new target cluster to whatever cluster you want to migrate to (which could even be the original ancestor origin cluster). + +== Next steps + +* xref:ROOT:phase1.adoc[] diff --git a/modules/ROOT/pages/setup-ansible-playbooks.adoc b/modules/ROOT/pages/setup-ansible-playbooks.adoc index 6ae409e1..e2cda141 100644 --- a/modules/ROOT/pages/setup-ansible-playbooks.adoc +++ b/modules/ROOT/pages/setup-ansible-playbooks.adoc @@ -25,13 +25,13 @@ image::docker-container-and-zdm-utility.png[{product-proxy} connections from Doc . You must have already provisioned the {product-short} infrastructure, which means you must have the server machines ready, and know their IP addresses. These can be in the cloud provider of your choice or on-premise. . Docker needs to be installed on the machine that will be running the Ansible Control Host container. -For comprehensive installation instructions, please refer to the https://docs.docker.com/engine/install/#server[official Docker documentation]. +For comprehensive installation instructions, see the https://docs.docker.com/engine/install/#server[Docker documentation]. . The `docker` command must not require superuser privileges. The instructions to do this can be found https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user[here]. [NOTE] ==== -Please note that the manual, non-superuser installation of Docker described above is to be done on the machine that will run the Ansible Control Host. +The manual, non-superuser installation of Docker described above is to be done on the machine that will run the Ansible Control Host. From that point, the automation will take care of installing Docker on the {product-proxy} machines without further intervention. ==== @@ -100,40 +100,28 @@ Now connect to the jumphost. ssh -F jumphost ---- -== Running the {product-utility} - -From the jumphost, download the {product-utility} executable. -Releases are available https://github.com/datastax/zdm-proxy-automation/releases[here]. - -The downloadable archive name format is `zdm-util--`. - -The latest version: - -image:https://img.shields.io/github/v/release/datastax/zdm-proxy-automation?color=green&display_name=tag[GitHub release, latest by date] - -[source,bash] ----- -wget https://github.com/datastax/zdm-proxy-automation/releases/download//zdm-util-linux-amd64-.tgz ----- - -Here's an example to wget {product-utility} 2.3.0: +== Run the {product-utility} +. From the jumphost, download the latest {product-utility} executable from the {product-automation-repo}/releases[{product-automation} GitHub repository] {product-automation-shield}. ++ +The package filename format is `zdm-util-**PLATFORM**-**VERSION***.tgz`. +The following example downloads {product-utility} version 2.3.0 for Linux amd64. +To download a different package, change the version and package filename accordingly. ++ [source,bash] ---- -wget https://github.com/datastax/zdm-proxy-automation/releases/download/v2.3.0/zdm-util-linux-amd64-v2.3.0.tgz +wget https://github.com/datastax/zdm-proxy-automation/releases/download/v2.3.0/zdm-util-linux-amd64-v2.3.0.tgz ---- -Once downloaded, unzip it. -Here's an example with {product-proxy} 2.3.0: - +. Extract the archive: ++ [source,bash] ---- tar -xvf zdm-util-linux-amd64-v2.3.0.tgz ---- -Run the {product-utility}. -Here's an example with {product-utility} 2.3.0: - +. Run the {product-utility}: ++ [source,bash] ---- ./zdm-util-v2.3.0 @@ -148,9 +136,7 @@ If you run the utility again, it will detect the file and ask you if you wish t If the configuration is not fully valid, you will be prompted for the missing or invalid parameters only. You can also pass a custom configuration file to the {product-utility} with the optional command-line parameter `-utilConfigFile`. -Example: - -Here's an example with {product-utility} 2.3.0: +For example: [source,bash] ---- @@ -167,16 +153,14 @@ You have five attempts to enter valid variables. You can always run the {product-utility} again, if necessary. ==== -. Enter the path to, and name of, the SSH private key to access the proxy hosts. -Example: +. Enter the path to, and name of, the SSH private key to access the proxy hosts: + [source,bash] ---- ~/my-zdm-key ---- -. Enter the common prefix of the private IP addresses of the proxy hosts. -Example: +. Enter the common prefix of the private IP addresses of the proxy hosts: + [source,bash] ---- @@ -223,10 +207,10 @@ For details about the metrics you can observe in these preconfigured Grafana das ==== + You can choose to deploy the monitoring stack on the jumphost or on a different machine, as long as it can connect to the {product-proxy} instances over TCP on ports 9100 (to collect host-level metrics) and on the port on which the {product-proxy} exposes its own metrics, typically 14001. - ++ In this example, we'll enter the same IP of the Ansible control host (the jumphost machine on which we're running the {product-utility}). Example: - ++ [source,bash] ---- 172.18.100.128 diff --git a/modules/ROOT/pages/tls.adoc b/modules/ROOT/pages/tls.adoc index 05ac7e64..3a137d87 100644 --- a/modules/ROOT/pages/tls.adoc +++ b/modules/ROOT/pages/tls.adoc @@ -22,7 +22,7 @@ One-way TLS and Mutual TLS are both supported and can be enabled depending on ea One-way TLS and Mutual TLS are both supported. * When the {product-proxy} connects to {astra-db}, it always implicitly uses Mutual TLS. -This is done through the Secure Connect Bundle (SCB) and does not require any extra configuration. +This is done through the {scb} and does not require any extra configuration. [[_retrieving_files_from_a_jks_keystore]] == Retrieving files from a JKS keystore @@ -45,7 +45,7 @@ To extract a file from a JKS keystore: keytool -exportcert -keystore -alias -file -rfc ---- -Please note the `-rfc` option, which extracts the files in non-binary PEM format. +The `-rfc` option extracts the files in non-binary PEM format. For more details, see the https://docs.oracle.com/javase/8/docs/technotes/tools/windows/keytool.html[keytool syntax documentation]. @@ -119,7 +119,7 @@ For Mutual TLS only, leave unset otherwise. == Application-to-proxy TLS Here are the steps to enable TLS between your client application and the {product-proxy} if required. -Please bear in mind that in this case your client application is the TLS client and the {product-proxy} is the TLS server. +In this case, your client application is the TLS client and the {product-proxy} is the TLS server. The files required by the proxy to configure application-to-proxy TLS are: diff --git a/modules/ROOT/pages/troubleshooting-scenarios.adoc b/modules/ROOT/pages/troubleshooting-scenarios.adoc index 2b3326f6..01abb1fc 100644 --- a/modules/ROOT/pages/troubleshooting-scenarios.adoc +++ b/modules/ROOT/pages/troubleshooting-scenarios.adoc @@ -26,7 +26,7 @@ This is by design: immutable configuration variables should not be changed after To change the value of configuration variables that are considered immutable, simply run the `deploy_zdm_proxy.yml` playbook again. This playbook can be run as many times as necessary and will just recreate the entire {product-proxy} deployment from scratch with the provided configuration. -Please note that this does not happen in a rolling fashion: the existing {product-proxy} instances will be torn down all at the same time prior to being recreated, resulting in a brief window in which the whole {product-proxy} deployment will become unavailable. +This doesn't happen in a rolling fashion: the existing {product-proxy} instances are torn down all at the same time prior to being recreated, resulting in a brief window in which the whole {product-proxy} deployment will become unavailable. == Unsupported protocol version error on the client application @@ -236,7 +236,7 @@ The driver might also have it enabled for {astra-db} depending on what server ve === Solution or Workaround These log messages are harmless but if your need to get rid of them, you can disable the {dse-short} Insights driver feature through the driver configuration. -Refer to https://github.com/datastax/java-driver/blob/65d2c19c401175dcc6c370560dd5f783d05b05b9/core/src/main/resources/reference.conf#L1328[this property] for Java driver 4.x. +Refer to https://github.com/apache/cassandra-java-driver/blob/65d2c19c401175dcc6c370560dd5f783d05b05b9/core/src/main/resources/reference.conf#L1328[this property] for Java driver 4.x. @@ -341,8 +341,7 @@ The {astra} DevOps API is likely temporarily unavailable. === Solution or Workaround -Download the {astra-db} Secure Connect Bundle (SCB) manually and provide its path to the {product-automation} as explained xref:deploy-proxy-monitoring.adoc#_core_configuration[here]. -For information about the SCB, see https://docs.datastax.com/en/astra-serverless/docs/connect/secure-connect-bundle.html[working with Secure Connect Bundle]. +xref:astra-db-serverless:databases:secure-connect-bundle.adoc[Download the {astra-db} {scb}] manually and provide its path in the xref:deploy-proxy-monitoring.adoc#_core_configuration[{product-automation} configuration]. == Metadata service returned not successful status code 4xx or 5xx @@ -409,7 +408,7 @@ Starting in version 2.1.0, you can now tune the maximum number of stream ids ava You can increase it to match your driver configuration through the xref:manage-proxy-instances.adoc#zdm_proxy_max_stream_ids[zdm_proxy_max_stream_ids] property. If these errors are being constantly written to the log files (for minutes or even hours) then it is likely that only an application OR {product-proxy} restart will fix it. -If you find an issue like this please submit an https://github.com/datastax/zdm-proxy/issues[Issue] in our GitHub repo. +If you find an issue like this, submit a {product-proxy-repo}/issues[GitHub issue]. == Client application closed connection errors every 10 minutes when migrating to {astra-db} diff --git a/modules/ROOT/pages/troubleshooting-tips.adoc b/modules/ROOT/pages/troubleshooting-tips.adoc index 8e0dd708..fb539c13 100644 --- a/modules/ROOT/pages/troubleshooting-tips.adoc +++ b/modules/ROOT/pages/troubleshooting-tips.adoc @@ -62,7 +62,7 @@ However, if you enable `DEBUG` logging, `debug` messages can help you find the s * `level=error`: Indicates an issue with the {product-proxy}, client application, or clusters. These messages require further examination. -If the meaning of a `warn` or `error` message isn't clear, you can submit an issue in the https://github.com/datastax/zdm-proxy/issues[{product-proxy} GitHub repository]. +If the meaning of a `warn` or `error` message isn't clear, you can submit an issue in the {product-proxy-repo}/issues[{product-proxy} GitHub repository]. === Common log messages @@ -149,10 +149,10 @@ This flag will prevent you from accessing the logs when {product-proxy} stops or To report an issue or get additional support, submit an issue in the {product-short} component GitHub repositories: -* https://github.com/datastax/zdm-proxy/issues[{product-proxy} repository] -* https://github.com/datastax/zdm-proxy-automation/issues[{product-automation} repository] (includes {product-automation} and the {product-utility}) -* https://github.com/datastax/cassandra-data-migrator/issues[{cass-migrator} repository] -* https://github.com/datastax/dsbulk-migrator/issues[{dsbulk-migrator} repository] +* {product-proxy-repo}/issues[{product-proxy} repository] +* {product-automation-repo}/issues[{product-automation} repository] (includes {product-automation} and the {product-utility}) +* {cass-migrator-repo}/issues[{cass-migrator} repository] +* {dsbulk-migrator-repo}/issues[{dsbulk-migrator} repository] [IMPORTANT] ==== diff --git a/modules/ROOT/pages/zdm-proxy-migration-paths.adoc b/modules/ROOT/pages/zdm-proxy-migration-paths.adoc new file mode 100644 index 00000000..57328f05 --- /dev/null +++ b/modules/ROOT/pages/zdm-proxy-migration-paths.adoc @@ -0,0 +1,6 @@ += Cluster compatibility for {product} +:description: Learn which sources and targets are eligible for {product}. + +include::ROOT:partial$migration-scenarios.adoc[] + +For more {product} requirements, see xref:ROOT:feasibility-checklists.adoc[]. \ No newline at end of file diff --git a/modules/ROOT/partials/migration-scenarios.adoc b/modules/ROOT/partials/migration-scenarios.adoc index caa4a848..1f8b5d77 100644 --- a/modules/ROOT/partials/migration-scenarios.adoc +++ b/modules/ROOT/partials/migration-scenarios.adoc @@ -1,31 +1,17 @@ -There are many reasons why you may decide to migrate your data and client applications from one cluster to another, for example: - -* Moving to a different type of CQL database, for example an on-demand cloud-based proposition such as {astra-db}. - -* Upgrading a cluster to a newer version, or newer infrastructure, in as little as one step while leaving your existing cluster untouched throughout the process. - -* Moving one or more client applications out of a shared cluster and onto a dedicated one, in order to manage and configure each cluster independently. - -* Consolidating client applications, which may be currently running on separate clusters, onto a shared one in order to reduce overall database footprint and maintenance overhead. - -Here are just a few examples of migration scenarios that are supported when moving from one type of CQL-based database to another: - -* From an existing self-managed {cass-reg} or {dse-short} cluster to cloud-native {astra-db}. For example: - -** {cass} 2.1.6+, 3.11.x, 4.0.x, or 4.1.x to {astra-db}. - -** {dse-short} 4.7.1+, 4.8.x, 5.1.x, 6.7.x or 6.8.x to {astra-db}. - -* From an existing {cass-short} or {dse-short} cluster to another {cass-short} or {dse-short} cluster. For example: - -** {cass-short} 2.1.6+ or 3.11.x to {cass-short} 4.0.x or 4.1.x. - -** {dse-short} 4.7.1+, 4.8.x, 5.1.x or 6.7.x to {dse-short} 6.8.x. - -** {cass-short} 2.1.6+, 3.11.x, 4.0.x, or 4.1.x to {dse-short} 6.8.x. - -** {dse-short} 4.7.1+ or 4.8.x to {cass-short} 4.0.x or 4.1.x. - -* From https://docs.datastax.com/en/astra-classic/docs[{astra-db} Classic] to https://docs.datastax.com/en/astra/astra-db-vector/[{astra-db} Serverless]. - -* From any CQL-based database type/version to the equivalent CQL-based database type/version. \ No newline at end of file +You can use {product-proxy} to support migrations from {cass-reg}, {dse}, {hcd}, {astra-db}, and other {cass-short}-based databases to any other {cass-short}-based database of the equivalent type or version: + +Compatible origin clusters:: +Migrate from one of the following: ++ +* https://www.datastax.com/products/datastax-enterprise[{dse}] version 4.7.1 and later +* https://cassandra.apache.org/_/index.html[{cass-reg}] version 2.1.6 and later +* Other {cass-short}-based databases that are based on a compatible {cass-short} version, such as {astra-db} Classic, ScyllaDB, and Yugabyte. + +Compatible target clusters:: +Migrate to one of the following: ++ +* https://www.datastax.com/products/hyper-converged-database-hcd[{hcd}] +* A cluster running the same or later version of {cass-short} or {dse-short} +* https://www.datastax.com/products/datastax-astra[{astra-db}] ++ +For more {astra-db} migration paths, see xref:ROOT:astra-migration-paths.adoc[]. \ No newline at end of file diff --git a/modules/ROOT/partials/supported-releases.adoc b/modules/ROOT/partials/supported-releases.adoc deleted file mode 100644 index 28145c37..00000000 --- a/modules/ROOT/partials/supported-releases.adoc +++ /dev/null @@ -1,3 +0,0 @@ -You can use {product-proxy} to migrate from a cluster running https://cassandra.apache.org/_/index.html[{cass-reg}] version 2.1.6 and later or https://www.datastax.com/products/datastax-enterprise[{dse}] version 4.7.1 and later. - -You can migrate to https://www.datastax.com/products/datastax-astra[{astra-db}] or a cluster running the same or later version of {cass-short} or {dse-short} \ No newline at end of file diff --git a/modules/sideloader/images/astra-migration-toolkit.svg b/modules/sideloader/images/astra-migration-toolkit.svg new file mode 100644 index 00000000..074b7aed --- /dev/null +++ b/modules/sideloader/images/astra-migration-toolkit.svg @@ -0,0 +1,99 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/modules/sideloader/images/data-importer-zdm.svg b/modules/sideloader/images/data-importer-zdm.svg deleted file mode 100644 index 22147f4a..00000000 --- a/modules/sideloader/images/data-importer-zdm.svg +++ /dev/null @@ -1,98 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/modules/sideloader/pages/migrate-sideloader.adoc b/modules/sideloader/pages/migrate-sideloader.adoc index 67a68d46..1be294d4 100644 --- a/modules/sideloader/pages/migrate-sideloader.adoc +++ b/modules/sideloader/pages/migrate-sideloader.adoc @@ -190,7 +190,7 @@ To prepare your target database for the migration, you must record the schema fo ==== For the migration to succeed, your target database must meet the schema requirements described in this section. Additionally, your snapshots must contain compatible data and directories, as described in xref:sideloader:prepare-sideloader.adoc#origin-cluster-requirements[Origin cluster requirements] and xref:sideloader:migrate-sideloader.adoc#create-snapshots[Create snapshots]. -For example, {astra-db} doesn't support materialized views, and {sstable-sideloader} can't migrate encrypted data. +For example, {astra-db} doesn't support materialized views, and {sstable-sideloader} cannot migrate encrypted data. However, indexes don't need to match. You can define indexes in your target database independently from the origin cluster because {sstable-sideloader} ignores Storage Attached Indexes (SAI) defined on the origin cluster. @@ -230,7 +230,7 @@ CREATE TABLE smart_home.sensor_readings ( PRIMARY KEY (device_id, room_id, reading_timestamp) ) WITH CLUSTERING ORDER BY (room_id ASC, reading_timestamp DESC); ---- -//However, {sstable-sideloader} can't import data to a xref:astra-db-serverless:databases:collection in a {db-serverless-vector} database. +//However, {sstable-sideloader} cannot import data to a xref:astra-db-serverless:databases:collection in a {db-serverless-vector} database. . Recreate the schemas in your target database: + .. In the {astra-ui-link} navigation menu, click *Databases*, and then click the name of your {astra-db} database. diff --git a/modules/sideloader/pages/prepare-sideloader.adoc b/modules/sideloader/pages/prepare-sideloader.adoc index 681144bb..9424e1a8 100644 --- a/modules/sideloader/pages/prepare-sideloader.adoc +++ b/modules/sideloader/pages/prepare-sideloader.adoc @@ -164,10 +164,10 @@ If you choose the alternative option, you must modify the commands accordingly f * *{astra-db} doesn't support materialized views*: You must replace these with SAI or an alternative data model design. -* *{sstable-sideloader} doesn't support encrypted data*: If your origin cluster uses xref:6.9@dse:securing:transparent-data-encryption.adoc[{dse-short} Transparent Data Encryption], be aware that {sstable-sideloader} can't migrate these SSTables. +* *{sstable-sideloader} doesn't support encrypted data*: If your origin cluster uses xref:6.9@dse:securing:transparent-data-encryption.adoc[{dse-short} Transparent Data Encryption], be aware that {sstable-sideloader} cannot migrate these SSTables. + If you have a mix of encrypted and unencrypted data, you can use {sstable-sideloader} to migrate the unencrypted data. -After the initial migration, you can use another strategy to move the encrypted data, such as https://github.com/datastax/cassandra-data-migrator[{cass-short} Data Migrator (CDM)] or a manual export and reupload. +After the initial migration, you can use another strategy to move the encrypted data, such as {cass-migrator-repo}[{cass-short} Data Migrator (CDM)] or a manual export and reupload. * *{sstable-sideloader} doesn't support secondary indexes*: If you don't remove or replace these in your origin cluster, {sstable-sideloader} ignores these directories when importing the data to your {astra-db} database. @@ -179,7 +179,7 @@ Your administration server must have SSH access to each node in your origin clus {company} recommends that you install the following additional software on your administration server: -* https://github.com/datastax/cassandra-data-migrator[{cass-short} Data Migrator (CDM)] to validate imported data and, in the context of {product}, reconcile it with the origin cluster. +* {cass-migrator-repo}[{cass-short} Data Migrator (CDM)] to validate imported data and, in the context of {product}, reconcile it with the origin cluster. * https://jqlang.github.io/jq/[jq] to format JSON responses from the {astra} {devops-api}. The {devops-api} commands in this guide use this tool. @@ -187,6 +187,22 @@ The {devops-api} commands in this guide use this tool. The following information can help you prepare for specific migration scenarios, including multi-region migrations and multiple migrations to the same database. +[#minimum-migration-scope] +=== Minimum migration scope + +To minimize data reconciliation issues, the recommended minimum migration scope is one CQL table across all nodes. + +This means that a single migration process, from start to finish, should encapsulate the data for one CQL table as it exists on all of your origin nodes. +For example, if you are migrating one table, you need to upload snapshots of all SSTables from all nodes for that CQL table. + +Avoid breaking one table into multiple migrations because migrating a subset of SSTables for one CQL table will likely result in data loss, corruption, or resurrection of previously deleted data. + +Each migration is performed separately, and each migration has no awareness of prior migrations. +This means that data from later migrations can be incorrectly applied to the table. +For example, if your first migration includes tombstones, that data could be resurrected if it is present in a subsequent migration from another node. + +In contrast, if you use a single large migration to migrate all SSTables for a CQL table across all nodes, {astra-db} can reconcile the data across all nodes, ensuring that your migration is accurate and complete. + === Multi-region migrations Multi-region migrations can include one or more of the following scenarios: @@ -215,6 +231,8 @@ Migrate multiple nodes to one database:: . From your origin cluster, create snapshots for all of the nodes that you want to migrate. + Run `nodetool snapshot` as many times as necessary to capture all of your nodes. ++ +For important warnings about multi-node migrations, see <>. . On your target database, replicate the schemas for all tables that you want to migrate. + @@ -249,6 +267,8 @@ Consider focusing on one target database at a time, or create a migration plan t . From your origin cluster, create snapshots for all of the nodes that you want to migrate. + Run `nodetool snapshot` as many times as necessary to capture all of your nodes. ++ +For important warnings about multi-node migrations, see <>. . On each of your target databases, replicate the schemas for the tables that you want to migrate to each database. + @@ -288,6 +308,7 @@ If you initialize multiple migrations for the same database, you generate multip This can be useful for breaking large migrations into smaller batches. For example, if you have 100 snapshots, you could initialize 10 migrations, and then upload 10 different snapshots to each migration directory. +However, don't break one CQL table into multiple migrations, as explained in <>. You can upload snapshots to multiple migration directories at once. However, when you reach the import phase of the migration, {sstable-sideloader} can import from only one migration directory at a time per database. diff --git a/modules/sideloader/pages/sideloader-overview.adoc b/modules/sideloader/pages/sideloader-overview.adoc index bc303098..c29de547 100644 --- a/modules/sideloader/pages/sideloader-overview.adoc +++ b/modules/sideloader/pages/sideloader-overview.adoc @@ -111,10 +111,7 @@ include::sideloader:partial$sideloader-partials.adoc[tags=validate] == Use {sstable-sideloader} with {product-short} -If you need to migrate a live database, you can use {sstable-sideloader} instead of DSBulk or {cass-short} Data Migrator during of xref:ROOT:migrate-and-validate-data.adoc[Phase 2 of {product} ({product-short})]. - -.Use {sstable-sideloader} in the context of {product}. -image::sideloader:data-importer-zdm.svg[] +include::sideloader:partial$sideloader-partials.adoc[tags=sideloader-zdm] == Next steps diff --git a/modules/sideloader/pages/sideloader-zdm.adoc b/modules/sideloader/pages/sideloader-zdm.adoc new file mode 100644 index 00000000..f3524186 --- /dev/null +++ b/modules/sideloader/pages/sideloader-zdm.adoc @@ -0,0 +1,14 @@ += Use {sstable-sideloader} with {product-short} +:navtitle: Use {sstable-sideloader} +:description: Use {sstable-sideloader} to migrate data with {product-short}. + +{sstable-sideloader} is a service running in {astra-db} that directly imports data from snapshot backups that you've uploaded to {astra-db} from an existing {dse-short}, {hcd-short}, or other compatible {cass-short} cluster. +For compatible origin clusters, see xref:ROOT:astra-migration-paths.adoc[]. + +Because it imports data directly, {sstable-sideloader} can offer several advantages over CQL-based tools like {dsbulk-migrator} and {cass-migrator}, including faster, more cost-effective data loading, and minimal performance impacts on your origin cluster and target database. + +{sstable-sideloader} uses the {astra} {devops-api}, your cloud provider's CLI, and `nodetool`. + +include::sideloader:partial$sideloader-partials.adoc[tags=sideloader-zdm] + +For more information, see xref:sideloader:sideloader-overview.adoc[]. \ No newline at end of file diff --git a/modules/sideloader/pages/stop-restart-sideloader.adoc b/modules/sideloader/pages/stop-restart-sideloader.adoc index 53598c22..a35faad3 100644 --- a/modules/sideloader/pages/stop-restart-sideloader.adoc +++ b/modules/sideloader/pages/stop-restart-sideloader.adoc @@ -21,7 +21,7 @@ Any in-progress jobs will complete, but no new jobs will start. === Resume a migration -Resume a previously-paused migration from the point at which it was paused: +Resume a previously paused migration from the point at which it was paused: [source,bash] ---- diff --git a/modules/sideloader/pages/troubleshoot-sideloader.adoc b/modules/sideloader/pages/troubleshoot-sideloader.adoc index e5e1d9ec..2e96a1ec 100644 --- a/modules/sideloader/pages/troubleshoot-sideloader.adoc +++ b/modules/sideloader/pages/troubleshoot-sideloader.adoc @@ -76,7 +76,7 @@ TODO: Add to this page and stop-restart page. When a migration fails due to a problem with the data uploaded to the migration directory, you must completely restart the migration. -This is because you can't change the data in the migration directory after you upload it. +This is because you cannot change the data in the migration directory after you upload it. For example, if your snapshots contain corrupt data, you have to restart the migration with new snapshots and a new migration directory. . Review the xref:sideloader:prepare-sideloader.adoc#origin-cluster-requirements[origin cluster requirements] to ensure that your snapshot doesn't contain invalid data, including materialized views and encrypted data. diff --git a/modules/sideloader/partials/sideloader-partials.adoc b/modules/sideloader/partials/sideloader-partials.adoc index 00faa3f5..46c5a350 100644 --- a/modules/sideloader/partials/sideloader-partials.adoc +++ b/modules/sideloader/partials/sideloader-partials.adoc @@ -31,7 +31,8 @@ You can xref:ROOT:cassandra-data-migrator.adoc#cdm-validation-steps[run {cass-mi // tag::initialize[] After you create snapshots on the origin cluster and pre-configure the schema on the target database, use the {astra} {devops-api} to initialize the migration. -image::sideloader:data-importer-workflow.svg[] +.{sstable-sideloader} moves data from the migration bucket to {astra-db}. +svg::sideloader:data-importer-workflow.svg[] When you initialize a migration, {sstable-sideloader} does the following: @@ -95,4 +96,11 @@ If all steps finish successfully, the migration is complete and you can access t // tag::no-return[] You can abort a migration up until the point at which {sstable-sideloader} starts importing SSTable metadata. After this point, you must wait for the migration to finish, and then you can use the CQL shell to drop the keyspace/table in your target database before repeating the entire migration procedure. -// end::no-return[] \ No newline at end of file +// end::no-return[] + +// tag::sideloader-zdm[] +If you need to migrate a live database, you can use {sstable-sideloader} instead of DSBulk or {cass-short} Data Migrator during of xref:ROOT:migrate-and-validate-data.adoc[Phase 2 of {product} ({product-short})]. + +.Use {sstable-sideloader} in the context of {product}. +svg::sideloader:astra-migration-toolkit.svg[] +// end::sideloader-zdm[] \ No newline at end of file