From 40bd411e7dc923c22fbf998a501ad5164dd41e97 Mon Sep 17 00:00:00 2001 From: Natalia Ivakina Date: Mon, 1 Sep 2025 14:01:51 +0200 Subject: [PATCH 01/10] Add a new guide on deploying multi-region cluster --- modules/ROOT/content-nav.adoc | 12 +- .../clustering-advanced/default-database.adoc | 61 ----- .../clustering/clustering-advanced/index.adoc | 16 -- modules/ROOT/pages/clustering/databases.adoc | 60 ++++- modules/ROOT/pages/clustering/index.adoc | 12 +- .../disaster-recovery.adoc | 2 +- .../geo-redundant-deployment.adoc | 210 ++++++++++++++++++ .../multi-data-center-routing.adoc | 0 modules/ROOT/pages/clustering/servers.adoc | 2 +- .../clustering/setup/analytics-cluster.adoc | 2 +- .../reconciler.adoc | 0 .../ROOT/pages/clustering/setup/routing.adoc | 2 +- .../{clustering-advanced => }/unbind.adoc | 0 .../configuration/configuration-settings.adoc | 2 +- .../pages/database-administration/index.adoc | 2 +- .../database-administration/queries.adoc | 2 +- .../configuration-parameters.adoc | 6 +- .../standard-databases/recreate-database.adoc | 2 +- modules/ROOT/pages/neo4j-admin-neo4j-cli.adoc | 2 +- 19 files changed, 287 insertions(+), 108 deletions(-) delete mode 100644 modules/ROOT/pages/clustering/clustering-advanced/default-database.adoc delete mode 100644 modules/ROOT/pages/clustering/clustering-advanced/index.adoc rename modules/ROOT/pages/clustering/{ => multi-region-deployment}/disaster-recovery.adoc (99%) create mode 100644 modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc rename modules/ROOT/pages/clustering/{clustering-advanced => multi-region-deployment}/multi-data-center-routing.adoc (100%) rename modules/ROOT/pages/clustering/{clustering-advanced => setup}/reconciler.adoc (100%) rename modules/ROOT/pages/clustering/{clustering-advanced => }/unbind.adoc (100%) diff --git a/modules/ROOT/content-nav.adoc b/modules/ROOT/content-nav.adoc index 6398e5afe..59c19f32b 100644 --- a/modules/ROOT/content-nav.adoc +++ b/modules/ROOT/content-nav.adoc @@ -155,25 +155,25 @@ *** xref:clustering/setup/deploy.adoc[] *** xref:clustering/setup/analytics-cluster.adoc[] *** xref:clustering/setup/single-to-cluster.adoc[] +*** xref:clustering/setup/reconciler.adoc[] *** xref:clustering/setup/discovery.adoc[] *** xref:clustering/setup/routing.adoc[] *** xref:clustering/setup/encryption.adoc[] ** xref:clustering/servers.adoc[] +** xref:clustering/unbind.adoc[] ** xref:clustering/databases.adoc[] ** Monitoring *** xref:clustering/monitoring/show-servers-monitoring.adoc[] *** xref:clustering/monitoring/show-databases-monitoring.adoc[] *** xref:clustering/monitoring/endpoints.adoc[] *** xref:clustering/monitoring/status-check.adoc[] -** xref:clustering/disaster-recovery.adoc[] +** Resilient cluster deployment +*** xref:clustering/multi-region-deployment/multi-data-center-routing.adoc[] +*** xref:clustering/multi-region-deployment/geo-redundant-deployment.adoc[] +*** xref:clustering/multi-region-deployment/disaster-recovery.adoc[] //** xref:clustering/internals.adoc[] ** xref:clustering/settings.adoc[] ** xref:clustering/server-syntax.adoc[] -** xref:clustering/clustering-advanced/index.adoc[] -*** xref:clustering/clustering-advanced/default-database.adoc[] -*** xref:clustering/clustering-advanced/unbind.adoc[] -*** xref:clustering/clustering-advanced/multi-data-center-routing.adoc[] -*** xref:clustering/clustering-advanced/reconciler.adoc[] ** xref:clustering/glossary.adoc[] * xref:backup-restore/index.adoc[] diff --git a/modules/ROOT/pages/clustering/clustering-advanced/default-database.adoc b/modules/ROOT/pages/clustering/clustering-advanced/default-database.adoc deleted file mode 100644 index 7815652f4..000000000 --- a/modules/ROOT/pages/clustering/clustering-advanced/default-database.adoc +++ /dev/null @@ -1,61 +0,0 @@ -[role=enterprise-edition] -[[cluster-default-database]] -= Default database in a cluster -:description: This section describes how the creation of the initial default database works in a cluster. - -[[default-database-introduction]] -== Introduction - -The default database, as defined by xref:configuration/configuration-settings.adoc#config_initial.dbms.default_database[`initial.dbms.default_database`], is automatically created when the DBMS starts for the first time. -This provides a user database to experiment with immediately. -However, this creation is 'best effort' for reasons explained below, and users are encouraged to create their own default database for production use. -If you create your own default database, even if you just `DROP DATABASE neo4j` and `CREATE DATABASE neo4j`, you do not have to be aware of the complexities below. - -[[default-database-automatic-creation]] -== Automatic default database creation - -The initial default database is created when the DBMS starts for the first time. -It uses the following settings: - -* xref:configuration/configuration-settings.adoc#config_initial.dbms.default_database[`initial.dbms.default_database`] -- the name of the database. -* xref:configuration/configuration-settings.adoc#config_initial.dbms.default_primaries_count[`initial.dbms.default_primaries_count`] -- the number of primaries requested for the default database. -* xref:configuration/configuration-settings.adoc#config_initial.dbms.default_secondaries_count[`initial.dbms.default_secondaries_count`] -- the number of secondaries requested for the default database. - -However, it uses the default primary and secondary counts (topology) as maximums, not as hard requirements. -This is different to normal database creation, where if the requested topology cannot be satisfied, creation fails. -For the automatic creation of the default database alone, if the requested topology cannot be satisfied, you get as many of each hosting type as can be satisfied by the current cluster. -This means you may end up with a default database with as few as one primary and no secondaries, despite the default values being higher. -It is also possible to configure a cluster where automatic creation of the default database is not possible when the DBMS starts up. -In this case, creation fails, a warning is logged, and creation *is not be re-attempted*. - -Automatic creation of the initial default database works as follows: - -* As the cluster starts for the first time, there is a configured threshold for how many servers are required to create the DBMS - `dbms.cluster.minimum_initial_system_primaries_count`. -* Once *a minimum* of this many servers have discovered each other, the `system` database bootstraps, allowing creation of the DBMS. -* The initial default database is created with those servers as the possible hosts. - * If any of the servers block hosting the default database (see xref:configuration/configuration-settings.adoc#config_initial.server.denied_databases[`initial.server.denied_databases`]), they are not used. - * If any of the servers restrict the mode they can host a database in, that is obeyed (see xref:configuration/configuration-settings.adoc#config_initial.server.mode_constraint[`initial.server.mode_constraint`]). - * If there are too few servers to allocate the requested number of primaries, whichever ones available are used. - If there are zero available primaries, automatic creation fails. - * If there are too few servers remaining after the primary allocation to satisfy the requested number of secondaries, whicever ones available are used. - -Some possible behaviours that may be observed as a result of the above approach: - -* If `initial.dbms.default_primaries_count` is larger than `dbms.cluster.minimum_initial_system_primaries_count`, you are likely to get an initial default database with fewer primaries than the default. -This is because DBMS initialisation only waits for the minimum system primaries. -* If `initial.dbms.default_secondaries_count` plus `initial.dbms.default_primaries_count` is larger than `dbms.cluster.minimum_initial_system_primaries_count`, you are likely to get an initial default database with fewer secondaries than the default. -This is because DBMS initialisation only waits for the minimum number of system primaries. -* If you use `initial.server.denied_databases` to prevent the allocation of your default database to any of your initial servers, you may end up with fewer copies of the database than the default request, and possibly even no default database. -* If you use `initial.server.mode_constraint=SECONDARY` for any of your initial servers, you may end up with fewer primary copies of the database than the default request, and possibly even no default database. - -[[default-database-change-topology]] -== Changing default database topology - -If the default database is initially created for you with a topology different to what you want, you can update it in the same way as any database, see xref:clustering/databases.adoc#alter-topology[Alter topology]. - -[[default-database-create-your-own]] -== Creating your own default database - -Once the DBMS has started, you can create your own database with your specified topology, and make it the default. -See xref:clustering/databases.adoc#cluster-default-database[Change the default database]. -This can replace the existing default database, or have a different name. diff --git a/modules/ROOT/pages/clustering/clustering-advanced/index.adoc b/modules/ROOT/pages/clustering/clustering-advanced/index.adoc deleted file mode 100644 index 16d2f8358..000000000 --- a/modules/ROOT/pages/clustering/clustering-advanced/index.adoc +++ /dev/null @@ -1,16 +0,0 @@ -[role=enterprise-edition] -:page-aliases: clustering-advanced/index.adoc -[[clustering-advanced]] -= Advanced clustering -:description: This appendix describes advanced features of a Neo4j Cluster. - -This section includes information about advanced deployments of a Neo4j Cluster. - -* xref:clustering/clustering-advanced/default-database.adoc[Default database in a cluster] -- Details of the creation of the default database in a cluster. -* xref:clustering/clustering-advanced/unbind.adoc[Unbind a server] -- How to remove and archive the cluster state of a cluster server so that it can rebind to a cluster. -* xref:clustering/clustering-advanced/multi-data-center-routing.adoc[Multi-data center routing] -- Information about routing in multi-data center deployments. -* xref:clustering/clustering-advanced/reconciler.adoc[Reconciler] -- Details about the way database management operations are processed. - -For details on the configuration and operation of a Neo4j cluster, see xref:clustering/index.adoc[Clustering]. - -For descriptions of settings related to running a Neo4j cluster, see xref:clustering/settings.adoc[Settings reference]. diff --git a/modules/ROOT/pages/clustering/databases.adoc b/modules/ROOT/pages/clustering/databases.adoc index c2def73b3..a43385a2e 100644 --- a/modules/ROOT/pages/clustering/databases.adoc +++ b/modules/ROOT/pages/clustering/databases.adoc @@ -351,7 +351,58 @@ Allowed and denied are mutually exclusive. If not set, there are no mode constraints on the server. [[cluster-default-database]] -== Change the default database +== Default database in a cluster + +The default database, as defined by xref:configuration/configuration-settings.adoc#config_initial.dbms.default_database[`initial.dbms.default_database`], is automatically created when the DBMS starts for the first time. +This provides a user database to experiment with immediately. +However, this creation is 'best effort' for reasons explained below, and users are encouraged to create their own default database for production use. +If you create your own default database, even if you just run `DROP DATABASE neo4j` and `CREATE DATABASE neo4j`, you do not have to be aware of the complexities below. + +[[default-database-automatic-creation]] +=== Automatic default database creation + +The initial default database is created when the DBMS starts for the first time. +It uses the following settings: + +* xref:configuration/configuration-settings.adoc#config_initial.dbms.default_database[`initial.dbms.default_database`] -- the name of the database. +* xref:configuration/configuration-settings.adoc#config_initial.dbms.default_primaries_count[`initial.dbms.default_primaries_count`] -- the number of primaries requested for the default database. +* xref:configuration/configuration-settings.adoc#config_initial.dbms.default_secondaries_count[`initial.dbms.default_secondaries_count`] -- the number of secondaries requested for the default database. + +However, it uses the default primary and secondary counts (topology) as maximums, not as hard requirements. +This is different to normal database creation, where if the requested topology cannot be satisfied, creation fails. +For the automatic creation of the default database alone, if the requested topology cannot be satisfied, you get as many of each hosting type as can be satisfied by the current cluster. +This means you may end up with a default database with as few as one primary and no secondaries, despite the default values being higher. +It is also possible to configure a cluster where automatic creation of the default database is not possible when the DBMS starts up. +In this case, creation fails, a warning is logged, and creation *is not be re-attempted*. + +Automatic creation of the initial default database works as follows: + +* As the cluster starts for the first time, there is a configured threshold for how many servers are required to create the DBMS - `dbms.cluster.minimum_initial_system_primaries_count`. +* Once *a minimum* of this many servers have discovered each other, the `system` database bootstraps, allowing creation of the DBMS. +* The initial default database is created with those servers as the possible hosts. +* If any of the servers block hosting the default database (see xref:configuration/configuration-settings.adoc#config_initial.server.denied_databases[`initial.server.denied_databases`]), they are not used. +* If any of the servers restrict the mode they can host a database in, that is obeyed (see xref:configuration/configuration-settings.adoc#config_initial.server.mode_constraint[`initial.server.mode_constraint`]). +* If there are too few servers to allocate the requested number of primaries, whichever ones available are used. +If there are zero available primaries, automatic creation fails. +* If there are too few servers remaining after the primary allocation to satisfy the requested number of secondaries, whicever ones available are used. + +Some possible behaviours that may be observed as a result of the above approach: + +* If `initial.dbms.default_primaries_count` is larger than `dbms.cluster.minimum_initial_system_primaries_count`, you are likely to get an initial default database with fewer primaries than the default. +This is because DBMS initialisation only waits for the minimum system primaries. +* If `initial.dbms.default_secondaries_count` plus `initial.dbms.default_primaries_count` is larger than `dbms.cluster.minimum_initial_system_primaries_count`, you are likely to get an initial default database with fewer secondaries than the default. +This is because DBMS initialisation only waits for the minimum number of system primaries. +* If you use `initial.server.denied_databases` to prevent the allocation of your default database to any of your initial servers, you may end up with fewer copies of the database than the default request, and possibly even no default database. +* If you use `initial.server.mode_constraint=SECONDARY` for any of your initial servers, you may end up with fewer primary copies of the database than the default request, and possibly even no default database. + +[[default-database-change-topology]] +=== Changing default database topology + +If the default database is initially created for you with a topology different to what you want, you can update it in the same way as any database, see <>. + + +[[change-default-database]] +=== Change the default database You can use the procedure xref:procedures.adoc#procedure_dbms_setDefaultDatabase[`dbms.setDefaultDatabase("newDefaultDatabaseName")`] to change the default database for a DBMS. Starting with Neo4j 2025.04, the default database can also be set to a local or remote database alias. @@ -362,11 +413,6 @@ Starting with Neo4j 2025.04, the default database can also be set to a local or . Run `CALL dbms.setDefaultDatabase("newDefaultDatabaseName")` against the `system` database to set the new default database. . Optionally, you can start the previous default database as non-default by using `START DATABASE `. -[NOTE] -==== -Be aware that the automatically created _initial_ default database may have a different topology to the default configuration values. -See xref:clustering/clustering-advanced/default-database.adoc[Default database in a cluster] for more information. -==== [[cluster-database-error-handling]] == Handling errors @@ -375,4 +421,4 @@ Databases can get into error states. Typically you can observe this with the `SHOW DATABASES` command, and use the xref:database-administration/standard-databases/errors.adoc[error handling guidance] to help. In more serious cases you may be dealing with a disaster situation, where the whole DBMS may not be responding correctly, or some specific databases cannot be restored without downtime. -Refer to the xref:clustering/disaster-recovery.adoc[disaster recovery guide] for those situations. +Refer to the xref:clustering/multi-region-deployment/disaster-recovery.adoc[disaster recovery guide] for those situations. diff --git a/modules/ROOT/pages/clustering/index.adoc b/modules/ROOT/pages/clustering/index.adoc index 2f64872ad..e902590a0 100644 --- a/modules/ROOT/pages/clustering/index.adoc +++ b/modules/ROOT/pages/clustering/index.adoc @@ -10,24 +10,24 @@ This chapter describes the following: ** xref:clustering/setup/deploy.adoc[Deploy a basic cluster] -- How to set up a basic cluster. ** xref:clustering/setup/analytics-cluster.adoc[Deploy an analytics cluster] -- How to deploy a special case Neo4j cluster for analytic queries. ** xref:clustering/setup/single-to-cluster.adoc[Move from a standalone deployment to a cluster] -- This section describes how to move from a single Neo4j server to Neo4j cluster. +** xref:clustering/setup/reconciler.adoc[Reconciler] -- An internal component that observes the requested state of a server and makes changes to the server to match that state. ** xref:clustering/setup/discovery.adoc[Cluster server discovery] -- How servers in a cluster discover each other and form a cluster. ** xref:clustering/setup/routing.adoc[Leadership, routing and load balancing] -- Election of leaders, routing and load balancing. ** xref:clustering/setup/encryption.adoc[Intra-cluster encryption] -- How to secure the cluster communication. * xref:clustering/servers.adoc[Managing servers in a cluster] -- How to manage manage the servers in a cluster. +* xref:clustering/unbind.adoc[Unbind a server] -- How to remove and archive the cluster state of a cluster server so that it can rebind to a cluster. * xref:clustering/databases.adoc[Managing databases in a cluster] -- How to manage the databases in a cluster. * Monitoring -- Monitoring of a cluster. ** xref:clustering/monitoring/show-servers-monitoring.adoc[Monitor servers] -- The tools available for monitoring the servers in a cluster. ** xref:clustering/monitoring/show-databases-monitoring.adoc[Monitor databases] -- The tools available for monitoring the databases in a cluster. ** xref:clustering/monitoring/endpoints.adoc[Monitor cluster endpoints for status information] -- The endpoints and semantics of endpoints used to monitor the health of the cluster. ** xref:clustering/monitoring/status-check.adoc[Monitor replication status] -- The procedure to monitor which members of a clustered database are up-to-date and can participate in a successful replication. -* xref:clustering/disaster-recovery.adoc[Disaster recovery] -- How to recover a cluster in the event of a disaster. +* Resilient cluster deployment -- Recommendations and guidance on how to set up a resilient cluster which ensures your database stays available, fast, and recoverable even under failures. +** xref:clustering/multi-region-deployment/multi-data-center-routing.adoc[Multi-data center routing] -- Clusters on multi-data centers. +** xref:clustering/multi-region-deployment/geo-redundant-deployment.adoc[] -- Recommended patterns of cluster deployment across multiple cloud regions / data centers. +** xref:clustering/multi-region-deployment/disaster-recovery.adoc[Disaster recovery] -- How to recover a cluster in the event of a disaster. * xref:clustering/settings.adoc[Settings reference] -- A summary of the most important cluster settings. * xref:clustering/server-syntax.adoc[Server commands reference] -- Reference of Cypher administrative commands to add and manage servers. -* xref:clustering/clustering-advanced/index.adoc[Advanced clustering] -- Some more advanced features of Neo4j clusters. -** xref:clustering/clustering-advanced/default-database.adoc[Default database in a cluster] -- The initial default database created when the DBMS starts for the first time. -** xref:clustering/clustering-advanced/unbind.adoc[Unbind a server] -- How to remove and archive the cluster state of a cluster server so that it can rebind to a cluster. -** xref:clustering/clustering-advanced/multi-data-center-routing.adoc[Multi-data center routing] -- Clusters on mutli-data centers. -** xref:clustering/clustering-advanced/reconciler.adoc[Reconciler] -- An internal component that observes the requested state of a server and makes changes to the server to match that state. * xref:clustering/glossary.adoc[Clustering glossary] -- A glossary of terms related to the Neo4j clustering. diff --git a/modules/ROOT/pages/clustering/disaster-recovery.adoc b/modules/ROOT/pages/clustering/multi-region-deployment/disaster-recovery.adoc similarity index 99% rename from modules/ROOT/pages/clustering/disaster-recovery.adoc rename to modules/ROOT/pages/clustering/multi-region-deployment/disaster-recovery.adoc index c8e522940..a659db22c 100644 --- a/modules/ROOT/pages/clustering/disaster-recovery.adoc +++ b/modules/ROOT/pages/clustering/multi-region-deployment/disaster-recovery.adoc @@ -288,7 +288,7 @@ Recreations might fail for different reasons, but one example is that the checks .Guide [%collapsible] ==== -. Identify all write-unavailable databases by running `CALL dbms.cluster.statusCheck([])` as described in the xref:clustering/disaster-recovery.adoc#example-verification[Example verification] part of this disaster recovery step. +. Identify all write-unavailable databases by running `CALL dbms.cluster.statusCheck([])` as described in the <<#example-verification, Example verification>> part of this disaster recovery step. Filter out all databases desired to be stopped, so that they are not recreated unnecessarily. . Recreate every database that is not write-available and has not been recreated previously. See xref:database-administration/standard-databases/recreate-database.adoc[Recreate a database] for more information. diff --git a/modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc b/modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc new file mode 100644 index 000000000..3f565014f --- /dev/null +++ b/modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc @@ -0,0 +1,210 @@ +:description: The page describes recommended and non-recommended patterns of deploying Neo4j cluster across multiple cloud regions / data centers. +[role=enterprise-edition] + + += Designing a resilient Neo4j cluster across cloud regions + +[[multi-region-deployment-overview]] +== Overview + +Deploying a resilient multi-region cluster, the goal is to achieve high availability, disaster recovery, and tolerance against the loss of a data center. + +You should take into account cluster architecture and topology and decide where database primaries and secondaries are located, balancing performance and fault tolerance. + +Pay attention to networking and traffic routing: + +* If database primaries are distant from each other, that will increase your write latency. +* To commit a change, xref:clustering/introduction.adoc#clustering-primary-mode[the writer primary] must get confirmation from a quorum of members, including itself. +If primaries are far apart, network latency adds to commit time. + + +[[recommended-cluster-patterns]] +== Recommended Neo4j cluster design patterns + +[[secondaries-for-read-resilience]] +=== Use database secondaries for read resilience + +You can locate all the database primaries in one data center (DC) and database secondaries in another region for better read performance. +This provides fast writes, because they will be performed within the region. + +However, if the data center with primaries goes down, your cluster loses write availability. +Though read availability may remain via the secondaries. + +==== How to recover from loss of a data center? + +You can restore the cluster write availability without the failed region: + +* If you have enough secondary servers in another data center, you can switch their mode to primary and not have to store copy or wait a long time for primary servers to restore. +* Use secondaries to re-seed databases if needed. +Run xref:database-administration/standard-databases/recreate-database.adoc[the `dbms.recreateDatabase()` procedure]. + +Example steps:: + +. Promote secondary servers to primaries to make the `system` database write-available. +This requires restarting processes. +For other scenarios, see xref:clustering/multi-region-deployment/disaster-recovery.adoc#make-the-system-database-write-available[the steps] in the Disaster recovery guide on how to make the `system` database write-available again. + +. Mark missing servers as not available by cordoning them. +For each `Unavailable` server, run `CALL dbms.cluster.cordonServer("unavailable-server-id")` on one of the available servers. + +. Recreate each user database, letting it choose the existing xref:database-administration/standard-databases/recreate-database.adoc#seed-servers[servers as seeders]. +You will need to accept a smaller topology that will fit in the remaining data center/cloud region. + +For detailed scenarios, see the xref:clustering/multi-region-deployment/disaster-recovery.adoc[Disaster recovery guide]. + + +[[geo-distributed-dc]] +=== Use geo-distributed data centers (3DC) + +You can place each primary server in a different data center using a minimum of three data centers. + +Therefore, if one data center fails, only one primary member is lost and the cluster can continue without data loss. + +However, you always pay cross-region latency times for every write operation. + +==== How to recover from loss of a data center? + +This setup has no loss of quorum, so the cluster keeps running -- only with reduced fault tolerance (with no room for extra failures). + +To restore fault tolerance, you can either wait until the affected region is back online or start a new primary member somewhere else that will provide resilience and re-establish three-region fault tolerance. + +Example steps:: + +. Start and enable a new server. +See xref:clustering/servers.adoc#cluster-add-server[How to add a server to the cluster] for details. + +. Remove the unavailable server from the cluster: +.. First, xref:clustering/servers.adoc#_deallocating_databases_from_a_server[deallocate databases] from it. +.. Then xref:clustering/servers.adoc#_dropping_a_server[drop the server]. +For more information, visit the xref:clustering/servers.adoc[]. + +For detailed scenarios, see the xref:clustering/multi-region-deployment/disaster-recovery.adoc[Disaster recovery guide]. + + +[[geo-distribution-system-database]] +=== Use full geo-distribution for the `system` database only (3DC) + +You can place all primaries for user databases in one region, with secondaries in another. + +In a third region, deploy a primary server only for the `system` database (in addition to those in the first two regions). + +* This server can be a small machine, since the `system` database has minimal resource requirements. + +* To prevent user databases from being allocated to it, set the `allowedDatabases` constraint to some name that will never be used. + +Your writes will be fast, because they are within the region. + +If a region goes down, you retain write availability for the `system` database, which makes restoring write availability to the user databases easier. + +However, if the region with primaries goes down, you lose write availability for the user databases. +Though read availability may remain via the secondaries. + +==== How to recover from loss of a data center? + +If you lose the region with primaries in, the user databases will go write-unavailable, though the secondaries should continue to provide read availability. +Because of the third region, the `system` database will remain write available, so you will be able to get the user databases back to write available without process downtime. + +However, if you need to use the `recreateDatabase()` procedure, it will involve downtime for the user database. + +Example steps:: + +. Mark missing servers as not present by cordoning them. +For each `Unavailable` server, run `CALL dbms.cluster.cordonServer("unavailable-server-id")` on one of the available servers. + +. Recreate each user database, letting it select the existing xref:database-administration/standard-databases/recreate-database.adoc#seed-servers[servers as seeders]. +You need to accept a smaller topology that fits in the remaining data center. + + +[[cluster-anti-patterns]] +== Neo4j cluster design patterns to avoid + + +[[two-dc-unbalanced-membership]] +=== Two data centers with unbalanced membership + +Suppose you decide to set up just two data centers, placing two primaries in data center 1 (DC1) and one primary in the data center 2 (DC2). + +If the writer primary is located in DC1, then writes can be fast because a local quorum can be reached. +This setup can tolerate the loss of one data center — but only if the failure is in DC2. +If DC1 fails, you lose two primary members, which means the quorum is lost and the cluster becomes unavailable for writes. + +Keep in mind that any issue could push the system back to cross–data center write latencies. +Worse, because of the latency, the member in DC2 may fall behind. +In that case a failure of a member in DC1 means the database is write-unavailable until the DC2 member has caught up. + +If leadership shifts to DC2, this makes all writes slow. + +Finally, there is no guarantee against data lost if DC1 goes down. +Beacuse the primary member in DC2 may not be up to date with writes, even in append. + + + +[[two-dc-balanced-membership]] +=== Two data centers with balanced membership + +The worst scenario is to operate with just two data centers and place two or three primaries in each of them. + +This means the failure of either data center leads to loss of quorum and, therefore, to loss of the cluster write-availability. + +Besides, all writes have to pay the cross-region latency cost. + +This design pattern is strongly recommended to avoid. + +== Summary + +.Comparison of cluster designs +[cols="1,2,2a,2a,2", options="header"] +|=== +| Setup +| Design +| Pros +| Cons +| Best use case + +5+^| Recommended patterns + +| Secondaries for read resilience +| Primaries in one region, secondaries in other regions +| * Fast writes (local quorum). + +* Local reads in remote regions. +| * Loss of write availability if primary region fails. + +* Recovery requires reseeding +| Applications needing fast writes. +The cluster can tolerate downtime during recovery. + +| Geo-distributed data centers (3DC) +| Each primary in a different region (≥3). +| * Survives loss of one DC without data loss. + +* Quorum remains intact. +| * Higher write latency (cross-region). + +* Requires more complex networking. +| Critical systems needing continuous availability even if a full region fails. + +| Full geo-distribution for the `system` database only (3DC) +| User database primaries in one region, secondaries in another, `system` primaries across three regions +| * Fast user database writes (local). + +* The `system` database is always available, which means smoother recovery. + +* Reads available if primaries fail. +| * Loss of user database writes if primary region fails. + +* Recovery requires reseeding. +| Balanced approach: fast normal operations, easier recovery, some downtime acceptable. + +5+^| Non-recommended patterns + +| Two DCs – Unbalanced membership +| Two primaries are in DC1, one primary is in DC2. +| Fast writes if a leader is in DC1. +| * Quorum lost if DC1 fails. + +* Risk of data loss. + +* Cross-region latency if leader is in DC2. +| Should be avoided. + +| Two DCs – Balanced membership +| Equal primaries in two DCs. +| (none significant) +| * Quorum lost if either DC fails. + +* All writes pay cross-region latency. +| Should be avoided. +|=== + + diff --git a/modules/ROOT/pages/clustering/clustering-advanced/multi-data-center-routing.adoc b/modules/ROOT/pages/clustering/multi-region-deployment/multi-data-center-routing.adoc similarity index 100% rename from modules/ROOT/pages/clustering/clustering-advanced/multi-data-center-routing.adoc rename to modules/ROOT/pages/clustering/multi-region-deployment/multi-data-center-routing.adoc diff --git a/modules/ROOT/pages/clustering/servers.adoc b/modules/ROOT/pages/clustering/servers.adoc index 9081a3d3b..d9677252b 100644 --- a/modules/ROOT/pages/clustering/servers.adoc +++ b/modules/ROOT/pages/clustering/servers.adoc @@ -539,5 +539,5 @@ neo4j@neo4j> ENABLE SERVER '25a7efc7-d063-44b8-bdee-f23357f89f01'; An unavailable server which has not been cordoned may still be allocated to host new databases. When the server recovers it observes that it is due to host these databases and begin catching up from some other available server (if one exists). However, in the meantime those databases have reduced fault tolerance or, worse, reduced availability. -See xref:clustering/disaster-recovery.adoc[Disaster Recovery] for more details. +See xref:clustering/multi-region-deployment/disaster-recovery.adoc[Disaster Recovery] for more details. ==== diff --git a/modules/ROOT/pages/clustering/setup/analytics-cluster.adoc b/modules/ROOT/pages/clustering/setup/analytics-cluster.adoc index d211c756e..5ca1ba35d 100644 --- a/modules/ROOT/pages/clustering/setup/analytics-cluster.adoc +++ b/modules/ROOT/pages/clustering/setup/analytics-cluster.adoc @@ -268,6 +268,6 @@ If you want to follow along with the startup, you can see the messages in xref:c == Running analytic queries If running large normal Cypher queries, it is possible to use server tags to identify the large servers, and a routing policy to direct the read queries towards those servers. -See xref:clustering/clustering-advanced/multi-data-center-routing.adoc[Multi-data center routing] for more details. +See xref:clustering/multi-region-deployment/multi-data-center-routing.adoc[Multi-data center routing] for more details. If using GDS, follow the guidance in link:https://neo4j.com/docs/graph-data-science/current/production-deployment/neo4j-cluster/[Neo4j Graph Data Science Library Manual -> GDS with Neo4j cluster]. \ No newline at end of file diff --git a/modules/ROOT/pages/clustering/clustering-advanced/reconciler.adoc b/modules/ROOT/pages/clustering/setup/reconciler.adoc similarity index 100% rename from modules/ROOT/pages/clustering/clustering-advanced/reconciler.adoc rename to modules/ROOT/pages/clustering/setup/reconciler.adoc diff --git a/modules/ROOT/pages/clustering/setup/routing.adoc b/modules/ROOT/pages/clustering/setup/routing.adoc index 36e5bda7f..93de6bfcd 100644 --- a/modules/ROOT/pages/clustering/setup/routing.adoc +++ b/modules/ROOT/pages/clustering/setup/routing.adoc @@ -56,7 +56,7 @@ For lower level details about getting routing tables, refer to the link:{neo4j-d [[clustering-client-side-routing-policies]] === Routing policies -You can control the routing table that servers provide by using xref:clustering/clustering-advanced/multi-data-center-routing.adoc[routing policies]. +You can control the routing table that servers provide by using xref:clustering/multi-region-deployment/multi-data-center-routing.adoc[routing policies]. Policies filter the full set of possible servers for each category according to the rules you define. For example, this can be used to preferentially route to a local data centre, or to specific large machines, depending on your policies. diff --git a/modules/ROOT/pages/clustering/clustering-advanced/unbind.adoc b/modules/ROOT/pages/clustering/unbind.adoc similarity index 100% rename from modules/ROOT/pages/clustering/clustering-advanced/unbind.adoc rename to modules/ROOT/pages/clustering/unbind.adoc diff --git a/modules/ROOT/pages/configuration/configuration-settings.adoc b/modules/ROOT/pages/configuration/configuration-settings.adoc index edecb288b..665cb4bea 100644 --- a/modules/ROOT/pages/configuration/configuration-settings.adoc +++ b/modules/ROOT/pages/configuration/configuration-settings.adoc @@ -65,7 +65,7 @@ Its default value is `all()`: dbms.routing.load_balancing.config.server_policies.default=all() ---- -See xref:clustering/clustering-advanced/multi-data-center-routing.adoc#mdc-load-balancing-framework[Clustering -> Multi-data center routing] for more details. +See xref:clustering/multi-region-deployment/multi-data-center-routing.adoc#mdc-load-balancing-framework[Clustering -> Multi-data center routing] for more details. == Checkpoint settings diff --git a/modules/ROOT/pages/database-administration/index.adoc b/modules/ROOT/pages/database-administration/index.adoc index bea952374..5bc75d7c0 100644 --- a/modules/ROOT/pages/database-administration/index.adoc +++ b/modules/ROOT/pages/database-administration/index.adoc @@ -50,7 +50,7 @@ image::manage-dbs-default.svg[title="A multiple database Neo4j installation, wit [NOTE] ==== Be aware that the automatically created _initial_ default database may have a different topology to the default configuration values. -See xref:clustering/clustering-advanced/default-database.adoc[Default database in a cluster] for more information. +See xref:clustering/databases.adoc#cluster-default-database[Default database in a cluster] for more information. ==== [role=enterprise-edition] diff --git a/modules/ROOT/pages/database-administration/queries.adoc b/modules/ROOT/pages/database-administration/queries.adoc index 8339bd12d..91d904d0e 100644 --- a/modules/ROOT/pages/database-administration/queries.adoc +++ b/modules/ROOT/pages/database-administration/queries.adoc @@ -136,7 +136,7 @@ Or in a cluster: [NOTE] ==== Be aware that the automatically created _initial_ default database may have a different topology to the default configuration values. -See xref:clustering/clustering-advanced/default-database.adoc[Default database in a cluster] for more information. +See xref:clustering/databases.adoc#cluster-default-database[Default database in a cluster] for more information. ==== To change the default database, see xref:clustering/databases.adoc#cluster-default-database[Change the default database]. diff --git a/modules/ROOT/pages/database-administration/standard-databases/configuration-parameters.adoc b/modules/ROOT/pages/database-administration/standard-databases/configuration-parameters.adoc index 1212cb162..187cbebd1 100644 --- a/modules/ROOT/pages/database-administration/standard-databases/configuration-parameters.adoc +++ b/modules/ROOT/pages/database-administration/standard-databases/configuration-parameters.adoc @@ -7,7 +7,7 @@ Configuration parameters are defined in the xref:configuration/neo4j-conf.adoc[n The following configuration parameters are applicable for managing databases: -[options="header", cols="40,60"] +[options="header", cols="40,60a"] |=== | Parameter name | Description @@ -22,13 +22,13 @@ The database is created if it does not exist when the instance starts. [NOTE] ==== In a clustered setup, the value of `initial.dbms.default_database` is only used to set the initial default database. -To change the default database at a later point, see xref:clustering/databases#cluster-default-database[Change the default database]. +To change the default database at a later point, see xref:clustering/databases.adoc#cluster-default-database[Change the default database]. ==== [NOTE] ==== Be aware that the automatically created _initial_ default database may have a different topology to the default configuration values. -See xref:clustering/clustering-advanced/default-database.adoc[Default database in a cluster] for more information. +See xref:clustering/databases.adoc#cluster-default-database[Default database in a cluster] for more information. ==== | xref:configuration/configuration-settings.adoc#config_dbms.max_databases[`dbms.max_databases`] diff --git a/modules/ROOT/pages/database-administration/standard-databases/recreate-database.adoc b/modules/ROOT/pages/database-administration/standard-databases/recreate-database.adoc index d2f79da1c..a11476627 100644 --- a/modules/ROOT/pages/database-administration/standard-databases/recreate-database.adoc +++ b/modules/ROOT/pages/database-administration/standard-databases/recreate-database.adoc @@ -10,7 +10,7 @@ The recreate procedure allows you to: * Change the database store to a specified backup, while keeping all the associated privileges for the database. * Make your database write-available again after it has been lost (for example, due to a disaster). -See xref:clustering/disaster-recovery.adoc[Disaster recovery] for more information. +See xref:clustering/multi-region-deployment/disaster-recovery.adoc[Disaster recovery] for more information. * label:new[Introduced in 2025.02] Delete the data and schema for a database, while keeping the database privileges assigned to each role. diff --git a/modules/ROOT/pages/neo4j-admin-neo4j-cli.adoc b/modules/ROOT/pages/neo4j-admin-neo4j-cli.adoc index adc92035b..66ac6a018 100644 --- a/modules/ROOT/pages/neo4j-admin-neo4j-cli.adoc +++ b/modules/ROOT/pages/neo4j-admin-neo4j-cli.adoc @@ -105,7 +105,7 @@ For details, see xref:monitoring/neo4j-admin-report.adoc[]. | `unbind` | Removes cluster state data from a stopped Neo4j server. -For details, see xref:clustering/clustering-advanced/unbind.adoc[]. +For details, see xref:clustering/unbind.adoc[]. | `validate-config` | Performs configuration validation without starting the server. From 9cac493a8ae6457e3f619fe7be75f39c351952d0 Mon Sep 17 00:00:00 2001 From: Natalia Ivakina Date: Tue, 2 Sep 2025 11:23:56 +0200 Subject: [PATCH 02/10] Add image for the `syst` db --- .../images/geo-distribution-system-db.svg | 112 ++++++++++++++++++ .../geo-redundant-deployment.adoc | 54 +++++---- 2 files changed, 140 insertions(+), 26 deletions(-) create mode 100644 modules/ROOT/images/geo-distribution-system-db.svg diff --git a/modules/ROOT/images/geo-distribution-system-db.svg b/modules/ROOT/images/geo-distribution-system-db.svg new file mode 100644 index 000000000..75b1639ae --- /dev/null +++ b/modules/ROOT/images/geo-distribution-system-db.svg @@ -0,0 +1,112 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc b/modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc index 3f565014f..8f30f5074 100644 --- a/modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc +++ b/modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc @@ -2,12 +2,12 @@ [role=enterprise-edition] -= Designing a resilient Neo4j cluster across cloud regions += Designing a resilient multi-data center Neo4j cluster [[multi-region-deployment-overview]] == Overview -Deploying a resilient multi-region cluster, the goal is to achieve high availability, disaster recovery, and tolerance against the loss of a data center. +Deploying a resilient multi-data center cluster, the goal is to achieve high availability, disaster recovery, and tolerance against the loss of a data center. You should take into account cluster architecture and topology and decide where database primaries and secondaries are located, balancing performance and fault tolerance. @@ -24,15 +24,15 @@ If primaries are far apart, network latency adds to commit time. [[secondaries-for-read-resilience]] === Use database secondaries for read resilience -You can locate all the database primaries in one data center (DC) and database secondaries in another region for better read performance. -This provides fast writes, because they will be performed within the region. +You can locate all the database primaries in one data center (DC) and database secondaries in another DC for better read performance. +This provides fast writes, because they will be performed within the DC. -However, if the data center with primaries goes down, your cluster loses write availability. +However, if the DC with primaries goes down, your cluster loses write availability. Though read availability may remain via the secondaries. ==== How to recover from loss of a data center? -You can restore the cluster write availability without the failed region: +You can restore the cluster write availability without the failed DC: * If you have enough secondary servers in another data center, you can switch their mode to primary and not have to store copy or wait a long time for primary servers to restore. * Use secondaries to re-seed databases if needed. @@ -60,13 +60,13 @@ You can place each primary server in a different data center using a minimum of Therefore, if one data center fails, only one primary member is lost and the cluster can continue without data loss. -However, you always pay cross-region latency times for every write operation. +However, you always pay cross-data center latency times for every write operation. ==== How to recover from loss of a data center? This setup has no loss of quorum, so the cluster keeps running -- only with reduced fault tolerance (with no room for extra failures). -To restore fault tolerance, you can either wait until the affected region is back online or start a new primary member somewhere else that will provide resilience and re-establish three-region fault tolerance. +To restore fault tolerance, you can either wait until the affected DC is back online or start a new primary member somewhere else that will provide resilience and re-establish three-DC fault tolerance. Example steps:: @@ -84,25 +84,27 @@ For detailed scenarios, see the xref:clustering/multi-region-deployment/disaster [[geo-distribution-system-database]] === Use full geo-distribution for the `system` database only (3DC) -You can place all primaries for user databases in one region, with secondaries in another. +image::geo-distribution-system-db.svg[width="400", title="Primaries for the `system` database distributed across three data centers", role=popup] -In a third region, deploy a primary server only for the `system` database (in addition to those in the first two regions). +You can place all primaries for user databases in one data center, with secondaries in another. + +In a third DC, deploy a primary server only for the `system` database (in addition to those in the first two data centers). * This server can be a small machine, since the `system` database has minimal resource requirements. * To prevent user databases from being allocated to it, set the `allowedDatabases` constraint to some name that will never be used. -Your writes will be fast, because they are within the region. +Your writes will be fast, because they are within the DC. -If a region goes down, you retain write availability for the `system` database, which makes restoring write availability to the user databases easier. +If a DC goes down, you retain write availability for the `system` database, which makes restoring write availability to the user databases easier. -However, if the region with primaries goes down, you lose write availability for the user databases. +However, if the DC with primaries goes down, you lose write availability for the user databases. Though read availability may remain via the secondaries. ==== How to recover from loss of a data center? -If you lose the region with primaries in, the user databases will go write-unavailable, though the secondaries should continue to provide read availability. -Because of the third region, the `system` database will remain write available, so you will be able to get the user databases back to write available without process downtime. +If you lose the DC with primaries in, the user databases will go write-unavailable, though the secondaries should continue to provide read availability. +Because of the third DC, the `system` database will remain write available, so you will be able to get the user databases back to write available without process downtime. However, if you need to use the `recreateDatabase()` procedure, it will involve downtime for the user database. @@ -146,7 +148,7 @@ The worst scenario is to operate with just two data centers and place two or thr This means the failure of either data center leads to loss of quorum and, therefore, to loss of the cluster write-availability. -Besides, all writes have to pay the cross-region latency cost. +Besides, all writes have to pay the cross-data center latency cost. This design pattern is strongly recommended to avoid. @@ -164,28 +166,28 @@ This design pattern is strongly recommended to avoid. 5+^| Recommended patterns | Secondaries for read resilience -| Primaries in one region, secondaries in other regions +| Primaries in one data center, secondaries in other data centers | * Fast writes (local quorum). + -* Local reads in remote regions. -| * Loss of write availability if primary region fails. + +* Local reads in remote data centers. +| * Loss of write availability if DC with primaries fails. + * Recovery requires reseeding | Applications needing fast writes. The cluster can tolerate downtime during recovery. | Geo-distributed data centers (3DC) -| Each primary in a different region (≥3). +| Each primary in a different data center (≥3). | * Survives loss of one DC without data loss. + * Quorum remains intact. -| * Higher write latency (cross-region). + +| * Higher write latency (cross-data center). + * Requires more complex networking. -| Critical systems needing continuous availability even if a full region fails. +| Critical systems needing continuous availability even if a full data center fails. | Full geo-distribution for the `system` database only (3DC) -| User database primaries in one region, secondaries in another, `system` primaries across three regions +| User database primaries in one DC, secondaries in another, `system` primaries across three data centers | * Fast user database writes (local). + * The `system` database is always available, which means smoother recovery. + * Reads available if primaries fail. -| * Loss of user database writes if primary region fails. + +| * Loss of user database writes if DC with primaries fails. + * Recovery requires reseeding. | Balanced approach: fast normal operations, easier recovery, some downtime acceptable. @@ -196,14 +198,14 @@ The cluster can tolerate downtime during recovery. | Fast writes if a leader is in DC1. | * Quorum lost if DC1 fails. + * Risk of data loss. + -* Cross-region latency if leader is in DC2. +* Cross-DC latency if leader is in DC2. | Should be avoided. | Two DCs – Balanced membership | Equal primaries in two DCs. | (none significant) | * Quorum lost if either DC fails. + -* All writes pay cross-region latency. +* All writes pay cross-DC latency. | Should be avoided. |=== From 06703a3a68387a95db64e3b9c7dcd5a5ec1330f4 Mon Sep 17 00:00:00 2001 From: Natalia Ivakina Date: Wed, 3 Sep 2025 11:20:50 +0200 Subject: [PATCH 03/10] Add more diagrams --- .../ROOT/images/2dc-balanced-membership.svg | 46 +++++++++ .../ROOT/images/2dc-unbalanced-membership.svg | 36 +++++++ .../ROOT/images/geo-distributed-primaries.svg | 38 ++++++++ .../secondaries-for-read-resilience.svg | 95 +++++++++++++++++++ .../geo-redundant-deployment.adoc | 14 ++- 5 files changed, 226 insertions(+), 3 deletions(-) create mode 100644 modules/ROOT/images/2dc-balanced-membership.svg create mode 100644 modules/ROOT/images/2dc-unbalanced-membership.svg create mode 100644 modules/ROOT/images/geo-distributed-primaries.svg create mode 100644 modules/ROOT/images/secondaries-for-read-resilience.svg diff --git a/modules/ROOT/images/2dc-balanced-membership.svg b/modules/ROOT/images/2dc-balanced-membership.svg new file mode 100644 index 000000000..02acf0e33 --- /dev/null +++ b/modules/ROOT/images/2dc-balanced-membership.svg @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/modules/ROOT/images/2dc-unbalanced-membership.svg b/modules/ROOT/images/2dc-unbalanced-membership.svg new file mode 100644 index 000000000..aab3ffa93 --- /dev/null +++ b/modules/ROOT/images/2dc-unbalanced-membership.svg @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/modules/ROOT/images/geo-distributed-primaries.svg b/modules/ROOT/images/geo-distributed-primaries.svg new file mode 100644 index 000000000..13b9e6d68 --- /dev/null +++ b/modules/ROOT/images/geo-distributed-primaries.svg @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/modules/ROOT/images/secondaries-for-read-resilience.svg b/modules/ROOT/images/secondaries-for-read-resilience.svg new file mode 100644 index 000000000..12c729138 --- /dev/null +++ b/modules/ROOT/images/secondaries-for-read-resilience.svg @@ -0,0 +1,95 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc b/modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc index 8f30f5074..28413f250 100644 --- a/modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc +++ b/modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc @@ -24,6 +24,8 @@ If primaries are far apart, network latency adds to commit time. [[secondaries-for-read-resilience]] === Use database secondaries for read resilience +image::secondaries-for-read-resilience.svg[width="400", title="Cluster design with database secondaries for better read performance", role=popup] + You can locate all the database primaries in one data center (DC) and database secondaries in another DC for better read performance. This provides fast writes, because they will be performed within the DC. @@ -54,7 +56,9 @@ For detailed scenarios, see the xref:clustering/multi-region-deployment/disaster [[geo-distributed-dc]] -=== Use geo-distributed data centers (3DC) +=== Use geo-distributed data centers + +image::geo-distributed-primaries.svg[width="400", title="Cluster design with primaries distributed across three data centers", role=popup] You can place each primary server in a different data center using a minimum of three data centers. @@ -82,7 +86,7 @@ For detailed scenarios, see the xref:clustering/multi-region-deployment/disaster [[geo-distribution-system-database]] -=== Use full geo-distribution for the `system` database only (3DC) +=== Use full geo-distribution for the `system` database only image::geo-distribution-system-db.svg[width="400", title="Primaries for the `system` database distributed across three data centers", role=popup] @@ -124,6 +128,8 @@ You need to accept a smaller topology that fits in the remaining data center. [[two-dc-unbalanced-membership]] === Two data centers with unbalanced membership +image::2dc-unbalanced-membership.svg[width="400", title="Unbalanced data center primary distribution", role=popup] + Suppose you decide to set up just two data centers, placing two primaries in data center 1 (DC1) and one primary in the data center 2 (DC2). If the writer primary is located in DC1, then writes can be fast because a local quorum can be reached. @@ -136,7 +142,7 @@ In that case a failure of a member in DC1 means the database is write-unavailabl If leadership shifts to DC2, this makes all writes slow. -Finally, there is no guarantee against data lost if DC1 goes down. +Finally, there is no guarantee against data loss if DC1 goes down. Beacuse the primary member in DC2 may not be up to date with writes, even in append. @@ -144,6 +150,8 @@ Beacuse the primary member in DC2 may not be up to date with writes, even in app [[two-dc-balanced-membership]] === Two data centers with balanced membership +image::2dc-balanced-membership.svg[width="400", title="Symmetric primaries across two data centers", role=popup] + The worst scenario is to operate with just two data centers and place two or three primaries in each of them. This means the failure of either data center leads to loss of quorum and, therefore, to loss of the cluster write-availability. From d77cf018596fb809a32e853407492930824f9abd Mon Sep 17 00:00:00 2001 From: Natalia Ivakina <82437520+NataliaIvakina@users.noreply.github.com> Date: Wed, 3 Sep 2025 15:44:42 +0200 Subject: [PATCH 04/10] Apply suggestions from code review Co-authored-by: Nick Giles <100630647+nick-giles-neo@users.noreply.github.com> --- .../geo-redundant-deployment.adoc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc b/modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc index 28413f250..164c370ad 100644 --- a/modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc +++ b/modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc @@ -36,18 +36,18 @@ Though read availability may remain via the secondaries. You can restore the cluster write availability without the failed DC: -* If you have enough secondary servers in another data center, you can switch their mode to primary and not have to store copy or wait a long time for primary servers to restore. +* If you have enough secondary members of the database in another data center, you can switch their mode to primary and not have to store copy or wait a long time for primary copies to restore. * Use secondaries to re-seed databases if needed. Run xref:database-administration/standard-databases/recreate-database.adoc[the `dbms.recreateDatabase()` procedure]. Example steps:: -. Promote secondary servers to primaries to make the `system` database write-available. +. Promote secondary copies of the `system` database to primaries to make the `system` database write-available. This requires restarting processes. For other scenarios, see xref:clustering/multi-region-deployment/disaster-recovery.adoc#make-the-system-database-write-available[the steps] in the Disaster recovery guide on how to make the `system` database write-available again. . Mark missing servers as not available by cordoning them. -For each `Unavailable` server, run `CALL dbms.cluster.cordonServer("unavailable-server-id")` on one of the available servers. +For each `Unavailable` server, run `CALL dbms.cluster.cordonServer("unavailable-server-id")` on the remaining cluster. . Recreate each user database, letting it choose the existing xref:database-administration/standard-databases/recreate-database.adoc#seed-servers[servers as seeders]. You will need to accept a smaller topology that will fit in the remaining data center/cloud region. @@ -60,7 +60,7 @@ For detailed scenarios, see the xref:clustering/multi-region-deployment/disaster image::geo-distributed-primaries.svg[width="400", title="Cluster design with primaries distributed across three data centers", role=popup] -You can place each primary server in a different data center using a minimum of three data centers. +You can place each primary copy in a different data center using a minimum of three data centers. Therefore, if one data center fails, only one primary member is lost and the cluster can continue without data loss. @@ -92,7 +92,7 @@ image::geo-distribution-system-db.svg[width="400", title="Primaries for the `sys You can place all primaries for user databases in one data center, with secondaries in another. -In a third DC, deploy a primary server only for the `system` database (in addition to those in the first two data centers). +In a third DC, deploy a server that only hosts a primary member of the `system` database (in addition to those in the first two data centers). * This server can be a small machine, since the `system` database has minimal resource requirements. @@ -178,7 +178,8 @@ This design pattern is strongly recommended to avoid. | * Fast writes (local quorum). + * Local reads in remote data centers. | * Loss of write availability if DC with primaries fails. + -* Recovery requires reseeding +* Recovery requires reseeding. +* Process restarts required if DC with primaries fails. | Applications needing fast writes. The cluster can tolerate downtime during recovery. @@ -187,7 +188,6 @@ The cluster can tolerate downtime during recovery. | * Survives loss of one DC without data loss. + * Quorum remains intact. | * Higher write latency (cross-data center). + -* Requires more complex networking. | Critical systems needing continuous availability even if a full data center fails. | Full geo-distribution for the `system` database only (3DC) From d99ac13ab8ddd6defc8f3ca19bf5ffe44f089979 Mon Sep 17 00:00:00 2001 From: Natalia Ivakina <82437520+NataliaIvakina@users.noreply.github.com> Date: Mon, 8 Sep 2025 16:00:25 +0200 Subject: [PATCH 05/10] Polishing headings --- .../geo-redundant-deployment.adoc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc b/modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc index 164c370ad..f5154fcbc 100644 --- a/modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc +++ b/modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc @@ -2,12 +2,12 @@ [role=enterprise-edition] -= Designing a resilient multi-data center Neo4j cluster += Design a resilient multi-data center cluster [[multi-region-deployment-overview]] == Overview -Deploying a resilient multi-data center cluster, the goal is to achieve high availability, disaster recovery, and tolerance against the loss of a data center. +The goal of deploying a resilient multi-data center cluster is to achieve high availability, disaster recovery, and tolerance against the loss of a data center. You should take into account cluster architecture and topology and decide where database primaries and secondaries are located, balancing performance and fault tolerance. @@ -19,10 +19,10 @@ If primaries are far apart, network latency adds to commit time. [[recommended-cluster-patterns]] -== Recommended Neo4j cluster design patterns +== Recommended cluster design patterns [[secondaries-for-read-resilience]] -=== Use database secondaries for read resilience +=== Read resilience with user database secondaries image::secondaries-for-read-resilience.svg[width="400", title="Cluster design with database secondaries for better read performance", role=popup] @@ -56,7 +56,7 @@ For detailed scenarios, see the xref:clustering/multi-region-deployment/disaster [[geo-distributed-dc]] -=== Use geo-distributed data centers +=== Geo-distribution of user database primaries image::geo-distributed-primaries.svg[width="400", title="Cluster design with primaries distributed across three data centers", role=popup] @@ -86,7 +86,7 @@ For detailed scenarios, see the xref:clustering/multi-region-deployment/disaster [[geo-distribution-system-database]] -=== Use full geo-distribution for the `system` database only +=== Exclusive geo-distribution for the `system` database image::geo-distribution-system-db.svg[width="400", title="Primaries for the `system` database distributed across three data centers", role=popup] @@ -122,7 +122,7 @@ You need to accept a smaller topology that fits in the remaining data center. [[cluster-anti-patterns]] -== Neo4j cluster design patterns to avoid +== Cluster design patterns to avoid [[two-dc-unbalanced-membership]] From 6fae5d60bc41799b695e621563f88e6bd2795107 Mon Sep 17 00:00:00 2001 From: Natalia Ivakina <82437520+NataliaIvakina@users.noreply.github.com> Date: Mon, 8 Sep 2025 16:42:59 +0200 Subject: [PATCH 06/10] Update geo-redundant-deployment.adoc --- .../geo-redundant-deployment.adoc | 40 ++++++++++--------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc b/modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc index f5154fcbc..70fb841fc 100644 --- a/modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc +++ b/modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc @@ -26,21 +26,21 @@ If primaries are far apart, network latency adds to commit time. image::secondaries-for-read-resilience.svg[width="400", title="Cluster design with database secondaries for better read performance", role=popup] -You can locate all the database primaries in one data center (DC) and database secondaries in another DC for better read performance. -This provides fast writes, because they will be performed within the DC. +For better read performance, you can locate all database primaries in one data center (DC) and database secondaries in another DC. +This also provides fast writes, because they will be performed within the DC. However, if the DC with primaries goes down, your cluster loses write availability. Though read availability may remain via the secondaries. -==== How to recover from loss of a data center? +==== Recovering from the loss of a data center You can restore the cluster write availability without the failed DC: -* If you have enough secondary members of the database in another data center, you can switch their mode to primary and not have to store copy or wait a long time for primary copies to restore. -* Use secondaries to re-seed databases if needed. -Run xref:database-administration/standard-databases/recreate-database.adoc[the `dbms.recreateDatabase()` procedure]. +* If you have enough secondary members of the database in another data center, you can switch their mode to primary and not have to store a copy or wait a long time for primary copies to restore. +* You can use secondaries to re-seed databases if needed. +See xref:database-administration/standard-databases/recreate-database.adoc[the `dbms.recreateDatabase()` procedure] for more details. -Example steps:: +Example recovery steps:: . Promote secondary copies of the `system` database to primaries to make the `system` database write-available. This requires restarting processes. @@ -50,7 +50,7 @@ For other scenarios, see xref:clustering/multi-region-deployment/disaster-recove For each `Unavailable` server, run `CALL dbms.cluster.cordonServer("unavailable-server-id")` on the remaining cluster. . Recreate each user database, letting it choose the existing xref:database-administration/standard-databases/recreate-database.adoc#seed-servers[servers as seeders]. -You will need to accept a smaller topology that will fit in the remaining data center/cloud region. +You need to accept a smaller topology that will fit in the remaining DC. For detailed scenarios, see the xref:clustering/multi-region-deployment/disaster-recovery.adoc[Disaster recovery guide]. @@ -58,28 +58,29 @@ For detailed scenarios, see the xref:clustering/multi-region-deployment/disaster [[geo-distributed-dc]] === Geo-distribution of user database primaries -image::geo-distributed-primaries.svg[width="400", title="Cluster design with primaries distributed across three data centers", role=popup] +image::geo-distributed-primaries.svg[width="400", title="Cluster design with database primaries distributed across three data centers", role=popup] -You can place each primary copy in a different data center using a minimum of three data centers. +You can place each primary copy in a different data center (DC) using a minimum of three data centers. -Therefore, if one data center fails, only one primary member is lost and the cluster can continue without data loss. +Therefore, if one DC fails, only one primary member is lost and the cluster can continue without data loss. However, you always pay cross-data center latency times for every write operation. -==== How to recover from loss of a data center? +==== Recovering from the loss of a data center This setup has no loss of quorum, so the cluster keeps running -- only with reduced fault tolerance (with no room for extra failures). To restore fault tolerance, you can either wait until the affected DC is back online or start a new primary member somewhere else that will provide resilience and re-establish three-DC fault tolerance. -Example steps:: +Example recovery steps:: . Start and enable a new server. See xref:clustering/servers.adoc#cluster-add-server[How to add a server to the cluster] for details. . Remove the unavailable server from the cluster: .. First, xref:clustering/servers.adoc#_deallocating_databases_from_a_server[deallocate databases] from it. -.. Then xref:clustering/servers.adoc#_dropping_a_server[drop the server]. +.. Then xref:clustering/servers.adoc#_dropping_a_server[drop the server]. ++ For more information, visit the xref:clustering/servers.adoc[]. For detailed scenarios, see the xref:clustering/multi-region-deployment/disaster-recovery.adoc[Disaster recovery guide]. @@ -90,7 +91,7 @@ For detailed scenarios, see the xref:clustering/multi-region-deployment/disaster image::geo-distribution-system-db.svg[width="400", title="Primaries for the `system` database distributed across three data centers", role=popup] -You can place all primaries for user databases in one data center, with secondaries in another. +You can place all primaries for user databases in one data center (DC), with secondaries in another. In a third DC, deploy a server that only hosts a primary member of the `system` database (in addition to those in the first two data centers). @@ -105,14 +106,14 @@ If a DC goes down, you retain write availability for the `system` database, whic However, if the DC with primaries goes down, you lose write availability for the user databases. Though read availability may remain via the secondaries. -==== How to recover from loss of a data center? +==== Recovering from the loss of a data center If you lose the DC with primaries in, the user databases will go write-unavailable, though the secondaries should continue to provide read availability. -Because of the third DC, the `system` database will remain write available, so you will be able to get the user databases back to write available without process downtime. +Because of the third DC, the `system` database remains write-available, so you will be able to get the user databases back to write-available without process downtime. However, if you need to use the `recreateDatabase()` procedure, it will involve downtime for the user database. -Example steps:: +Example recovery steps:: . Mark missing servers as not present by cordoning them. For each `Unavailable` server, run `CALL dbms.cluster.cordonServer("unavailable-server-id")` on one of the available servers. @@ -130,9 +131,10 @@ You need to accept a smaller topology that fits in the remaining data center. image::2dc-unbalanced-membership.svg[width="400", title="Unbalanced data center primary distribution", role=popup] -Suppose you decide to set up just two data centers, placing two primaries in data center 1 (DC1) and one primary in the data center 2 (DC2). +Suppose, you decide to set up just two data centers, placing two primaries in data center 1 (DC1) and one primary in the data center 2 (DC2). If the writer primary is located in DC1, then writes can be fast because a local quorum can be reached. + This setup can tolerate the loss of one data center — but only if the failure is in DC2. If DC1 fails, you lose two primary members, which means the quorum is lost and the cluster becomes unavailable for writes. From 2c4d2c6a8bac53687bb9c2abda872ab2aa7ebadd Mon Sep 17 00:00:00 2001 From: Natalia Ivakina <82437520+NataliaIvakina@users.noreply.github.com> Date: Mon, 8 Sep 2025 16:59:38 +0200 Subject: [PATCH 07/10] Apply suggestions from code review --- .../multi-region-deployment/geo-redundant-deployment.adoc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc b/modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc index 70fb841fc..d63604f18 100644 --- a/modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc +++ b/modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc @@ -103,7 +103,7 @@ Your writes will be fast, because they are within the DC. If a DC goes down, you retain write availability for the `system` database, which makes restoring write availability to the user databases easier. -However, if the DC with primaries goes down, you lose write availability for the user databases. +However, if the DC with primaries goes down, the user databases will become write-unavailable. Though read availability may remain via the secondaries. ==== Recovering from the loss of a data center @@ -111,7 +111,7 @@ Though read availability may remain via the secondaries. If you lose the DC with primaries in, the user databases will go write-unavailable, though the secondaries should continue to provide read availability. Because of the third DC, the `system` database remains write-available, so you will be able to get the user databases back to write-available without process downtime. -However, if you need to use the `recreateDatabase()` procedure, it will involve downtime for the user database. +However, if you need to use the xref:database-administration/standard-databases/recreate-database.adoc[`dbms.recreateDatabase()` procedure], it will involve downtime for the user database. Example recovery steps:: @@ -119,7 +119,9 @@ Example recovery steps:: For each `Unavailable` server, run `CALL dbms.cluster.cordonServer("unavailable-server-id")` on one of the available servers. . Recreate each user database, letting it select the existing xref:database-administration/standard-databases/recreate-database.adoc#seed-servers[servers as seeders]. -You need to accept a smaller topology that fits in the remaining data center. +You need to accept a smaller topology that will fit in the remaining data center. + +For detailed scenarios, see the xref:clustering/multi-region-deployment/disaster-recovery.adoc[Disaster recovery guide]. [[cluster-anti-patterns]] From b9b54e8f0ff6dadeb062b0ac4c1aaf4ee6357f60 Mon Sep 17 00:00:00 2001 From: Natalia Ivakina <82437520+NataliaIvakina@users.noreply.github.com> Date: Tue, 9 Sep 2025 09:18:59 +0200 Subject: [PATCH 08/10] more edits for better readability --- .../geo-redundant-deployment.adoc | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc b/modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc index d63604f18..c5e044b9e 100644 --- a/modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc +++ b/modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc @@ -2,7 +2,7 @@ [role=enterprise-edition] -= Design a resilient multi-data center cluster += Designing a resilient multi-data center cluster [[multi-region-deployment-overview]] == Overview @@ -26,8 +26,8 @@ If primaries are far apart, network latency adds to commit time. image::secondaries-for-read-resilience.svg[width="400", title="Cluster design with database secondaries for better read performance", role=popup] -For better read performance, you can locate all database primaries in one data center (DC) and database secondaries in another DC. -This also provides fast writes, because they will be performed within the DC. +For better read performance, you can locate all database primaries in one data center (DC) and the secondaries in another DC. +This setup also provides fast writes, because they will be performed within the single DC. However, if the DC with primaries goes down, your cluster loses write availability. Though read availability may remain via the secondaries. @@ -36,7 +36,7 @@ Though read availability may remain via the secondaries. You can restore the cluster write availability without the failed DC: -* If you have enough secondary members of the database in another data center, you can switch their mode to primary and not have to store a copy or wait a long time for primary copies to restore. +* If you have enough secondary members of the database in another DC, you can switch their mode to primary and not have to store a copy or wait a long time for primary copies to restore. * You can use secondaries to re-seed databases if needed. See xref:database-administration/standard-databases/recreate-database.adoc[the `dbms.recreateDatabase()` procedure] for more details. @@ -60,9 +60,9 @@ For detailed scenarios, see the xref:clustering/multi-region-deployment/disaster image::geo-distributed-primaries.svg[width="400", title="Cluster design with database primaries distributed across three data centers", role=popup] -You can place each primary copy in a different data center (DC) using a minimum of three data centers. +You can place each primary copy in a different data center (DC), using at least three data centers. -Therefore, if one DC fails, only one primary member is lost and the cluster can continue without data loss. +Therefore, if one DC fails, only a single primary member is lost, and the cluster can continue operating without data loss. However, you always pay cross-data center latency times for every write operation. @@ -91,7 +91,7 @@ For detailed scenarios, see the xref:clustering/multi-region-deployment/disaster image::geo-distribution-system-db.svg[width="400", title="Primaries for the `system` database distributed across three data centers", role=popup] -You can place all primaries for user databases in one data center (DC), with secondaries in another. +You can place all primaries for user databases in one data center (DC) and all secondaries in another. In a third DC, deploy a server that only hosts a primary member of the `system` database (in addition to those in the first two data centers). @@ -99,12 +99,12 @@ In a third DC, deploy a server that only hosts a primary member of the `system` * To prevent user databases from being allocated to it, set the `allowedDatabases` constraint to some name that will never be used. -Your writes will be fast, because they are within the DC. +Your writes will be fast, because they occur within the single DC. If a DC goes down, you retain write availability for the `system` database, which makes restoring write availability to the user databases easier. However, if the DC with primaries goes down, the user databases will become write-unavailable. -Though read availability may remain via the secondaries. +Though read availability may still be maintained via the secondaries. ==== Recovering from the loss of a data center From c734efe77705c5f28c9b2ceb8a3ddb3a3e6d440d Mon Sep 17 00:00:00 2001 From: Natalia Ivakina <82437520+NataliaIvakina@users.noreply.github.com> Date: Tue, 9 Sep 2025 16:39:41 +0200 Subject: [PATCH 09/10] Update unbind.adoc --- modules/ROOT/pages/clustering/unbind.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ROOT/pages/clustering/unbind.adoc b/modules/ROOT/pages/clustering/unbind.adoc index 0a72ee3b3..b6fa105ce 100644 --- a/modules/ROOT/pages/clustering/unbind.adoc +++ b/modules/ROOT/pages/clustering/unbind.adoc @@ -76,7 +76,7 @@ The Neo4j server must be shut down before running the `neo4j-admin server unbind [[unbind-command-usage]] == Usage -You can use the `neo4j-admin server unbind` command to remove the cluster state of a cluster server, turn a cluster server into a standalone server, or remove and archive the cluster state of a cluster server. +You can use the `neo4j-admin server unbind` command to remove or archive the cluster state of a cluster server. === Remove the cluster state of a server From a3a134719386c130242e1f89d4e63432f9e44675 Mon Sep 17 00:00:00 2001 From: Natalia Ivakina <82437520+NataliaIvakina@users.noreply.github.com> Date: Wed, 10 Sep 2025 15:47:19 +0200 Subject: [PATCH 10/10] Apply suggestions from code review --- modules/ROOT/content-nav.adoc | 2 +- modules/ROOT/pages/clustering/index.adoc | 4 ++-- .../multi-region-deployment/geo-redundant-deployment.adoc | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/ROOT/content-nav.adoc b/modules/ROOT/content-nav.adoc index 59c19f32b..435c400ef 100644 --- a/modules/ROOT/content-nav.adoc +++ b/modules/ROOT/content-nav.adoc @@ -168,8 +168,8 @@ *** xref:clustering/monitoring/endpoints.adoc[] *** xref:clustering/monitoring/status-check.adoc[] ** Resilient cluster deployment -*** xref:clustering/multi-region-deployment/multi-data-center-routing.adoc[] *** xref:clustering/multi-region-deployment/geo-redundant-deployment.adoc[] +*** xref:clustering/multi-region-deployment/multi-data-center-routing.adoc[] *** xref:clustering/multi-region-deployment/disaster-recovery.adoc[] //** xref:clustering/internals.adoc[] ** xref:clustering/settings.adoc[] diff --git a/modules/ROOT/pages/clustering/index.adoc b/modules/ROOT/pages/clustering/index.adoc index e902590a0..e0ee411d2 100644 --- a/modules/ROOT/pages/clustering/index.adoc +++ b/modules/ROOT/pages/clustering/index.adoc @@ -14,7 +14,7 @@ This chapter describes the following: ** xref:clustering/setup/discovery.adoc[Cluster server discovery] -- How servers in a cluster discover each other and form a cluster. ** xref:clustering/setup/routing.adoc[Leadership, routing and load balancing] -- Election of leaders, routing and load balancing. ** xref:clustering/setup/encryption.adoc[Intra-cluster encryption] -- How to secure the cluster communication. -* xref:clustering/servers.adoc[Managing servers in a cluster] -- How to manage manage the servers in a cluster. +* xref:clustering/servers.adoc[Managing servers in a cluster] -- How to manage the servers in a cluster. * xref:clustering/unbind.adoc[Unbind a server] -- How to remove and archive the cluster state of a cluster server so that it can rebind to a cluster. * xref:clustering/databases.adoc[Managing databases in a cluster] -- How to manage the databases in a cluster. * Monitoring -- Monitoring of a cluster. @@ -23,8 +23,8 @@ This chapter describes the following: ** xref:clustering/monitoring/endpoints.adoc[Monitor cluster endpoints for status information] -- The endpoints and semantics of endpoints used to monitor the health of the cluster. ** xref:clustering/monitoring/status-check.adoc[Monitor replication status] -- The procedure to monitor which members of a clustered database are up-to-date and can participate in a successful replication. * Resilient cluster deployment -- Recommendations and guidance on how to set up a resilient cluster which ensures your database stays available, fast, and recoverable even under failures. -** xref:clustering/multi-region-deployment/multi-data-center-routing.adoc[Multi-data center routing] -- Clusters on multi-data centers. ** xref:clustering/multi-region-deployment/geo-redundant-deployment.adoc[] -- Recommended patterns of cluster deployment across multiple cloud regions / data centers. +** xref:clustering/multi-region-deployment/multi-data-center-routing.adoc[Multi-data center routing] -- Clusters on multi-data centers. ** xref:clustering/multi-region-deployment/disaster-recovery.adoc[Disaster recovery] -- How to recover a cluster in the event of a disaster. * xref:clustering/settings.adoc[Settings reference] -- A summary of the most important cluster settings. * xref:clustering/server-syntax.adoc[Server commands reference] -- Reference of Cypher administrative commands to add and manage servers. diff --git a/modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc b/modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc index c5e044b9e..894bd46c5 100644 --- a/modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc +++ b/modules/ROOT/pages/clustering/multi-region-deployment/geo-redundant-deployment.adoc @@ -147,7 +147,7 @@ In that case a failure of a member in DC1 means the database is write-unavailabl If leadership shifts to DC2, this makes all writes slow. Finally, there is no guarantee against data loss if DC1 goes down. -Beacuse the primary member in DC2 may not be up to date with writes, even in append. +Because the primary member in DC2 may not be up to date with writes, even in append.