datastax
diff --git a/‎PERF/cdm-v3.properties
Lines changed: 130 additions & 0 deletions b/‎PERF/cdm-v3.properties
Lines changed: 130 additions & 0 deletions
@@ -0,0 +1,130 @@
+# Origin cluster credentials (use "host + port" OR "secure-connect-bundle" but not both)
+spark.origin.host                                 cass-origin
+spark.origin.port                                 9042
+#spark.origin.scb                                 file:///aaa/bbb/secure-connect-enterprise.zip
+spark.origin.username                             cassandra
+spark.origin.password                             cassandra
+
+# Target cluster credentials (use "host + port" OR "secure-connect-bundle" but not both)
+spark.target.host                                 cass-target
+#spark.target.port                                9042
+#spark.target.scb                                  file:///aaa/bbb/secure-connect-enterprise.zip
+spark.target.username                             cassandra
+spark.target.password                             cassandra
+
+# Add 'missing' rows (during 'Validation') in 'Target' from 'Origin'. N/A for 'Migration'
+spark.target.autocorrect.missing                  false
+# Update 'mismatched' rows (during 'Validation') in 'Target' to match 'Origin'. N/A for 'Migration'
+spark.target.autocorrect.mismatch                 false
+
+# Read & Write rate-limits(rows/second). Higher value will improve performance and put more load on cluster
+spark.readRateLimit                               20000
+spark.writeRateLimit                              20000
+
+# Used to split Cassandra token-range into slices and migrate random slices one at a time
+# 10K splits usually works for tables up to 100GB (uncompressed) with balanced token distribution
+# For larger tables, test on 1% volume (using param coveragePercent) and increase the number-of-splits as needed
+spark.numSplits                                   10000
+
+# Use a value of 1 (disable batching) when primary-key and partition-key are same
+# For tables with high avg count of rows/partition, use higher value to improve performance
+spark.batchSize                                   10
+
+# ENABLE ONLY IF YOU WANT SOME COLUMNS FROM ORIGIN TO MIGRATE (default auto-detects schema & migrates all columns)
+# COMMA SEPARATED LIST OF COLUMN NAMES (MUST INCLUDE ALL PRIMARY-KEY FIELDS)
+#spark.query.origin                                comma-separated-partition-key,comma-separated-clustering-key,comma-separated-other-columns
+
+# ENABLE ONLY IF COLUMN NAMES ON TARGET ARE DIFFERENT FROM ORIGIN (default assumes target schema to be same as origin)
+#spark.query.target                                comma-separated-partition-key,comma-separated-clustering-key,comma-separated-other-columns
+
+############################### EXAMPLE MAPPING USING A DEMO TABLE ##########################################
+# If the origin table schema is as below
+# CREATE TABLE cycling.cyclist_name (
+#     pk1 uuid,
+#     pk2 date,
+#     cc1 boolean,
+#     firstname text,
+#     middlename text, // You do not want to migrate this column
+#     lastname text,
+#     phones list<text>,
+#     PRIMARY KEY((pk1,pk2),cc1)
+# );
+# then, our origin mapping would look like below
+# spark.query.origin                                pk1,pk2,cc1,firstname,lastname,phones
+#
+# And target table schema is as below
+# CREATE TABLE cycling.cyclist_name (
+#     pk1 uuid,
+#     pk2 date,
+#     cc1 boolean,
+#     fn text, // Column has different name than origin
+#     ln text, // Column has different name than origin
+#     phones list<text>,
+#     PRIMARY KEY((pk1,pk2),cc1)
+# );
+# then, our target mapping would look like below
+# spark.query.target                                pk1,pk2,cc1,fn,ln,phones
+#############################################################################################################
+
+# ENABLE ONLY IF YOU WANT TO MIGRATE/VALIDATE ROWS BASED ON A VALID CQL FILTER
+#spark.query.condition
+
+# ENABLE ONLY IF YOU WANT TO FILTER BASED ON WRITE-TIME (values must be in microseconds)
+#spark.origin.writeTimeStampFilter                 false
+#spark.origin.minWriteTimeStampFilter              0
+#spark.origin.maxWriteTimeStampFilter              4102444800000000
+
+# ENABLE ONLY IF retries needed (Retry a slice of token-range if an exception occurs)
+#spark.maxRetries                                  0
+
+# ENABLE ONLY IF YOU WANT TO MIGRATE/VALIDATE SOME % OF ROWS (NOT 100%)
+#spark.coveragePercent                             100
+
+# ENABLE ONLY IF WANT LOG STATS MORE OR LESS FREQUENTLY THAN DEFAULT
+#spark.printStatsAfter                             100000
+
+# ENABLE ONLY IF YOU WANT TO USE READ AND/OR WRITE CONSISTENCY OTHER THAN LOCAL_QUORUM
+#spark.consistency.read                            LOCAL_QUORUM
+#spark.consistency.write                           LOCAL_QUORUM
+
+# ENABLE ONLY IF YOU WANT TO REDUCE FETCH-SIZE TO AVOID FrameTooLongException
+#spark.read.fetch.sizeInRows                       1000
+
+# ENABLE ONLY IF YOU WANT TO USE CUSTOM FIXED WRITETIME VALUE ON TARGET
+#spark.target.writeTime.fixedValue                 0
+
+# ENABLE ONLY IF YOU WANT TO INCREMENT SOURCE WRITETIME VALUE
+# DUPLICATES IN LIST FIELDS: USE THIS WORKAROUND FOR CASSANDRA BUG https://issues.apache.org/jira/browse/CASSANDRA-11368
+#spark.target.writeTime.incrementBy                0
+
+# ONLY USE when running in Guardrail mode to identify large fields
+#spark.guardrail.colSizeInKB                       1024
+
+# ENABLE ONLY TO filter data from Origin
+#spark.origin.FilterData                           false
+#spark.origin.FilterColumn                         test
+#spark.origin.FilterColumnIndex                    2
+#spark.origin.FilterColumnType                     6%16
+#spark.origin.FilterColumnValue                    test
+
+# ONLY USE if SSL is enabled on origin Cassandra/DSE (e.g. Azure Cosmos Cassandra DB)
+#spark.origin.ssl.enabled                          true
+
+# ONLY USE if SSL clientAuth is enabled on origin Cassandra/DSE
+#spark.origin.trustStore.path
+#spark.origin.trustStore.password
+#spark.origin.trustStore.type                     JKS
+#spark.origin.keyStore.path
+#spark.origin.keyStore.password
+#spark.origin.enabledAlgorithms                   TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA
+
+# ONLY USE if SSL is enabled on target Cassandra/DSE
+#spark.target.ssl.enabled                          true
+
+# ONLY USE if SSL clientAuth is enabled on target Cassandra/DSE
+#spark.target.trustStore.path
+#spark.target.trustStore.password
+#spark.target.trustStore.type                     JKS
+#spark.target.keyStore.path
+#spark.target.keyStore.password
+#spark.target.enabledAlgorithms                   TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA