|
| 1 | +# Origin cluster credentials (use "host + port" OR "secure-connect-bundle" but not both) |
| 2 | +spark.origin.host cass-origin |
| 3 | +spark.origin.port 9042 |
| 4 | +#spark.origin.scb file:///aaa/bbb/secure-connect-enterprise.zip |
| 5 | +spark.origin.username cassandra |
| 6 | +spark.origin.password cassandra |
| 7 | + |
| 8 | +# Target cluster credentials (use "host + port" OR "secure-connect-bundle" but not both) |
| 9 | +spark.target.host cass-target |
| 10 | +#spark.target.port 9042 |
| 11 | +#spark.target.scb file:///aaa/bbb/secure-connect-enterprise.zip |
| 12 | +spark.target.username cassandra |
| 13 | +spark.target.password cassandra |
| 14 | + |
| 15 | +# Add 'missing' rows (during 'Validation') in 'Target' from 'Origin'. N/A for 'Migration' |
| 16 | +spark.target.autocorrect.missing false |
| 17 | +# Update 'mismatched' rows (during 'Validation') in 'Target' to match 'Origin'. N/A for 'Migration' |
| 18 | +spark.target.autocorrect.mismatch false |
| 19 | + |
| 20 | +# Read & Write rate-limits(rows/second). Higher value will improve performance and put more load on cluster |
| 21 | +spark.readRateLimit 20000 |
| 22 | +spark.writeRateLimit 20000 |
| 23 | + |
| 24 | +# Used to split Cassandra token-range into slices and migrate random slices one at a time |
| 25 | +# 10K splits usually works for tables up to 100GB (uncompressed) with balanced token distribution |
| 26 | +# For larger tables, test on 1% volume (using param coveragePercent) and increase the number-of-splits as needed |
| 27 | +spark.numSplits 10000 |
| 28 | + |
| 29 | +# Use a value of 1 (disable batching) when primary-key and partition-key are same |
| 30 | +# For tables with high avg count of rows/partition, use higher value to improve performance |
| 31 | +spark.batchSize 10 |
| 32 | + |
| 33 | +# ENABLE ONLY IF YOU WANT SOME COLUMNS FROM ORIGIN TO MIGRATE (default auto-detects schema & migrates all columns) |
| 34 | +# COMMA SEPARATED LIST OF COLUMN NAMES (MUST INCLUDE ALL PRIMARY-KEY FIELDS) |
| 35 | +#spark.query.origin comma-separated-partition-key,comma-separated-clustering-key,comma-separated-other-columns |
| 36 | + |
| 37 | +# ENABLE ONLY IF COLUMN NAMES ON TARGET ARE DIFFERENT FROM ORIGIN (default assumes target schema to be same as origin) |
| 38 | +#spark.query.target comma-separated-partition-key,comma-separated-clustering-key,comma-separated-other-columns |
| 39 | + |
| 40 | +############################### EXAMPLE MAPPING USING A DEMO TABLE ########################################## |
| 41 | +# If the origin table schema is as below |
| 42 | +# CREATE TABLE cycling.cyclist_name ( |
| 43 | +# pk1 uuid, |
| 44 | +# pk2 date, |
| 45 | +# cc1 boolean, |
| 46 | +# firstname text, |
| 47 | +# middlename text, // You do not want to migrate this column |
| 48 | +# lastname text, |
| 49 | +# phones list<text>, |
| 50 | +# PRIMARY KEY((pk1,pk2),cc1) |
| 51 | +# ); |
| 52 | +# then, our origin mapping would look like below |
| 53 | +# spark.query.origin pk1,pk2,cc1,firstname,lastname,phones |
| 54 | +# |
| 55 | +# And target table schema is as below |
| 56 | +# CREATE TABLE cycling.cyclist_name ( |
| 57 | +# pk1 uuid, |
| 58 | +# pk2 date, |
| 59 | +# cc1 boolean, |
| 60 | +# fn text, // Column has different name than origin |
| 61 | +# ln text, // Column has different name than origin |
| 62 | +# phones list<text>, |
| 63 | +# PRIMARY KEY((pk1,pk2),cc1) |
| 64 | +# ); |
| 65 | +# then, our target mapping would look like below |
| 66 | +# spark.query.target pk1,pk2,cc1,fn,ln,phones |
| 67 | +############################################################################################################# |
| 68 | + |
| 69 | +# ENABLE ONLY IF YOU WANT TO MIGRATE/VALIDATE ROWS BASED ON A VALID CQL FILTER |
| 70 | +#spark.query.condition |
| 71 | + |
| 72 | +# ENABLE ONLY IF YOU WANT TO FILTER BASED ON WRITE-TIME (values must be in microseconds) |
| 73 | +#spark.origin.writeTimeStampFilter false |
| 74 | +#spark.origin.minWriteTimeStampFilter 0 |
| 75 | +#spark.origin.maxWriteTimeStampFilter 4102444800000000 |
| 76 | + |
| 77 | +# ENABLE ONLY IF retries needed (Retry a slice of token-range if an exception occurs) |
| 78 | +#spark.maxRetries 0 |
| 79 | + |
| 80 | +# ENABLE ONLY IF YOU WANT TO MIGRATE/VALIDATE SOME % OF ROWS (NOT 100%) |
| 81 | +#spark.coveragePercent 100 |
| 82 | + |
| 83 | +# ENABLE ONLY IF WANT LOG STATS MORE OR LESS FREQUENTLY THAN DEFAULT |
| 84 | +#spark.printStatsAfter 100000 |
| 85 | + |
| 86 | +# ENABLE ONLY IF YOU WANT TO USE READ AND/OR WRITE CONSISTENCY OTHER THAN LOCAL_QUORUM |
| 87 | +#spark.consistency.read LOCAL_QUORUM |
| 88 | +#spark.consistency.write LOCAL_QUORUM |
| 89 | + |
| 90 | +# ENABLE ONLY IF YOU WANT TO REDUCE FETCH-SIZE TO AVOID FrameTooLongException |
| 91 | +#spark.read.fetch.sizeInRows 1000 |
| 92 | + |
| 93 | +# ENABLE ONLY IF YOU WANT TO USE CUSTOM FIXED WRITETIME VALUE ON TARGET |
| 94 | +#spark.target.writeTime.fixedValue 0 |
| 95 | + |
| 96 | +# ENABLE ONLY IF YOU WANT TO INCREMENT SOURCE WRITETIME VALUE |
| 97 | +# DUPLICATES IN LIST FIELDS: USE THIS WORKAROUND FOR CASSANDRA BUG https://issues.apache.org/jira/browse/CASSANDRA-11368 |
| 98 | +#spark.target.writeTime.incrementBy 0 |
| 99 | + |
| 100 | +# ONLY USE when running in Guardrail mode to identify large fields |
| 101 | +#spark.guardrail.colSizeInKB 1024 |
| 102 | + |
| 103 | +# ENABLE ONLY TO filter data from Origin |
| 104 | +#spark.origin.FilterData false |
| 105 | +#spark.origin.FilterColumn test |
| 106 | +#spark.origin.FilterColumnIndex 2 |
| 107 | +#spark.origin.FilterColumnType 6%16 |
| 108 | +#spark.origin.FilterColumnValue test |
| 109 | + |
| 110 | +# ONLY USE if SSL is enabled on origin Cassandra/DSE (e.g. Azure Cosmos Cassandra DB) |
| 111 | +#spark.origin.ssl.enabled true |
| 112 | + |
| 113 | +# ONLY USE if SSL clientAuth is enabled on origin Cassandra/DSE |
| 114 | +#spark.origin.trustStore.path |
| 115 | +#spark.origin.trustStore.password |
| 116 | +#spark.origin.trustStore.type JKS |
| 117 | +#spark.origin.keyStore.path |
| 118 | +#spark.origin.keyStore.password |
| 119 | +#spark.origin.enabledAlgorithms TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA |
| 120 | + |
| 121 | +# ONLY USE if SSL is enabled on target Cassandra/DSE |
| 122 | +#spark.target.ssl.enabled true |
| 123 | + |
| 124 | +# ONLY USE if SSL clientAuth is enabled on target Cassandra/DSE |
| 125 | +#spark.target.trustStore.path |
| 126 | +#spark.target.trustStore.password |
| 127 | +#spark.target.trustStore.type JKS |
| 128 | +#spark.target.keyStore.path |
| 129 | +#spark.target.keyStore.password |
| 130 | +#spark.target.enabledAlgorithms TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA |
0 commit comments