Skip to content

Commit 7a3fb8a

Browse files
authored
Merge pull request #153 from datastax/issue/CDM-54
Issue/cdm 54
2 parents b3d4c45 + dc8b565 commit 7a3fb8a

File tree

94 files changed

+2643
-696
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

94 files changed

+2643
-696
lines changed

PERF/cdm-v3.properties

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
# Origin cluster credentials (use "host + port" OR "secure-connect-bundle" but not both)
2+
spark.origin.host cass-origin
3+
spark.origin.port 9042
4+
#spark.origin.scb file:///aaa/bbb/secure-connect-enterprise.zip
5+
spark.origin.username cassandra
6+
spark.origin.password cassandra
7+
8+
# Target cluster credentials (use "host + port" OR "secure-connect-bundle" but not both)
9+
spark.target.host cass-target
10+
#spark.target.port 9042
11+
#spark.target.scb file:///aaa/bbb/secure-connect-enterprise.zip
12+
spark.target.username cassandra
13+
spark.target.password cassandra
14+
15+
# Add 'missing' rows (during 'Validation') in 'Target' from 'Origin'. N/A for 'Migration'
16+
spark.target.autocorrect.missing false
17+
# Update 'mismatched' rows (during 'Validation') in 'Target' to match 'Origin'. N/A for 'Migration'
18+
spark.target.autocorrect.mismatch false
19+
20+
# Read & Write rate-limits(rows/second). Higher value will improve performance and put more load on cluster
21+
spark.readRateLimit 20000
22+
spark.writeRateLimit 20000
23+
24+
# Used to split Cassandra token-range into slices and migrate random slices one at a time
25+
# 10K splits usually works for tables up to 100GB (uncompressed) with balanced token distribution
26+
# For larger tables, test on 1% volume (using param coveragePercent) and increase the number-of-splits as needed
27+
spark.numSplits 10000
28+
29+
# Use a value of 1 (disable batching) when primary-key and partition-key are same
30+
# For tables with high avg count of rows/partition, use higher value to improve performance
31+
spark.batchSize 10
32+
33+
# ENABLE ONLY IF YOU WANT SOME COLUMNS FROM ORIGIN TO MIGRATE (default auto-detects schema & migrates all columns)
34+
# COMMA SEPARATED LIST OF COLUMN NAMES (MUST INCLUDE ALL PRIMARY-KEY FIELDS)
35+
#spark.query.origin comma-separated-partition-key,comma-separated-clustering-key,comma-separated-other-columns
36+
37+
# ENABLE ONLY IF COLUMN NAMES ON TARGET ARE DIFFERENT FROM ORIGIN (default assumes target schema to be same as origin)
38+
#spark.query.target comma-separated-partition-key,comma-separated-clustering-key,comma-separated-other-columns
39+
40+
############################### EXAMPLE MAPPING USING A DEMO TABLE ##########################################
41+
# If the origin table schema is as below
42+
# CREATE TABLE cycling.cyclist_name (
43+
# pk1 uuid,
44+
# pk2 date,
45+
# cc1 boolean,
46+
# firstname text,
47+
# middlename text, // You do not want to migrate this column
48+
# lastname text,
49+
# phones list<text>,
50+
# PRIMARY KEY((pk1,pk2),cc1)
51+
# );
52+
# then, our origin mapping would look like below
53+
# spark.query.origin pk1,pk2,cc1,firstname,lastname,phones
54+
#
55+
# And target table schema is as below
56+
# CREATE TABLE cycling.cyclist_name (
57+
# pk1 uuid,
58+
# pk2 date,
59+
# cc1 boolean,
60+
# fn text, // Column has different name than origin
61+
# ln text, // Column has different name than origin
62+
# phones list<text>,
63+
# PRIMARY KEY((pk1,pk2),cc1)
64+
# );
65+
# then, our target mapping would look like below
66+
# spark.query.target pk1,pk2,cc1,fn,ln,phones
67+
#############################################################################################################
68+
69+
# ENABLE ONLY IF YOU WANT TO MIGRATE/VALIDATE ROWS BASED ON A VALID CQL FILTER
70+
#spark.query.condition
71+
72+
# ENABLE ONLY IF YOU WANT TO FILTER BASED ON WRITE-TIME (values must be in microseconds)
73+
#spark.origin.writeTimeStampFilter false
74+
#spark.origin.minWriteTimeStampFilter 0
75+
#spark.origin.maxWriteTimeStampFilter 4102444800000000
76+
77+
# ENABLE ONLY IF retries needed (Retry a slice of token-range if an exception occurs)
78+
#spark.maxRetries 0
79+
80+
# ENABLE ONLY IF YOU WANT TO MIGRATE/VALIDATE SOME % OF ROWS (NOT 100%)
81+
#spark.coveragePercent 100
82+
83+
# ENABLE ONLY IF WANT LOG STATS MORE OR LESS FREQUENTLY THAN DEFAULT
84+
#spark.printStatsAfter 100000
85+
86+
# ENABLE ONLY IF YOU WANT TO USE READ AND/OR WRITE CONSISTENCY OTHER THAN LOCAL_QUORUM
87+
#spark.consistency.read LOCAL_QUORUM
88+
#spark.consistency.write LOCAL_QUORUM
89+
90+
# ENABLE ONLY IF YOU WANT TO REDUCE FETCH-SIZE TO AVOID FrameTooLongException
91+
#spark.read.fetch.sizeInRows 1000
92+
93+
# ENABLE ONLY IF YOU WANT TO USE CUSTOM FIXED WRITETIME VALUE ON TARGET
94+
#spark.target.writeTime.fixedValue 0
95+
96+
# ENABLE ONLY IF YOU WANT TO INCREMENT SOURCE WRITETIME VALUE
97+
# DUPLICATES IN LIST FIELDS: USE THIS WORKAROUND FOR CASSANDRA BUG https://issues.apache.org/jira/browse/CASSANDRA-11368
98+
#spark.target.writeTime.incrementBy 0
99+
100+
# ONLY USE when running in Guardrail mode to identify large fields
101+
#spark.guardrail.colSizeInKB 1024
102+
103+
# ENABLE ONLY TO filter data from Origin
104+
#spark.origin.FilterData false
105+
#spark.origin.FilterColumn test
106+
#spark.origin.FilterColumnIndex 2
107+
#spark.origin.FilterColumnType 6%16
108+
#spark.origin.FilterColumnValue test
109+
110+
# ONLY USE if SSL is enabled on origin Cassandra/DSE (e.g. Azure Cosmos Cassandra DB)
111+
#spark.origin.ssl.enabled true
112+
113+
# ONLY USE if SSL clientAuth is enabled on origin Cassandra/DSE
114+
#spark.origin.trustStore.path
115+
#spark.origin.trustStore.password
116+
#spark.origin.trustStore.type JKS
117+
#spark.origin.keyStore.path
118+
#spark.origin.keyStore.password
119+
#spark.origin.enabledAlgorithms TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA
120+
121+
# ONLY USE if SSL is enabled on target Cassandra/DSE
122+
#spark.target.ssl.enabled true
123+
124+
# ONLY USE if SSL clientAuth is enabled on target Cassandra/DSE
125+
#spark.target.trustStore.path
126+
#spark.target.trustStore.password
127+
#spark.target.trustStore.type JKS
128+
#spark.target.keyStore.path
129+
#spark.target.keyStore.password
130+
#spark.target.enabledAlgorithms TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA

0 commit comments

Comments
 (0)