1
+ # Origin cluster credentials
1
2
spark.origin.host localhost
2
3
spark.origin.username some-username
3
4
spark.origin.password some-secret-password
4
5
spark.origin.keyspaceTable test.a1
5
6
7
+ # Target cluster credentials
6
8
spark.target.scb file:///aaa/bbb/secure-connect-enterprise.zip
7
9
spark.target.username client-id
8
10
spark.target.password client-secret
9
11
spark.target.keyspaceTable test.a2
12
+
13
+ # Add 'missing' rows (during 'Validation') in 'Target' from 'Origin'. N/A for 'Migration'
10
14
spark.target.autocorrect.missing false
15
+ # Update 'mismatched' rows (during 'Validation') in 'Target' to match 'Origin'. N/A for 'Migration'
11
16
spark.target.autocorrect.mismatch false
12
17
13
- spark.maxRetries 3
18
+ # Read & Write rate-limits(rows/second). Higher value will improve performance and put more load on cluster
14
19
spark.readRateLimit 20000
15
20
spark.writeRateLimit 20000
21
+
22
+ # Used to split Cassandra token-range into slices and migrate random slices one at a time
23
+ # 10K splits usually works for tables up to 100GB (uncompressed) with balanced token distribution
24
+ # For larger tables, increase the splits relatively i.e. use 100K for a 1TB table
16
25
spark.splitSize 10000
17
- spark.batchSize 5
18
26
27
+ # Use a value of 1 (disable batching) when primary-key and partition-key are same
28
+ # For tables with high avg count of rows/partition, use higher value to improve performance
29
+ spark.batchSize 10
30
+
31
+ # Below 'query' properties are set based on table schema
19
32
spark.query.origin partition-key,clustering-key,order-date,amount
20
33
spark.query.origin.partitionKey partition-key
21
34
spark.query.target.id partition-key,clustering-key
22
35
spark.query.types 9,1,4,3
23
- spark.query.ttl.cols 2,3
24
- spark.query.writetime.cols 2,3
36
+ # ############################################################################################################
37
+ # Following are the supported data types and their corresponding [Cassandra data-types]
38
+ # 0: ascii, text, varchar
39
+ # 1: int
40
+ # 2: bigint, counter
41
+ # 3: double
42
+ # 4: timestamp
43
+ # 5: map (separate type by %) - Example: 5%1%0 for map<int, text>
44
+ # 6: list (separate type by %) - Example: 6%0 for list<text>
45
+ # 7: blob
46
+ # 8: set (separate type by %) - Example: 8%0 for set<text>
47
+ # 9: uuid, timeuuid
48
+ # 10: boolean
49
+ # 11: tuple
50
+ # 12: float
51
+ # 13: tinyint
52
+ # 14: decimal
53
+ # 15: date
54
+ # 16: UDT [any user-defined-type created using 'CREATE TYPE']
55
+ # 17: varint
56
+ # 18: time
57
+ # 19: smallint
58
+ # Note: Ignore "Frozen" while mapping Collections (Map/List/Set) - Example: 5%1%0 for frozen<map<int, text>>
59
+ # ############################################################################################################
25
60
26
- # #### ENABLE ONLY IF COLUMN NAMES ON TARGET IS DIFFERENT FROM ORIGIN (SCHEMA & DATA-TYPES MUST BE SAME) #####
61
+ # ENABLE ONLY IF COLUMN NAMES ON TARGET IS DIFFERENT FROM ORIGIN (SCHEMA & DATA-TYPES MUST BE SAME)
27
62
# spark.query.target partition-key,clustering-key,order-date,amount
28
63
29
- # ################ ENABLE ONLY IF YOU WANT TO MIGRATE/VALIDATE SOME DATA BASED ON CQL FILTER #################
30
- # spark.query.condition
31
-
32
- # ################ ENABLE ONLY IF YOU WANT TO MIGRATE/VALIDATE SOME % (NOT 100%) DATA ######################
33
- # spark.coveragePercent 10
64
+ # The tool adds TTL & Writetime at row-level (not field-level).
65
+ # The largest TTL & Writetime values are used if multiple indexes are listed (comma separated)
66
+ # Comma separated column indexes from "spark.query.origin" used to find largest TTL or Writetime
67
+ spark.query.ttl.cols 2,3
68
+ spark.query.writetime.cols 2,3
34
69
35
- # ################### ENABLE ONLY IF WANT LOG STATS MORE OR LESS FREQUENTLY THAN DEFAULT #####################
36
- # spark.printStatsAfter 100000
70
+ # ENABLE ONLY IF YOU WANT TO MIGRATE/VALIDATE ROWS BASED ON CQL FILTER
71
+ # spark.query.condition
37
72
38
- # ################################ ENABLE ONLY IF IT IS A COUNTER TABLE ######################################
73
+ # ENABLE ONLY IF IT IS A COUNTER TABLE
39
74
# spark.counterTable false
40
75
# spark.counterTable.cql
41
76
# spark.counterTable.cql.index 0
42
77
43
- # ####### ENABLE ONLY IF YOU WANT TO FILTER BASED ON WRITE-TIME (values must be in microseconds) #############
78
+ # ENABLE ONLY IF YOU WANT TO FILTER BASED ON WRITE-TIME (values must be in microseconds)
44
79
# spark.origin.writeTimeStampFilter false
45
80
# spark.origin.minWriteTimeStampFilter 0
46
81
# spark.origin.maxWriteTimeStampFilter 4102444800000000
47
82
48
- # ####### ENABLE ONLY IF YOU WANT TO USE READ AND/OR WRITE CONSISTENCY OTHER THAN LOCAL_QUORUM ##############
83
+ # ENABLE ONLY IF retries needed (Retry a slice of token-range if an exception occurs)
84
+ # spark.maxRetries 0
85
+
86
+ # ENABLE ONLY IF YOU WANT TO MIGRATE/VALIDATE SOME % OF ROWS (NOT 100%)
87
+ # spark.coveragePercent 100
88
+
89
+ # ENABLE ONLY IF WANT LOG STATS MORE OR LESS FREQUENTLY THAN DEFAULT
90
+ # spark.printStatsAfter 100000
91
+
92
+ # ENABLE ONLY IF YOU WANT TO USE READ AND/OR WRITE CONSISTENCY OTHER THAN LOCAL_QUORUM
49
93
# spark.consistency.read LOCAL_QUORUM
50
94
# spark.consistency.write LOCAL_QUORUM
51
95
52
- # ############ ENABLE ONLY IF YOU WANT TO REDUCE FETCH-SIZE TO AVOID FrameTooLongException ##################
96
+ # ENABLE ONLY IF YOU WANT TO REDUCE FETCH-SIZE TO AVOID FrameTooLongException
53
97
# spark.read.fetch.sizeInRows 1000
54
98
55
- # ############## ENABLE ONLY IF YOU WANT TO USE CUSTOM FIXED WRITETIME VALUE ON TARGET ######################
99
+ # ENABLE ONLY IF YOU WANT TO USE CUSTOM FIXED WRITETIME VALUE ON TARGET
56
100
# spark.target.custom.writeTime 0
57
101
58
- # ################### ONLY USE if SKIPPING recs greater than 10MB from Origin needed #########################
102
+ # ENABLE ONLY TO SKIP recs greater than 10MB from Origin (to avoid Astra Guardrail error)
59
103
# spark.fieldGuardraillimitMB 10
60
104
61
- # ################### ONLY USE if count of recs greater than 10MB from Origin needed #########################
105
+ # ENABLE ONLY TO count of recs greater than 10MB from Origin needed
62
106
# spark.origin.checkTableforColSize false
63
107
# spark.origin.checkTableforColSize.cols partition-key,clustering-key
64
108
# spark.origin.checkTableforColSize.cols.types 9,1
65
109
66
- # ########################### ONLY USE if needing to filter data from Origin #################################
110
+ # ENABLE ONLY TO filter data from Origin
67
111
# spark.origin.FilterData false
68
112
# spark.origin.FilterColumn test
69
113
# spark.origin.FilterColumnIndex 2
70
114
# spark.origin.FilterColumnType 6%16
71
115
# spark.origin.FilterColumnValue test
72
116
73
- # ######################### ONLY USE if SSL clientAuth is enabled on origin Cassandra/DSE ####################
117
+ # ONLY USE if SSL clientAuth is enabled on origin Cassandra/DSE
74
118
# spark.origin.trustStore.path
75
119
# spark.origin.trustStore.password
76
120
# spark.origin.trustStore.type JKS
77
121
# spark.origin.keyStore.path
78
122
# spark.origin.keyStore.password
79
123
# spark.origin.enabledAlgorithms TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA
80
124
81
- # ###################### ONLY USE if SSL clientAuth is enabled on target Cassandra/DSE #######################
125
+ # ONLY USE if SSL clientAuth is enabled on target Cassandra/DSE
82
126
# spark.target.trustStore.path
83
127
# spark.target.trustStore.password
84
128
# spark.target.trustStore.type JKS
85
129
# spark.target.keyStore.path
86
130
# spark.target.keyStore.password
87
131
# spark.target.enabledAlgorithms TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA
88
-
89
- # ############################################################################################################
90
- # Following are the supported data types and their corresponding [Cassandra data-types]
91
- # 0: ascii, text, varchar
92
- # 1: int
93
- # 2: bigint, counter
94
- # 3: double
95
- # 4: timestamp
96
- # 5: map (separate type by %) - Example: 5%1%0 for map<int, text>
97
- # 6: list (separate type by %) - Example: 6%0 for list<text>
98
- # 7: blob
99
- # 8: set (separate type by %) - Example: 8%0 for set<text>
100
- # 9: uuid, timeuuid
101
- # 10: boolean
102
- # 11: tuple
103
- # 12: float
104
- # 13: tinyint
105
- # 14: decimal
106
- # 15: date
107
- # 16: UDT [any user-defined-type created using 'CREATE TYPE']
108
- # 17: varint
109
- # 18: time
110
- # 19: smallint
111
-
112
- # Note: Ignore "Frozen" while mapping Collections (Map/List/Set) - Example: 5%1%0 for frozen<map<int, text>>
113
- #
114
- # "spark.query.ttl.cols" - Comma separated column indexes from "spark.query.origin" used to find largest TTL.
115
- # "spark.query.writetime.cols" - Comma separated column indexes from "spark.query.origin" used to find largest writetime.
116
- # Note: The tool migrates TTL & Writetimes at row-level and not field-level.
117
- # Migration will use the largest TTL & Writetimes value per row.
118
- #
119
- # ############################################################################################################
0 commit comments