1
1
package datastax .astra .migrate ;
2
2
3
3
import com .datastax .oss .driver .api .core .CqlSession ;
4
- import com .datastax .oss .driver .api .core .cql .BoundStatement ;
5
- import com .datastax .oss .driver .api .core .cql .PreparedStatement ;
6
- import com .datastax .oss .driver .api .core .cql .Row ;
7
4
import com .datastax .oss .driver .shaded .guava .common .util .concurrent .RateLimiter ;
8
5
import datastax .astra .migrate .properties .KnownProperties ;
9
6
import org .apache .spark .SparkConf ;
10
7
import org .slf4j .Logger ;
11
8
import org .slf4j .LoggerFactory ;
12
9
13
- import java .time .Duration ;
14
- import java .time .Instant ;
15
- import java .util .List ;
16
- import java .util .Optional ;
17
- import java .util .stream .IntStream ;
18
-
19
10
public class AbstractJobSession extends BaseJobSession {
20
11
21
12
public Logger logger = LoggerFactory .getLogger (this .getClass ().getName ());
@@ -31,11 +22,10 @@ protected AbstractJobSession(CqlSession originSession, CqlSession targetSession,
31
22
return ;
32
23
}
33
24
34
- this .originSessionSession = originSession ;
35
- this .targetSession = targetSession ;
25
+ cqlHelper .setOriginSession (originSession );
26
+ cqlHelper .setTargetSession (targetSession );
27
+ cqlHelper .setJobMigrateRowsFromFile (isJobMigrateRowsFromFile );
36
28
37
- batchSize = propertyHelper .getInteger (KnownProperties .SPARK_BATCH_SIZE );
38
- fetchSizeInRows = propertyHelper .getInteger (KnownProperties .READ_FETCH_SIZE );
39
29
printStatsAfter = propertyHelper .getInteger (KnownProperties .SPARK_STATS_AFTER );
40
30
if (!propertyHelper .meetsMinimum (KnownProperties .SPARK_STATS_AFTER , printStatsAfter , 1 )) {
41
31
logger .warn (KnownProperties .SPARK_STATS_AFTER +" must be greater than 0. Setting to default value of " + KnownProperties .getDefaultAsString (KnownProperties .SPARK_STATS_AFTER ));
@@ -47,242 +37,11 @@ protected AbstractJobSession(CqlSession originSession, CqlSession targetSession,
47
37
writeLimiter = RateLimiter .create (propertyHelper .getInteger (KnownProperties .SPARK_LIMIT_WRITE ));
48
38
maxRetries = propertyHelper .getInteger (KnownProperties .SPARK_MAX_RETRIES );
49
39
50
- originKeyspaceTable = propertyHelper .getString (KnownProperties .ORIGIN_KEYSPACE_TABLE );
51
- targetKeyspaceTable = propertyHelper .getString (KnownProperties .TARGET_KEYSPACE_TABLE );
52
-
53
- String ttlColsStr = propertyHelper .getAsString (KnownProperties .ORIGIN_TTL_COLS );
54
- if (null != ttlColsStr && ttlColsStr .trim ().length () > 0 ) {
55
- for (String ttlCol : ttlColsStr .split ("," )) {
56
- ttlCols .add (Integer .parseInt (ttlCol ));
57
- }
58
- }
59
-
60
- String writeTimestampColsStr = propertyHelper .getAsString (KnownProperties .ORIGIN_WRITETIME_COLS );
61
- if (null != writeTimestampColsStr && writeTimestampColsStr .trim ().length () > 0 ) {
62
- for (String writeTimeStampCol : writeTimestampColsStr .split ("," )) {
63
- writeTimeStampCols .add (Integer .parseInt (writeTimeStampCol ));
64
- }
65
- }
66
-
67
- writeTimeStampFilter = propertyHelper .getBoolean (KnownProperties .ORIGIN_FILTER_WRITETS_ENABLED );
68
- // batchsize set to 1 if there is a writeFilter
69
- if (writeTimeStampFilter ) {
70
- batchSize = 1 ;
71
- }
72
-
73
- minWriteTimeStampFilter = propertyHelper .getLong (KnownProperties .ORIGIN_FILTER_WRITETS_MIN );
74
- maxWriteTimeStampFilter = propertyHelper .getLong (KnownProperties .ORIGIN_FILTER_WRITETS_MAX );
75
- customWritetime = propertyHelper .getLong (KnownProperties .TARGET_CUSTOM_WRITETIME );
76
-
77
- logger .info ("PARAM -- Read Consistency: {}" , readConsistencyLevel );
78
- logger .info ("PARAM -- Write Consistency: {}" , writeConsistencyLevel );
79
- logger .info ("PARAM -- Write Batch Size: {}" , batchSize );
80
40
logger .info ("PARAM -- Max Retries: {}" , maxRetries );
81
- logger .info ("PARAM -- Read Fetch Size: {}" , fetchSizeInRows );
82
- logger .info ("PARAM -- Source Keyspace Table: {}" , originKeyspaceTable );
83
- logger .info ("PARAM -- Destination Keyspace Table: {}" , targetKeyspaceTable );
84
41
logger .info ("PARAM -- ReadRateLimit: {}" , readLimiter .getRate ());
85
42
logger .info ("PARAM -- WriteRateLimit: {}" , writeLimiter .getRate ());
86
- logger .info ("PARAM -- TTLCols: {}" , ttlCols );
87
- logger .info ("PARAM -- WriteTimestampFilterCols: {}" , writeTimeStampCols );
88
- logger .info ("PARAM -- WriteTimestampFilter: {}" , writeTimeStampFilter );
89
- if (writeTimeStampFilter ) {
90
- logger .info ("PARAM -- minWriteTimeStampFilter: {} datetime is {}" , minWriteTimeStampFilter ,
91
- Instant .ofEpochMilli (minWriteTimeStampFilter / 1000 ));
92
- logger .info ("PARAM -- maxWriteTimeStampFilter: {} datetime is {}" , maxWriteTimeStampFilter ,
93
- Instant .ofEpochMilli (maxWriteTimeStampFilter / 1000 ));
94
- }
95
-
96
- String selectCols = propertyHelper .getAsString (KnownProperties .ORIGIN_COLUMN_NAMES );
97
- String partitionKey = propertyHelper .getAsString (KnownProperties .ORIGIN_PARTITION_KEY );
98
- String originSelectCondition = propertyHelper .getAsString (KnownProperties .ORIGIN_FILTER_CONDITION );
99
- if (!originSelectCondition .isEmpty () && !originSelectCondition .trim ().toUpperCase ().startsWith ("AND" )) {
100
- originSelectCondition = " AND " + originSelectCondition ;
101
- }
102
-
103
- final StringBuilder selectTTLWriteTimeCols = new StringBuilder ();
104
- allCols = selectCols .split ("," );
105
- ttlCols .forEach (col -> {
106
- selectTTLWriteTimeCols .append (",ttl(" + allCols [col ] + ")" );
107
- });
108
- writeTimeStampCols .forEach (col -> {
109
- selectTTLWriteTimeCols .append (",writetime(" + allCols [col ] + ")" );
110
- });
111
- selectColTypes = getTypes (propertyHelper .getAsString (KnownProperties .ORIGIN_COLUMN_TYPES ));
112
- String idCols = propertyHelper .getAsString (KnownProperties .TARGET_PRIMARY_KEY );
113
- idColTypes = selectColTypes .subList (0 , idCols .split ("," ).length );
114
-
115
- String insertCols = propertyHelper .getAsString (KnownProperties .TARGET_COLUMN_NAMES );
116
- if (null == insertCols || insertCols .trim ().isEmpty ()) {
117
- insertCols = selectCols ;
118
- }
119
- String insertBinds = "" ;
120
- for (String str : idCols .split ("," )) {
121
- if (insertBinds .isEmpty ()) {
122
- insertBinds = str + "= ?" ;
123
- } else {
124
- insertBinds += " and " + str + "= ?" ;
125
- }
126
- }
127
-
128
- String fullSelectQuery ;
129
- if (!isJobMigrateRowsFromFile ) {
130
- fullSelectQuery = "select " + selectCols + selectTTLWriteTimeCols + " from " + originKeyspaceTable +
131
- " where token(" + partitionKey .trim () + ") >= ? and token(" + partitionKey .trim () + ") <= ? " +
132
- originSelectCondition + " ALLOW FILTERING" ;
133
- } else {
134
- fullSelectQuery = "select " + selectCols + selectTTLWriteTimeCols + " from " + originKeyspaceTable + " where " + insertBinds ;
135
- }
136
- originSelectStatement = originSession .prepare (fullSelectQuery );
137
- logger .info ("PARAM -- Query used: {}" , fullSelectQuery );
138
-
139
- targetSelectStatement = targetSession .prepare (
140
- "select " + insertCols + " from " + targetKeyspaceTable
141
- + " where " + insertBinds );
142
-
143
- hasRandomPartitioner = propertyHelper .getBoolean (KnownProperties .ORIGIN_HAS_RANDOM_PARTITIONER );
144
- isCounterTable = propertyHelper .getBoolean (KnownProperties .ORIGIN_IS_COUNTER );
145
- if (isCounterTable ) {
146
- String updateSelectMappingStr = propertyHelper .getString (KnownProperties .ORIGIN_COUNTER_INDEXES );
147
- for (String updateSelectIndex : updateSelectMappingStr .split ("," )) {
148
- updateSelectMapping .add (Integer .parseInt (updateSelectIndex ));
149
- }
150
-
151
- String counterTableUpdate = propertyHelper .getString (KnownProperties .ORIGIN_COUNTER_CQL );
152
- targetInsertStatement = targetSession .prepare (counterTableUpdate );
153
- } else {
154
- insertBinds = "" ;
155
- for (String str : insertCols .split ("," )) {
156
- if (insertBinds .isEmpty ()) {
157
- insertBinds += "?" ;
158
- } else {
159
- insertBinds += ", ?" ;
160
- }
161
- }
162
-
163
- String fullInsertQuery = "insert into " + targetKeyspaceTable + " (" + insertCols + ") VALUES (" + insertBinds + ")" ;
164
- if (!ttlCols .isEmpty ()) {
165
- fullInsertQuery += " USING TTL ?" ;
166
- if (!writeTimeStampCols .isEmpty ()) {
167
- fullInsertQuery += " AND TIMESTAMP ?" ;
168
- }
169
- } else if (!writeTimeStampCols .isEmpty ()) {
170
- fullInsertQuery += " USING TIMESTAMP ?" ;
171
- }
172
- targetInsertStatement = targetSession .prepare (fullInsertQuery );
173
- }
174
-
175
- // Handle rows with blank values for 'timestamp' data-type in primary-key fields
176
- tsReplaceValStr = propertyHelper .getAsString (KnownProperties .TARGET_REPLACE_MISSING_TS );
177
- if (!tsReplaceValStr .isEmpty ()) {
178
- tsReplaceVal = Long .parseLong (tsReplaceValStr );
179
- }
180
- }
181
-
182
- public BoundStatement bindInsert (PreparedStatement insertStatement , Row originRow , Row targetRow ) {
183
- BoundStatement boundInsertStatement = insertStatement .bind ().setConsistencyLevel (writeConsistencyLevel );
184
-
185
- if (isCounterTable ) {
186
- for (int index = 0 ; index < selectColTypes .size (); index ++) {
187
- MigrateDataType dataType = selectColTypes .get (updateSelectMapping .get (index ));
188
- // compute the counter delta if reading from target for the difference
189
- if (targetRow != null && index < (selectColTypes .size () - idColTypes .size ())) {
190
- boundInsertStatement = boundInsertStatement .set (index , (originRow .getLong (updateSelectMapping .get (index )) - targetRow .getLong (updateSelectMapping .get (index ))), Long .class );
191
- } else {
192
- boundInsertStatement = boundInsertStatement .set (index , getData (dataType , updateSelectMapping .get (index ), originRow ), dataType .typeClass );
193
- }
194
- }
195
- } else {
196
- int index = 0 ;
197
- for (index = 0 ; index < selectColTypes .size (); index ++) {
198
- boundInsertStatement = getBoundStatement (originRow , boundInsertStatement , index , selectColTypes );
199
- if (boundInsertStatement == null ) return null ;
200
- }
201
-
202
- if (!ttlCols .isEmpty ()) {
203
- boundInsertStatement = boundInsertStatement .set (index , getLargestTTL (originRow ), Integer .class );
204
- index ++;
205
- }
206
- if (!writeTimeStampCols .isEmpty ()) {
207
- if (customWritetime > 0 ) {
208
- boundInsertStatement = boundInsertStatement .set (index , customWritetime , Long .class );
209
- } else {
210
- boundInsertStatement = boundInsertStatement .set (index , getLargestWriteTimeStamp (originRow ), Long .class );
211
- }
212
- }
213
- }
214
-
215
- // Batch insert for large records may take longer, hence 10 secs to avoid timeout errors
216
- return boundInsertStatement .setTimeout (Duration .ofSeconds (10 ));
217
- }
218
-
219
- public int getLargestTTL (Row row ) {
220
- return IntStream .range (0 , ttlCols .size ())
221
- .map (i -> row .getInt (selectColTypes .size () + i )).max ().getAsInt ();
222
- }
223
-
224
- public long getLargestWriteTimeStamp (Row row ) {
225
- return IntStream .range (0 , writeTimeStampCols .size ())
226
- .mapToLong (i -> row .getLong (selectColTypes .size () + ttlCols .size () + i )).max ().getAsLong ();
227
- }
228
-
229
- public BoundStatement selectFromTarget (PreparedStatement selectStatement , Row originRow ) {
230
- BoundStatement boundSelectStatement = selectStatement .bind ().setConsistencyLevel (readConsistencyLevel );
231
- for (int index = 0 ; index < idColTypes .size (); index ++) {
232
- boundSelectStatement = getBoundStatement (originRow , boundSelectStatement , index , idColTypes );
233
- if (boundSelectStatement == null ) return null ;
234
- }
235
-
236
- return boundSelectStatement ;
237
- }
238
-
239
- private BoundStatement getBoundStatement (Row row , BoundStatement boundSelectStatement , int index ,
240
- List <MigrateDataType > cols ) {
241
- MigrateDataType dataTypeObj = cols .get (index );
242
- Object colData = getData (dataTypeObj , index , row );
243
-
244
- // Handle rows with blank values in primary-key fields
245
- if (index < idColTypes .size ()) {
246
- Optional <Object > optionalVal = handleBlankInPrimaryKey (index , colData , dataTypeObj .typeClass , row );
247
- if (!optionalVal .isPresent ()) {
248
- return null ;
249
- }
250
- colData = optionalVal .get ();
251
- }
252
- boundSelectStatement = boundSelectStatement .set (index , colData , dataTypeObj .typeClass );
253
- return boundSelectStatement ;
254
- }
255
-
256
- protected Optional <Object > handleBlankInPrimaryKey (int index , Object colData , Class dataType , Row originRow ) {
257
- return handleBlankInPrimaryKey (index , colData , dataType , originRow , true );
258
- }
259
-
260
- protected Optional <Object > handleBlankInPrimaryKey (int index , Object colData , Class dataType , Row originRow , boolean logWarn ) {
261
- // Handle rows with blank values for 'String' data-type in primary-key fields
262
- if (index < idColTypes .size () && colData == null && dataType == String .class ) {
263
- if (logWarn ) {
264
- logger .warn ("For row with Key: {}, found String primary-key column {} with blank value" ,
265
- getKey (originRow ), allCols [index ]);
266
- }
267
- return Optional .of ("" );
268
- }
269
-
270
- // Handle rows with blank values for 'timestamp' data-type in primary-key fields
271
- if (index < idColTypes .size () && colData == null && dataType == Instant .class ) {
272
- if (tsReplaceValStr .isEmpty ()) {
273
- logger .error ("Skipping row with Key: {} as Timestamp primary-key column {} has invalid blank value. " +
274
- "Alternatively rerun the job with --conf " +KnownProperties .TARGET_REPLACE_MISSING_TS +"\" <fixed-epoch-value>\" " +
275
- "option to replace the blanks with a fixed timestamp value" , getKey (originRow ), allCols [index ]);
276
- return Optional .empty ();
277
- }
278
- if (logWarn ) {
279
- logger .warn ("For row with Key: {}, found Timestamp primary-key column {} with invalid blank value. " +
280
- "Using value {} instead" , getKey (originRow ), allCols [index ], Instant .ofEpochSecond (tsReplaceVal ));
281
- }
282
- return Optional .of (Instant .ofEpochSecond (tsReplaceVal ));
283
- }
284
43
285
- return Optional . of ( colData );
44
+ cqlHelper . initialize ( );
286
45
}
287
46
288
47
}
0 commit comments