12
12
13
13
import java .time .Duration ;
14
14
import java .time .Instant ;
15
- import java .util .Map ;
15
+ import java .util .List ;
16
+ import java .util .Optional ;
16
17
import java .util .stream .IntStream ;
17
18
18
19
public class AbstractJobSession extends BaseJobSession {
@@ -25,11 +26,11 @@ protected AbstractJobSession(CqlSession sourceSession, CqlSession astraSession,
25
26
26
27
protected AbstractJobSession (CqlSession sourceSession , CqlSession astraSession , SparkConf sc , boolean isJobMigrateRowsFromFile ) {
27
28
super (sc );
28
-
29
+
29
30
if (sourceSession == null ) {
30
31
return ;
31
32
}
32
-
33
+
33
34
this .sourceSession = sourceSession ;
34
35
this .astraSession = astraSession ;
35
36
@@ -105,14 +106,14 @@ protected AbstractJobSession(CqlSession sourceSession, CqlSession astraSession,
105
106
}
106
107
107
108
String selectCols = Util .getSparkProp (sc , "spark.query.origin" );
108
- String partionKey = Util .getSparkProp (sc , "spark.query.origin.partitionKey" );
109
+ String partitionKey = Util .getSparkProp (sc , "spark.query.origin.partitionKey" );
109
110
String sourceSelectCondition = Util .getSparkPropOrEmpty (sc , "spark.query.condition" );
110
111
if (!sourceSelectCondition .isEmpty () && !sourceSelectCondition .trim ().toUpperCase ().startsWith ("AND" )) {
111
112
sourceSelectCondition = " AND " + sourceSelectCondition ;
112
113
}
113
114
114
115
final StringBuilder selectTTLWriteTimeCols = new StringBuilder ();
115
- String [] allCols = selectCols .split ("," );
116
+ allCols = selectCols .split ("," );
116
117
ttlCols .forEach (col -> {
117
118
selectTTLWriteTimeCols .append (",ttl(" + allCols [col ] + ")" );
118
119
});
@@ -138,8 +139,9 @@ protected AbstractJobSession(CqlSession sourceSession, CqlSession astraSession,
138
139
139
140
String fullSelectQuery ;
140
141
if (!isJobMigrateRowsFromFile ) {
141
- fullSelectQuery = "select " + selectCols + selectTTLWriteTimeCols + " from " + sourceKeyspaceTable + " where token(" + partionKey .trim ()
142
- + ") >= ? and token(" + partionKey .trim () + ") <= ? " + sourceSelectCondition + " ALLOW FILTERING" ;
142
+ fullSelectQuery = "select " + selectCols + selectTTLWriteTimeCols + " from " + sourceKeyspaceTable +
143
+ " where token(" + partitionKey .trim () + ") >= ? and token(" + partitionKey .trim () + ") <= ? " +
144
+ sourceSelectCondition + " ALLOW FILTERING" ;
143
145
} else {
144
146
fullSelectQuery = "select " + selectCols + selectTTLWriteTimeCols + " from " + sourceKeyspaceTable + " where " + insertBinds ;
145
147
}
@@ -181,6 +183,12 @@ protected AbstractJobSession(CqlSession sourceSession, CqlSession astraSession,
181
183
}
182
184
astraInsertStatement = astraSession .prepare (fullInsertQuery );
183
185
}
186
+
187
+ // Handle rows with blank values for 'timestamp' data-type in primary-key fields
188
+ tsReplaceValStr = Util .getSparkPropOr (sc , "spark.target.replace.blankTimestampKeyUsingEpoch" , "" );
189
+ if (!tsReplaceValStr .isEmpty ()) {
190
+ tsReplaceVal = Long .parseLong (tsReplaceValStr );
191
+ }
184
192
}
185
193
186
194
public BoundStatement bindInsert (PreparedStatement insertStatement , Row sourceRow , Row astraRow ) {
@@ -199,21 +207,8 @@ public BoundStatement bindInsert(PreparedStatement insertStatement, Row sourceRo
199
207
} else {
200
208
int index = 0 ;
201
209
for (index = 0 ; index < selectColTypes .size (); index ++) {
202
- MigrateDataType dataTypeObj = selectColTypes .get (index );
203
- Class dataType = dataTypeObj .typeClass ;
204
-
205
- try {
206
- Object colData = getData (dataTypeObj , index , sourceRow );
207
- if (index < idColTypes .size () && colData == null && dataType == String .class ) {
208
- colData = "" ;
209
- }
210
- boundInsertStatement = boundInsertStatement .set (index , colData , dataType );
211
- } catch (NullPointerException e ) {
212
- // ignore the exception for map values being null
213
- if (dataType != Map .class ) {
214
- throw e ;
215
- }
216
- }
210
+ boundInsertStatement = getBoundStatement (sourceRow , boundInsertStatement , index , selectColTypes );
211
+ if (boundInsertStatement == null ) return null ;
217
212
}
218
213
219
214
if (!ttlCols .isEmpty ()) {
@@ -246,12 +241,60 @@ public long getLargestWriteTimeStamp(Row sourceRow) {
246
241
public BoundStatement selectFromAstra (PreparedStatement selectStatement , Row sourceRow ) {
247
242
BoundStatement boundSelectStatement = selectStatement .bind ().setConsistencyLevel (readConsistencyLevel );
248
243
for (int index = 0 ; index < idColTypes .size (); index ++) {
249
- MigrateDataType dataType = idColTypes .get (index );
250
- boundSelectStatement = boundSelectStatement .set (index , getData (dataType , index , sourceRow ),
251
- dataType .typeClass );
244
+ boundSelectStatement = getBoundStatement (sourceRow , boundSelectStatement , index , idColTypes );
245
+ if (boundSelectStatement == null ) return null ;
252
246
}
253
247
254
248
return boundSelectStatement ;
255
249
}
256
250
251
+ private BoundStatement getBoundStatement (Row sourceRow , BoundStatement boundSelectStatement , int index ,
252
+ List <MigrateDataType > cols ) {
253
+ MigrateDataType dataTypeObj = cols .get (index );
254
+ Object colData = getData (dataTypeObj , index , sourceRow );
255
+
256
+ // Handle rows with blank values in primary-key fields
257
+ if (index < idColTypes .size ()) {
258
+ Optional <Object > optionalVal = handleBlankInPrimaryKey (index , colData , dataTypeObj .typeClass , sourceRow );
259
+ if (!optionalVal .isPresent ()) {
260
+ return null ;
261
+ }
262
+ colData = optionalVal .get ();
263
+ }
264
+ boundSelectStatement = boundSelectStatement .set (index , colData , dataTypeObj .typeClass );
265
+ return boundSelectStatement ;
266
+ }
267
+
268
+ protected Optional <Object > handleBlankInPrimaryKey (int index , Object colData , Class dataType , Row sourceRow ) {
269
+ return handleBlankInPrimaryKey (index , colData , dataType , sourceRow , true );
270
+ }
271
+
272
+ protected Optional <Object > handleBlankInPrimaryKey (int index , Object colData , Class dataType , Row sourceRow , boolean logWarn ) {
273
+ // Handle rows with blank values for 'String' data-type in primary-key fields
274
+ if (index < idColTypes .size () && colData == null && dataType == String .class ) {
275
+ if (logWarn ) {
276
+ logger .warn ("For row with Key: {}, found String primary-key column {} with blank value" ,
277
+ getKey (sourceRow ), allCols [index ]);
278
+ }
279
+ return Optional .of ("" );
280
+ }
281
+
282
+ // Handle rows with blank values for 'timestamp' data-type in primary-key fields
283
+ if (index < idColTypes .size () && colData == null && dataType == Instant .class ) {
284
+ if (tsReplaceValStr .isEmpty ()) {
285
+ logger .error ("Skipping row with Key: {} as Timestamp primary-key column {} has invalid blank value. " +
286
+ "Alternatively rerun the job with --conf spark.target.replace.blankTimestampKeyUsingEpoch=\" <fixed-epoch-value>\" " +
287
+ "option to replace the blanks with a fixed timestamp value" , getKey (sourceRow ), allCols [index ]);
288
+ return Optional .empty ();
289
+ }
290
+ if (logWarn ) {
291
+ logger .warn ("For row with Key: {}, found Timestamp primary-key column {} with invalid blank value. " +
292
+ "Using value {} instead" , getKey (sourceRow ), allCols [index ], Instant .ofEpochSecond (tsReplaceVal ));
293
+ }
294
+ return Optional .of (Instant .ofEpochSecond (tsReplaceVal ));
295
+ }
296
+
297
+ return Optional .of (colData );
298
+ }
299
+
257
300
}
0 commit comments