Skip to content

Commit e6f8868

Browse files
committed
WIP work, before creating new SIT test
1 parent 4a66df9 commit e6f8868

File tree

7 files changed

+305
-184
lines changed

7 files changed

+305
-184
lines changed

src/main/java/datastax/astra/migrate/AbstractJobSession.java

Lines changed: 9 additions & 179 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,13 @@
1010
import org.slf4j.Logger;
1111
import org.slf4j.LoggerFactory;
1212

13-
import java.time.Duration;
1413
import java.time.Instant;
15-
import java.util.List;
1614
import java.util.Optional;
17-
import java.util.stream.IntStream;
1815

1916
public class AbstractJobSession extends BaseJobSession {
2017

2118
public Logger logger = LoggerFactory.getLogger(this.getClass().getName());
19+
protected CqlHelper cqlHelper;
2220

2321
protected AbstractJobSession(CqlSession sourceSession, CqlSession astraSession, SparkConf sc) {
2422
this(sourceSession, astraSession, sc, false);
@@ -33,6 +31,7 @@ protected AbstractJobSession(CqlSession sourceSession, CqlSession astraSession,
3331

3432
this.sourceSession = sourceSession;
3533
this.astraSession = astraSession;
34+
this.cqlHelper = new CqlHelper(this.propertyHelper, this.sourceSession, this.astraSession, isJobMigrateRowsFromFile, this);
3635

3736
batchSize = propertyHelper.getInteger(KnownProperties.SPARK_BATCH_SIZE);
3837
fetchSizeInRows = propertyHelper.getInteger(KnownProperties.READ_FETCH_SIZE);
@@ -47,9 +46,6 @@ protected AbstractJobSession(CqlSession sourceSession, CqlSession astraSession,
4746
writeLimiter = RateLimiter.create(propertyHelper.getInteger(KnownProperties.SPARK_LIMIT_WRITE));
4847
maxRetries = propertyHelper.getInteger(KnownProperties.SPARK_MAX_RETRIES);
4948

50-
sourceKeyspaceTable = propertyHelper.getString(KnownProperties.ORIGIN_KEYSPACE_TABLE);
51-
targetKeyspaceTable = propertyHelper.getString(KnownProperties.TARGET_KEYSPACE_TABLE);
52-
5349
ttlCols = propertyHelper.getIntegerList(KnownProperties.ORIGIN_TTL_COLS);
5450
writeTimeStampCols = propertyHelper.getIntegerList(KnownProperties.ORIGIN_WRITETIME_COLS);
5551

@@ -62,6 +58,7 @@ protected AbstractJobSession(CqlSession sourceSession, CqlSession astraSession,
6258
maxWriteTimeStampFilter = propertyHelper.getLong(KnownProperties.ORIGIN_FILTER_WRITETS_MAX);
6359

6460
customWritetime = propertyHelper.getLong(KnownProperties.TARGET_CUSTOM_WRITETIME);
61+
isCounterTable = propertyHelper.getBoolean(KnownProperties.ORIGIN_IS_COUNTER);
6562

6663
logger.info("PARAM -- Read Consistency: {}", readConsistencyLevel);
6764
logger.info("PARAM -- Write Consistency: {}", writeConsistencyLevel);
@@ -82,197 +79,30 @@ protected AbstractJobSession(CqlSession sourceSession, CqlSession astraSession,
8279
Instant.ofEpochMilli(maxWriteTimeStampFilter / 1000));
8380
}
8481

85-
String selectCols = String.join(",", propertyHelper.getStringList(KnownProperties.ORIGIN_COLUMN_NAMES));
86-
String partitionKey = String.join(",", propertyHelper.getStringList(KnownProperties.ORIGIN_PARTITION_KEY));
87-
String sourceSelectCondition = propertyHelper.getString(KnownProperties.ORIGIN_FILTER_CONDITION);
88-
if (null != sourceSelectCondition && !sourceSelectCondition.isEmpty() && !sourceSelectCondition.trim().toUpperCase().startsWith("AND")) {
89-
sourceSelectCondition = " AND " + sourceSelectCondition;
90-
propertyHelper.setProperty(KnownProperties.ORIGIN_FILTER_CONDITION, sourceSelectCondition);
91-
}
9282

93-
final StringBuilder selectTTLWriteTimeCols = new StringBuilder();
94-
allCols = propertyHelper.getStringList(KnownProperties.ORIGIN_COLUMN_NAMES);
95-
if (null != ttlCols) {
96-
ttlCols.forEach(col -> {
97-
selectTTLWriteTimeCols.append(",ttl(" + allCols.get(col) + ")");
98-
});
99-
}
100-
if (null != writeTimeStampCols) {
101-
writeTimeStampCols.forEach(col -> {
102-
selectTTLWriteTimeCols.append(",writetime(" + allCols.get(col) + ")");
103-
});
104-
}
105-
selectColTypes = propertyHelper.getMigrationTypeList(KnownProperties.ORIGIN_COLUMN_TYPES);
106-
String idCols = String.join(",", propertyHelper.getStringList(KnownProperties.TARGET_PRIMARY_KEY));
107-
idColTypes = selectColTypes.subList(0, idCols.split(",").length);
10883

109-
String insertCols = String.join(",", propertyHelper.getStringList(KnownProperties.TARGET_COLUMN_NAMES));
110-
if (null == insertCols || insertCols.trim().isEmpty()) {
111-
insertCols = selectCols;
112-
propertyHelper.setProperty(KnownProperties.TARGET_COLUMN_NAMES, propertyHelper.getStringList(KnownProperties.ORIGIN_COLUMN_NAMES));
113-
}
114-
String insertBinds = "";
115-
for (String str : idCols.split(",")) {
116-
if (insertBinds.isEmpty()) {
117-
insertBinds = str + "= ?";
118-
} else {
119-
insertBinds += " and " + str + "= ?";
120-
}
121-
}
122-
123-
String fullSelectQuery;
124-
if (!isJobMigrateRowsFromFile) {
125-
fullSelectQuery = "select " + selectCols + selectTTLWriteTimeCols + " from " + sourceKeyspaceTable +
126-
" where token(" + partitionKey.trim() + ") >= ? and token(" + partitionKey.trim() + ") <= ? " +
127-
sourceSelectCondition + " ALLOW FILTERING";
128-
} else {
129-
fullSelectQuery = "select " + selectCols + selectTTLWriteTimeCols + " from " + sourceKeyspaceTable + " where " + insertBinds;
130-
}
84+
cqlHelper.initialize();
85+
String fullSelectQuery = cqlHelper.getCqlString(CqlHelper.CqlStatementType.ORIGIN_SELECT);
13186
logger.info("PARAM -- ORIGIN SELECT Query used: {}", fullSelectQuery);
13287
sourceSelectStatement = sourceSession.prepare(fullSelectQuery);
13388

134-
astraSelectStatement = astraSession.prepare(
135-
"select " + insertCols + " from " + targetKeyspaceTable
136-
+ " where " + insertBinds);
89+
astraSelectStatement = astraSession.prepare(cqlHelper.getCqlString(CqlHelper.CqlStatementType.TARGET_SELECT_BY_PK));
13790

13891
hasRandomPartitioner = propertyHelper.getBoolean(KnownProperties.ORIGIN_HAS_RANDOM_PARTITIONER);
139-
isCounterTable = propertyHelper.getBoolean(KnownProperties.ORIGIN_IS_COUNTER);
140-
if (isCounterTable) {
141-
updateSelectMapping = propertyHelper.getIntegerList(KnownProperties.ORIGIN_COUNTER_INDEX);
142-
logger.info("PARAM -- TARGET INSERT Query used: {}", KnownProperties.ORIGIN_COUNTER_CQL);
143-
astraInsertStatement = astraSession.prepare(propertyHelper.getString(KnownProperties.ORIGIN_COUNTER_CQL));
144-
} else {
145-
insertBinds = "";
146-
for (String str : insertCols.split(",")) {
147-
if (insertBinds.isEmpty()) {
148-
insertBinds += "?";
149-
} else {
150-
insertBinds += ", ?";
151-
}
152-
}
15392

154-
String fullInsertQuery = "insert into " + targetKeyspaceTable + " (" + insertCols + ") VALUES (" + insertBinds + ")";
155-
if (null != ttlCols && !ttlCols.isEmpty()) {
156-
fullInsertQuery += " USING TTL ?";
157-
if (null != writeTimeStampCols && !writeTimeStampCols.isEmpty()) {
158-
fullInsertQuery += " AND TIMESTAMP ?";
159-
}
160-
} else if (null != writeTimeStampCols && !writeTimeStampCols.isEmpty()) {
161-
fullInsertQuery += " USING TIMESTAMP ?";
162-
}
163-
logger.info("PARAM -- TARGET INSERT Query used: {}", fullInsertQuery);
164-
astraInsertStatement = astraSession.prepare(fullInsertQuery);
165-
}
93+
astraInsertStatement = astraSession.prepare(cqlHelper.getCqlString(CqlHelper.CqlStatementType.TARGET_INSERT));
16694

16795
// Handle rows with blank values for 'timestamp' data-type in primary-key fields
16896
if (null != propertyHelper.getLong(KnownProperties.TARGET_REPLACE_MISSING_TS))
16997
tsReplaceVal = propertyHelper.getLong(KnownProperties.TARGET_REPLACE_MISSING_TS);
17098
}
17199

172100
public BoundStatement bindInsert(PreparedStatement insertStatement, Row sourceRow, Row astraRow) {
173-
BoundStatement boundInsertStatement = insertStatement.bind().setConsistencyLevel(writeConsistencyLevel);
174-
175-
if (isCounterTable) {
176-
for (int index = 0; index < selectColTypes.size(); index++) {
177-
MigrateDataType dataType = selectColTypes.get(updateSelectMapping.get(index));
178-
// compute the counter delta if reading from astra for the difference
179-
if (astraRow != null && index < (selectColTypes.size() - idColTypes.size())) {
180-
boundInsertStatement = boundInsertStatement.set(index, (sourceRow.getLong(updateSelectMapping.get(index)) - astraRow.getLong(updateSelectMapping.get(index))), Long.class);
181-
} else {
182-
boundInsertStatement = boundInsertStatement.set(index, getData(dataType, updateSelectMapping.get(index), sourceRow), dataType.typeClass);
183-
}
184-
}
185-
} else {
186-
int index = 0;
187-
for (index = 0; index < selectColTypes.size(); index++) {
188-
boundInsertStatement = getBoundStatement(sourceRow, boundInsertStatement, index, selectColTypes);
189-
if (boundInsertStatement == null) return null;
190-
}
191-
192-
if (null != ttlCols && !ttlCols.isEmpty()) {
193-
boundInsertStatement = boundInsertStatement.set(index, getLargestTTL(sourceRow), Integer.class);
194-
index++;
195-
}
196-
if (null != writeTimeStampCols && !writeTimeStampCols.isEmpty()) {
197-
if (customWritetime > 0) {
198-
boundInsertStatement = boundInsertStatement.set(index, customWritetime, Long.class);
199-
} else {
200-
boundInsertStatement = boundInsertStatement.set(index, getLargestWriteTimeStamp(sourceRow), Long.class);
201-
}
202-
}
203-
}
204-
205-
// Batch insert for large records may take longer, hence 10 secs to avoid timeout errors
206-
return boundInsertStatement.setTimeout(Duration.ofSeconds(10));
207-
}
208-
209-
public int getLargestTTL(Row sourceRow) {
210-
return IntStream.range(0, ttlCols.size())
211-
.map(i -> sourceRow.getInt(selectColTypes.size() + i)).max().getAsInt();
212-
}
213-
214-
public long getLargestWriteTimeStamp(Row sourceRow) {
215-
return IntStream.range(0, writeTimeStampCols.size())
216-
.mapToLong(i -> sourceRow.getLong(selectColTypes.size() + ttlCols.size() + i)).max().getAsLong();
101+
return cqlHelper.bindInsert(insertStatement, sourceRow, astraRow);
217102
}
218103

219104
public BoundStatement selectFromAstra(PreparedStatement selectStatement, Row sourceRow) {
220-
BoundStatement boundSelectStatement = selectStatement.bind().setConsistencyLevel(readConsistencyLevel);
221-
for (int index = 0; index < idColTypes.size(); index++) {
222-
boundSelectStatement = getBoundStatement(sourceRow, boundSelectStatement, index, idColTypes);
223-
if (boundSelectStatement == null) return null;
224-
}
225-
226-
return boundSelectStatement;
227-
}
228-
229-
private BoundStatement getBoundStatement(Row sourceRow, BoundStatement boundSelectStatement, int index,
230-
List<MigrateDataType> cols) {
231-
MigrateDataType dataTypeObj = cols.get(index);
232-
Object colData = getData(dataTypeObj, index, sourceRow);
233-
234-
// Handle rows with blank values in primary-key fields
235-
if (index < idColTypes.size()) {
236-
Optional<Object> optionalVal = handleBlankInPrimaryKey(index, colData, dataTypeObj.typeClass, sourceRow);
237-
if (!optionalVal.isPresent()) {
238-
return null;
239-
}
240-
colData = optionalVal.get();
241-
}
242-
boundSelectStatement = boundSelectStatement.set(index, colData, dataTypeObj.typeClass);
243-
return boundSelectStatement;
244-
}
245-
246-
protected Optional<Object> handleBlankInPrimaryKey(int index, Object colData, Class dataType, Row sourceRow) {
247-
return handleBlankInPrimaryKey(index, colData, dataType, sourceRow, true);
248-
}
249-
250-
protected Optional<Object> handleBlankInPrimaryKey(int index, Object colData, Class dataType, Row sourceRow, boolean logWarn) {
251-
// Handle rows with blank values for 'String' data-type in primary-key fields
252-
if (index < idColTypes.size() && colData == null && dataType == String.class) {
253-
if (logWarn) {
254-
logger.warn("For row with Key: {}, found String primary-key column {} with blank value",
255-
getKey(sourceRow), allCols.get(index));
256-
}
257-
return Optional.of("");
258-
}
259-
260-
// Handle rows with blank values for 'timestamp' data-type in primary-key fields
261-
if (index < idColTypes.size() && colData == null && dataType == Instant.class) {
262-
if (tsReplaceValStr.isEmpty()) {
263-
logger.error("Skipping row with Key: {} as Timestamp primary-key column {} has invalid blank value. " +
264-
"Alternatively rerun the job with --conf spark.target.replace.blankTimestampKeyUsingEpoch=\"<fixed-epoch-value>\" " +
265-
"option to replace the blanks with a fixed timestamp value", getKey(sourceRow), allCols.get(index));
266-
return Optional.empty();
267-
}
268-
if (logWarn) {
269-
logger.warn("For row with Key: {}, found Timestamp primary-key column {} with invalid blank value. " +
270-
"Using value {} instead", getKey(sourceRow), allCols.get(index), Instant.ofEpochSecond(tsReplaceVal));
271-
}
272-
return Optional.of(Instant.ofEpochSecond(tsReplaceVal));
273-
}
274-
275-
return Optional.of(colData);
105+
return cqlHelper.selectFromTargetByPK(selectStatement, sourceRow);
276106
}
277107

278108
}

src/main/java/datastax/astra/migrate/BaseJobSession.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ public abstract class BaseJobSession {
3737
protected CqlSession astraSession;
3838
protected List<MigrateDataType> selectColTypes = new ArrayList<MigrateDataType>();
3939
protected List<MigrateDataType> idColTypes = new ArrayList<MigrateDataType>();
40-
protected List<Integer> updateSelectMapping = new ArrayList<Integer>();
40+
// protected List<Integer> updateSelectMapping = new ArrayList<Integer>();
4141

4242
protected Integer batchSize = 1;
4343
protected Integer fetchSizeInRows = 1000;

src/main/java/datastax/astra/migrate/CopyJobSession.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ public void getDataAndInsert(BigInteger min, BigInteger max) {
8080
}
8181
if (writeTimeStampFilter) {
8282
// only process rows greater than writeTimeStampFilter
83-
Long sourceWriteTimeStamp = getLargestWriteTimeStamp(sourceRow);
83+
Long sourceWriteTimeStamp = cqlHelper.getLargestWriteTimeStamp(sourceRow);
8484
if (sourceWriteTimeStamp < minWriteTimeStampFilter
8585
|| sourceWriteTimeStamp > maxWriteTimeStampFilter) {
8686
skipCnt++;

0 commit comments

Comments
 (0)