|
2 | 2 |
|
3 | 3 | import com.datastax.oss.driver.api.core.CqlSession;
|
4 | 4 | import com.datastax.oss.driver.api.core.cql.*;
|
5 |
| -import datastax.astra.migrate.cql.CqlHelper; |
| 5 | +import datastax.astra.migrate.cql.PKFactory; |
| 6 | +import datastax.astra.migrate.cql.Record; |
| 7 | +import datastax.astra.migrate.cql.statements.OriginSelectByPartitionRangeStatement; |
| 8 | +import datastax.astra.migrate.cql.statements.TargetInsertStatement; |
| 9 | +import datastax.astra.migrate.cql.statements.TargetSelectByPKStatement; |
| 10 | +import datastax.astra.migrate.cql.statements.TargetUpdateStatement; |
6 | 11 | import org.apache.spark.SparkConf;
|
7 | 12 | import org.slf4j.Logger;
|
8 | 13 | import org.slf4j.LoggerFactory;
|
9 | 14 |
|
10 | 15 | import java.math.BigInteger;
|
11 |
| -import java.util.ArrayList; |
12 |
| -import java.util.List; |
13 |
| -import java.util.Collection; |
| 16 | +import java.util.*; |
14 | 17 | import java.util.concurrent.CompletionStage;
|
15 | 18 | import java.util.concurrent.atomic.AtomicLong;
|
16 | 19 |
|
@@ -42,127 +45,100 @@ public static CopyJobSession getInstance(CqlSession originSession, CqlSession ta
|
42 | 45 | public void getDataAndInsert(BigInteger min, BigInteger max) {
|
43 | 46 | logger.info("ThreadID: {} Processing min: {} max: {}", Thread.currentThread().getId(), min, max);
|
44 | 47 | boolean done = false;
|
| 48 | + boolean batching = false; |
45 | 49 | int maxAttempts = maxRetries + 1;
|
46 | 50 | for (int attempts = 1; attempts <= maxAttempts && !done; attempts++) {
|
47 | 51 | long readCnt = 0;
|
48 | 52 | long writeCnt = 0;
|
49 | 53 | long skipCnt = 0;
|
50 | 54 | long errCnt = 0;
|
51 | 55 | try {
|
52 |
| - ResultSet resultSet = cqlHelper.getOriginSession().execute( |
53 |
| - cqlHelper.getPreparedStatement(CqlHelper.CQL.ORIGIN_SELECT) |
54 |
| - .bind(cqlHelper.hasRandomPartitioner() ? min : min.longValueExact(), |
55 |
| - cqlHelper.hasRandomPartitioner() ? max : max.longValueExact()) |
56 |
| - .setConsistencyLevel(cqlHelper.getReadConsistencyLevel()) |
57 |
| - .setPageSize(cqlHelper.getFetchSizeInRows())); |
| 56 | + PKFactory pkFactory = cqlHelper.getPKFactory(); |
| 57 | + OriginSelectByPartitionRangeStatement originSelectByPartitionRangeStatement = cqlHelper.getOriginSelectByPartitionRangeStatement(); |
| 58 | + ResultSet resultSet = originSelectByPartitionRangeStatement.execute(originSelectByPartitionRangeStatement.bind(min, max)); |
| 59 | + |
| 60 | + TargetInsertStatement targetInsertStatement = cqlHelper.getTargetInsertStatement(); |
| 61 | + TargetUpdateStatement targetUpdateStatement = cqlHelper.getTargetUpdateStatement(); |
| 62 | + TargetSelectByPKStatement targetSelectByPKStatement = cqlHelper.getTargetSelectByPKStatement(); |
58 | 63 |
|
59 | 64 | Collection<CompletionStage<AsyncResultSet>> writeResults = new ArrayList<CompletionStage<AsyncResultSet>>();
|
60 | 65 |
|
61 | 66 | // cannot do batching if the writeFilter is greater than 0 or
|
62 | 67 | // maxWriteTimeStampFilter is less than max long
|
63 | 68 | // do not batch for counters as it adds latency & increases chance of discrepancy
|
64 |
| - if (cqlHelper.getBatchSize() == 1 || cqlHelper.hasWriteTimestampFilter() || cqlHelper.isCounterTable()) { |
65 |
| - for (Row originRow : resultSet) { |
66 |
| - readLimiter.acquire(1); |
67 |
| - readCnt++; |
68 |
| - if (readCnt % printStatsAfter == 0) { |
69 |
| - printCounts(false); |
70 |
| - } |
| 69 | + batching = !(cqlHelper.getBatchSize() == 1 || cqlHelper.hasWriteTimestampFilter() || cqlHelper.isCounterTable()); |
| 70 | + BatchStatement batch = BatchStatement.newInstance(BatchType.UNLOGGED); // this may not be used |
71 | 71 |
|
72 |
| - // exclusion filter below |
73 |
| - if (cqlHelper.hasFilterColumn()) { |
74 |
| - String col = (String) cqlHelper.getData(cqlHelper.getFilterColType(), cqlHelper.getFilterColIndex(), originRow); |
75 |
| - if (col.trim().equalsIgnoreCase(cqlHelper.getFilterColValue())) { |
76 |
| - logger.warn("Skipping row and filtering out: {}", cqlHelper.getKey(originRow)); |
77 |
| - skipCnt++; |
78 |
| - continue; |
79 |
| - } |
80 |
| - } |
81 |
| - if (cqlHelper.hasWriteTimestampFilter()) { |
82 |
| - // only process rows greater than writeTimeStampFilter |
83 |
| - Long originWriteTimeStamp = cqlHelper.getLargestWriteTimeStamp(originRow); |
84 |
| - if (originWriteTimeStamp < cqlHelper.getMinWriteTimeStampFilter() |
85 |
| - || originWriteTimeStamp > cqlHelper.getMaxWriteTimeStampFilter()) { |
86 |
| - skipCnt++; |
87 |
| - continue; |
88 |
| - } |
89 |
| - } |
90 |
| - writeLimiter.acquire(1); |
| 72 | + boolean isCounterTable = cqlHelper.isCounterTable(); |
| 73 | + CompletionStage<AsyncResultSet> writeResultSet; |
91 | 74 |
|
92 |
| - Row targetRow = null; |
93 |
| - if (cqlHelper.isCounterTable()) { |
94 |
| - ResultSet targetResultSet = cqlHelper.getTargetSession() |
95 |
| - .execute(cqlHelper.selectFromTargetByPK(cqlHelper.getPreparedStatement(CqlHelper.CQL.TARGET_SELECT_ORIGIN_BY_PK), originRow)); |
96 |
| - targetRow = targetResultSet.one(); |
97 |
| - } |
| 75 | + for (Row originRow : resultSet) { |
| 76 | + readLimiter.acquire(1); |
| 77 | + readCnt++; |
| 78 | + if (readCnt % printStatsAfter == 0) { |
| 79 | + printCounts(false); |
| 80 | + } |
98 | 81 |
|
99 |
| - List<BoundStatement> bInsertList = cqlHelper.bindInsert(cqlHelper.getPreparedStatement(CqlHelper.CQL.TARGET_INSERT), originRow, targetRow); |
100 |
| - if (null == bInsertList || bInsertList.isEmpty()) { |
101 |
| - skipCnt++; |
102 |
| - continue; |
103 |
| - } |
104 |
| - for (BoundStatement bInsert : bInsertList) { |
105 |
| - CompletionStage<AsyncResultSet> targetWriteResultSet = cqlHelper.getTargetSession().executeAsync(bInsert); |
106 |
| - writeResults.add(targetWriteResultSet); |
107 |
| - if (writeResults.size() > cqlHelper.getFetchSizeInRows()) { |
108 |
| - writeCnt += iterateAndClearWriteResults(writeResults, 1); |
109 |
| - } |
110 |
| - } |
| 82 | + Record record = new Record(pkFactory.getTargetPK(originRow), originRow, null); |
| 83 | + if (originSelectByPartitionRangeStatement.shouldFilterRecord(record)) { |
| 84 | + skipCnt++; |
| 85 | + continue; |
111 | 86 | }
|
112 | 87 |
|
113 |
| - // clear the write resultset |
114 |
| - writeCnt += iterateAndClearWriteResults(writeResults, 1); |
115 |
| - } else { |
116 |
| - BatchStatement batchStatement = BatchStatement.newInstance(BatchType.UNLOGGED); |
117 |
| - for (Row originRow : resultSet) { |
118 |
| - readLimiter.acquire(1); |
119 |
| - readCnt++; |
120 |
| - if (readCnt % printStatsAfter == 0) { |
121 |
| - printCounts(false); |
122 |
| - } |
| 88 | + for (Record r : pkFactory.toValidRecordList(record)) { |
| 89 | + writeLimiter.acquire(1); |
123 | 90 |
|
124 |
| - if (cqlHelper.hasFilterColumn()) { |
125 |
| - String colValue = (String) cqlHelper.getData(cqlHelper.getFilterColType(), cqlHelper.getFilterColIndex(), originRow); |
126 |
| - if (colValue.trim().equalsIgnoreCase(cqlHelper.getFilterColValue())) { |
127 |
| - logger.warn("Skipping row and filtering out: {}", cqlHelper.getKey(originRow)); |
128 |
| - skipCnt++; |
129 |
| - continue; |
| 91 | + BoundStatement boundUpsert; |
| 92 | + if (isCounterTable) { |
| 93 | + Record targetRecord = targetSelectByPKStatement.getRecord(r.getPk()); |
| 94 | + if (null != targetRecord) { |
| 95 | + r.setTargetRow(targetRecord.getTargetRow()); |
130 | 96 | }
|
| 97 | + boundUpsert = targetUpdateStatement.bindRecord(r); |
| 98 | + } |
| 99 | + else { |
| 100 | + boundUpsert = targetInsertStatement.bindRecord(r); |
131 | 101 | }
|
132 | 102 |
|
133 |
| - writeLimiter.acquire(1); |
134 |
| - List<BoundStatement> bInsertList = cqlHelper.bindInsert(cqlHelper.getPreparedStatement(CqlHelper.CQL.TARGET_INSERT), originRow, null); |
135 |
| - if (null == bInsertList || bInsertList.isEmpty()) { |
136 |
| - skipCnt++; |
| 103 | + if (null == boundUpsert) { |
| 104 | + skipCnt++; // TODO: this previously skipped, why not errCnt? |
137 | 105 | continue;
|
138 | 106 | }
|
139 |
| - for (BoundStatement bInsert : bInsertList) { |
140 |
| - batchStatement = batchStatement.add(bInsert); |
141 | 107 |
|
142 |
| - // if batch threshold is met, send the writes and clear the batch |
143 |
| - if (batchStatement.size() >= cqlHelper.getBatchSize()) { |
144 |
| - CompletionStage<AsyncResultSet> writeResultSet = cqlHelper.getTargetSession().executeAsync(batchStatement); |
| 108 | + if (batching) { |
| 109 | + batch = batch.add(boundUpsert); |
| 110 | + if (batch.size() >= cqlHelper.getBatchSize()) { |
| 111 | + writeResultSet = isCounterTable ? targetUpdateStatement.executeAsync(batch) : targetInsertStatement.executeAsync(batch); |
145 | 112 | writeResults.add(writeResultSet);
|
146 |
| - batchStatement = BatchStatement.newInstance(BatchType.UNLOGGED); |
| 113 | + batch = BatchStatement.newInstance(BatchType.UNLOGGED); |
147 | 114 | }
|
148 | 115 |
|
149 | 116 | if (writeResults.size() * cqlHelper.getBatchSize() > cqlHelper.getFetchSizeInRows()) {
|
150 | 117 | writeCnt += iterateAndClearWriteResults(writeResults, cqlHelper.getBatchSize());
|
151 | 118 | }
|
152 | 119 | }
|
| 120 | + else { |
| 121 | + writeResultSet = isCounterTable ? targetUpdateStatement.executeAsync(boundUpsert) : targetInsertStatement.executeAsync(boundUpsert); |
| 122 | + writeResults.add(writeResultSet); |
| 123 | + if (writeResults.size() > cqlHelper.getFetchSizeInRows()) { |
| 124 | + writeCnt += iterateAndClearWriteResults(writeResults, 1); |
| 125 | + } |
| 126 | + } |
153 | 127 | }
|
| 128 | + } |
154 | 129 |
|
155 |
| - // clear the write resultset |
156 |
| - writeCnt += iterateAndClearWriteResults(writeResults, cqlHelper.getBatchSize()); |
157 |
| - |
158 |
| - // if there are any pending writes because the batchSize threshold was not met, then write and clear them |
159 |
| - if (batchStatement.size() > 0) { |
160 |
| - CompletionStage<AsyncResultSet> writeResultSet = cqlHelper.getTargetSession().executeAsync(batchStatement); |
| 130 | + // Flush pending writes |
| 131 | + if (batching) { |
| 132 | + if (batch.size() > 0) { |
| 133 | + writeResultSet = isCounterTable ? targetUpdateStatement.executeAsync(batch) : targetInsertStatement.executeAsync(batch); |
161 | 134 | writeResults.add(writeResultSet);
|
162 |
| - writeCnt += iterateAndClearWriteResults(writeResults, batchStatement.size()); |
163 |
| - batchStatement = BatchStatement.newInstance(BatchType.UNLOGGED); |
| 135 | + writeCnt += iterateAndClearWriteResults(writeResults, batch.size()); |
164 | 136 | }
|
165 | 137 | }
|
| 138 | + else { |
| 139 | + // clear the write resultset |
| 140 | + writeCnt += iterateAndClearWriteResults(writeResults, 1); |
| 141 | + } |
166 | 142 |
|
167 | 143 | readCounter.addAndGet(readCnt);
|
168 | 144 | writeCounter.addAndGet(writeCnt);
|
|
0 commit comments