Skip to content

Commit 266ddc7

Browse files
pravinbhatmsmygit
andauthored
Make Writetime filter + custom writetimestamp work correctly when used together (#331)
* Fixed issue #327 i.e. writetime filter does not work as expected when custom writetimestamp is also used. * Removed deprecated properties `printStatsAfter` and `printStatsPerPart`. Run metrics should now be tracked using the `trackRun` feature instead. * Apply suggestions from code review --------- Co-authored-by: Madhavan <[email protected]>
1 parent 029bddd commit 266ddc7

File tree

12 files changed

+30
-52
lines changed

12 files changed

+30
-52
lines changed

PERF/cdm-v3.properties

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,6 @@ spark.batchSize 10
8080
# ENABLE ONLY IF YOU WANT TO MIGRATE/VALIDATE SOME % OF ROWS (NOT 100%)
8181
#spark.coveragePercent 100
8282

83-
# ENABLE ONLY IF WANT LOG STATS MORE OR LESS FREQUENTLY THAN DEFAULT
84-
#spark.printStatsAfter 100000
85-
8683
# ENABLE ONLY IF YOU WANT TO USE READ AND/OR WRITE CONSISTENCY OTHER THAN LOCAL_QUORUM
8784
#spark.consistency.read LOCAL_QUORUM
8885
#spark.consistency.write LOCAL_QUORUM

PERF/cdm-v4.properties

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -164,8 +164,6 @@ spark.cdm.autocorrect.mismatch false
164164
# .read : Default is LOCAL_QUORUM. Read consistency from Origin, and also from Target
165165
# when records are read for comparison purposes.
166166
# .write : Default is LOCAL_QUORUM. Write consistency to Target.
167-
# .printStatsAfter : Default is 100000. Number of rows of processing after which a progress log
168-
# entry will be made.
169167
# .fetchSizeInRows : Default is 1000. This affects the frequency of reads from Origin, and also the
170168
# frequency of flushes to Target.
171169
# .error.limit : Default is 0. Controls how many errors a thread may encounter during MigrateData
@@ -179,7 +177,6 @@ spark.cdm.perfops.readRateLimit 5000
179177
spark.cdm.perfops.writeRateLimit 5000
180178
#spark.cdm.perfops.consistency.read LOCAL_QUORUM
181179
#spark.cdm.perfops.consistency.write LOCAL_QUORUM
182-
#spark.cdm.perfops.printStatsAfter 100000
183180
#spark.cdm.perfops.fetchSizeInRows 1000
184181
#spark.cdm.perfops.error.limit 0
185182

RELEASE.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
# Release Notes
2+
## [5.1.1] - 2024-11-22
3+
- Bug fix: Writetime filter does not work as expected when custom writetimestamp is also used (issue #327).
4+
- Removed deprecated properties `printStatsAfter` and `printStatsPerPart`. Run metrics should now be tracked using the `trackRun` feature instead.
5+
26
## [5.1.0] - 2024-11-15
37
- Improves metrics output by producing stats labels in an intuitive and consistent order
48
- Refactored JobCounter by removing any references to `thread` or `global` as CDM operations are now isolated within partition-ranges (`parts`). Each such `part` is then parallelly processed and aggregated by Spark.

SIT/regression/03_performance/migrate.properties

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,3 @@ spark.cdm.schema.target.keyspaceTable target.regression_performance
2020

2121
spark.cdm.perfops.numParts 32
2222
spark.cdm.perfops.batchSize 1
23-
24-
spark.cdm.perfops.printStatsAfter 450
25-
spark.cdm.perfops.printStatsPerPart true

src/main/java/com/datastax/cdm/cql/statement/OriginSelectStatement.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ public abstract class OriginSelectStatement extends BaseCdmStatement {
4141
private final Boolean filterColumnEnabled;
4242
private final Integer filterColumnIndex;
4343
private final String filterColumnString;
44+
private final WritetimeTTL writetimeTTLFeature;
4445

4546
public OriginSelectStatement(IPropertyHelper propertyHelper, EnhancedSession session) {
4647
super(propertyHelper, session);
@@ -50,7 +51,7 @@ public OriginSelectStatement(IPropertyHelper propertyHelper, EnhancedSession ses
5051
throw new RuntimeException("No columns found in table " + cqlTable.getTableName());
5152
}
5253

53-
WritetimeTTL writetimeTTLFeature = (WritetimeTTL) cqlTable.getFeature(Featureset.WRITETIME_TTL);
54+
this.writetimeTTLFeature = (WritetimeTTL) cqlTable.getFeature(Featureset.WRITETIME_TTL);
5455
if (null != writetimeTTLFeature && writetimeTTLFeature.isEnabled()
5556
&& writetimeTTLFeature.hasWriteTimestampFilter()) {
5657
writeTimestampFilterEnabled = true;
@@ -114,14 +115,15 @@ public boolean shouldFilterRecord(Record record) {
114115
}
115116

116117
if (this.writeTimestampFilterEnabled) {
117-
// only process rows greater than writeTimeStampFilter
118-
Long originWriteTimeStamp = record.getPk().getWriteTimestamp();
118+
// only process rows within the writeTimeStampFilter
119+
Long originWriteTimeStamp = writetimeTTLFeature.getLargestWriteTimeStamp(record.getOriginRow());
119120
if (null == originWriteTimeStamp) {
120121
return false;
121122
}
122123
if (originWriteTimeStamp < minWriteTimeStampFilter || originWriteTimeStamp > maxWriteTimeStampFilter) {
123124
if (logger.isInfoEnabled())
124-
logger.info("Timestamp filter removing: {}", record.getPk());
125+
logger.info("Timestamp filter removing record with primary key: {} with write timestamp: {}", record.getPk(),
126+
originWriteTimeStamp);
125127
return true;
126128
}
127129
}

src/main/java/com/datastax/cdm/data/PKFactory.java

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,16 @@ public EnhancedPK getTargetPK(Row originRow) {
106106
Long originWriteTimeStamp = null;
107107
Integer originTTL = null;
108108
if (FeatureFactory.isEnabled(writetimeTTLFeature)) {
109-
originWriteTimeStamp = writetimeTTLFeature.getLargestWriteTimeStamp(originRow);
110-
originTTL = writetimeTTLFeature.getLargestTTL(originRow);
109+
if (writetimeTTLFeature.getCustomWritetime() > 0) {
110+
originWriteTimeStamp = writetimeTTLFeature.getCustomWritetime();
111+
} else {
112+
originWriteTimeStamp = writetimeTTLFeature.getLargestWriteTimeStamp(originRow);
113+
}
114+
if (writetimeTTLFeature.getCustomTTL() > 0) {
115+
originTTL = writetimeTTLFeature.getCustomTTL().intValue();
116+
} else {
117+
originTTL = writetimeTTLFeature.getLargestTTL(originRow);
118+
}
111119
}
112120
if (explodeMapTargetKeyColumnIndex < 0) {
113121
return new EnhancedPK(this, newValues, getPKClasses(Side.TARGET), originTTL, originWriteTimeStamp);

src/main/java/com/datastax/cdm/feature/WritetimeTTL.java

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -234,10 +234,7 @@ public boolean hasWritetimeColumns() {
234234

235235
public Long getLargestWriteTimeStamp(Row row) {
236236
if (logDebug)
237-
logger.debug("getLargestWriteTimeStamp: customWritetime={}, writetimeSelectColumnIndexes={}",
238-
customWritetime, writetimeSelectColumnIndexes);
239-
if (this.customWritetime > 0)
240-
return this.customWritetime;
237+
logger.debug("getLargestWriteTimeStamp: writetimeSelectColumnIndexes={}", writetimeSelectColumnIndexes);
241238
if (null == this.writetimeSelectColumnIndexes || this.writetimeSelectColumnIndexes.isEmpty())
242239
return null;
243240

@@ -262,9 +259,7 @@ private OptionalLong getMaxWriteTimeStamp(Row row) {
262259

263260
public Integer getLargestTTL(Row row) {
264261
if (logDebug)
265-
logger.debug("getLargestTTL: customTTL={}, ttlSelectColumnIndexes={}", customTTL, ttlSelectColumnIndexes);
266-
if (this.customTTL > 0)
267-
return this.customTTL.intValue();
262+
logger.debug("getLargestTTL: ttlSelectColumnIndexes={}", ttlSelectColumnIndexes);
268263
if (null == this.ttlSelectColumnIndexes || this.ttlSelectColumnIndexes.isEmpty())
269264
return null;
270265

@@ -317,7 +312,7 @@ private void validateTTLColumns(CqlTable originTable) {
317312
}
318313

319314
private void validateWritetimeColumns(CqlTable originTable) {
320-
if (writetimeNames == null || writetimeNames.isEmpty() || customWritetime > 0) {
315+
if (writetimeNames == null || writetimeNames.isEmpty()) {
321316
return;
322317
}
323318

src/main/java/com/datastax/cdm/job/AbstractJobSession.java

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ public abstract class AbstractJobSession<T> extends BaseJobSession {
4040
protected EnhancedSession originSession;
4141
protected EnhancedSession targetSession;
4242
protected Guardrail guardrailFeature;
43-
protected Long printStatsAfter;
4443
protected TrackRun trackRunFeature;
4544
protected long runId;
4645

@@ -56,15 +55,6 @@ protected AbstractJobSession(CqlSession originSession, CqlSession targetSession,
5655
return;
5756
}
5857

59-
this.printStatsAfter = propertyHelper.getLong(KnownProperties.PRINT_STATS_AFTER);
60-
if (!propertyHelper.meetsMinimum(KnownProperties.PRINT_STATS_AFTER, printStatsAfter, 1L)) {
61-
logger.warn(KnownProperties.PRINT_STATS_AFTER + " must be greater than 0. Setting to default value of "
62-
+ KnownProperties.getDefaultAsString(KnownProperties.PRINT_STATS_AFTER));
63-
propertyHelper.setProperty(KnownProperties.PRINT_STATS_AFTER,
64-
KnownProperties.getDefault(KnownProperties.PRINT_STATS_AFTER));
65-
printStatsAfter = propertyHelper.getLong(KnownProperties.PRINT_STATS_AFTER);
66-
}
67-
6858
rateLimiterOrigin = RateLimiter.create(propertyHelper.getInteger(KnownProperties.PERF_RATELIMIT_ORIGIN));
6959
rateLimiterTarget = RateLimiter.create(propertyHelper.getInteger(KnownProperties.PERF_RATELIMIT_TARGET));
7060

src/main/java/com/datastax/cdm/properties/KnownProperties.java

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,6 @@ public enum PropertyType {
123123
public static final String READ_CL = "spark.cdm.perfops.consistency.read";
124124
public static final String WRITE_CL = "spark.cdm.perfops.consistency.write";
125125
public static final String PERF_FETCH_SIZE = "spark.cdm.perfops.fetchSizeInRows";
126-
public static final String PRINT_STATS_AFTER = "spark.cdm.perfops.printStatsAfter";
127-
public static final String PRINT_STATS_PER_PART = "spark.cdm.perfops.printStatsPerPart";
128126

129127
static {
130128
types.put(AUTOCORRECT_MISSING, PropertyType.BOOLEAN);
@@ -153,10 +151,6 @@ public enum PropertyType {
153151
defaults.put(READ_CL, "LOCAL_QUORUM");
154152
types.put(WRITE_CL, PropertyType.STRING);
155153
defaults.put(WRITE_CL, "LOCAL_QUORUM");
156-
types.put(PRINT_STATS_AFTER, PropertyType.NUMBER);
157-
defaults.put(PRINT_STATS_AFTER, "100000");
158-
types.put(PRINT_STATS_PER_PART, PropertyType.BOOLEAN);
159-
defaults.put(PRINT_STATS_PER_PART, "false");
160154
types.put(PERF_FETCH_SIZE, PropertyType.NUMBER);
161155
defaults.put(PERF_FETCH_SIZE, "1000");
162156
}

src/resources/cdm-detailed.properties

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -227,9 +227,6 @@ spark.cdm.trackRun.runId <auto-generated-unique-long-va
227227
# .read : Default is LOCAL_QUORUM. Read consistency from Origin, and also from Target
228228
# when records are read for comparison purposes.
229229
# .write : Default is LOCAL_QUORUM. Write consistency to Target.
230-
# .printStatsAfter : Default is 100000. Number of rows of processing after which a progress log
231-
# entry will be made.
232-
# .printStatsPerPart : Default is false. Print statistics for each part after it is processed.
233230
# .fetchSizeInRows : Default is 1000. This affects the frequency of reads from Origin, and also the
234231
# frequency of flushes to Target. A larger value will reduce the number of reads
235232
# and writes, but will increase the memory requirements.
@@ -240,8 +237,6 @@ spark.cdm.perfops.ratelimit.origin 20000
240237
spark.cdm.perfops.ratelimit.target 20000
241238
#spark.cdm.perfops.consistency.read LOCAL_QUORUM
242239
#spark.cdm.perfops.consistency.write LOCAL_QUORUM
243-
#spark.cdm.perfops.printStatsAfter 100000
244-
#spark.cdm.perfops.printStatsPerPart false
245240
#spark.cdm.perfops.fetchSizeInRows 1000
246241
#spark.cdm.perfops.errorLimit 0
247242

0 commit comments

Comments
 (0)