Skip to content

Commit 9187f1b

Browse files
authored
Feature/track run (#302)
* Upgraded trackRun feature to include status on Info table & also fixed rerun of uninitialized jobs * Added release notes * Added trace log
1 parent 453a12e commit 9187f1b

File tree

9 files changed

+193
-24
lines changed

9 files changed

+193
-24
lines changed

RELEASE.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
# Release Notes
2+
## [4.3.8] - 2024-09-09
3+
- Upgraded `spark.cdm.trackRun` feature to include `status` on `cdm_run_info` table. Also improved the code to handle rerun of previous run which may have exited before being correctly initialized.
4+
25
## [4.3.7] - 2024-09-03
36
- Added property `spark.cdm.transform.custom.ttl` to allow a custom constant value to be set for TTL instead of using the values from `origin` rows.
47
- Repo wide code formating & imports organization

src/main/java/com/datastax/cdm/cql/statement/TargetUpsertRunDetailsStatement.java

Lines changed: 49 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,18 @@
2020
import java.util.ArrayList;
2121
import java.util.Collection;
2222

23+
import org.slf4j.Logger;
24+
import org.slf4j.LoggerFactory;
25+
2326
import com.datastax.cdm.feature.TrackRun;
2427
import com.datastax.cdm.feature.TrackRun.RUN_TYPE;
28+
import com.datastax.cdm.job.RunNotStartedException;
2529
import com.datastax.cdm.job.SplitPartitions;
2630
import com.datastax.cdm.job.SplitPartitions.Partition;
2731
import com.datastax.oss.driver.api.core.CqlSession;
2832
import com.datastax.oss.driver.api.core.cql.BoundStatement;
2933
import com.datastax.oss.driver.api.core.cql.ResultSet;
34+
import com.datastax.oss.driver.api.core.cql.Row;
3035

3136
public class TargetUpsertRunDetailsStatement {
3237
private CqlSession session;
@@ -36,45 +41,71 @@ public class TargetUpsertRunDetailsStatement {
3641
private long prevRunId;
3742
private BoundStatement boundInitInfoStatement;
3843
private BoundStatement boundInitStatement;
39-
private BoundStatement boundUpdateInfoStatement;
44+
private BoundStatement boundEndInfoStatement;
4045
private BoundStatement boundUpdateStatement;
4146
private BoundStatement boundUpdateStartStatement;
47+
private BoundStatement boundSelectInfoStatement;
4248
private BoundStatement boundSelectStatement;
4349

50+
public Logger logger = LoggerFactory.getLogger(this.getClass().getName());
51+
4452
public TargetUpsertRunDetailsStatement(CqlSession session, String keyspaceTable) {
4553
this.session = session;
4654
String[] ksTab = keyspaceTable.split("\\.");
55+
if (ksTab.length != 2) {
56+
throw new RuntimeException("Invalid keyspace.table format: " + keyspaceTable);
57+
}
4758
this.keyspaceName = ksTab[0];
4859
this.tableName = ksTab[1];
4960
String cdmKsTabInfo = this.keyspaceName + ".cdm_run_info";
5061
String cdmKsTabDetails = this.keyspaceName + ".cdm_run_details";
5162

5263
this.session.execute("create table if not exists " + cdmKsTabInfo
53-
+ " (table_name text, run_id bigint, run_type text, prev_run_id bigint, start_time timestamp, end_time timestamp, run_info text, primary key (table_name, run_id))");
54-
this.session.execute("create table if not exists " + cdmKsTabDetails
55-
+ " (table_name text, run_id bigint, start_time timestamp, token_min bigint, token_max bigint, status text, primary key ((table_name, run_id), token_min))");
64+
+ " (table_name text, run_id bigint, run_type text, prev_run_id bigint, start_time timestamp, end_time timestamp, run_info text, status text, primary key (table_name, run_id))");
65+
66+
// TODO: Remove this code block after a few releases, its only added for backward compatibility
67+
try {
68+
this.session.execute("alter table " + cdmKsTabInfo + " add status text");
69+
} catch (Exception e) {
70+
// ignore if column already exists
71+
logger.trace("Column 'status' already exists in table {}", cdmKsTabInfo);
72+
}
5673

5774
boundInitInfoStatement = bindStatement("INSERT INTO " + cdmKsTabInfo
58-
+ " (table_name, run_id, run_type, prev_run_id, start_time) VALUES (?, ?, ?, ?, dateof(now()))");
75+
+ " (table_name, run_id, run_type, prev_run_id, start_time, status) VALUES (?, ?, ?, ?, dateof(now()), ?)");
5976
boundInitStatement = bindStatement("INSERT INTO " + cdmKsTabDetails
6077
+ " (table_name, run_id, token_min, token_max, status) VALUES (?, ?, ?, ?, ?)");
61-
boundUpdateInfoStatement = bindStatement("UPDATE " + cdmKsTabInfo
62-
+ " SET end_time = dateof(now()), run_info = ? WHERE table_name = ? AND run_id = ?");
78+
boundEndInfoStatement = bindStatement("UPDATE " + cdmKsTabInfo
79+
+ " SET end_time = dateof(now()), run_info = ?, status = ? WHERE table_name = ? AND run_id = ?");
6380
boundUpdateStatement = bindStatement(
6481
"UPDATE " + cdmKsTabDetails + " SET status = ? WHERE table_name = ? AND run_id = ? AND token_min = ?");
6582
boundUpdateStartStatement = bindStatement("UPDATE " + cdmKsTabDetails
6683
+ " SET start_time = dateof(now()), status = ? WHERE table_name = ? AND run_id = ? AND token_min = ?");
84+
boundSelectInfoStatement = bindStatement(
85+
"SELECT status FROM " + cdmKsTabInfo + " WHERE table_name = ? AND run_id = ?");
6786
boundSelectStatement = bindStatement("SELECT token_min, token_max FROM " + cdmKsTabDetails
6887
+ " WHERE table_name = ? AND run_id = ? and status in ('NOT_STARTED', 'STARTED', 'FAIL', 'DIFF') ALLOW FILTERING");
6988
}
7089

71-
public Collection<SplitPartitions.Partition> getPendingPartitions(long prevRunId) {
90+
public Collection<SplitPartitions.Partition> getPendingPartitions(long prevRunId) throws RunNotStartedException {
7291
this.prevRunId = prevRunId;
92+
final Collection<SplitPartitions.Partition> pendingParts = new ArrayList<SplitPartitions.Partition>();
7393
if (prevRunId == 0) {
74-
return new ArrayList<SplitPartitions.Partition>();
94+
return pendingParts;
95+
}
96+
97+
ResultSet rsInfo = session
98+
.execute(boundSelectInfoStatement.setString("table_name", tableName).setLong("run_id", prevRunId));
99+
Row cdmRunStatus = rsInfo.one();
100+
if (cdmRunStatus == null) {
101+
return pendingParts;
102+
} else {
103+
String status = cdmRunStatus.getString("status");
104+
if (TrackRun.RUN_STATUS.NOT_STARTED.toString().equals(status)) {
105+
throw new RunNotStartedException("Run not started for run_id: " + prevRunId);
106+
}
75107
}
76108

77-
final Collection<SplitPartitions.Partition> pendingParts = new ArrayList<SplitPartitions.Partition>();
78109
ResultSet rs = session
79110
.execute(boundSelectStatement.setString("table_name", tableName).setLong("run_id", prevRunId));
80111
rs.forEach(row -> {
@@ -89,8 +120,12 @@ public Collection<SplitPartitions.Partition> getPendingPartitions(long prevRunId
89120
public long initCdmRun(Collection<SplitPartitions.Partition> parts, RUN_TYPE runType) {
90121
runId = System.currentTimeMillis();
91122
session.execute(boundInitInfoStatement.setString("table_name", tableName).setLong("run_id", runId)
92-
.setString("run_type", runType.toString()).setLong("prev_run_id", prevRunId));
123+
.setString("run_type", runType.toString()).setLong("prev_run_id", prevRunId)
124+
.setString("status", TrackRun.RUN_STATUS.NOT_STARTED.toString()));
93125
parts.forEach(part -> initCdmRun(part));
126+
session.execute(boundInitInfoStatement.setString("table_name", tableName).setLong("run_id", runId)
127+
.setString("run_type", runType.toString()).setLong("prev_run_id", prevRunId)
128+
.setString("status", TrackRun.RUN_STATUS.STARTED.toString()));
94129
return runId;
95130
}
96131

@@ -101,9 +136,9 @@ private void initCdmRun(Partition partition) {
101136
.setString("status", TrackRun.RUN_STATUS.NOT_STARTED.toString()));
102137
}
103138

104-
public void updateCdmRunInfo(String runInfo) {
105-
session.execute(boundUpdateInfoStatement.setString("table_name", tableName).setLong("run_id", runId)
106-
.setString("run_info", runInfo));
139+
public void endCdmRun(String runInfo) {
140+
session.execute(boundEndInfoStatement.setString("table_name", tableName).setLong("run_id", runId)
141+
.setString("run_info", runInfo).setString("status", TrackRun.RUN_STATUS.ENDED.toString()));
107142
}
108143

109144
public void updateCdmRun(BigInteger min, TrackRun.RUN_STATUS status) {

src/main/java/com/datastax/cdm/data/DataUtility.java

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -131,10 +131,16 @@ public static String getMyClassMethodLine(Exception e) {
131131
break;
132132
}
133133
}
134-
String className = targetStackTraceElement.getClassName();
135-
String methodName = targetStackTraceElement.getMethodName();
136-
int lineNumber = targetStackTraceElement.getLineNumber();
134+
if (null == targetStackTraceElement && null != stackTraceElements && stackTraceElements.length > 0) {
135+
targetStackTraceElement = stackTraceElements[0];
136+
}
137+
if (null != targetStackTraceElement) {
138+
String className = targetStackTraceElement.getClassName();
139+
String methodName = targetStackTraceElement.getMethodName();
140+
int lineNumber = targetStackTraceElement.getLineNumber();
141+
return className + "." + methodName + ":" + lineNumber;
142+
}
137143

138-
return className + "." + methodName + ":" + lineNumber;
144+
return "Unknown";
139145
}
140146
}

src/main/java/com/datastax/cdm/feature/TrackRun.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.slf4j.LoggerFactory;
2323

2424
import com.datastax.cdm.cql.statement.TargetUpsertRunDetailsStatement;
25+
import com.datastax.cdm.job.RunNotStartedException;
2526
import com.datastax.cdm.job.SplitPartitions;
2627
import com.datastax.oss.driver.api.core.CqlSession;
2728

@@ -31,7 +32,7 @@ public enum RUN_TYPE {
3132
}
3233

3334
public enum RUN_STATUS {
34-
NOT_STARTED, STARTED, PASS, FAIL, DIFF
35+
NOT_STARTED, STARTED, PASS, FAIL, DIFF, ENDED
3536
}
3637

3738
public Logger logger = LoggerFactory.getLogger(this.getClass().getName());
@@ -41,7 +42,7 @@ public TrackRun(CqlSession session, String keyspaceTable) {
4142
this.runStatement = new TargetUpsertRunDetailsStatement(session, keyspaceTable);
4243
}
4344

44-
public Collection<SplitPartitions.Partition> getPendingPartitions(long prevRunId) {
45+
public Collection<SplitPartitions.Partition> getPendingPartitions(long prevRunId) throws RunNotStartedException {
4546
Collection<SplitPartitions.Partition> pendingParts = runStatement.getPendingPartitions(prevRunId);
4647
logger.info("###################### {} partitions pending from previous run id {} ######################",
4748
pendingParts.size(), prevRunId);
@@ -60,6 +61,6 @@ public void updateCdmRun(BigInteger min, RUN_STATUS status) {
6061
}
6162

6263
public void endCdmRun(String runInfo) {
63-
runStatement.updateCdmRunInfo(runInfo);
64+
runStatement.endCdmRun(runInfo);
6465
}
6566
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
/*
2+
* Copyright DataStax, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.datastax.cdm.job;
17+
18+
public class RunNotStartedException extends Exception {
19+
20+
private static final long serialVersionUID = -4108800389847708120L;
21+
22+
public RunNotStartedException(String message) {
23+
super(message);
24+
}
25+
26+
}

src/main/scala/com/datastax/cdm/job/BasePartitionJob.scala

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,11 @@ abstract class BasePartitionJob extends BaseJob[SplitPartitions.Partition] {
3838
}
3939

4040
if (prevRunId != 0) {
41-
trackRunFeature.getPendingPartitions(prevRunId)
41+
try {
42+
trackRunFeature.getPendingPartitions(prevRunId)
43+
} catch {
44+
case e: RunNotStartedException => SplitPartitions.getRandomSubPartitions(pieces, minPartition, maxPartition, coveragePercent)
45+
}
4246
} else {
4347
SplitPartitions.getRandomSubPartitions(pieces, minPartition, maxPartition, coveragePercent)
4448
}
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
/*
2+
* Copyright DataStax, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.datastax.cdm.cql.statement;
17+
18+
import static org.junit.jupiter.api.Assertions.*;
19+
import static org.mockito.Mockito.*;
20+
21+
import java.util.Collections;
22+
import java.util.List;
23+
import java.util.stream.Collectors;
24+
import java.util.stream.IntStream;
25+
26+
import org.junit.jupiter.api.BeforeEach;
27+
import org.junit.jupiter.api.Test;
28+
import org.mockito.Mock;
29+
30+
import com.datastax.cdm.cql.CommonMocks;
31+
import com.datastax.cdm.job.RunNotStartedException;
32+
import com.datastax.oss.driver.api.core.CqlSession;
33+
import com.datastax.oss.driver.api.core.cql.BoundStatement;
34+
import com.datastax.oss.driver.api.core.cql.PreparedStatement;
35+
import com.datastax.oss.driver.api.core.cql.ResultSet;
36+
import com.datastax.oss.driver.api.core.cql.Row;
37+
import com.datastax.oss.driver.api.core.type.DataTypes;
38+
39+
public class TargetUpsertRunDetailsStatementTest extends CommonMocks {
40+
@Mock
41+
PreparedStatement preparedStatement;
42+
43+
@Mock
44+
CqlSession cqlSession;
45+
46+
@Mock
47+
ResultSet rs;
48+
49+
@Mock
50+
Row row;
51+
52+
@Mock
53+
BoundStatement bStatement;
54+
55+
TargetUpsertRunDetailsStatement targetUpsertRunDetailsStatement;
56+
57+
@BeforeEach
58+
public void setup() {
59+
// UPDATE is needed by counters, though the class should handle non-counter updates
60+
commonSetup(false, false, true);
61+
when(cqlSession.prepare(anyString())).thenReturn(preparedStatement);
62+
when(preparedStatement.bind(any())).thenReturn(bStatement);
63+
when(cqlSession.execute(bStatement)).thenReturn(rs);
64+
when(rs.all()).thenReturn(List.of(row));
65+
66+
}
67+
68+
@Test
69+
public void init() throws RunNotStartedException {
70+
targetUpsertRunDetailsStatement = new TargetUpsertRunDetailsStatement(cqlSession, "ks.table1");
71+
assertEquals(Collections.emptyList(), targetUpsertRunDetailsStatement.getPendingPartitions(0));
72+
}
73+
74+
@Test
75+
public void incorrectKsTable() throws RunNotStartedException {
76+
assertThrows(RuntimeException.class, () -> new TargetUpsertRunDetailsStatement(cqlSession, "table1"));
77+
}
78+
79+
}

src/test/java/com/datastax/cdm/data/DataUtilityTest.java

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,11 +137,26 @@ public void extractObjectsFromCollectionTest() {
137137
}
138138

139139
@Test
140-
public void getMyClassMethodLineTest() {
140+
public void getMyClassMethodLineTestCDMClass() {
141141
Exception ex = new Exception();
142142
ex.setStackTrace(new StackTraceElement[] { new StackTraceElement("com.datastax.cdm.data.DataUtilityTest",
143143
"getMyClassMethodLineTest", "DataUtilityTest.java", 0) });
144144
assertEquals("com.datastax.cdm.data.DataUtilityTest.getMyClassMethodLineTest:0",
145145
DataUtility.getMyClassMethodLine(ex));
146146
}
147+
148+
@Test
149+
public void getMyClassMethodLineTestOtherClass() {
150+
Exception ex = new Exception();
151+
ex.setStackTrace(new StackTraceElement[] { new StackTraceElement("com.datastax.other.SomeClass",
152+
"getMyClassMethodLineTest", "SomeClass.java", 0) });
153+
assertEquals("com.datastax.other.SomeClass.getMyClassMethodLineTest:0", DataUtility.getMyClassMethodLine(ex));
154+
}
155+
156+
@Test
157+
public void getMyClassMethodLineTestUnknown() {
158+
Exception ex = new Exception();
159+
ex.setStackTrace(new StackTraceElement[] {});
160+
assertEquals("Unknown", DataUtility.getMyClassMethodLine(ex));
161+
}
147162
}

src/test/java/com/datastax/cdm/feature/TrackRunTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ void test() {
2727
assertEquals("DIFF_DATA", TrackRun.RUN_TYPE.DIFF_DATA.name());
2828

2929
assertEquals(2, TrackRun.RUN_TYPE.values().length);
30-
assertEquals(5, TrackRun.RUN_STATUS.values().length);
30+
assertEquals(6, TrackRun.RUN_STATUS.values().length);
3131
}
3232

3333
}

0 commit comments

Comments
 (0)