Skip to content

Commit 30d8495

Browse files
authored
Merge pull request #132 from datastax/feature/cdm-44-auto-discover-counter-schema
Implemented auto-discovery of counter tables
2 parents 1d00d88 + 26b3a7b commit 30d8495

File tree

7 files changed

+41
-65
lines changed

7 files changed

+41
-65
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,8 @@ grep "ERROR CopyJobSession: Error with PartitionRange" /path/to/logfile_name.txt
116116
> A sample Guardrail properties file can be [found here](./src/resources/cdmGuardrail.properties)
117117
118118
# Features
119-
- Auto-detects table schema (column names, types, id fields, collections, UDTs, etc.)
120-
- Supports migration/validation of [Counter tables](https://docs.datastax.com/en/dse/6.8/cql/cql/cql_using/useCountersConcept.html)
119+
- Auto-detects table schema (column names, types, keys, collections, UDTs, etc.)
120+
- Including counter table [Counter tables](https://docs.datastax.com/en/dse/6.8/cql/cql/cql_using/useCountersConcept.html)
121121
- Preserve [writetimes](https://docs.datastax.com/en/dse/6.8/cql/cql/cql_reference/cql_commands/cqlSelect.html#cqlSelect__retrieving-the-datetime-a-write-occurred-p) and [TTLs](https://docs.datastax.com/en/dse/6.8/cql/cql/cql_reference/cql_commands/cqlSelect.html#cqlSelect__ref-select-ttl-p)
122122
- Supports migration/validation of advanced DataTypes ([Sets](https://docs.datastax.com/en/dse/6.8/cql/cql/cql_reference/refDataTypes.html#refDataTypes__set), [Lists](https://docs.datastax.com/en/dse/6.8/cql/cql/cql_reference/refDataTypes.html#refDataTypes__list), [Maps](https://docs.datastax.com/en/dse/6.8/cql/cql/cql_reference/refDataTypes.html#refDataTypes__map), [UDTs](https://docs.datastax.com/en/dse/6.8/cql/cql/cql_reference/refDataTypes.html#refDataTypes__udt))
123123
- Filter records from `Origin` using `writetimes` and/or CQL conditions and/or min/max token-range

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
<properties>
1010
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
11-
<revision>3.4.0</revision>
11+
<revision>3.4.1</revision>
1212
<scala.version>2.12.17</scala.version>
1313
<scala.main.version>2.12</scala.main.version>
1414
<spark.version>3.3.1</spark.version>

src/main/java/datastax/astra/migrate/AbstractJobSession.java

Lines changed: 25 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,10 @@
1515

1616
import java.time.Duration;
1717
import java.time.Instant;
18+
import java.util.Arrays;
1819
import java.util.List;
1920
import java.util.Optional;
21+
import java.util.stream.Collectors;
2022
import java.util.stream.IntStream;
2123

2224
public class AbstractJobSession extends BaseJobSession {
@@ -119,14 +121,8 @@ protected AbstractJobSession(CqlSession sourceSession, CqlSession astraSession,
119121
if (null == insertCols || insertCols.trim().isEmpty()) {
120122
insertCols = selectCols;
121123
}
122-
String insertBinds = "";
123-
for (String idCol : tableInfo.getKeyColumns()) {
124-
if (insertBinds.isEmpty()) {
125-
insertBinds = idCol + "= ?";
126-
} else {
127-
insertBinds += " and " + idCol + "= ?";
128-
}
129-
}
124+
String insertBinds = String.join(" and ",
125+
tableInfo.getKeyColumns().stream().map(col -> col + " = ?").collect(Collectors.toList()));
130126

131127
String originSelectQry;
132128
if (!isJobMigrateRowsFromFile) {
@@ -144,32 +140,22 @@ protected AbstractJobSession(CqlSession sourceSession, CqlSession astraSession,
144140
astraSelectStatement = astraSession.prepare(targetSelectQry);
145141

146142
isCounterTable = tableInfo.isCounterTable();
143+
String fullInsertQuery;
147144
if (isCounterTable) {
148-
String updateSelectMappingStr = Util.getSparkPropOr(sc, "spark.counterTable.cql.index", "0");
149-
for (String updateSelectIndex : updateSelectMappingStr.split(",")) {
150-
updateSelectMapping.add(Integer.parseInt(updateSelectIndex));
151-
}
152-
153-
String counterTableUpdate = Util.getSparkProp(sc, "spark.counterTable.cql");
154-
astraInsertStatement = astraSession.prepare(counterTableUpdate);
155-
String fullInsertQuery = "update " + astraKeyspaceTable + " set (" + insertCols + ") VALUES (" + insertBinds + ")";
145+
String updateCols = String.join(" , ",
146+
tableInfo.getOtherColumns().stream().map(s -> s + " += ?").collect(Collectors.toList()));
147+
String updateKeys = String.join(" and ",
148+
tableInfo.getKeyColumns().stream().map(s -> s + " = ?").collect(Collectors.toList()));
149+
fullInsertQuery = "update " + astraKeyspaceTable + " set " + updateCols + " where " + updateKeys;
156150
} else {
157-
insertBinds = "";
158-
for (String str : insertCols.split(",")) {
159-
if (insertBinds.isEmpty()) {
160-
insertBinds += "?";
161-
} else {
162-
insertBinds += ", ?";
163-
}
164-
}
165-
166-
String fullInsertQuery = "insert into " + astraKeyspaceTable + " (" + insertCols + ") VALUES (" + insertBinds + ")";
151+
insertBinds = String.join(" , ", Arrays.stream(insertCols.split(",")).map(col -> " ?").collect(Collectors.toList()));
152+
fullInsertQuery = "insert into " + astraKeyspaceTable + " (" + insertCols + ") VALUES (" + insertBinds + ")";
167153
if (!ttlWTCols.isEmpty()) {
168154
fullInsertQuery += " USING TTL ? AND TIMESTAMP ?";
169155
}
170-
logger.info("PARAM -- Target insert query: {}", fullInsertQuery);
171-
astraInsertStatement = astraSession.prepare(fullInsertQuery);
172156
}
157+
logger.info("PARAM -- Target insert query: {}", fullInsertQuery);
158+
astraInsertStatement = astraSession.prepare(fullInsertQuery);
173159

174160
// Handle rows with blank values for 'timestamp' data-type in primary-key fields
175161
tsReplaceValStr = Util.getSparkPropOr(sc, "spark.target.replace.blankTimestampKeyUsingEpoch", "");
@@ -182,15 +168,21 @@ public BoundStatement bindInsert(PreparedStatement insertStatement, Row sourceRo
182168
BoundStatement boundInsertStatement = insertStatement.bind().setConsistencyLevel(writeConsistencyLevel);
183169

184170
if (isCounterTable) {
185-
for (int index = 0; index < tableInfo.getAllColumns().size(); index++) {
186-
TypeInfo typeInfo = tableInfo.getColumns().get(index).getTypeInfo();
171+
for (int index = 0; index < tableInfo.getNonKeyColumns().size(); index++) {
172+
TypeInfo typeInfo = tableInfo.getNonKeyColumns().get(index).getTypeInfo();
173+
int colIdx = tableInfo.getIdColumns().size() + index;
187174
// compute the counter delta if reading from astra for the difference
188-
if (astraRow != null && index < (tableInfo.getColumns().size() - tableInfo.getIdColumns().size())) {
189-
boundInsertStatement = boundInsertStatement.set(index, (sourceRow.getLong(updateSelectMapping.get(index)) - astraRow.getLong(updateSelectMapping.get(index))), Long.class);
175+
if (astraRow != null) {
176+
boundInsertStatement = boundInsertStatement.set(index, (sourceRow.getLong(colIdx) - astraRow.getLong(colIdx)), Long.class);
190177
} else {
191-
boundInsertStatement = boundInsertStatement.set(index, getData(typeInfo, updateSelectMapping.get(index), sourceRow), typeInfo.getTypeClass());
178+
boundInsertStatement = boundInsertStatement.set(index, sourceRow.getLong(colIdx), Long.class);
192179
}
193180
}
181+
for (int index = 0; index < tableInfo.getIdColumns().size(); index++) {
182+
TypeInfo typeInfo = tableInfo.getIdColumns().get(index).getTypeInfo();
183+
int colIdx = tableInfo.getNonKeyColumns().size() + index;
184+
boundInsertStatement = boundInsertStatement.set(colIdx, getData(typeInfo, index, sourceRow), typeInfo.getTypeClass());
185+
}
194186
} else {
195187
int index = 0;
196188
for (index = 0; index < tableInfo.getAllColumns().size(); index++) {

src/main/java/datastax/astra/migrate/BaseJobSession.java

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
import org.apache.spark.SparkConf;
1111

1212
import java.io.Serializable;
13-
import java.util.ArrayList;
1413
import java.util.List;
1514
import java.util.Map;
1615
import java.util.Set;
@@ -34,9 +33,6 @@ public abstract class BaseJobSession {
3433
protected RateLimiter writeLimiter;
3534
protected Integer maxRetries = 10;
3635
protected AtomicLong readCounter = new AtomicLong(0);
37-
38-
protected List<Integer> updateSelectMapping = new ArrayList<Integer>();
39-
4036
protected Integer batchSize = 1;
4137
protected Integer fetchSizeInRows = 1000;
4238
protected Integer printStatsAfter = 100000;

src/main/java/datastax/astra/migrate/schema/TableInfo.java

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,19 +21,19 @@ public class TableInfo {
2121
private List<ColumnInfo> idColumns;
2222
private List<String> partitionKeyColumns;
2323
private List<String> keyColumns;
24+
private List<String> otherColumns;
2425
private List<String> allColumns;
2526
private List<String> ttlAndWriteTimeColumns;
2627
private String desc;
28+
29+
@ToString.Include
2730
private boolean isCounterTable = false;
2831

2932
protected TableInfo(CqlSession session, String keySpace, String table, String selectColsString) {
3033
List<String> selectCols = selectColsString.isEmpty() ? Collections.emptyList() :
3134
Arrays.asList(selectColsString.toLowerCase(Locale.ROOT).split(","));
3235
TableMetadata tm = session.getMetadata().getKeyspace(keySpace).get().getTable(table).get();
3336
desc = tm.describe(false);
34-
if (desc.toLowerCase(Locale.ROOT).contains("counter")) {
35-
isCounterTable = true;
36-
}
3737

3838
partitionColumns = getPartitionKeyColumns(tm);
3939
partitionKeyColumns = colInfoToList(partitionColumns);
@@ -43,9 +43,11 @@ protected TableInfo(CqlSession session, String keySpace, String table, String se
4343
keyColumns = colInfoToList(idColumns);
4444

4545
nonKeyColumns = getNonKeyColumns(tm, keyColumns, selectCols);
46+
otherColumns = colInfoToList(nonKeyColumns);
4647
columns.addAll(idColumns);
4748
columns.addAll(nonKeyColumns);
4849
allColumns = colInfoToList(columns);
50+
isCounterTable = isCounterTable(nonKeyColumns);
4951

5052
ttlAndWriteTimeColumns = loadTtlAndWriteTimeCols();
5153
}
@@ -99,4 +101,8 @@ private List<ColumnInfo> getNonKeyColumns(TableMetadata tm, List keyColumnsNames
99101
private List<String> colInfoToList(List<ColumnInfo> listColInfo) {
100102
return listColInfo.stream().map(ColumnInfo::getColName).collect(Collectors.toList());
101103
}
104+
105+
private boolean isCounterTable(List<ColumnInfo> nonKeyColumns) {
106+
return nonKeyColumns.stream().filter(ci -> ci.getTypeInfo().isCounter()).findAny().isPresent();
107+
}
102108
}

src/main/java/datastax/astra/migrate/schema/TypeInfo.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ public class TypeInfo {
1818
private static Map<String, Class> typeMap = loadTypeMap();
1919
private Class typeClass = Object.class;
2020
private List<Class> subTypes = new ArrayList<Class>();
21+
private boolean isCounter = false;
2122

2223
public TypeInfo(DataType dataType) {
2324
this(dataType.toString());
@@ -48,6 +49,9 @@ public TypeInfo(String dataTypeStr) {
4849
typeClass = UdtValue.class;
4950
} else if (dataTypeStr.toLowerCase(Locale.ROOT).startsWith("tuple")) {
5051
typeClass = TupleValue.class;
52+
} else if (dataTypeStr.toLowerCase(Locale.ROOT).startsWith("counter")) {
53+
typeClass = Long.class;
54+
isCounter = true;
5155
} else {
5256
typeClass = typeMap.get(dataTypeStr.toLowerCase(Locale.ROOT));
5357
}

src/resources/cdm.properties

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -69,28 +69,6 @@ spark.batchSize 10
6969
# ENABLE ONLY IF YOU WANT TO MIGRATE/VALIDATE ROWS BASED ON A VALID CQL FILTER
7070
#spark.query.condition
7171

72-
# ENABLE ONLY IF IT IS A COUNTER TABLE
73-
#spark.counterTable false
74-
#spark.counterTable.cql
75-
#spark.counterTable.cql.index 0
76-
77-
############################### EXAMPLE MAPPING USING A DEMO counter column TABLE ###########################
78-
# CREATE TABLE cycling.cyclist_count (
79-
# pk1 uuid,
80-
# pk2 date,
81-
# cc1 boolean,
82-
# c1 counter,
83-
# PRIMARY KEY((pk1,pk2),cc1)
84-
# );
85-
# then, our counter table mapping would look like below,
86-
# spark.counterTable true
87-
# spark.counterTable.cql UPDATE cycling.cyclist_count SET c1 += ? WHERE pk1 = ? AND pk2 = ? AND cc1 = ?
88-
# spark.counterTable.cql.index 3,0,1,2
89-
#
90-
# Remember the above count index order is based on the below column mapping ordering,
91-
# spark.query.origin pk1,pk2,cc1,c
92-
#############################################################################################################
93-
9472
# ENABLE ONLY IF YOU WANT TO FILTER BASED ON WRITE-TIME (values must be in microseconds)
9573
#spark.origin.writeTimeStampFilter false
9674
#spark.origin.minWriteTimeStampFilter 0

0 commit comments

Comments
 (0)