datastax
diff --git a/‎README.md
Lines changed: 10 additions & 10 deletions b/‎README.md
Lines changed: 10 additions & 10 deletions
diff --git a/‎pom.xml
Lines changed: 1 addition & 1 deletion b/‎pom.xml
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/main/java/datastax/astra/migrate/AbstractJobSession.java
Lines changed: 25 additions & 25 deletions b/‎src/main/java/datastax/astra/migrate/AbstractJobSession.java
Lines changed: 25 additions & 25 deletions
diff --git a/‎src/main/java/datastax/astra/migrate/CopyJobSession.java
Lines changed: 4 additions & 4 deletions b/‎src/main/java/datastax/astra/migrate/CopyJobSession.java
Lines changed: 4 additions & 4 deletions
diff --git a/‎src/main/java/datastax/astra/migrate/DiffJobSession.java
Lines changed: 9 additions & 9 deletions b/‎src/main/java/datastax/astra/migrate/DiffJobSession.java
Lines changed: 9 additions & 9 deletions
diff --git a/‎src/main/java/datastax/astra/migrate/Util.java
Lines changed: 31 additions & 0 deletions b/‎src/main/java/datastax/astra/migrate/Util.java
Lines changed: 31 additions & 0 deletions
diff --git a/‎src/main/scala/datastax/astra/migrate/AbstractJob.scala
Lines changed: 3 additions & 3 deletions b/‎src/main/scala/datastax/astra/migrate/AbstractJob.scala
Lines changed: 3 additions & 3 deletions
@@ -44,22 +44,22 @@ Note: Above command also generates a log file `logfile_name.txt` to avoid log ou
 - Validation job will report differences as “ERRORS” in the log file as shown below
 
 ```
-22/09/27 11:21:24 ERROR DiffJobSession: Data mismatch found -  Key: ek-1 %% mn1 %% c1 %% true Data:  (Index: 4 Source: 30 Astra: 20 )
-22/09/27 11:21:24 ERROR DiffJobSession: Corrected mismatch data in Astra: ek-1 %% mn1 %% c1 %% true
-22/09/27 11:21:24 ERROR DiffJobSession: Data is missing in Astra: ek-2 %% mn2 %% c2 %% true
-22/09/27 11:21:24 ERROR DiffJobSession: Corrected missing data in Astra: ek-2 %% mn2 %% c2 %% true
+22/10/27 23:25:29 ERROR DiffJobSession: Missing target row found for key: Grapes %% 1 %% 2020-05-22 %% 2020-05-23T00:05:09.353Z %% skuid %% Aliquam faucibus
+22/10/27 23:25:29 ERROR DiffJobSession: Inserted missing row in target: Grapes %% 1 %% 2020-05-22 %% 2020-05-23T00:05:09.353Z %% skuid %% Aliquam faucibus
+22/10/27 23:25:30 ERROR DiffJobSession: Mismatch row found for key: Grapes %% 1 %% 2020-05-22 %% 2020-05-23T00:05:09.353Z %% skuid %% augue odio at quam Data:  (Index: 8 Origin: Hello 3 Target: Hello 2 )
+22/10/27 23:25:30 ERROR DiffJobSession: Updated mismatch row in target: Grapes %% 1 %% 2020-05-22 %% 2020-05-23T00:05:09.353Z %% skuid %% augue odio at quam
 ```
 
 - Please grep for all `ERROR` from the output log files to get the list of missing and mismatched records.
   - Note that it lists differences by partition key values.
 - The Validation job can also be run in an AutoCorrect mode. This mode can
-  - Add any missing records from source to target
-  - Fix any inconsistencies between source and target (makes target same as source). 
+  - Add any missing records from origin to target
+  - Fix any inconsistencies between origin and target (makes target same as origin). 
 - Enable/disable this feature using one or both of the below setting in the config file
 
 ```
-spark.destination.autocorrect.missing                   true|false
-spark.destination.autocorrect.mismatch                  true|false
+spark.target.autocorrect.missing                    true|false
+spark.target.autocorrect.mismatch                   true|false
 ```
 
 # Migrating specific partition ranges
@@ -83,8 +83,8 @@ This mode is specifically useful to processes a subset of partition-ranges that
 - [Counter tables](https://docs.datastax.com/en/dse/6.8/cql/cql/cql_using/useCountersConcept.html)
 - Preserve [writetimes](https://docs.datastax.com/en/dse/6.8/cql/cql/cql_reference/cql_commands/cqlSelect.html#cqlSelect__retrieving-the-datetime-a-write-occurred-p) and [TTL](https://docs.datastax.com/en/dse/6.8/cql/cql/cql_reference/cql_commands/cqlSelect.html#cqlSelect__ref-select-ttl-p)
 - Advanced DataTypes ([Sets](https://docs.datastax.com/en/dse/6.8/cql/cql/cql_reference/refDataTypes.html#refDataTypes__set), [Lists](https://docs.datastax.com/en/dse/6.8/cql/cql/cql_reference/refDataTypes.html#refDataTypes__list), [Maps](https://docs.datastax.com/en/dse/6.8/cql/cql/cql_reference/refDataTypes.html#refDataTypes__map), [UDTs](https://docs.datastax.com/en/dse/6.8/cql/cql/cql_reference/refDataTypes.html#refDataTypes__udt))
-- Filter records from source using writetime
+- Filter records from origin using writetime
 - SSL Support (including custom cipher algorithms)
-- Migrate from any Cassandra source ([Apache Cassandra](https://cassandra.apache.org)/[DataStax Enterprise (DSE)](https://www.datastax.com/products/datastax-enterprise)/[DataStax Astra DB](https://www.datastax.com/products/datastax-astra)) to any Cassandra target ([Apache Cassandra](https://cassandra.apache.org)/[DataStax Enterprise (DSE)](https://www.datastax.com/products/datastax-enterprise)/[DataStax Astra DB](https://www.datastax.com/products/datastax-astra))
+- Migrate from any Cassandra origin ([Apache Cassandra](https://cassandra.apache.org)/[DataStax Enterprise (DSE)](https://www.datastax.com/products/datastax-enterprise)/[DataStax Astra DB](https://www.datastax.com/products/datastax-astra)) to any Cassandra target ([Apache Cassandra](https://cassandra.apache.org)/[DataStax Enterprise (DSE)](https://www.datastax.com/products/datastax-enterprise)/[DataStax Astra DB](https://www.datastax.com/products/datastax-astra))
 - Validate migration accuracy and performance using a smaller randomized data-set
 - Custom writetime
@@ -3,7 +3,7 @@
 
   <groupId>datastax.astra.migrate</groupId>
   <artifactId>cassandra-data-migrator</artifactId>
-  <version>2.0</version>
+  <version>2.1</version>
   <packaging>jar</packaging>
 
   <properties>
 
@@ -20,57 +20,57 @@ public class AbstractJobSession extends BaseJobSession {
 
     public Logger logger = LoggerFactory.getLogger(this.getClass().getName());
 
-    protected AbstractJobSession(CqlSession sourceSession, CqlSession astraSession, SparkConf sparkConf) {
+    protected AbstractJobSession(CqlSession sourceSession, CqlSession astraSession, SparkConf sc) {
         this.sourceSession = sourceSession;
         this.astraSession = astraSession;
 
-        batchSize = new Integer(sparkConf.get("spark.batchSize", "1"));
-        printStatsAfter = new Integer(sparkConf.get("spark.printStatsAfter", "100000"));
+        batchSize = new Integer(Util.getSparkPropOr(sc, "spark.batchSize", "1"));
+        printStatsAfter = new Integer(Util.getSparkPropOr(sc, "spark.printStatsAfter", "100000"));
         if (printStatsAfter < 1) {
             printStatsAfter = 100000;
         }
 
-        readLimiter = RateLimiter.create(new Integer(sparkConf.get("spark.readRateLimit", "20000")));
-        writeLimiter = RateLimiter.create(new Integer(sparkConf.get("spark.writeRateLimit", "40000")));
-        maxRetries = Integer.parseInt(sparkConf.get("spark.maxRetries", "10"));
+        readLimiter = RateLimiter.create(new Integer(Util.getSparkPropOr(sc, "spark.readRateLimit", "20000")));
+        writeLimiter = RateLimiter.create(new Integer(Util.getSparkPropOr(sc, "spark.writeRateLimit", "40000")));
+        maxRetries = Integer.parseInt(sc.get("spark.maxRetries", "10"));
 
-        sourceKeyspaceTable = sparkConf.get("spark.source.keyspaceTable");
-        astraKeyspaceTable = sparkConf.get("spark.destination.keyspaceTable");
+        sourceKeyspaceTable = Util.getSparkProp(sc, "spark.origin.keyspaceTable");
+        astraKeyspaceTable = Util.getSparkProp(sc, "spark.target.keyspaceTable");
 
-        String ttlColsStr = sparkConf.get("spark.query.ttl.cols", "");
+        String ttlColsStr = Util.getSparkPropOrEmpty(sc, "spark.query.ttl.cols");
         if (null != ttlColsStr && ttlColsStr.trim().length() > 0) {
             for (String ttlCol : ttlColsStr.split(",")) {
                 ttlCols.add(Integer.parseInt(ttlCol));
             }
         }
 
-        String writeTimestampColsStr = sparkConf.get("spark.query.writetime.cols", "");
+        String writeTimestampColsStr = Util.getSparkPropOrEmpty(sc, "spark.query.writetime.cols");
         if (null != writeTimestampColsStr && writeTimestampColsStr.trim().length() > 0) {
             for (String writeTimeStampCol : writeTimestampColsStr.split(",")) {
                 writeTimeStampCols.add(Integer.parseInt(writeTimeStampCol));
             }
         }
 
         writeTimeStampFilter = Boolean
-                .parseBoolean(sparkConf.get("spark.source.writeTimeStampFilter", "false"));
+                .parseBoolean(Util.getSparkPropOr(sc, "spark.origin.writeTimeStampFilter", "false"));
         // batchsize set to 1 if there is a writeFilter
         if (writeTimeStampFilter) {
             batchSize = 1;
         }
 
         String minWriteTimeStampFilterStr =
-                sparkConf.get("spark.source.minWriteTimeStampFilter", "0");
+                Util.getSparkPropOr(sc, "spark.origin.minWriteTimeStampFilter", "0");
         if (null != minWriteTimeStampFilterStr && minWriteTimeStampFilterStr.trim().length() > 1) {
             minWriteTimeStampFilter = Long.parseLong(minWriteTimeStampFilterStr);
         }
         String maxWriteTimeStampFilterStr =
-                sparkConf.get("spark.source.maxWriteTimeStampFilter", "0");
+                Util.getSparkPropOr(sc, "spark.origin.maxWriteTimeStampFilter", "0");
         if (null != maxWriteTimeStampFilterStr && maxWriteTimeStampFilterStr.trim().length() > 1) {
             maxWriteTimeStampFilter = Long.parseLong(maxWriteTimeStampFilterStr);
         }
 
         String customWriteTimeStr =
-                sparkConf.get("spark.destination.custom.writeTime", "0");
+                Util.getSparkPropOr(sc, "spark.target.custom.writeTime", "0");
         if (null != customWriteTimeStr && customWriteTimeStr.trim().length() > 1 && StringUtils.isNumeric(customWriteTimeStr.trim())) {
             customWritetime = Long.parseLong(customWriteTimeStr);
         }
@@ -84,9 +84,9 @@ protected AbstractJobSession(CqlSession sourceSession, CqlSession astraSession,
         logger.info("PARAM -- WriteTimestampFilterCols: " + writeTimeStampCols);
         logger.info("PARAM -- WriteTimestampFilter: " + writeTimeStampFilter);
 
-        String selectCols = sparkConf.get("spark.query.source");
-        String partionKey = sparkConf.get("spark.query.source.partitionKey");
-        String sourceSelectCondition = sparkConf.get("spark.query.condition", "");
+        String selectCols = Util.getSparkProp(sc, "spark.query.origin");
+        String partionKey = Util.getSparkProp(sc, "spark.query.origin.partitionKey");
+        String sourceSelectCondition = Util.getSparkPropOrEmpty(sc, "spark.query.condition");
 
         final StringBuilder selectTTLWriteTimeCols = new StringBuilder();
         String[] allCols = selectCols.split(",");
@@ -96,16 +96,16 @@ protected AbstractJobSession(CqlSession sourceSession, CqlSession astraSession,
         writeTimeStampCols.forEach(col -> {
             selectTTLWriteTimeCols.append(",writetime(" + allCols[col] + ")");
         });
-        String fullSelectQuery = "select " + selectCols + selectTTLWriteTimeCols.toString() + " from " + sourceKeyspaceTable + " where token(" + partionKey.trim()
+        String fullSelectQuery = "select " + selectCols + selectTTLWriteTimeCols + " from " + sourceKeyspaceTable + " where token(" + partionKey.trim()
                 + ") >= ? and token(" + partionKey.trim() + ") <= ?  " + sourceSelectCondition + " ALLOW FILTERING";
         sourceSelectStatement = sourceSession.prepare(fullSelectQuery);
         logger.info("PARAM -- Query used: " + fullSelectQuery);
 
-        selectColTypes = getTypes(sparkConf.get("spark.query.types"));
-        String idCols = sparkConf.get("spark.query.destination.id", "");
+        selectColTypes = getTypes(Util.getSparkProp(sc, "spark.query.types"));
+        String idCols = Util.getSparkPropOrEmpty(sc, "spark.query.target.id");
         idColTypes = selectColTypes.subList(0, idCols.split(",").length);
 
-        String insertCols = sparkConf.get("spark.query.destination", "");
+        String insertCols = Util.getSparkPropOrEmpty(sc, "spark.query.target");
         if (null == insertCols || insertCols.trim().isEmpty()) {
             insertCols = selectCols;
         }
@@ -121,15 +121,15 @@ protected AbstractJobSession(CqlSession sourceSession, CqlSession astraSession,
                 "select " + insertCols + " from " + astraKeyspaceTable
                         + " where " + insertBinds);
 
-        hasRandomPartitioner = Boolean.parseBoolean(sparkConf.get("spark.source.hasRandomPartitioner", "false"));
-        isCounterTable = Boolean.parseBoolean(sparkConf.get("spark.counterTable", "false"));
+        hasRandomPartitioner = Boolean.parseBoolean(Util.getSparkPropOr(sc, "spark.origin.hasRandomPartitioner", "false"));
+        isCounterTable = Boolean.parseBoolean(Util.getSparkPropOr(sc, "spark.counterTable", "false"));
         if (isCounterTable) {
-            String updateSelectMappingStr = sparkConf.get("spark.counterTable.cql.index", "0");
+            String updateSelectMappingStr = Util.getSparkPropOr(sc, "spark.counterTable.cql.index", "0");
             for (String updateSelectIndex : updateSelectMappingStr.split(",")) {
                 updateSelectMapping.add(Integer.parseInt(updateSelectIndex));
             }
 
-            String counterTableUpdate = sparkConf.get("spark.counterTable.cql");
+            String counterTableUpdate = Util.getSparkProp(sc, "spark.counterTable.cql");
             astraInsertStatement = astraSession.prepare(counterTableUpdate);
         } else {
             insertBinds = "";
 
@@ -20,15 +20,15 @@ public class CopyJobSession extends AbstractJobSession {
     protected AtomicLong readCounter = new AtomicLong(0);
     protected AtomicLong writeCounter = new AtomicLong(0);
 
-    protected CopyJobSession(CqlSession sourceSession, CqlSession astraSession, SparkConf sparkConf) {
-        super(sourceSession, astraSession, sparkConf);
+    protected CopyJobSession(CqlSession sourceSession, CqlSession astraSession, SparkConf sc) {
+        super(sourceSession, astraSession, sc);
     }
 
-    public static CopyJobSession getInstance(CqlSession sourceSession, CqlSession astraSession, SparkConf sparkConf) {
+    public static CopyJobSession getInstance(CqlSession sourceSession, CqlSession astraSession, SparkConf sc) {
         if (copyJobSession == null) {
             synchronized (CopyJobSession.class) {
                 if (copyJobSession == null) {
-                    copyJobSession = new CopyJobSession(sourceSession, astraSession, sparkConf);
+                    copyJobSession = new CopyJobSession(sourceSession, astraSession, sc);
                 }
             }
         }
 
@@ -31,13 +31,13 @@ public class DiffJobSession extends CopyJobSession {
     private AtomicLong validCounter = new AtomicLong(0);
     private AtomicLong skippedCounter = new AtomicLong(0);
 
-    private DiffJobSession(CqlSession sourceSession, CqlSession astraSession, SparkConf sparkConf) {
-        super(sourceSession, astraSession, sparkConf);
+    private DiffJobSession(CqlSession sourceSession, CqlSession astraSession, SparkConf sc) {
+        super(sourceSession, astraSession, sc);
 
-        autoCorrectMissing = Boolean.parseBoolean(sparkConf.get("spark.destination.autocorrect.missing", "false"));
+        autoCorrectMissing = Boolean.parseBoolean(Util.getSparkPropOr(sc, "spark.target.autocorrect.missing", "false"));
         logger.info("PARAM -- Autocorrect Missing: " + autoCorrectMissing);
 
-        autoCorrectMismatch = Boolean.parseBoolean(sparkConf.get("spark.destination.autocorrect.mismatch", "false"));
+        autoCorrectMismatch = Boolean.parseBoolean(Util.getSparkPropOr(sc, "spark.target.autocorrect.mismatch", "false"));
         logger.info("PARAM -- Autocorrect Mismatch: " + autoCorrectMismatch);
     }
 
@@ -130,13 +130,13 @@ public void printCounts(String finalStr) {
     private void diff(Row sourceRow, Row astraRow) {
         if (astraRow == null) {
             missingCounter.incrementAndGet();
-            logger.error("Data is missing in Astra: " + getKey(sourceRow));
+            logger.error("Missing target row found for key: " + getKey(sourceRow));
             //correct data
 
             if (autoCorrectMissing) {
                 astraSession.execute(bindInsert(astraInsertStatement, sourceRow, null));
                 correctedMissingCounter.incrementAndGet();
-                logger.error("Corrected missing data in Astra: " + getKey(sourceRow));
+                logger.error("Inserted missing row in target: " + getKey(sourceRow));
             }
 
             return;
@@ -145,7 +145,7 @@ private void diff(Row sourceRow, Row astraRow) {
         String diffData = isDifferent(sourceRow, astraRow);
         if (!diffData.isEmpty()) {
             mismatchCounter.incrementAndGet();
-            logger.error("Data mismatch found -  Key: " + getKey(sourceRow) + " Data: " + diffData);
+            logger.error("Mismatch row found for key: " + getKey(sourceRow) + " Mismatch: " + diffData);
 
             if (autoCorrectMismatch) {
                 if (isCounterTable) {
@@ -154,7 +154,7 @@ private void diff(Row sourceRow, Row astraRow) {
                     astraSession.execute(bindInsert(astraInsertStatement, sourceRow, null));
                 }
                 correctedMismatchCounter.incrementAndGet();
-                logger.error("Corrected mismatch data in Astra: " + getKey(sourceRow));
+                logger.error("Updated mismatch row in target: " + getKey(sourceRow));
             }
 
             return;
@@ -172,7 +172,7 @@ private String isDifferent(Row sourceRow, Row astraRow) {
 
             boolean isDiff = dataType.diff(source, astra);
             if (isDiff) {
-                diffData.append(" (Index: " + index + " Source: " + source + " Astra: " + astra + " ) ");
+                diffData.append("(Index: " + index + " Origin: " + source + " Target: " + astra + " ) ");
             }
         });
 
 
@@ -0,0 +1,31 @@
+package datastax.astra.migrate;
+
+import org.apache.spark.SparkConf;
+
+import java.util.NoSuchElementException;
+
+public class Util {
+
+    public static String getSparkProp(SparkConf sc, String prop) {
+        try {
+            return sc.get(prop);
+        } catch (NoSuchElementException nse) {
+            String newProp = prop.replace("origin", "source").replace("target", "destination");
+            return sc.get(newProp);
+        }
+    }
+
+    public static String getSparkPropOr(SparkConf sc, String prop, String defaultVal) {
+        try {
+            return sc.get(prop);
+        } catch (NoSuchElementException nse) {
+            String newProp = prop.replace("origin", "source").replace("target", "destination");
+            return sc.get(newProp, defaultVal);
+        }
+    }
+
+    public static String getSparkPropOrEmpty(SparkConf sc, String prop) {
+        return getSparkPropOr(sc, prop, "");
+    }
+
+}
@@ -26,7 +26,7 @@ class AbstractJob extends BaseJob {
     if ("true".equals(isAstra)) {
       abstractLogger.info(connType + ": Connected to Astra using SCB: " + scbPath);
 
-      return CassandraConnector(sc.getConf
+      return CassandraConnector(sc
         .set("spark.cassandra.auth.username", username)
         .set("spark.cassandra.auth.password", password)
         .set("spark.cassandra.input.consistency.level", readConsistencyLevel)
@@ -40,7 +40,7 @@ class AbstractJob extends BaseJob {
         enabledAlgorithmsVar = "TLS_RSA_WITH_AES_128_CBC_SHA, TLS_RSA_WITH_AES_256_CBC_SHA"
       }
 
-      return CassandraConnector(sc.getConf
+      return CassandraConnector(sc
         .set("spark.cassandra.auth.username", username)
         .set("spark.cassandra.auth.password", password)
         .set("spark.cassandra.input.consistency.level", readConsistencyLevel)
@@ -57,7 +57,7 @@ class AbstractJob extends BaseJob {
     } else {
       abstractLogger.info(connType + ": Connected to Cassandra (or DSE) host: " + host);
 
-      return CassandraConnector(sc.getConf.set("spark.cassandra.auth.username", username)
+      return CassandraConnector(sc.set("spark.cassandra.auth.username", username)
         .set("spark.cassandra.auth.password", password)
         .set("spark.cassandra.input.consistency.level", readConsistencyLevel)
         .set("spark.cassandra.connection.host", host))
Original file line number	Diff line number	Diff line change
`@@ -20,15 +20,15 @@ public class CopyJobSession extends AbstractJobSession {`
`20`	`20`	`protected AtomicLong readCounter = new AtomicLong(0);`
`21`	`21`	`protected AtomicLong writeCounter = new AtomicLong(0);`
`22`	`22`
`23`		`- protected CopyJobSession(CqlSession sourceSession, CqlSession astraSession, SparkConf sparkConf) {`
`24`		`- super(sourceSession, astraSession, sparkConf);`
	`23`	`+ protected CopyJobSession(CqlSession sourceSession, CqlSession astraSession, SparkConf sc) {`
	`24`	`+ super(sourceSession, astraSession, sc);`
`25`	`25`	`}`
`26`	`26`
`27`		`- public static CopyJobSession getInstance(CqlSession sourceSession, CqlSession astraSession, SparkConf sparkConf) {`
	`27`	`+ public static CopyJobSession getInstance(CqlSession sourceSession, CqlSession astraSession, SparkConf sc) {`
`28`	`28`	`if (copyJobSession == null) {`
`29`	`29`	`synchronized (CopyJobSession.class) {`
`30`	`30`	`if (copyJobSession == null) {`
`31`		`- copyJobSession = new CopyJobSession(sourceSession, astraSession, sparkConf);`
	`31`	`+ copyJobSession = new CopyJobSession(sourceSession, astraSession, sc);`
`32`	`32`	`}`
`33`	`33`	`}`
`34`	`34`	`}`