datastax
diff --git a/‎Dockerfile
Lines changed: 2 additions & 2 deletions b/‎Dockerfile
Lines changed: 2 additions & 2 deletions
diff --git a/‎README.md
Lines changed: 6 additions & 6 deletions b/‎README.md
Lines changed: 6 additions & 6 deletions
diff --git a/‎pom.xml
Lines changed: 10 additions & 10 deletions b/‎pom.xml
Lines changed: 10 additions & 10 deletions
diff --git a/‎src/main/java/datastax/astra/migrate/CopyJobSession.java
Lines changed: 40 additions & 21 deletions b/‎src/main/java/datastax/astra/migrate/CopyJobSession.java
Lines changed: 40 additions & 21 deletions
@@ -18,9 +18,9 @@ RUN apt-get update && apt-get install -y openssh-server vim python3 --no-install
     service ssh start
 
 # Copy CDM jar & template files
-ARG MAVEN_VERSION=3.8.6
+ARG MAVEN_VERSION=3.8.7
 ARG USER_HOME_DIR="/root"
-ARG BASE_URL=https://apache.osuosl.org/maven/maven-3/${MAVEN_VERSION}/binaries
+ARG BASE_URL=https://dlcdn.apache.org/maven/maven-3/${MAVEN_VERSION}/binaries
 ENV MAVEN_HOME /usr/share/maven
 ENV MAVEN_CONFIG "$USER_HOME_DIR/.m2"
 COPY ./src /assets/src
 
@@ -2,7 +2,7 @@
 
 Migrate and Validate Tables between Origin and Target Cassandra Clusters.
 
-> :warning: Please note this job has been tested with spark version [2.4.8](https://archive.apache.org/dist/spark/spark-2.4.8/)
+> :warning: Please note this job has been tested with spark version [3.3.1](https://archive.apache.org/dist/spark/spark-3.3.1/)
 
 ## Container Image
 - Get the latest image that includes all dependencies from [DockerHub](https://hub.docker.com/r/datastax/cassandra-data-migrator) 
@@ -16,15 +16,15 @@ Migrate and Validate Tables between Origin and Target Cassandra Clusters.
 - Install single instance of spark on a node where you want to run this job. Spark can be installed by running the following: -
 
 ```
-wget https://downloads.apache.org/spark/spark-2.4.8/
+wget https://archive.apache.org/dist/spark/spark-3.3.1/spark-3.3.1-bin-hadoop3.tgz
 tar -xvzf <spark downloaded file name>
 ```
 
 ### Build
 1. Clone this repo
 2. Move to the repo folder `cd cassandra-data-migrator`
 3. Run the build `mvn clean package`
-4. The fat jar (`cassandra-data-migrator-2.x.x.jar`) file should now be present in the `target` folder
+4. The fat jar (`cassandra-data-migrator-3.x.x.jar`) file should now be present in the `target` folder
 
 # Steps for Data-Migration:
 
@@ -36,7 +36,7 @@ tar -xvzf <spark downloaded file name>
 ```
 ./spark-submit --properties-file sparkConf.properties /
 --master "local[*]" /
---class datastax.astra.migrate.Migrate cassandra-data-migrator-2.x.x.jar &> logfile_name.txt
+--class datastax.astra.migrate.Migrate cassandra-data-migrator-3.x.x.jar &> logfile_name.txt
 ```
 
 Note: Above command also generates a log file `logfile_name.txt` to avoid log output on the console.
@@ -49,7 +49,7 @@ Note: Above command also generates a log file `logfile_name.txt` to avoid log ou
 ```
 ./spark-submit --properties-file sparkConf.properties /
 --master "local[*]" /
---class datastax.astra.migrate.DiffData cassandra-data-migrator-2.x.x.jar &> logfile_name.txt
+--class datastax.astra.migrate.DiffData cassandra-data-migrator-3.x.x.jar &> logfile_name.txt
 ```
 
 - Validation job will report differences as “ERRORS” in the log file as shown below
@@ -78,7 +78,7 @@ spark.target.autocorrect.mismatch                   true|false
 ```
 ./spark-submit --properties-file sparkConf.properties /
 --master "local[*]" /
---class datastax.astra.migrate.MigratePartitionsFromFile cassandra-data-migrator-2.x.x.jar &> logfile_name.txt
+--class datastax.astra.migrate.MigratePartitionsFromFile cassandra-data-migrator-3.x.x.jar &> logfile_name.txt
 ```
 
 When running in above mode the tool assumes a `partitions.csv` file to be present in the current folder in the below format, where each line (`min,max`) represents a partition-range 
 
@@ -3,16 +3,16 @@
 
   <groupId>datastax.astra.migrate</groupId>
   <artifactId>cassandra-data-migrator</artifactId>
-  <version>2.11.2</version>
+  <version>3.0.0</version>
   <packaging>jar</packaging>
 
   <properties>
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-    <scala.version>2.11.8</scala.version>
-    <scala.main.version>2.11</scala.main.version>
-    <spark.version>2.4.8</spark.version>
+    <scala.version>2.12.17</scala.version>
+    <scala.main.version>2.12</scala.main.version>
+    <spark.version>3.3.1</spark.version>
     <scalatest.version>3.2.12</scalatest.version>
-    <connector.version>2.5.2</connector.version>
+    <connector.version>3.2.0</connector.version>
     <cassandra.version>3.11.13</cassandra.version>
     <junit.version>4.13.2</junit.version>
   </properties>
@@ -133,7 +133,7 @@
       <plugin>
         <groupId>net.alchim31.maven</groupId>
         <artifactId>scala-maven-plugin</artifactId>
-        <version>3.2.2</version>
+        <version>4.8.0</version>
         <executions>
           <execution>
             <phase>process-sources</phase>
@@ -148,7 +148,7 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-shade-plugin</artifactId>
-        <version>2.4.3</version>
+        <version>3.4.1</version>
         <executions>
           <execution>
 
@@ -178,7 +178,7 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-surefire-plugin</artifactId>
-        <version>2.7</version>
+        <version>2.22.2</version>
         <configuration>
           <skipTests>true</skipTests>
         </configuration>
@@ -187,7 +187,7 @@
       <plugin>
         <groupId>org.scalatest</groupId>
         <artifactId>scalatest-maven-plugin</artifactId>
-        <version>1.0</version>
+        <version>2.2.0</version>
         <configuration>
           <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
           <junitxml>.</junitxml>
@@ -206,7 +206,7 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-compiler-plugin</artifactId>
-        <version>3.8.1</version>
+        <version>3.10.1</version>
         <configuration>
           <source>1.8</source>
           <target>1.8</target>
 
@@ -19,6 +19,7 @@ public class CopyJobSession extends AbstractJobSession {
     protected AtomicLong readCounter = new AtomicLong(0);
     protected AtomicLong skippedCounter = new AtomicLong(0);
     protected AtomicLong writeCounter = new AtomicLong(0);
+    protected AtomicLong errorCounter = new AtomicLong(0);
 
     protected CopyJobSession(CqlSession sourceSession, CqlSession astraSession, SparkConf sc) {
         super(sourceSession, astraSession, sc);
@@ -44,8 +45,13 @@ public static CopyJobSession getInstance(CqlSession sourceSession, CqlSession as
     public void getDataAndInsert(BigInteger min, BigInteger max) {
         logger.info("ThreadID: {} Processing min: {} max: {}", Thread.currentThread().getId(), min, max);
         int maxAttempts = maxRetries;
-        for (int retryCount = 1; retryCount <= maxAttempts; retryCount++) {
+        boolean done = false;
 
+        for (int retryCount = 1; retryCount <= maxAttempts && !done; retryCount++) {
+            long readCnt = 0;
+            long writeCnt = 0;
+            long skipCnt = 0;
+            long errCnt = 0;
             try {
                 ResultSet resultSet = sourceSession.execute(sourceSelectStatement.bind(hasRandomPartitioner ?
                                 min : min.longValueExact(), hasRandomPartitioner ? max : max.longValueExact())
@@ -59,67 +65,66 @@ public void getDataAndInsert(BigInteger min, BigInteger max) {
                 if (batchSize == 1 || writeTimeStampFilter || isCounterTable) {
                     for (Row sourceRow : resultSet) {
                         readLimiter.acquire(1);
+                        readCnt++;
+                        if (readCnt % printStatsAfter == 0) {
+                            printCounts(false);
+                        }
 
                         if (filterData) {
                             String col = (String) getData(new MigrateDataType(filterColType), filterColIndex, sourceRow);
                             if (col.trim().equalsIgnoreCase(filterColValue)) {
                                 logger.warn("Skipping row and filtering out: {}", getKey(sourceRow));
-                                skippedCounter.incrementAndGet();
+                                skipCnt++;
                                 continue;
                             }
                         }
-
                         if (writeTimeStampFilter) {
                             // only process rows greater than writeTimeStampFilter
                             Long sourceWriteTimeStamp = getLargestWriteTimeStamp(sourceRow);
                             if (sourceWriteTimeStamp < minWriteTimeStampFilter
                                     || sourceWriteTimeStamp > maxWriteTimeStampFilter) {
-                                readCounter.incrementAndGet();
-                                skippedCounter.incrementAndGet();
+                                skipCnt++;
                                 continue;
                             }
                         }
-
                         writeLimiter.acquire(1);
-                        if (readCounter.incrementAndGet() % printStatsAfter == 0) {
-                            printCounts(false);
-                        }
+
                         Row astraRow = null;
                         if (isCounterTable) {
                             ResultSet astraReadResultSet = astraSession
                                     .execute(selectFromAstra(astraSelectStatement, sourceRow));
                             astraRow = astraReadResultSet.one();
                         }
 
-
                         CompletionStage<AsyncResultSet> astraWriteResultSet = astraSession
                                 .executeAsync(bindInsert(astraInsertStatement, sourceRow, astraRow));
                         writeResults.add(astraWriteResultSet);
                         if (writeResults.size() > fetchSizeInRows) {
-                            iterateAndClearWriteResults(writeResults, 1);
+                            writeCnt += iterateAndClearWriteResults(writeResults, 1);
                         }
                     }
 
                     // clear the write resultset
-                    iterateAndClearWriteResults(writeResults, 1);
+                    writeCnt += iterateAndClearWriteResults(writeResults, 1);
                 } else {
                     BatchStatement batchStatement = BatchStatement.newInstance(BatchType.UNLOGGED);
                     for (Row sourceRow : resultSet) {
                         readLimiter.acquire(1);
-                        writeLimiter.acquire(1);
-                        if (readCounter.incrementAndGet() % printStatsAfter == 0) {
+                        readCnt++;
+                        if (readCnt % printStatsAfter == 0) {
                             printCounts(false);
                         }
 
                         if (filterData) {
                             String colValue = (String) getData(new MigrateDataType(filterColType), filterColIndex, sourceRow);
                             if (colValue.trim().equalsIgnoreCase(filterColValue)) {
                                 logger.warn("Skipping row and filtering out: {}", getKey(sourceRow));
-                                skippedCounter.incrementAndGet();
+                                skipCnt++;
                                 continue;
                             }
                         }
 
+                        writeLimiter.acquire(1);
                         batchStatement = batchStatement.add(bindInsert(astraInsertStatement, sourceRow, null));
 
                         // if batch threshold is met, send the writes and clear the batch
@@ -130,27 +135,37 @@ public void getDataAndInsert(BigInteger min, BigInteger max) {
                         }
 
                         if (writeResults.size() * batchSize > fetchSizeInRows) {
-                            iterateAndClearWriteResults(writeResults, batchSize);
+                            writeCnt += iterateAndClearWriteResults(writeResults, batchSize);
                         }
                     }
 
                     // clear the write resultset
-                    iterateAndClearWriteResults(writeResults, batchSize);
+                    writeCnt += iterateAndClearWriteResults(writeResults, batchSize);
 
                     // if there are any pending writes because the batchSize threshold was not met, then write and clear them
                     if (batchStatement.size() > 0) {
                         CompletionStage<AsyncResultSet> writeResultSet = astraSession.executeAsync(batchStatement);
                         writeResults.add(writeResultSet);
-                        iterateAndClearWriteResults(writeResults, batchStatement.size());
+                        writeCnt += iterateAndClearWriteResults(writeResults, batchStatement.size());
                         batchStatement = BatchStatement.newInstance(BatchType.UNLOGGED);
                     }
                 }
 
-                retryCount = maxAttempts;
+                readCounter.addAndGet(readCnt);
+                writeCounter.addAndGet(writeCnt);
+                skippedCounter.addAndGet(skipCnt);
+                done = true;
             } catch (Exception e) {
+                if (retryCount == maxAttempts) {
+                    readCounter.addAndGet(readCnt);
+                    writeCounter.addAndGet(writeCnt);
+                    skippedCounter.addAndGet(skipCnt);
+                    errorCounter.addAndGet(readCnt - writeCnt - skipCnt);
+                }
                 logger.error("Error occurred retry#: {}", retryCount, e);
                 logger.error("Error with PartitionRange -- ThreadID: {} Processing min: {} max: {} -- Retry# {}",
                         Thread.currentThread().getId(), min, max, retryCount);
+                logger.error("Error stats Read#: {}, Wrote#: {}, Skipped#: {}, Error#: {}", readCnt, writeCnt, skipCnt, (readCnt - writeCnt - skipCnt));
             }
         }
     }
@@ -164,18 +179,22 @@ public synchronized void printCounts(boolean isFinal) {
         logger.info("{} Read Record Count: {}", msg, readCounter.get());
         logger.info("{} Skipped Record Count: {}", msg, skippedCounter.get());
         logger.info("{} Write Record Count: {}", msg, writeCounter.get());
+        logger.info("{} Error Record Count: {}", msg, errorCounter.get());
         if (isFinal) {
             logger.info("################################################################################################");
         }
     }
 
-    private void iterateAndClearWriteResults(Collection<CompletionStage<AsyncResultSet>> writeResults, int incrementBy) throws Exception {
+    private int iterateAndClearWriteResults(Collection<CompletionStage<AsyncResultSet>> writeResults, int incrementBy) throws Exception {
+        int cnt = 0;
         for (CompletionStage<AsyncResultSet> writeResult : writeResults) {
             //wait for the writes to complete for the batch. The Retry policy, if defined,  should retry the write on timeouts.
             writeResult.toCompletableFuture().get().one();
-            writeCounter.addAndGet(incrementBy);
+            cnt += incrementBy;
         }
         writeResults.clear();
+
+        return cnt;
     }
 
 }