datastax
diff --git a/‎.github/workflows/cdm-integrationtest.yml
Lines changed: 2 additions & 7 deletions b/‎.github/workflows/cdm-integrationtest.yml
Lines changed: 2 additions & 7 deletions
diff --git a/‎SIT/Makefile
Lines changed: 2 additions & 3 deletions b/‎SIT/Makefile
Lines changed: 2 additions & 3 deletions
diff --git a/‎pom.xml
Lines changed: 4 additions & 1 deletion b/‎pom.xml
Lines changed: 4 additions & 1 deletion
diff --git a/‎src/main/java/datastax/astra/migrate/CopyJobSession.java
Lines changed: 1 addition & 0 deletions b/‎src/main/java/datastax/astra/migrate/CopyJobSession.java
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/main/java/datastax/astra/migrate/MigrateDataType.java
Lines changed: 18 additions & 5 deletions b/‎src/main/java/datastax/astra/migrate/MigrateDataType.java
Lines changed: 18 additions & 5 deletions
diff --git a/‎src/main/java/datastax/astra/migrate/OriginCountJobSession.java
Lines changed: 35 additions & 34 deletions b/‎src/main/java/datastax/astra/migrate/OriginCountJobSession.java
Lines changed: 35 additions & 34 deletions
diff --git a/‎src/main/java/datastax/astra/migrate/properties/KnownProperties.java
Lines changed: 3 additions & 3 deletions b/‎src/main/java/datastax/astra/migrate/properties/KnownProperties.java
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/main/scala/datastax/astra/migrate/AbstractJob.scala
Lines changed: 6 additions & 6 deletions b/‎src/main/scala/datastax/astra/migrate/AbstractJob.scala
Lines changed: 6 additions & 6 deletions
@@ -1,8 +1,7 @@
 name: Build and test jar with integration tests
-run-name: ${{ github.actor }} is testing out GitHub Actions 🚀
 on: [push]
 jobs:
-  Explore-GitHub-Actions:
+  CDM-Integration-Test:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v3
@@ -12,12 +11,8 @@ jobs:
           java-version: '8'
           distribution: 'temurin'
           cache: maven
-      - name: Build with Maven
-        run: mvn -B package --file pom.xml
-
       - name: Test SIT with cdm
         working-directory: SIT
         run: |
           echo "Testing SIT with cdm"
-          chmod o+rx ./environment.sh
-          make -f Makefile
+          make -f Makefile
@@ -1,8 +1,6 @@
 
-all: set_permissions setup test_smoke env_teardown
+all: setup test_smoke env_teardown
 
-set_permissions:
-	chmod -R 777 ./*.sh
 setup: build env_setup
 
 reset: build env_reset
@@ -16,6 +14,7 @@ test_smoke_cmd:
 	./test.sh -p smoke
 
 env_setup:
+	chmod -R 777 ./*.sh
 	./environment.sh -m setup -j ../target/cassandra-data-migrator*.jar
 env_reset:
 	./environment.sh -m reset -j ../target/cassandra-data-migrator*.jar
 
@@ -8,7 +8,7 @@
 
   <properties>
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-    <revision>3.4.0</revision>
+    <revision>4.0.0-SNAPSHOT</revision>
     <scala.version>2.12.17</scala.version>
     <scala.main.version>2.12</scala.main.version>
     <spark.version>3.3.1</spark.version>
@@ -23,6 +23,9 @@
       <id>github</id>
       <name>GitHub Packages</name>
       <url>https://maven.pkg.github.com/datastax/cassandra-data-migrator</url>
+      <snapshots>
+          <enabled>false</enabled>
+      </snapshots>
     </repository>
   </distributionManagement>
 
 
@@ -2,6 +2,7 @@
 
 import com.datastax.oss.driver.api.core.CqlSession;
 import com.datastax.oss.driver.api.core.cql.*;
+import datastax.astra.migrate.properties.KnownProperties;
 import org.apache.spark.SparkConf;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -53,6 +53,19 @@ public MigrateDataType(String dataType) {
         }
         this.typeClass = getType(this.type);
 
+        if (this.type >= minType && this.type <= maxType) {
+            isValid = true;
+            for (Object o : subTypes) {
+                if (null == o || Object.class == o) {
+                    isValid = false;
+                }
+            }
+        }
+        else {
+            isValid = false;
+        }
+        this.typeClass = getType(this.type);
+
         if (this.type >= minType && this.type <= maxType) {
             isValid = true;
             for (Object o : subTypes) {
@@ -76,16 +89,16 @@ private int typeAsInt(String dataType) {
         return rtn;
     }
 
-    public boolean diff(Object source, Object astra) {
-        if (source == null && astra == null) {
+    public boolean diff(Object obj1, Object obj2) {
+        if (obj1 == null && obj2 == null) {
             return false;
-        } else if (source == null && astra != null) {
+        } else if (obj1 == null && obj2 != null) {
             return true;
-        } else if (source != null && astra == null) {
+        } else if (obj1 != null && obj2 == null) {
             return true;
         }
 
-        return !source.equals(astra);
+        return !obj1.equals(obj2);
     }
 
     private Class getType(int type) {
 
@@ -45,48 +45,49 @@ public class OriginCountJobSession extends BaseJobSession {
 //    protected Integer fieldGuardraillimitMB;
 //    protected List<MigrateDataType> checkTableforColSizeTypes = new ArrayList<MigrateDataType>();
 //
-    protected OriginCountJobSession(CqlSession sourceSession, SparkConf sc) {
+    protected OriginCountJobSession(CqlSession originSession, SparkConf sc) {
         super(sc);
-//        this.sourceSession = sourceSession;
-//        batchSize = new Integer(sc.get("spark.batchSize", "1"));
-//        printStatsAfter = new Integer(sc.get("spark.printStatsAfter", "100000"));
+//        this.originSessionSession = originSession;
+//        batchSize = new Integer(sc.get(KnownProperties.SPARK_BATCH_SIZE, "1"));
+//        printStatsAfter = new Integer(sc.get(KnownProperties.SPARK_STATS_AFTER, "100000"));
 //        if (printStatsAfter < 1) {
 //            printStatsAfter = 100000;
 //        }
 //
-//        readLimiter = RateLimiter.create(new Integer(sc.get("spark.readRateLimit", "20000")));
-//        sourceKeyspaceTable = sc.get("spark.origin.keyspaceTable");
+//        readLimiter = RateLimiter.create(new Integer(sc.get(KnownProperties.SPARK_LIMIT_READ, "20000")));
+//        originKeyspaceTable = sc.get(KnownProperties.ORIGIN_KEYSPACE_TABLE);
 //
-//        hasRandomPartitioner = Boolean.parseBoolean(sc.get("spark.origin.hasRandomPartitioner", "false"));
-//        isCounterTable = Boolean.parseBoolean(sc.get("spark.counterTable", "false"));
+//        hasRandomPartitioner = Boolean.parseBoolean(sc.get(KnownProperties.ORIGIN_HAS_RANDOM_PARTITIONER, "false"));
+//        isCounterTable = Boolean.parseBoolean(sc.get(KnownProperties.ORIGIN_IS_COUNTER, "false"));
 //
-//        checkTableforColSize = Boolean.parseBoolean(sc.get("spark.origin.checkTableforColSize", "false"));
-//        checkTableforselectCols = sc.get("spark.origin.checkTableforColSize.cols");
-//        checkTableforColSizeTypes = getTypes(sc.get("spark.origin.checkTableforColSize.cols.types"));
-//        filterColName = propertyHelper.getString(KnownProperties.ORIGIN_FILTER_COLUMN_NAME);
-//        filterColType = propertyHelper.getString(KnownProperties.ORIGIN_FILTER_COLUMN_TYPE); // TODO: this is a string, but should be MigrationDataType?
-//        filterColIndex = Integer.parseInt(sc.get("spark.origin.FilterColumnIndex", "0"));
-//        fieldGuardraillimitMB = Integer.parseInt(sc.get("spark.fieldGuardraillimitMB", "0"));
+//        checkTableforColSize = Boolean.parseBoolean(sc.get(KnownProperties.ORIGIN_CHECK_COLSIZE_ENABLED, "false"));
+//        checkTableforselectCols = sc.get(KnownProperties.ORIGIN_CHECK_COLSIZE_COLUMN_NAMES);
+//        checkTableforColSizeTypes = getTypes(sc.get(KnownProperties.ORIGIN_CHECK_COLSIZE_COLUMN_TYPES));
+//        filterColName = Util.getSparkPropOrEmpty(sc, KnownProperties.ORIGIN_FILTER_COLUMN_NAME);
+//        filterColType = Util.getSparkPropOrEmpty(sc, KnownProperties.ORIGIN_FILTER_COLUMN_TYPE);
+//        filterColIndex = Integer.parseInt(sc.get(KnownProperties.ORIGIN_FILTER_COLUMN_INDEX, "0"));
+//        fieldGuardraillimitMB = Integer.parseInt(sc.get(KnownProperties.FIELD_GUARDRAIL_MB, "0"));
 //
-//        String partionKey = sc.get("spark.query.cols.partitionKey");
-//        idColTypes = getTypes(sc.get("spark.query.cols.id.types"));
+//        String partionKey = sc.get(KnownProperties.ORIGIN_PARTITION_KEY);
+//        idColTypes = getTypes(sc.get(KnownProperties.TARGET_PRIMARY_KEY_TYPES));
 //
-//        String selectCols = sc.get("spark.query.cols.select");
-//        String updateSelectMappingStr = sc.get("spark.counterTable.cql.index", "0");
+//        String selectCols = sc.get(KnownProperties.ORIGIN_COLUMN_NAMES);
+//        String updateSelectMappingStr = sc.get(KnownProperties.ORIGIN_COUNTER_INDEXES, "0");
 //        for (String updateSelectIndex : updateSelectMappingStr.split(",")) {
 //            updateSelectMapping.add(Integer.parseInt(updateSelectIndex));
 //        }
-//        String sourceSelectCondition = sc.get("spark.query.cols.select.condition", "");
-//        sourceSelectStatement = sourceSession.prepare(
-//                "select " + selectCols + " from " + sourceKeyspaceTable + " where token(" + partionKey.trim()
-//                        + ") >= ? and token(" + partionKey.trim() + ") <= ?  " + sourceSelectCondition + " ALLOW FILTERING");
+//        String originSelectCondition = sc.get(KnownProperties.ORIGIN_FILTER_CONDITION, "");
+//        // TODO: AbstractJobSession has some checks to ensure AND is added to the condition
+//        originSelectStatement = originSession.prepare(
+//                "select " + selectCols + " from " + originKeyspaceTable + " where token(" + partionKey.trim()
+//                        + ") >= ? and token(" + partionKey.trim() + ") <= ?  " + originSelectCondition + " ALLOW FILTERING");
     }
 //
-//    public static OriginCountJobSession getInstance(CqlSession sourceSession, SparkConf sparkConf) {
+//    public static OriginCountJobSession getInstance(CqlSession originSession, SparkConf sparkConf) {
 //        if (originCountJobSession == null) {
 //            synchronized (OriginCountJobSession.class) {
 //                if (originCountJobSession == null) {
-//                    originCountJobSession = new OriginCountJobSession(sourceSession, sparkConf);
+//                    originCountJobSession = new OriginCountJobSession(originSession, sparkConf);
 //                }
 //            }
 //        }
@@ -100,7 +101,7 @@ protected OriginCountJobSession(CqlSession sourceSession, SparkConf sc) {
 //        int maxAttempts = maxRetries + 1;
 //        for (int attempts = 1; attempts <= maxAttempts && !done; attempts++) {
 //            try {
-//                ResultSet resultSet = sourceSession.execute(sourceSelectStatement.bind(hasRandomPartitioner ?
+//                ResultSet resultSet = originSessionSession.execute(originSelectStatement.bind(hasRandomPartitioner ?
 //                                min : min.longValueExact(), hasRandomPartitioner ? max : max.longValueExact())
 //                        .setConsistencyLevel(readConsistencyLevel).setPageSize(fetchSizeInRows));
 //
@@ -110,16 +111,16 @@ protected OriginCountJobSession(CqlSession sourceSession, SparkConf sc) {
 //                // maxWriteTimeStampFilter is less than max long
 //                // do not batch for counters as it adds latency & increases chance of discrepancy
 //                if (batchSize == 1 || writeTimeStampFilter || isCounterTable) {
-//                    for (Row sourceRow : resultSet) {
+//                    for (Row originRow : resultSet) {
 //                        readLimiter.acquire(1);
 //
 //                        if (checkTableforColSize) {
-//                            int rowColcnt = GetRowColumnLength(sourceRow, filterColType, filterColIndex);
+//                            int rowColcnt = GetRowColumnLength(originRow, filterColType, filterColIndex);
 //                            String result = "";
 //                            if (rowColcnt > fieldGuardraillimitMB * 1048576) {
 //                                for (int index = 0; index < checkTableforColSizeTypes.size(); index++) {
 //                                    MigrateDataType dataType = checkTableforColSizeTypes.get(index);
-//                                    Object colData = getData(dataType, index, sourceRow);
+//                                    Object colData = getData(dataType, index, originRow);
 //                                    String[] colName = checkTableforselectCols.split(",");
 //                                    result = result + " - " + colName[index] + " : " + colData;
 //                                }
@@ -130,17 +131,17 @@ protected OriginCountJobSession(CqlSession sourceSession, SparkConf sc) {
 //                    }
 //                } else {
 //                    BatchStatement batchStatement = BatchStatement.newInstance(BatchType.UNLOGGED);
-//                    for (Row sourceRow : resultSet) {
+//                    for (Row originRow : resultSet) {
 //                        readLimiter.acquire(1);
 //                        writeLimiter.acquire(1);
 //
 //                        if (checkTableforColSize) {
-//                            int rowColcnt = GetRowColumnLength(sourceRow, filterColType, filterColIndex);
+//                            int rowColcnt = GetRowColumnLength(originRow, filterColType, filterColIndex);
 //                            String result = "";
 //                            if (rowColcnt > fieldGuardraillimitMB * 1048576) {
 //                                for (int index = 0; index < checkTableforColSizeTypes.size(); index++) {
 //                                    MigrateDataType dataType = checkTableforColSizeTypes.get(index);
-//                                    Object colData = getData(dataType, index, sourceRow);
+//                                    Object colData = getData(dataType, index, originRow);
 //                                    String[] colName = checkTableforselectCols.split(",");
 //                                    result = result + " - " + colName[index] + " : " + colData;
 //                                }
@@ -166,9 +167,9 @@ protected OriginCountJobSession(CqlSession sourceSession, SparkConf sc) {
 //        }
 //    }
 //
-//    private int GetRowColumnLength(Row sourceRow, String filterColType, Integer filterColIndex) {
+//    private int GetRowColumnLength(Row originRow, String filterColType, Integer filterColIndex) {
 //        int sizeInMB = 0;
-//        Object colData = getData(new MigrateDataType(filterColType), filterColIndex, sourceRow);
+//        Object colData = getData(new MigrateDataType(filterColType), filterColIndex, originRow);
 //        byte[] colBytes = SerializationUtils.serialize((Serializable) colData);
 //        sizeInMB = colBytes.length;
 //        if (sizeInMB > fieldGuardraillimitMB)
 
@@ -174,7 +174,7 @@ public enum PropertyType {
     public static final String SPARK_MAX_RETRIES = "spark.maxRetries";            // 0
     public static final String READ_FETCH_SIZE   = "spark.read.fetch.sizeInRows"; //1000
     public static final String SPARK_STATS_AFTER = "spark.printStatsAfter";       //100000
-    public static final String FIELD_GUARDRAIL   = "spark.fieldGuardraillimitMB"; //10
+    public static final String FIELD_GUARDRAIL_MB = "spark.fieldGuardraillimitMB"; //10
     public static final String PARTITION_MIN     = "spark.origin.minPartition";   // -9223372036854775808
     public static final String PARTITION_MAX     = "spark.origin.maxPartition";   // 9223372036854775807
 
@@ -195,8 +195,8 @@ public enum PropertyType {
         defaults.put(READ_FETCH_SIZE, "1000");
            types.put(SPARK_STATS_AFTER, PropertyType.NUMBER);
         defaults.put(SPARK_STATS_AFTER, "100000");
-           types.put(FIELD_GUARDRAIL, PropertyType.NUMBER);
-        defaults.put(FIELD_GUARDRAIL, "10");
+           types.put(FIELD_GUARDRAIL_MB, PropertyType.NUMBER);
+        defaults.put(FIELD_GUARDRAIL_MB, "10");
            types.put(PARTITION_MIN, PropertyType.NUMBER);
         defaults.put(PARTITION_MIN, "-9223372036854775808");
            types.put(PARTITION_MAX, PropertyType.NUMBER);
 
@@ -9,14 +9,14 @@ class AbstractJob extends BaseJob {
   abstractLogger.info("PARAM -- Max Partition: " + maxPartition)
   abstractLogger.info("PARAM -- Number of Splits : " + numSplits)
   abstractLogger.info("PARAM -- Coverage Percent: " + coveragePercent)
-  abstractLogger.info("PARAM -- Origin SSL Enabled: {}", sourceSSLEnabled);
-  abstractLogger.info("PARAM -- Target SSL Enabled: {}", destinationSSLEnabled);
+  abstractLogger.info("PARAM -- Origin SSL Enabled: {}", originSSLEnabled);
+  abstractLogger.info("PARAM -- Target SSL Enabled: {}", targetSSLEnabled);
 
-  var sourceConnection = getConnection(true, sourceScbPath, sourceHost, sourcePort, sourceUsername, sourcePassword, sourceSSLEnabled,
-    sourceTrustStorePath, sourceTrustStorePassword, sourceTrustStoreType, sourceKeyStorePath, sourceKeyStorePassword, sourceEnabledAlgorithms);
+  var originConnection = getConnection(true, originScbPath, originHost, originPort, originUsername, originPassword, originSSLEnabled,
+    originTrustStorePath, originTrustStorePassword, originTrustStoreType, originKeyStorePath, originKeyStorePassword, originEnabledAlgorithms);
 
-  var destinationConnection = getConnection(false, destinationScbPath, destinationHost, destinationPort, destinationUsername, destinationPassword, destinationSSLEnabled,
-    destinationTrustStorePath, destinationTrustStorePassword, destinationTrustStoreType, destinationKeyStorePath, destinationKeyStorePassword, destinationEnabledAlgorithms);
+  var targetConnection = getConnection(false, targetScbPath, targetHost, targetPort, targetUsername, targetPassword, targetSSLEnabled,
+    targetTrustStorePath, targetTrustStorePassword, targetTrustStoreType, targetKeyStorePath, targetKeyStorePassword, targetEnabledAlgorithms);
 
   private def getConnection(isSource: Boolean, scbPath: String, host: String, port: String, username: String, password: String,
                             sslEnabled: String, trustStorePath: String, trustStorePassword: String, trustStoreType: String,