datastax
diff --git a/‎.github/workflows/maven-publish.yml
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/maven-publish.yml
Lines changed: 1 addition & 0 deletions
diff --git a/‎Dockerfile
Lines changed: 2 additions & 2 deletions b/‎Dockerfile
Lines changed: 2 additions & 2 deletions
diff --git a/‎README.md
Lines changed: 25 additions & 6 deletions b/‎README.md
Lines changed: 25 additions & 6 deletions
diff --git a/‎pom.xml
Lines changed: 7 additions & 1 deletion b/‎pom.xml
Lines changed: 7 additions & 1 deletion
@@ -4,6 +4,7 @@
 name: Maven Package
 
 on:
+  workflow_dispatch:
   push:
     branches: [ main ]
 
 
@@ -18,14 +18,14 @@ RUN apt-get update && apt-get install -y openssh-server vim python3 --no-install
     service ssh start
 
 # Copy CDM jar & template files
-ARG MAVEN_VERSION=3.8.7
+ARG MAVEN_VERSION=3.8.8
 ARG USER_HOME_DIR="/root"
 ARG BASE_URL=https://dlcdn.apache.org/maven/maven-3/${MAVEN_VERSION}/binaries
 ENV MAVEN_HOME /usr/share/maven
 ENV MAVEN_CONFIG "$USER_HOME_DIR/.m2"
 COPY ./src /assets/src
 COPY ./pom.xml /assets/pom.xml
-COPY ./src/resources/sparkConf.properties /assets/
+COPY src/resources/cdm.properties /assets/
 COPY ./src/resources/partitions.csv /assets/
 COPY ./src/resources/primary_key_rows.csv /assets/
 COPY ./src/resources/runCommands.txt /assets/
 
@@ -24,13 +24,14 @@ tar -xvzf spark-3.3.1-bin-hadoop3.tgz
 
 # Steps for Data-Migration:
 
-1. `sparkConf.properties` file needs to be configured as applicable for the environment
-   > A sample Spark conf file configuration can be [found here](./src/resources/sparkConf.properties)
+1. `cdm.properties` file needs to be configured as applicable for the environment
+   > A sample properties file can be [found here](./src/resources/cdm.properties)
 2. Place the conf file where it can be accessed while running the job via spark-submit.
 3. Run the below job using `spark-submit` command as shown below:
 
 ```
-./spark-submit --properties-file sparkConf.properties /
+./spark-submit --properties-file cdm.properties /
+--conf spark.origin.keyspaceTable="<keyspace-name>.<table-name>" /
 --master "local[*]" /
 --class datastax.astra.migrate.Migrate cassandra-data-migrator-3.x.x.jar &> logfile_name.txt
 ```
@@ -39,7 +40,8 @@ Note:
 - Above command generates a log file `logfile_name.txt` to avoid log output on the console.
 - Add option `--driver-memory 25G --executor-memory 25G` as shown below if the table migrated is large (over 100GB)
 ```
-./spark-submit --properties-file sparkConf.properties /
+./spark-submit --properties-file cdm.properties /
+--conf spark.origin.keyspaceTable="<keyspace-name>.<table-name>" /
 --master "local[*]" --driver-memory 25G --executor-memory 25G /
 --class datastax.astra.migrate.Migrate cassandra-data-migrator-3.x.x.jar &> logfile_name.txt
 ```
@@ -49,7 +51,8 @@ Note:
 - To run the job in Data validation mode, use class option `--class datastax.astra.migrate.DiffData` as shown below
 
 ```
-./spark-submit --properties-file sparkConf.properties /
+./spark-submit --properties-file cdm.properties /
+--conf spark.origin.keyspaceTable="<keyspace-name>.<table-name>" /
 --master "local[*]" /
 --class datastax.astra.migrate.DiffData cassandra-data-migrator-3.x.x.jar &> logfile_name.txt
 ```
@@ -79,7 +82,8 @@ Note:
 # Migrating specific partition ranges
 - You can also use the tool to migrate specific partition ranges using class option `--class datastax.astra.migrate.MigratePartitionsFromFile` as shown below
 ```
-./spark-submit --properties-file sparkConf.properties /
+./spark-submit --properties-file cdm.properties /
+--conf spark.origin.keyspaceTable="<keyspace-name>.<table-name>" /
 --master "local[*]" /
 --class datastax.astra.migrate.MigratePartitionsFromFile cassandra-data-migrator-3.x.x.jar &> logfile_name.txt
 ```
@@ -93,11 +97,23 @@ When running in above mode the tool assumes a `partitions.csv` file to be presen
 ```
 This mode is specifically useful to processes a subset of partition-ranges that may have failed during a previous run.
 
+# Perform large-field Guardrail violation checks
+- The tool can be used to identify large fields from a table that may break you cluster guardrails (e.g. AstraDB has a 10MB limit for a single large field)  `--class datastax.astra.migrate.Guardrail` as shown below
+```
+./spark-submit --properties-file cdmGuardrail.properties /
+--conf spark.origin.keyspaceTable="<keyspace-name>.<table-name>" /
+--master "local[*]" /
+--class datastax.astra.migrate.Guardrail cassandra-data-migrator-3.x.x.jar &> logfile_name.txt
+```
+> A sample Guardrail properties file can be [found here](./src/resources/cdmGuardrail.properties)
+
 # Features
+- Auto-detects table schema (column names, types, id fields, collections, UDTs, etc.)
 - Supports migration/validation of [Counter tables](https://docs.datastax.com/en/dse/6.8/cql/cql/cql_using/useCountersConcept.html)
 - Preserve [writetimes](https://docs.datastax.com/en/dse/6.8/cql/cql/cql_reference/cql_commands/cqlSelect.html#cqlSelect__retrieving-the-datetime-a-write-occurred-p) and [TTLs](https://docs.datastax.com/en/dse/6.8/cql/cql/cql_reference/cql_commands/cqlSelect.html#cqlSelect__ref-select-ttl-p)
 - Supports migration/validation of advanced DataTypes ([Sets](https://docs.datastax.com/en/dse/6.8/cql/cql/cql_reference/refDataTypes.html#refDataTypes__set), [Lists](https://docs.datastax.com/en/dse/6.8/cql/cql/cql_reference/refDataTypes.html#refDataTypes__list), [Maps](https://docs.datastax.com/en/dse/6.8/cql/cql/cql_reference/refDataTypes.html#refDataTypes__map), [UDTs](https://docs.datastax.com/en/dse/6.8/cql/cql/cql_reference/refDataTypes.html#refDataTypes__udt))
 - Filter records from `Origin` using `writetimes` and/or CQL conditions and/or min/max token-range
+- Perform guardrail checks (identify large fields)
 - Supports adding `constants` as new columns on `Target`
 - Fully containerized (Docker and K8s friendly)
 - SSL Support (including custom cipher algorithms)
@@ -106,6 +122,9 @@ This mode is specifically useful to processes a subset of partition-ranges that
 - Validate migration accuracy and performance using a smaller randomized data-set
 - Supports adding custom fixed `writetime`
 
+# Known Limitations
+- This tool does not migrate `ttl` & `writetime` at the field-level (for optimization reasons). It instead finds the field with the highest `ttl` & the field with the highest `writetime` within an `origin` row and uses those values on the entire `target` row.
+
 # Building Jar for local development
 1. Clone this repo
 2. Move to the repo folder `cd cassandra-data-migrator`
 
@@ -8,7 +8,7 @@
 
   <properties>
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-    <revision>3.2.3</revision>
+    <revision>3.4.0</revision>
     <scala.version>2.12.17</scala.version>
     <scala.main.version>2.12</scala.main.version>
     <spark.version>3.3.1</spark.version>
@@ -89,6 +89,12 @@
       <artifactId>log4j-to-slf4j</artifactId>
       <version>2.19.0</version>
     </dependency>
+      <dependency>
+        <groupId>org.projectlombok</groupId>
+        <artifactId>lombok</artifactId>
+        <version>1.18.26</version>
+        <scope>provided</scope>
+      </dependency>
 
     <!-- Test Dependencies -->
     <dependency>