|
| 1 | +#! /bin/bash |
| 2 | + |
| 3 | +########################################################################################################################### |
| 4 | +# |
| 5 | +# This script can be used to Migrate data between Cassandra Clusters (including Astra) in chunks. It migrates data by |
| 6 | +# partition token ranges sequentially in progressive slices. It also helps to restart migration from a point where the |
| 7 | +# previous run might have stopped/failed for whatever reasons. |
| 8 | +# |
| 9 | +# *** IMP Note: Run this script using nohup in background using a logfile and tail the logfile to monitor progress *** |
| 10 | +# e.g. nohup ./migrate_data.sh > logs/spark/migrate_data.out & |
| 11 | +# |
| 12 | +# To monitor migration progress, you could use the below command |
| 13 | +# grep "Running Migrate for Partition Range" logs/spark/migrate_data.out |
| 14 | +# |
| 15 | +########################################################################################################################### |
| 16 | + |
| 17 | +# Path to spark-submit |
| 18 | +SPARK_SUBMIT=/home/ubuntu/spark-2.4.8-bin-hadoop2.6/bin/spark-submit |
| 19 | + |
| 20 | +# Path to spark configuration for the table |
| 21 | +PROPS_FILE=/home/ubuntu/sparkConf.properties |
| 22 | + |
| 23 | +# Starting partition token (Default is Min possible value of a Cassandra token - min long value in Java). |
| 24 | +# Change this value only if you want to start from a custom partition token (e.g. when a migrate job failed midway) |
| 25 | +S_IDX=-9223372036854775808 |
| 26 | + |
| 27 | +# ** DO NOT CHANGE ANYTHING BELOW THIS ** |
| 28 | +SLICE=999999999999999999 |
| 29 | + |
| 30 | +echo "Starting Migration using $PROPS_FILE !!" |
| 31 | + |
| 32 | +# Migrate initial partition tokens from min-long to -9000000000000000000 |
| 33 | +if [ $S_IDX -lt -9000000000000000000 ] |
| 34 | +then |
| 35 | + E_IDX=-9000000000000000001 |
| 36 | + echo "Running Migrate for Partition Range $S_IDX to $E_IDX .." |
| 37 | + $SPARK_SUBMIT --properties-file $PROPS_FILE --master "local[*]" --conf spark.migrate.source.minPartition=$S_IDX --conf spark.migrate.source.maxPartition=$E_IDX --class datastax.astra.migrate.Migrate migrate-*.jar |
| 38 | + S_IDX=-9000000000000000000 |
| 39 | +fi |
| 40 | + |
| 41 | +# Migrate partition tokens from -9000000000000000000 to 8999999999999999999 in slices of 1000000000000000000 |
| 42 | +while [ $S_IDX -lt 9000000000000000000 ] |
| 43 | +do |
| 44 | + if [ $S_IDX -gt 8223372036854775807 ] |
| 45 | + then |
| 46 | + E_IDX=8999999999999999999 |
| 47 | + else |
| 48 | + E_IDX=$(( $S_IDX + $SLICE )) |
| 49 | + fi |
| 50 | + echo "Running Migrate for Partition Range $S_IDX to $E_IDX .." |
| 51 | + $SPARK_SUBMIT --properties-file $PROPS_FILE --master "local[*]" --conf spark.migrate.source.minPartition=$S_IDX --conf spark.migrate.source.maxPartition=$E_IDX --class datastax.astra.migrate.Migrate migrate-*.jar |
| 52 | + S_IDX=$(( $E_IDX + 1 )) |
| 53 | +done |
| 54 | + |
| 55 | +# Migrate final partition tokens from 9000000000000000000 to max-long |
| 56 | +E_IDX=9223372036854775807 |
| 57 | +echo "Running Migrate for Partition Range $S_IDX to 9223372036854775807 .." |
| 58 | +$SPARK_SUBMIT --properties-file $PROPS_FILE --master "local[*]" --conf spark.migrate.source.minPartition=$S_IDX --conf spark.migrate.source.maxPartition=$E_IDX --class datastax.astra.migrate.Migrate migrate-*.jar |
| 59 | +echo "Completed Migration using $PROPS_FILE !!" |
0 commit comments