Skip to content

Commit 2ceec35

Browse files
committed
Migration helper script (migrate data sequentially in progressive token-range slices)
1 parent 54481a2 commit 2ceec35

File tree

1 file changed

+59
-0
lines changed

1 file changed

+59
-0
lines changed

src/resources/migrate_data.sh

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#! /bin/bash
2+
3+
###########################################################################################################################
4+
#
5+
# This script can be used to Migrate data between Cassandra Clusters (including Astra) in chunks. It migrates data by
6+
# partition token ranges sequentially in progressive slices. It also helps to restart migration from a point where the
7+
# previous run might have stopped/failed for whatever reasons.
8+
#
9+
# *** IMP Note: Run this script using nohup in background using a logfile and tail the logfile to monitor progress ***
10+
# e.g. nohup ./migrate_data.sh > logs/spark/migrate_data.out &
11+
#
12+
# To monitor migration progress, you could use the below command
13+
# grep "Running Migrate for Partition Range" logs/spark/migrate_data.out
14+
#
15+
###########################################################################################################################
16+
17+
# Path to spark-submit
18+
SPARK_SUBMIT=/home/ubuntu/spark-2.4.8-bin-hadoop2.6/bin/spark-submit
19+
20+
# Path to spark configuration for the table
21+
PROPS_FILE=/home/ubuntu/sparkConf.properties
22+
23+
# Starting partition token (Default is Min possible value of a Cassandra token - min long value in Java).
24+
# Change this value only if you want to start from a custom partition token (e.g. when a migrate job failed midway)
25+
S_IDX=-9223372036854775808
26+
27+
# ** DO NOT CHANGE ANYTHING BELOW THIS **
28+
SLICE=999999999999999999
29+
30+
echo "Starting Migration using $PROPS_FILE !!"
31+
32+
# Migrate initial partition tokens from min-long to -9000000000000000000
33+
if [ $S_IDX -lt -9000000000000000000 ]
34+
then
35+
E_IDX=-9000000000000000001
36+
echo "Running Migrate for Partition Range $S_IDX to $E_IDX .."
37+
$SPARK_SUBMIT --properties-file $PROPS_FILE --master "local[*]" --conf spark.migrate.source.minPartition=$S_IDX --conf spark.migrate.source.maxPartition=$E_IDX --class datastax.astra.migrate.Migrate migrate-*.jar
38+
S_IDX=-9000000000000000000
39+
fi
40+
41+
# Migrate partition tokens from -9000000000000000000 to 8999999999999999999 in slices of 1000000000000000000
42+
while [ $S_IDX -lt 9000000000000000000 ]
43+
do
44+
if [ $S_IDX -gt 8223372036854775807 ]
45+
then
46+
E_IDX=8999999999999999999
47+
else
48+
E_IDX=$(( $S_IDX + $SLICE ))
49+
fi
50+
echo "Running Migrate for Partition Range $S_IDX to $E_IDX .."
51+
$SPARK_SUBMIT --properties-file $PROPS_FILE --master "local[*]" --conf spark.migrate.source.minPartition=$S_IDX --conf spark.migrate.source.maxPartition=$E_IDX --class datastax.astra.migrate.Migrate migrate-*.jar
52+
S_IDX=$(( $E_IDX + 1 ))
53+
done
54+
55+
# Migrate final partition tokens from 9000000000000000000 to max-long
56+
E_IDX=9223372036854775807
57+
echo "Running Migrate for Partition Range $S_IDX to 9223372036854775807 .."
58+
$SPARK_SUBMIT --properties-file $PROPS_FILE --master "local[*]" --conf spark.migrate.source.minPartition=$S_IDX --conf spark.migrate.source.maxPartition=$E_IDX --class datastax.astra.migrate.Migrate migrate-*.jar
59+
echo "Completed Migration using $PROPS_FILE !!"

0 commit comments

Comments
 (0)