File tree Expand file tree Collapse file tree 5 files changed +48
-114
lines changed Expand file tree Collapse file tree 5 files changed +48
-114
lines changed Original file line number Diff line number Diff line change
1
+ FROM eclipse-temurin:8-jammy
2
+
3
+ # Add all migration tools to path
4
+ RUN mkdir -p /assets/
5
+
6
+ # Download all migration dependencies
7
+ RUN cd /assets && \
8
+ curl -OL https://downloads.datastax.com/dsbulk/dsbulk.tar.gz && \
9
+ tar -xzf ./dsbulk.tar.gz && \
10
+ rm ./dsbulk.tar.gz && \
11
+ mv /assets/dsbulk-1.10.0 /assets/dsbulk && \
12
+ curl -OL https://downloads.datastax.com/enterprise/cqlsh-astra.tar.gz && \
13
+ tar -xzf ./cqlsh-astra.tar.gz && \
14
+ rm ./cqlsh-astra.tar.gz && \
15
+ curl -OL https://archive.apache.org/dist/spark/spark-2.4.8/spark-2.4.8-bin-hadoop2.7.tgz && \
16
+ tar -xzf ./spark-2.4.8-bin-hadoop2.7.tgz && \
17
+ rm ./spark-2.4.8-bin-hadoop2.7.tgz
18
+
19
+ RUN apt-get update && apt-get install -y openssh-server vim --no-install-recommends && \
20
+ rm -rf /var/lib/apt/lists/*
21
+ RUN service ssh start
22
+
23
+ # Copy CDM jar & template files
24
+ COPY ./target/cassandra-data-migrator-*.jar /assets/
25
+ COPY ./src/resources/sparkConf.properties /assets/
26
+ COPY ./src/resources/partitions.csv /assets/
27
+ COPY ./src/resources/primary_key_rows.csv /assets/
28
+ COPY ./src/resources/runCommands.txt /assets/
29
+
30
+ # Add all migration tools to path
31
+ ENV PATH="${PATH}:/assets/dsbulk/bin/:/assets/cqlsh-astra/bin/:/assets/spark-2.4.8-bin-hadoop2.7/bin/"
32
+
33
+ EXPOSE 22
34
+
35
+ CMD ["/usr/sbin/sshd" ,"-D" ]
Original file line number Diff line number Diff line change 3
3
4
4
<groupId >datastax.astra.migrate</groupId >
5
5
<artifactId >cassandra-data-migrator</artifactId >
6
- <version >2.9 </version >
6
+ <version >2.10 </version >
7
7
<packaging >jar</packaging >
8
8
9
9
<properties >
Load Diff This file was deleted.
Original file line number Diff line number Diff line change @@ -39,7 +39,7 @@ if [ $S_IDX -lt -9000000000000000000 ]
39
39
then
40
40
E_IDX=-9000000000000000001
41
41
echo " Running Migrate for Partition Range $S_IDX to $E_IDX .."
42
- $SPARK_SUBMIT --properties-file $PROPS_FILE --master " local[*]" --conf spark.source .minPartition=$S_IDX --conf spark.source .maxPartition=$E_IDX --class datastax.astra.migrate.Migrate cassandra-data-migrator-* .jar
42
+ $SPARK_SUBMIT --properties-file $PROPS_FILE --master " local[*]" --conf spark.origin .minPartition=$S_IDX --conf spark.origin .maxPartition=$E_IDX --class datastax.astra.migrate.Migrate cassandra-data-migrator-* .jar
43
43
S_IDX=-9000000000000000000
44
44
fi
45
45
53
53
E_IDX=$(( $S_IDX + $SLICE ))
54
54
fi
55
55
echo " Running Migrate for Partition Range $S_IDX to $E_IDX .."
56
- $SPARK_SUBMIT --properties-file $PROPS_FILE --master " local[*]" --conf spark.source .minPartition=$S_IDX --conf spark.source .maxPartition=$E_IDX --class datastax.astra.migrate.Migrate cassandra-data-migrator-* .jar
56
+ $SPARK_SUBMIT --properties-file $PROPS_FILE --master " local[*]" --conf spark.origin .minPartition=$S_IDX --conf spark.origin .maxPartition=$E_IDX --class datastax.astra.migrate.Migrate cassandra-data-migrator-* .jar
57
57
S_IDX=$(( $E_IDX + 1 ))
58
58
done
59
59
60
60
# Migrate final partition tokens from 9000000000000000000 to max-long
61
61
E_IDX=9223372036854775807
62
62
echo " Running Migrate for Partition Range $S_IDX to 9223372036854775807 .."
63
- $SPARK_SUBMIT --properties-file $PROPS_FILE --master " local[*]" --conf spark.source .minPartition=$S_IDX --conf spark.source .maxPartition=$E_IDX --class datastax.astra.migrate.Migrate cassandra-data-migrator-* .jar
63
+ $SPARK_SUBMIT --properties-file $PROPS_FILE --master " local[*]" --conf spark.origin .minPartition=$S_IDX --conf spark.origin .maxPartition=$E_IDX --class datastax.astra.migrate.Migrate cassandra-data-migrator-* .jar
64
64
echo " Completed Migration using $PROPS_FILE !!"
Original file line number Diff line number Diff line change 1
- // Download spark
2
- wget https://archive.apache.org/dist/spark/spark-2.4.8/spark-2.4.8-bin-hadoop2.6.tgz
3
-
4
- // Increase driver memory
5
- --driver-memory 8G
1
+ // Download dependencies
2
+ curl -OL https://downloads.datastax.com/dsbulk/dsbulk.tar.gz
3
+ curl -OL https://downloads.datastax.com/enterprise/cqlsh-astra.tar.gz
4
+ wget https://archive.apache.org/dist/spark/spark-2.4.8/spark-2.4.8-bin-hadoop2.7.tgz
6
5
7
6
// Migrate
8
- spark-submit --properties-file /<path>/sparkConf.properties --verbose --master "local[8]" --class datastax.astra.migrate.Migrate /<path>/cassandra-data-migrator-1.x.jar
7
+ spark-submit --properties-file /<path>/sparkConf.properties --verbose --master "local[8]" --class datastax.astra.migrate.Migrate /<path>/cassandra-data-migrator-2.x.jar
8
+ spark-submit --properties-file /<path>/sparkConf.properties --master "local[8]" --driver-memory 25G --executor-memory 25G --class datastax.astra.migrate.Migrate /<path>/cassandra-data-migrator-2.x.jar &> table_out.log
9
9
10
10
// Random Partitioner Run Command
11
- spark-submit --properties-file /<path>/sparkConf.properties --verbose --master "local[8]" --conf spark.source .minPartition=-1 --conf spark.source .maxPartition=170141183460469231731687303715884105728 --class datastax.astra.migrate.Migrate /<path>/cassandra-data-migrator-1 .x.jar
11
+ spark-submit --properties-file /<path>/sparkConf.properties --verbose --master "local[8]" --conf spark.origin .minPartition=-1 --conf spark.origin .maxPartition=170141183460469231731687303715884105728 --class datastax.astra.migrate.Migrate /<path>/cassandra-data-migrator-2 .x.jar
12
12
13
- // Diff Data
14
- spark-submit --properties-file /<path>/sparkConf.properties --verbose -- master "local[8]" --class datastax.astra.migrate.DiffData /<path>/cassandra-data-migrator-1 .x.jar
13
+ // Validate
14
+ spark-submit --properties-file /<path>/sparkConf.properties --master "local[8]" --driver-memory 25G --executor-memory 25G -- class datastax.astra.migrate.DiffData /<path>/cassandra-data-migrator-2 .x.jar &> table_out.log
You can’t perform that action at this time.
0 commit comments