Skip to content

Commit 937e005

Browse files
authored
Merge pull request #44 from datastax/feature/cdm-docker-image
Containerize
2 parents 69bb9ec + 7610603 commit 937e005

File tree

5 files changed

+48
-114
lines changed

5 files changed

+48
-114
lines changed

Dockerfile

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
FROM eclipse-temurin:8-jammy
2+
3+
# Add all migration tools to path
4+
RUN mkdir -p /assets/
5+
6+
# Download all migration dependencies
7+
RUN cd /assets && \
8+
curl -OL https://downloads.datastax.com/dsbulk/dsbulk.tar.gz && \
9+
tar -xzf ./dsbulk.tar.gz && \
10+
rm ./dsbulk.tar.gz && \
11+
mv /assets/dsbulk-1.10.0 /assets/dsbulk && \
12+
curl -OL https://downloads.datastax.com/enterprise/cqlsh-astra.tar.gz && \
13+
tar -xzf ./cqlsh-astra.tar.gz && \
14+
rm ./cqlsh-astra.tar.gz && \
15+
curl -OL https://archive.apache.org/dist/spark/spark-2.4.8/spark-2.4.8-bin-hadoop2.7.tgz && \
16+
tar -xzf ./spark-2.4.8-bin-hadoop2.7.tgz && \
17+
rm ./spark-2.4.8-bin-hadoop2.7.tgz
18+
19+
RUN apt-get update && apt-get install -y openssh-server vim --no-install-recommends && \
20+
rm -rf /var/lib/apt/lists/*
21+
RUN service ssh start
22+
23+
# Copy CDM jar & template files
24+
COPY ./target/cassandra-data-migrator-*.jar /assets/
25+
COPY ./src/resources/sparkConf.properties /assets/
26+
COPY ./src/resources/partitions.csv /assets/
27+
COPY ./src/resources/primary_key_rows.csv /assets/
28+
COPY ./src/resources/runCommands.txt /assets/
29+
30+
# Add all migration tools to path
31+
ENV PATH="${PATH}:/assets/dsbulk/bin/:/assets/cqlsh-astra/bin/:/assets/spark-2.4.8-bin-hadoop2.7/bin/"
32+
33+
EXPOSE 22
34+
35+
CMD ["/usr/sbin/sshd","-D"]

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
<groupId>datastax.astra.migrate</groupId>
55
<artifactId>cassandra-data-migrator</artifactId>
6-
<version>2.9</version>
6+
<version>2.10</version>
77
<packaging>jar</packaging>
88

99
<properties>

src/resources/100PartitionSplits.txt

Lines changed: 0 additions & 101 deletions
This file was deleted.

src/resources/migrate_data.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ if [ $S_IDX -lt -9000000000000000000 ]
3939
then
4040
E_IDX=-9000000000000000001
4141
echo "Running Migrate for Partition Range $S_IDX to $E_IDX .."
42-
$SPARK_SUBMIT --properties-file $PROPS_FILE --master "local[*]" --conf spark.source.minPartition=$S_IDX --conf spark.source.maxPartition=$E_IDX --class datastax.astra.migrate.Migrate cassandra-data-migrator-*.jar
42+
$SPARK_SUBMIT --properties-file $PROPS_FILE --master "local[*]" --conf spark.origin.minPartition=$S_IDX --conf spark.origin.maxPartition=$E_IDX --class datastax.astra.migrate.Migrate cassandra-data-migrator-*.jar
4343
S_IDX=-9000000000000000000
4444
fi
4545

@@ -53,12 +53,12 @@ do
5353
E_IDX=$(( $S_IDX + $SLICE ))
5454
fi
5555
echo "Running Migrate for Partition Range $S_IDX to $E_IDX .."
56-
$SPARK_SUBMIT --properties-file $PROPS_FILE --master "local[*]" --conf spark.source.minPartition=$S_IDX --conf spark.source.maxPartition=$E_IDX --class datastax.astra.migrate.Migrate cassandra-data-migrator-*.jar
56+
$SPARK_SUBMIT --properties-file $PROPS_FILE --master "local[*]" --conf spark.origin.minPartition=$S_IDX --conf spark.origin.maxPartition=$E_IDX --class datastax.astra.migrate.Migrate cassandra-data-migrator-*.jar
5757
S_IDX=$(( $E_IDX + 1 ))
5858
done
5959

6060
# Migrate final partition tokens from 9000000000000000000 to max-long
6161
E_IDX=9223372036854775807
6262
echo "Running Migrate for Partition Range $S_IDX to 9223372036854775807 .."
63-
$SPARK_SUBMIT --properties-file $PROPS_FILE --master "local[*]" --conf spark.source.minPartition=$S_IDX --conf spark.source.maxPartition=$E_IDX --class datastax.astra.migrate.Migrate cassandra-data-migrator-*.jar
63+
$SPARK_SUBMIT --properties-file $PROPS_FILE --master "local[*]" --conf spark.origin.minPartition=$S_IDX --conf spark.origin.maxPartition=$E_IDX --class datastax.astra.migrate.Migrate cassandra-data-migrator-*.jar
6464
echo "Completed Migration using $PROPS_FILE !!"

src/resources/runCommands.txt

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
1-
// Download spark
2-
wget https://archive.apache.org/dist/spark/spark-2.4.8/spark-2.4.8-bin-hadoop2.6.tgz
3-
4-
// Increase driver memory
5-
--driver-memory 8G
1+
// Download dependencies
2+
curl -OL https://downloads.datastax.com/dsbulk/dsbulk.tar.gz
3+
curl -OL https://downloads.datastax.com/enterprise/cqlsh-astra.tar.gz
4+
wget https://archive.apache.org/dist/spark/spark-2.4.8/spark-2.4.8-bin-hadoop2.7.tgz
65

76
// Migrate
8-
spark-submit --properties-file /<path>/sparkConf.properties --verbose --master "local[8]" --class datastax.astra.migrate.Migrate /<path>/cassandra-data-migrator-1.x.jar
7+
spark-submit --properties-file /<path>/sparkConf.properties --verbose --master "local[8]" --class datastax.astra.migrate.Migrate /<path>/cassandra-data-migrator-2.x.jar
8+
spark-submit --properties-file /<path>/sparkConf.properties --master "local[8]" --driver-memory 25G --executor-memory 25G --class datastax.astra.migrate.Migrate /<path>/cassandra-data-migrator-2.x.jar &> table_out.log
99

1010
// Random Partitioner Run Command
11-
spark-submit --properties-file /<path>/sparkConf.properties --verbose --master "local[8]" --conf spark.source.minPartition=-1 --conf spark.source.maxPartition=170141183460469231731687303715884105728 --class datastax.astra.migrate.Migrate /<path>/cassandra-data-migrator-1.x.jar
11+
spark-submit --properties-file /<path>/sparkConf.properties --verbose --master "local[8]" --conf spark.origin.minPartition=-1 --conf spark.origin.maxPartition=170141183460469231731687303715884105728 --class datastax.astra.migrate.Migrate /<path>/cassandra-data-migrator-2.x.jar
1212

13-
// Diff Data
14-
spark-submit --properties-file /<path>/sparkConf.properties --verbose --master "local[8]" --class datastax.astra.migrate.DiffData /<path>/cassandra-data-migrator-1.x.jar
13+
// Validate
14+
spark-submit --properties-file /<path>/sparkConf.properties --master "local[8]" --driver-memory 25G --executor-memory 25G --class datastax.astra.migrate.DiffData /<path>/cassandra-data-migrator-2.x.jar &> table_out.log

0 commit comments

Comments
 (0)