Skip to content

Commit 990519e

Browse files
committed
Update scalardb-analytics-spark-sample to support 3.14
1 parent bdedc82 commit 990519e

File tree

13 files changed

+139
-148
lines changed

13 files changed

+139
-148
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
.scala_history
Lines changed: 61 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,50 +1,52 @@
11
services:
2-
spark-shell:
2+
spark-sql:
33
build:
44
context: ./docker
55
dockerfile: Dockerfile.spark
66
volumes:
77
- ./scalardb.properties:/etc/scalardb.properties
8+
- ./spark-defaults.conf:/opt/spark/conf/spark-defaults.conf
89
- ./cert.pem:/etc/cert.pem
9-
- .scala_history_jline3:/root/.scala_history_jline3
10+
- .scala_history:/root/.scala_history
11+
- ~/.m2:/root/.m2 #ToDo
1012
networks:
1113
- scalar-network
1214
profiles:
1315
- dev
1416
depends_on:
15-
- backend-postgres
16-
- backend-cassandra
17-
- backend-dynamodb
17+
- scalardb-cassandra
18+
- scalardb-mysql
19+
- postgres
1820
command:
19-
- "/opt/spark/bin/spark-shell"
21+
- "/opt/spark/bin/spark-sql"
2022
- "--packages"
21-
- "com.scalar-labs:scalardb-analytics-spark-3.5_2.12:3.12.0"
23+
- "com.scalar-labs:scalardb-analytics-spark-without-licensing-all-3.5_2.12:3.14.0,com.h2database:h2:2.2.224" #ToDo
2224

23-
backend-postgres:
24-
image: postgres:15.1
25-
ports:
26-
- "5432"
25+
sample-data-loader:
26+
build:
27+
context: sample-data-loader
28+
dockerfile: Dockerfile
2729
volumes:
28-
- backend-postgres-data:/var/lib/postgresql/data
29-
environment:
30-
- POSTGRES_USER=postgres
31-
- POSTGRES_PASSWORD=postgres
32-
- POSTGRES_DB=test
30+
- ./scalardb.properties:/etc/scalardb.properties
31+
- ./schema.json:/etc/schema.json
32+
- ./data:/data
33+
working_dir: /sample-data-loader
3334
networks:
3435
- scalar-network
35-
healthcheck:
36-
test: ["CMD", "psql", "-U", "postgres", "-c", "select 1"]
37-
interval: 1s
38-
timeout: 1s
39-
retries: 10
40-
start_period: 1s
36+
profiles:
37+
- dev
38+
depends_on:
39+
- scalardb-cassandra
40+
- scalardb-mysql
41+
- postgres
42+
command: ["java", "-jar", "/app.jar"]
4143

42-
backend-cassandra:
44+
scalardb-cassandra:
4345
image: cassandra:3.11
4446
ports:
45-
- "9042"
47+
- 9042
4648
volumes:
47-
- backend-cassandra-data:/var/lib/cassandra
49+
- scalardb-cassandra-data:/var/lib/cassandra
4850
environment:
4951
- CASSANDRA_DC=dc1
5052
- CASSANDRA_ENDPOINT_SNITCH=GossipingPropertyFileSnitch
@@ -55,50 +57,52 @@ services:
5557
interval: 1s
5658
timeout: 1s
5759
retries: 10
58-
start_period: 5s
60+
start_period: 10s
5961

60-
backend-dynamodb:
61-
image: amazon/dynamodb-local:1.21.0
62+
scalardb-mysql:
63+
image: mysql:8.0.36
6264
ports:
63-
- "8000"
64-
command:
65-
[
66-
"-jar",
67-
"DynamoDBLocal.jar",
68-
"-sharedDb",
69-
"-dbPath",
70-
"/home/dynamodblocal",
71-
"-optimizeDbBeforeStartup",
72-
]
65+
- 3306
7366
volumes:
74-
- backend-dynamodb-data:/home/dynamodblocal
67+
- scalardb-mysql-data:/var/lib/mysql
68+
environment:
69+
- MYSQL_ROOT_PASSWORD=mysql
70+
- MYSQL_DATABASE=sampledb
7571
networks:
7672
- scalar-network
73+
healthcheck:
74+
test: ["CMD", "mysqladmin", "ping", "-h", "localhost", "-u", "root"]
75+
interval: 1s
76+
timeout: 1s
77+
retries: 10
78+
start_period: 5s
7779

78-
sample-data-loader:
79-
build:
80-
context: sample-data-loader
81-
dockerfile: Dockerfile
80+
postgres:
81+
image: postgres:15.1
82+
ports:
83+
- 5432
8284
volumes:
83-
- ./scalardb.properties:/etc/scalardb.properties
84-
- ./schema.json:/etc/schema.json
85-
- ./data:/data
86-
working_dir: /sample-data-loader
85+
- postgres-data:/var/lib/postgresql/data
86+
- ./data/customer.csv:/opt/customer.csv
87+
- ./sql/postgres_copy.sql:/docker-entrypoint-initdb.d/postgres_copy.sql
88+
environment:
89+
- POSTGRES_USER=postgres
90+
- POSTGRES_PASSWORD=postgres
91+
- POSTGRES_DB=sampledb
8792
networks:
8893
- scalar-network
89-
profiles:
90-
- dev
91-
depends_on:
92-
- backend-postgres
93-
- backend-cassandra
94-
- backend-dynamodb
95-
command: ["java", "-jar", "/app.jar"]
94+
healthcheck:
95+
test: ["CMD", "psql", "-U", "postgres", "-c", "select 1"]
96+
interval: 1s
97+
timeout: 1s
98+
retries: 10
99+
start_period: 5s
96100

97101
volumes:
98102
analytics-data: {}
99-
backend-postgres-data: {}
100-
backend-cassandra-data: {}
101-
backend-dynamodb-data: {}
103+
scalardb-cassandra-data: {}
104+
scalardb-mysql-data: {}
105+
postgres-data: {}
102106

103107
networks:
104108
scalar-network: {}

scalardb-analytics-spark-sample/docker/Dockerfile.spark

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ FROM eclipse-temurin:17-jre-jammy
33

44
WORKDIR /work
55

6-
ENV SPARK_VERSION 3.5.1
6+
ENV SPARK_VERSION 3.5.3
77

88
RUN apt-get update && \
99
apt-get install -y --no-install-recommends \

scalardb-analytics-spark-sample/sample-data-loader/build.gradle.kts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
plugins {
22
application
3-
id("com.github.johnrengelman.shadow") version "7.1.2"
3+
id("com.gradleup.shadow") version "8.3.5"
44
id("com.diffplug.spotless") version "6.24.0"
55
}
66

@@ -9,10 +9,11 @@ repositories {
99
}
1010

1111
dependencies {
12-
implementation("com.scalar-labs:scalardb:3.12.1")
13-
implementation("com.scalar-labs:scalardb-schema-loader:3.12.1")
12+
implementation("com.scalar-labs:scalardb:3.14.0")
13+
implementation("com.scalar-labs:scalardb-schema-loader:3.14.0")
1414
implementation("org.apache.commons:commons-csv:1.10.0")
1515

16+
1617
implementation("io.netty:netty-transport-native-epoll:4.1.99.Final:linux-x86_64")
1718
implementation("io.netty:netty-transport-native-epoll:4.1.99.Final:linux-aarch_64")
1819
implementation("io.netty:netty-transport-native-kqueue:4.1.99.Final:osx-x86_64")
Binary file not shown.

scalardb-analytics-spark-sample/sample-data-loader/gradle/wrapper/gradle-wrapper.properties

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
distributionBase=GRADLE_USER_HOME
22
distributionPath=wrapper/dists
3-
distributionUrl=https\://services.gradle.org/distributions/gradle-8.5-bin.zip
3+
distributionUrl=https\://services.gradle.org/distributions/gradle-8.10-bin.zip
44
networkTimeout=10000
55
validateDistributionUrl=true
66
zipStoreBase=GRADLE_USER_HOME

scalardb-analytics-spark-sample/sample-data-loader/gradlew

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
# See the License for the specific language governing permissions and
1616
# limitations under the License.
1717
#
18+
# SPDX-License-Identifier: Apache-2.0
19+
#
1820

1921
##############################################################################
2022
#
@@ -55,7 +57,7 @@
5557
# Darwin, MinGW, and NonStop.
5658
#
5759
# (3) This script is generated from the Groovy template
58-
# https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
60+
# https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
5961
# within the Gradle project.
6062
#
6163
# You can find Gradle at https://github.com/gradle/gradle/.
@@ -84,7 +86,8 @@ done
8486
# shellcheck disable=SC2034
8587
APP_BASE_NAME=${0##*/}
8688
# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
87-
APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit
89+
APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s
90+
' "$PWD" ) || exit
8891

8992
# Use the maximum available, or set MAX_FD != -1 to use that value.
9093
MAX_FD=maximum

scalardb-analytics-spark-sample/sample-data-loader/gradlew.bat

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
@rem See the License for the specific language governing permissions and
1414
@rem limitations under the License.
1515
@rem
16+
@rem SPDX-License-Identifier: Apache-2.0
17+
@rem
1618

1719
@if "%DEBUG%"=="" @echo off
1820
@rem ##########################################################################
@@ -43,11 +45,11 @@ set JAVA_EXE=java.exe
4345
%JAVA_EXE% -version >NUL 2>&1
4446
if %ERRORLEVEL% equ 0 goto execute
4547

46-
echo.
47-
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
48-
echo.
49-
echo Please set the JAVA_HOME variable in your environment to match the
50-
echo location of your Java installation.
48+
echo. 1>&2
49+
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2
50+
echo. 1>&2
51+
echo Please set the JAVA_HOME variable in your environment to match the 1>&2
52+
echo location of your Java installation. 1>&2
5153

5254
goto fail
5355

@@ -57,11 +59,11 @@ set JAVA_EXE=%JAVA_HOME%/bin/java.exe
5759

5860
if exist "%JAVA_EXE%" goto execute
5961

60-
echo.
61-
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
62-
echo.
63-
echo Please set the JAVA_HOME variable in your environment to match the
64-
echo location of your Java installation.
62+
echo. 1>&2
63+
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2
64+
echo. 1>&2
65+
echo Please set the JAVA_HOME variable in your environment to match the 1>&2
66+
echo location of your Java installation. 1>&2
6567

6668
goto fail
6769

scalardb-analytics-spark-sample/sample-data-loader/src/main/java/sample/data/Loader.java

Lines changed: 5 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import com.scalar.db.api.DistributedTransaction;
44
import com.scalar.db.api.DistributedTransactionManager;
5+
import com.scalar.db.api.Mutation;
56
import com.scalar.db.api.Put;
67
import com.scalar.db.exception.transaction.TransactionException;
78
import com.scalar.db.io.Key;
@@ -14,29 +15,18 @@
1415
import java.nio.file.Files;
1516
import java.nio.file.Path;
1617
import java.util.HashMap;
18+
import java.util.List;
1719
import java.util.Map;
1820
import java.util.function.Function;
1921
import org.apache.commons.csv.CSVFormat;
2022
import org.apache.commons.csv.CSVRecord;
2123

2224
public class Loader implements AutoCloseable {
23-
private static final String CUSTOMER_DATA = "/data/customer.csv";
2425
private static final String ORDERS_DATA = "/data/orders.csv";
2526
private static final String LINEITEM_DATA = "/data/lineitem.csv";
2627
private static final String CONFIG_FILE_PATH = "/etc/scalardb.properties";
2728
private static final String SCHEMA_FILE_PATH = "/etc/schema.json";
2829

29-
private static final String[] CUSTOMER_COLUMNS = {
30-
"c_custkey",
31-
"c_name",
32-
"c_address",
33-
"c_nationkey",
34-
"c_phone",
35-
"c_acctbal",
36-
"c_mktsegment",
37-
"c_comment"
38-
};
39-
4030
private static final String[] ORDERS_COLUMNS = {
4131
"o_orderkey",
4232
"o_custkey",
@@ -82,8 +72,6 @@ public void close() {
8272
public void load() throws TransactionException, IOException, SchemaLoaderException {
8373
loadSchema();
8474

85-
loadData(this.manager, CUSTOMER_DATA, CUSTOMER_COLUMNS, this::buildPutCustomer);
86-
8775
loadData(this.manager, ORDERS_DATA, ORDERS_COLUMNS, this::buildPutOrders);
8876

8977
loadData(this.manager, LINEITEM_DATA, LINEITEM_COLUMNS, this::buildPutLineitem);
@@ -101,25 +89,9 @@ private void loadSchema() throws SchemaLoaderException {
10189
SchemaLoader.load(configFilePath, schemaFilePath, options, createCoordinatorTables);
10290
}
10391

104-
private Put buildPutCustomer(CSVRecord record) {
105-
return Put.newBuilder()
106-
.namespace("dynamons")
107-
.table("customer")
108-
.partitionKey(Key.ofInt("c_custkey", intCol(record, "c_custkey")))
109-
.textValue("c_name", stringCol(record, "c_name"))
110-
.textValue("c_address", stringCol(record, "c_address"))
111-
.intValue("c_nationkey", intCol(record, "c_nationkey"))
112-
.textValue("c_phone", stringCol(record, "c_phone"))
113-
.doubleValue("c_acctbal", doubleCol(record, "c_acctbal"))
114-
.textValue("c_mktsegment", stringCol(record, "c_mktsegment"))
115-
.textValue("c_comment", stringCol(record, "c_comment"))
116-
.enableImplicitPreRead()
117-
.build();
118-
}
119-
12092
private Put buildPutOrders(CSVRecord record) {
12193
return Put.newBuilder()
122-
.namespace("postgresns")
94+
.namespace("mysqlns")
12395
.table("orders")
12496
.partitionKey(Key.ofInt("o_orderkey", intCol(record, "o_orderkey")))
12597
.intValue("o_custkey", intCol(record, "o_custkey"))
@@ -175,7 +147,8 @@ private void loadData(
175147
transaction = manager.start();
176148
for (CSVRecord record : records) {
177149
Put put = putFunction.apply(record);
178-
transaction.put(put);
150+
List<Mutation> mutations = List.of(put);
151+
transaction.mutate(mutations);
179152
}
180153
transaction.commit();
181154
} catch (TransactionException e) {

scalardb-analytics-spark-sample/scalardb.properties

Lines changed: 7 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,18 @@
11
scalar.db.storage=multi-storage
2-
scalar.db.multi_storage.storages=cassandra,postgres,dynamodb
2+
scalar.db.multi_storage.storages=cassandra,mysql
33

44
scalar.db.multi_storage.storages.cassandra.storage=cassandra
5-
scalar.db.multi_storage.storages.cassandra.contact_points=backend-cassandra
5+
scalar.db.multi_storage.storages.cassandra.contact_points=scalardb-cassandra
66
scalar.db.multi_storage.storages.cassandra.contact_port=9042
77
scalar.db.multi_storage.storages.cassandra.username=cassandra
88
scalar.db.multi_storage.storages.cassandra.password=cassandra
99

10-
scalar.db.multi_storage.storages.postgres.storage=jdbc
11-
scalar.db.multi_storage.storages.postgres.contact_points=jdbc:postgresql://backend-postgres:5432/test
12-
scalar.db.multi_storage.storages.postgres.username=postgres
13-
scalar.db.multi_storage.storages.postgres.password=postgres
14-
scalar.db.multi_storage.storages.postgres.jdbc.connection_pool.min_idle=5
15-
scalar.db.multi_storage.storages.postgres.jdbc.connection_pool.max_idle=10
16-
scalar.db.multi_storage.storages.postgres.jdbc.connection_pool.max_total=25
10+
scalar.db.multi_storage.storages.mysql.storage=jdbc
11+
scalar.db.multi_storage.storages.mysql.contact_points=jdbc:mysql://scalardb-mysql:3306/sampledb
12+
scalar.db.multi_storage.storages.mysql.username=root
13+
scalar.db.multi_storage.storages.mysql.password=mysql
1714

18-
scalar.db.multi_storage.storages.dynamodb.contact_points=ap-northeast-1
19-
scalar.db.multi_storage.storages.dynamodb.username=access_key_id
20-
scalar.db.multi_storage.storages.dynamodb.password=secret_access_key
21-
scalar.db.multi_storage.storages.dynamodb.storage=dynamo
22-
scalar.db.multi_storage.storages.dynamodb.dynamo.endpoint_override=http://backend-dynamodb:8000
23-
scalar.db.multi_storage.storages.dynamodb.dynamo.table_metadata.namespace=table_metadata
24-
scalar.db.multi_storage.storages.dynamodb.dynamo.namespace.prefix=scalar_
25-
26-
scalar.db.multi_storage.namespace_mapping=cassandrans:cassandra,postgresns:postgres,dynamons:dynamodb
15+
scalar.db.multi_storage.namespace_mapping=cassandrans:cassandra,mysqlns:mysql
2716

2817
scalar.db.multi_storage.default_storage=cassandra
2918

0 commit comments

Comments
 (0)