From 1c6f5974f2743cd4bfa1659dfe58ec3744021ffa Mon Sep 17 00:00:00 2001 From: Akihiro Okuno Date: Sun, 27 Jul 2025 23:07:27 +0900 Subject: [PATCH 1/5] feat(scalardb-analytics-spark-sample): update sample to support ScalarDB Analytics 3.16 - Add ScalarDB Analytics Server and CLI services to docker-compose - Configure Analytics Server with catalog database - Create data source definitions for ScalarDB and PostgreSQL - Reorganize configuration files into config directory - Add platform specification for ARM64 compatibility - Update Dockerfiles to use Gradle application plugin instead of shadowJar - Add volume caching for Spark JAR dependencies - Include setup instructions in README This update enables the sample to work with ScalarDB Analytics 3.16, providing a complete example of federated queries across ScalarDB and PostgreSQL data sources. --- scalardb-analytics-spark-sample/README.md | 60 +++++++ .../config/analytics-cli-config.properties | 4 + .../config/analytics-server.properties | 32 ++++ .../config/data-sources/postgres.json | 12 ++ .../config/data-sources/scalardb.json | 8 + .../{ => config}/scalardb.properties | 0 .../config/spark-defaults.conf | 10 ++ .../docker-compose.yml | 168 +++++++++++++----- .../docker/Dockerfile.spark | 24 ++- .../sample-data-loader/Dockerfile | 4 +- .../sample-data-loader/build.gradle.kts | 5 +- .../spark-defaults.conf | 24 --- .../sql/postgres_copy.sql | 5 +- 13 files changed, 273 insertions(+), 83 deletions(-) create mode 100644 scalardb-analytics-spark-sample/README.md create mode 100644 scalardb-analytics-spark-sample/config/analytics-cli-config.properties create mode 100644 scalardb-analytics-spark-sample/config/analytics-server.properties create mode 100644 scalardb-analytics-spark-sample/config/data-sources/postgres.json create mode 100644 scalardb-analytics-spark-sample/config/data-sources/scalardb.json rename scalardb-analytics-spark-sample/{ => config}/scalardb.properties (100%) create mode 100644 scalardb-analytics-spark-sample/config/spark-defaults.conf delete mode 100644 scalardb-analytics-spark-sample/spark-defaults.conf diff --git a/scalardb-analytics-spark-sample/README.md b/scalardb-analytics-spark-sample/README.md new file mode 100644 index 00000000..a9f7a2b0 --- /dev/null +++ b/scalardb-analytics-spark-sample/README.md @@ -0,0 +1,60 @@ +# ScalarDB Analytics Spark Sample + +## Setup + +### 1. Start services + +```bash +docker compose up -d +``` + +### 2. Load sample data + +```bash +docker compose run --rm sample-data-loader +``` + +### 3. Create catalog + +```bash +docker compose run --rm scalardb-analytics-cli catalog create --catalog sample_catalog +``` + +### 4. Register data sources + +```bash +# Register ScalarDB data source +docker compose run --rm scalardb-analytics-cli data-source register --data-source-json /config/data-sources/scalardb.json + +# Register PostgreSQL data source +docker compose run --rm scalardb-analytics-cli data-source register --data-source-json /config/data-sources/postgres.json +``` + +### 5. Run Spark SQL + +```bash +docker compose run --rm spark-sql +``` + +## Query examples + +```sql +-- List catalogs +SHOW CATALOGS; + +-- Use ScalarDB catalog +USE sample_catalog; + +-- Query ScalarDB tables +SELECT * FROM scalardb.mysqlns.orders LIMIT 10; +SELECT * FROM scalardb.cassandrans.lineitem LIMIT 10; + +-- Query PostgreSQL tables +SELECT * FROM postgres.public.customer LIMIT 10; +``` + +## Stop services + +```bash +docker compose down +``` diff --git a/scalardb-analytics-spark-sample/config/analytics-cli-config.properties b/scalardb-analytics-spark-sample/config/analytics-cli-config.properties new file mode 100644 index 00000000..d45400ae --- /dev/null +++ b/scalardb-analytics-spark-sample/config/analytics-cli-config.properties @@ -0,0 +1,4 @@ +# ScalarDB Analytics CLI configuration +scalar.db.analytics.client.server.host=scalardb-analytics-server +scalar.db.analytics.client.server.catalog.port=11051 +scalar.db.analytics.client.server.metering.port=11052 diff --git a/scalardb-analytics-spark-sample/config/analytics-server.properties b/scalardb-analytics-spark-sample/config/analytics-server.properties new file mode 100644 index 00000000..11b381a0 --- /dev/null +++ b/scalardb-analytics-spark-sample/config/analytics-server.properties @@ -0,0 +1,32 @@ +# ScalarDB Analytics Server configuration + +# Server ports +scalar.db.analytics.server.catalog.port=11051 +scalar.db.analytics.server.metering.port=11052 + +# Server database configuration (for catalog metadata) +scalar.db.analytics.server.db.url=jdbc:postgresql://analytics-catalog-postgres:5432/catalogdb +scalar.db.analytics.server.db.username=analytics +scalar.db.analytics.server.db.password=analytics + +# Server database connection pool configuration +scalar.db.analytics.server.db.pool.size=10 +scalar.db.analytics.server.db.pool.max-lifetime=1800000 +scalar.db.analytics.server.db.pool.connection-timeout=30000 +scalar.db.analytics.server.db.pool.minimum-idle=5 +scalar.db.analytics.server.db.pool.idle-timeout=600000 + +# Metering storage configuration (filesystem for development) +scalar.db.analytics.server.metering.storage.provider=filesystem +scalar.db.analytics.server.metering.storage.path=/tmp/metering + +# License configuration (required for production) +# scalar.db.analytics.server.licensing.license-key= +# scalar.db.analytics.server.licensing.license-check-cert-pem= + +# Logging configuration +logging.level.root=INFO +logging.level.com.scalar.db.analytics=INFO + +# Graceful shutdown configuration +scalar.db.analytics.server.graceful_shutdown_delay_millis=100 diff --git a/scalardb-analytics-spark-sample/config/data-sources/postgres.json b/scalardb-analytics-spark-sample/config/data-sources/postgres.json new file mode 100644 index 00000000..6979c537 --- /dev/null +++ b/scalardb-analytics-spark-sample/config/data-sources/postgres.json @@ -0,0 +1,12 @@ +{ + "catalog": "sample_catalog", + "name": "postgres", + "type": "postgres", + "provider": { + "host": "postgres", + "port": 5432, + "username": "postgres", + "password": "postgres", + "database": "sampledb" + } +} diff --git a/scalardb-analytics-spark-sample/config/data-sources/scalardb.json b/scalardb-analytics-spark-sample/config/data-sources/scalardb.json new file mode 100644 index 00000000..b2422c4a --- /dev/null +++ b/scalardb-analytics-spark-sample/config/data-sources/scalardb.json @@ -0,0 +1,8 @@ +{ + "catalog": "sample_catalog", + "name": "scalardb", + "type": "scalardb", + "provider": { + "configPath": "/etc/scalardb.properties" + } +} diff --git a/scalardb-analytics-spark-sample/scalardb.properties b/scalardb-analytics-spark-sample/config/scalardb.properties similarity index 100% rename from scalardb-analytics-spark-sample/scalardb.properties rename to scalardb-analytics-spark-sample/config/scalardb.properties diff --git a/scalardb-analytics-spark-sample/config/spark-defaults.conf b/scalardb-analytics-spark-sample/config/spark-defaults.conf new file mode 100644 index 00000000..442ab52b --- /dev/null +++ b/scalardb-analytics-spark-sample/config/spark-defaults.conf @@ -0,0 +1,10 @@ +spark.jars.packages com.scalar-labs:scalardb-analytics-spark-all-3.5_2.12:3.16.2 +spark.extraListeners com.scalar.db.analytics.spark.metering.ScalarDbAnalyticsListener + +# Use the ScalarDB Analytics catalog as `sample_catalog` +spark.sql.catalog.sample_catalog com.scalar.db.analytics.spark.ScalarDbAnalyticsCatalog +spark.sql.catalog.sample_catalog.server.host scalardb-analytics-server +spark.sql.catalog.sample_catalog.server.catalog.port 11051 +spark.sql.catalog.sample_catalog.server.metering.port 11052 + +spark.sql.defaultCatalog sample_catalog diff --git a/scalardb-analytics-spark-sample/docker-compose.yml b/scalardb-analytics-spark-sample/docker-compose.yml index e7050ffc..f6d4f3cf 100644 --- a/scalardb-analytics-spark-sample/docker-compose.yml +++ b/scalardb-analytics-spark-sample/docker-compose.yml @@ -1,48 +1,79 @@ services: - spark-sql: - build: - context: ./docker - dockerfile: Dockerfile.spark + # =========================================== + # ScalarDB Analytics Services + # =========================================== + + # Catalog database for Analytics Server metadata + analytics-catalog-postgres: + image: postgres:17 + expose: + - 5432 volumes: - - ./scalardb.properties:/etc/scalardb.properties - - ./spark-defaults.conf:/opt/spark/conf/spark-defaults.conf - - .scala_history:/root/.scala_history + - analytics-catalog-data:/var/lib/postgresql/data + environment: + - POSTGRES_USER=analytics + - POSTGRES_PASSWORD=analytics + - POSTGRES_DB=catalogdb + networks: + - scalar-network + healthcheck: + test: + ["CMD", "psql", "-U", "analytics", "-d", "catalogdb", "-c", "select 1"] + interval: 5s + timeout: 3s + retries: 10 + start_period: 5s + + # ScalarDB Analytics Server + scalardb-analytics-server: + image: ghcr.io/scalar-labs/scalardb-analytics-server-without-licensing:3.16.2 + platform: linux/amd64 + expose: + - 11051 # Catalog service port + - 11052 # Metering service port + volumes: + - ./config/analytics-server.properties:/scalardb-analytics-server/server.properties:ro + - ./config/scalardb.properties:/etc/scalardb.properties:ro networks: - scalar-network - profiles: - - dev depends_on: - - scalardb-cassandra - - scalardb-mysql - - postgres - command: - - "/opt/spark/bin/spark-sql" - - "--packages" - - "com.scalar-labs:scalardb-analytics-spark-all-3.5_2.12:3.14.0" + - analytics-catalog-postgres + healthcheck: + test: ["CMD", "/usr/local/bin/grpc_health_probe", "-addr=:11051"] + interval: 5s + timeout: 3s + retries: 10 + start_period: 5s - sample-data-loader: - build: - context: sample-data-loader - dockerfile: Dockerfile + # ScalarDB Analytics CLI + scalardb-analytics-cli: + image: ghcr.io/scalar-labs/scalardb-analytics-cli:3.16.2 volumes: - - ./scalardb.properties:/etc/scalardb.properties - - ./schema.json:/etc/schema.json - - ./data:/data - working_dir: /sample-data-loader + - ./config/analytics-cli-config.properties:/config/client.properties:ro + - ./config/data-sources:/config/data-sources:ro networks: - scalar-network profiles: - dev - depends_on: - - scalardb-cassandra - - scalardb-mysql - - postgres - command: ["java", "-jar", "/app.jar"] + entrypoint: + [ + "java", + "-jar", + "/scalardb-analytics-cli/scalardb-analytics-cli.jar", + "-c", + "/config/client.properties", + ] + command: ["--help"] # Default command, will be overridden when running specific commands + # =========================================== + # Data Storage Services (Sample Data) + # =========================================== + + # ScalarDB managed storage - Cassandra scalardb-cassandra: image: cassandra:3.11 - ports: - - 9042 + expose: + - 9042 # CQL native transport volumes: - scalardb-cassandra-data:/var/lib/cassandra environment: @@ -52,14 +83,15 @@ services: - scalar-network healthcheck: test: ["CMD", "cqlsh", "-e", "exit"] - interval: 1s - timeout: 1s + interval: 5s + timeout: 3s retries: 10 - start_period: 10s + start_period: 5s + # ScalarDB managed storage - MySQL scalardb-mysql: - image: mysql:8.0 - ports: + image: mysql:9 + expose: - 3306 volumes: - scalardb-mysql-data:/var/lib/mysql @@ -70,14 +102,15 @@ services: - scalar-network healthcheck: test: ["CMD", "mysqladmin", "ping", "-h", "localhost", "-u", "root"] - interval: 1s - timeout: 1s + interval: 5s + timeout: 3s retries: 10 start_period: 5s + # Direct access storage - PostgreSQL (for federated queries) postgres: - image: postgres:15.1 - ports: + image: postgres:17 + expose: - 5432 volumes: - postgres-data:/var/lib/postgresql/data @@ -91,16 +124,65 @@ services: - scalar-network healthcheck: test: ["CMD", "psql", "-U", "postgres", "-c", "select 1"] - interval: 1s - timeout: 1s + interval: 5s + timeout: 3s retries: 10 start_period: 5s + # =========================================== + # Data Loading Services + # =========================================== + + # Sample data loader for initial data setup + sample-data-loader: + build: + context: sample-data-loader + dockerfile: Dockerfile + volumes: + - ./config/scalardb.properties:/etc/scalardb.properties + - ./schema.json:/etc/schema.json + - ./data:/data + networks: + - scalar-network + profiles: + - dev + depends_on: + - scalardb-cassandra + - scalardb-mysql + - postgres + command: ["/app/bin/sample-data-loader"] + + # =========================================== + # Query Execution Services + # =========================================== + + # Spark SQL interactive shell + spark-sql: + build: + context: ./docker + dockerfile: Dockerfile.spark + volumes: + - ./config/scalardb.properties:/etc/scalardb.properties + - ./config/spark-defaults.conf:/opt/spark/conf/spark-defaults.conf + - .scala_history:/root/.scala_history + - spark-ivy-cache:/root/.ivy2 + - spark-m2-cache:/root/.m2 + networks: + - scalar-network + profiles: + - dev + depends_on: + - scalardb-analytics-server + command: + - "/opt/spark/bin/spark-sql" + volumes: - analytics-data: {} scalardb-cassandra-data: {} scalardb-mysql-data: {} postgres-data: {} + analytics-catalog-data: {} + spark-ivy-cache: {} + spark-m2-cache: {} networks: scalar-network: {} diff --git a/scalardb-analytics-spark-sample/docker/Dockerfile.spark b/scalardb-analytics-spark-sample/docker/Dockerfile.spark index 42f1c6df..b31f4caa 100644 --- a/scalardb-analytics-spark-sample/docker/Dockerfile.spark +++ b/scalardb-analytics-spark-sample/docker/Dockerfile.spark @@ -1,20 +1,28 @@ FROM eclipse-temurin:17-jre-jammy +ENV SPARK_VERSION=3.5.6 \ + HADOOP_VERSION=3 \ + SPARK_HOME=/opt/spark \ + PATH="/opt/spark/bin:/opt/spark/sbin:${PATH}" \ + SPARK_NO_DAEMONIZE=true -WORKDIR /work - -ENV SPARK_VERSION 3.5.3 +WORKDIR /tmp +# Install dependencies RUN apt-get update && \ apt-get install -y --no-install-recommends \ procps \ - curl && \ + curl \ + ca-certificates && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -SHELL ["/bin/bash", "-o", "pipefail", "-c"] -RUN curl -SL "https://dlcdn.apache.org/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop3.tgz" | tar -xzC /opt - -RUN mv "/opt/spark-$SPARK_VERSION-bin-hadoop3" /opt/spark +# Download and verify Spark +RUN curl -fsSL -o spark.tgz "https://dlcdn.apache.org/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \ + curl -fsSL -o spark.tgz.sha512 "https://dlcdn.apache.org/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz.sha512" && \ + sha512sum -c spark.tgz.sha512 && \ + tar -xzf spark.tgz -C /opt && \ + mv "/opt/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" "${SPARK_HOME}" && \ + rm -rf spark.tgz spark.tgz.sha512 WORKDIR /opt/spark diff --git a/scalardb-analytics-spark-sample/sample-data-loader/Dockerfile b/scalardb-analytics-spark-sample/sample-data-loader/Dockerfile index fe0e9486..ec51ce73 100644 --- a/scalardb-analytics-spark-sample/sample-data-loader/Dockerfile +++ b/scalardb-analytics-spark-sample/sample-data-loader/Dockerfile @@ -3,8 +3,8 @@ FROM eclipse-temurin:17-jdk-jammy AS builder COPY . /app WORKDIR /app -RUN ./gradlew shadowJar +RUN chmod +x gradlew && ./gradlew installDist FROM eclipse-temurin:17-jre-jammy -COPY --from=builder /app/build/libs/sample-data-loader-all.jar /app.jar +COPY --from=builder /app/build/install/sample-data-loader /app diff --git a/scalardb-analytics-spark-sample/sample-data-loader/build.gradle.kts b/scalardb-analytics-spark-sample/sample-data-loader/build.gradle.kts index 9c2b6d5b..064f26e8 100644 --- a/scalardb-analytics-spark-sample/sample-data-loader/build.gradle.kts +++ b/scalardb-analytics-spark-sample/sample-data-loader/build.gradle.kts @@ -1,6 +1,5 @@ plugins { application - id("com.gradleup.shadow") version "8.3.5" id("com.diffplug.spotless") version "6.24.0" } @@ -9,8 +8,8 @@ repositories { } dependencies { - implementation("com.scalar-labs:scalardb:3.14.0") - implementation("com.scalar-labs:scalardb-schema-loader:3.14.0") + implementation("com.scalar-labs:scalardb:3.16.1") + implementation("com.scalar-labs:scalardb-schema-loader:3.16.1") implementation("org.apache.commons:commons-csv:1.10.0") implementation("io.netty:netty-transport-native-epoll:4.1.99.Final:linux-x86_64") diff --git a/scalardb-analytics-spark-sample/spark-defaults.conf b/scalardb-analytics-spark-sample/spark-defaults.conf deleted file mode 100644 index ac239c94..00000000 --- a/scalardb-analytics-spark-sample/spark-defaults.conf +++ /dev/null @@ -1,24 +0,0 @@ -# Use the ScalarDB Analytics catalog as `test_catalog` -spark.sql.catalog.test_catalog com.scalar.db.analytics.spark.ScalarDbAnalyticsCatalog - -# Enable Spark extension for ScalarDB Analytics -spark.sql.extensions com.scalar.db.analytics.spark.extension.ScalarDbAnalyticsExtensions - -# Set `test_catalog` as the default catalog -spark.sql.defaultCatalog test_catalog - -# Confiture the ScalarDB Analytics license. PLEASE REPLACE THESE VALUES WITH YOUR LICENSE KEY AND CERTIFICATE CONTENTS -spark.sql.catalog.test_catalog.license.key -spark.sql.catalog.test_catalog.license.cert_pem - -# Configure the ScalarDB Analytics catalog for ScalarDB -spark.sql.catalog.test_catalog.data_source.scalardb.type scalardb -spark.sql.catalog.test_catalog.data_source.scalardb.config_path /etc/scalardb.properties - -# Configure the ScalarDB Analytics catalog for PostgreSQL, which is not managed by ScalarDB -spark.sql.catalog.test_catalog.data_source.postgresql.type postgresql -spark.sql.catalog.test_catalog.data_source.postgresql.host postgres -spark.sql.catalog.test_catalog.data_source.postgresql.port 5432 -spark.sql.catalog.test_catalog.data_source.postgresql.username postgres -spark.sql.catalog.test_catalog.data_source.postgresql.password postgres -spark.sql.catalog.test_catalog.data_source.postgresql.database sampledb diff --git a/scalardb-analytics-spark-sample/sql/postgres_copy.sql b/scalardb-analytics-spark-sample/sql/postgres_copy.sql index bf233f5a..22b2224a 100644 --- a/scalardb-analytics-spark-sample/sql/postgres_copy.sql +++ b/scalardb-analytics-spark-sample/sql/postgres_copy.sql @@ -1,5 +1,4 @@ -create schema sample_ns; -create table sample_ns.customer ( +create table customer ( c_custkey int, c_name text, c_address text, @@ -10,4 +9,4 @@ create table sample_ns.customer ( c_comment text, PRIMARY KEY (c_custkey) ); -\copy sample_ns.customer from '/opt/customer.csv' delimiter ',' csv; +\copy customer from '/opt/customer.csv' delimiter ',' csv; From 279f97f97c0d6700e141e5c1bab3f5d90d55fc60 Mon Sep 17 00:00:00 2001 From: Akihiro Okuno Date: Mon, 28 Jul 2025 23:35:34 +0900 Subject: [PATCH 2/5] feat(scalardb-analytics-spark-sample): enable automatic sample data loading - Remove profile requirement for sample-data-loader service - Add volume-based flag to track data loading completion - Implement idempotent data loading with skip on subsequent runs - Add healthcheck dependencies for database readiness --- .../docker-compose.yml | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/scalardb-analytics-spark-sample/docker-compose.yml b/scalardb-analytics-spark-sample/docker-compose.yml index f6d4f3cf..82500714 100644 --- a/scalardb-analytics-spark-sample/docker-compose.yml +++ b/scalardb-analytics-spark-sample/docker-compose.yml @@ -142,15 +142,23 @@ services: - ./config/scalardb.properties:/etc/scalardb.properties - ./schema.json:/etc/schema.json - ./data:/data + - sample-data-flags:/flags networks: - scalar-network - profiles: - - dev depends_on: - - scalardb-cassandra - - scalardb-mysql - - postgres - command: ["/app/bin/sample-data-loader"] + scalardb-cassandra: + condition: service_healthy + scalardb-mysql: + condition: service_healthy + entrypoint: | + sh -c ' + if [ -f /flags/.data-loaded ]; then + echo "Sample data already loaded, skipping..." + exit 0 + fi + echo "Loading sample data..." + /app/bin/sample-data-loader && touch /flags/.data-loaded + ' # =========================================== # Query Execution Services @@ -183,6 +191,7 @@ volumes: analytics-catalog-data: {} spark-ivy-cache: {} spark-m2-cache: {} + sample-data-flags: {} networks: scalar-network: {} From 5f8790c7a1701c5d17559fc935c496990975a4e3 Mon Sep 17 00:00:00 2001 From: Akihiro Okuno Date: Mon, 28 Jul 2025 23:39:25 +0900 Subject: [PATCH 3/5] refactor(scalardb-analytics-spark-sample): standardize configuration file naming - Rename analytics-server.properties to scalardb-analytics-server.properties - Rename analytics-cli-config.properties to scalardb-analytics-cli.properties - Replace hyphens with underscores in property names for consistency - Update docker-compose.yml volume mappings --- ....properties => scalardb-analytics-cli.properties} | 0 ...operties => scalardb-analytics-server.properties} | 12 ++++++------ scalardb-analytics-spark-sample/docker-compose.yml | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) rename scalardb-analytics-spark-sample/config/{analytics-cli-config.properties => scalardb-analytics-cli.properties} (100%) rename scalardb-analytics-spark-sample/config/{analytics-server.properties => scalardb-analytics-server.properties} (73%) diff --git a/scalardb-analytics-spark-sample/config/analytics-cli-config.properties b/scalardb-analytics-spark-sample/config/scalardb-analytics-cli.properties similarity index 100% rename from scalardb-analytics-spark-sample/config/analytics-cli-config.properties rename to scalardb-analytics-spark-sample/config/scalardb-analytics-cli.properties diff --git a/scalardb-analytics-spark-sample/config/analytics-server.properties b/scalardb-analytics-spark-sample/config/scalardb-analytics-server.properties similarity index 73% rename from scalardb-analytics-spark-sample/config/analytics-server.properties rename to scalardb-analytics-spark-sample/config/scalardb-analytics-server.properties index 11b381a0..3ad3a442 100644 --- a/scalardb-analytics-spark-sample/config/analytics-server.properties +++ b/scalardb-analytics-spark-sample/config/scalardb-analytics-server.properties @@ -11,18 +11,18 @@ scalar.db.analytics.server.db.password=analytics # Server database connection pool configuration scalar.db.analytics.server.db.pool.size=10 -scalar.db.analytics.server.db.pool.max-lifetime=1800000 -scalar.db.analytics.server.db.pool.connection-timeout=30000 -scalar.db.analytics.server.db.pool.minimum-idle=5 -scalar.db.analytics.server.db.pool.idle-timeout=600000 +scalar.db.analytics.server.db.pool.max_lifetime=1800000 +scalar.db.analytics.server.db.pool.connection_timeout=30000 +scalar.db.analytics.server.db.pool.minimum_idle=5 +scalar.db.analytics.server.db.pool.idle_timeout=600000 # Metering storage configuration (filesystem for development) scalar.db.analytics.server.metering.storage.provider=filesystem scalar.db.analytics.server.metering.storage.path=/tmp/metering # License configuration (required for production) -# scalar.db.analytics.server.licensing.license-key= -# scalar.db.analytics.server.licensing.license-check-cert-pem= +# scalar.db.analytics.server.licensing.license_key= +# scalar.db.analytics.server.licensing.license_check_cert_pem= # Logging configuration logging.level.root=INFO diff --git a/scalardb-analytics-spark-sample/docker-compose.yml b/scalardb-analytics-spark-sample/docker-compose.yml index 82500714..2ad11763 100644 --- a/scalardb-analytics-spark-sample/docker-compose.yml +++ b/scalardb-analytics-spark-sample/docker-compose.yml @@ -32,7 +32,7 @@ services: - 11051 # Catalog service port - 11052 # Metering service port volumes: - - ./config/analytics-server.properties:/scalardb-analytics-server/server.properties:ro + - ./config/scalardb-analytics-server.properties:/scalardb-analytics-server/server.properties:ro - ./config/scalardb.properties:/etc/scalardb.properties:ro networks: - scalar-network @@ -49,7 +49,7 @@ services: scalardb-analytics-cli: image: ghcr.io/scalar-labs/scalardb-analytics-cli:3.16.2 volumes: - - ./config/analytics-cli-config.properties:/config/client.properties:ro + - ./config/scalardb-analytics-cli.properties:/config/client.properties:ro - ./config/data-sources:/config/data-sources:ro networks: - scalar-network From d1610003dd80bb751da9a3a62f451a4e8d2c0b23 Mon Sep 17 00:00:00 2001 From: Akihiro Okuno Date: Mon, 28 Jul 2025 23:40:32 +0900 Subject: [PATCH 4/5] docs(scalardb-analytics-spark-sample): update README for automatic data loading - Remove manual sample-data-loader step - Add --wait flag to docker compose up command - Update step numbering - Add note about automatic data loading on first run --- scalardb-analytics-spark-sample/README.md | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/scalardb-analytics-spark-sample/README.md b/scalardb-analytics-spark-sample/README.md index a9f7a2b0..d198b0ee 100644 --- a/scalardb-analytics-spark-sample/README.md +++ b/scalardb-analytics-spark-sample/README.md @@ -5,22 +5,18 @@ ### 1. Start services ```bash -docker compose up -d +docker compose up -d --wait ``` -### 2. Load sample data +This command will start all services and automatically load sample data on the first run. -```bash -docker compose run --rm sample-data-loader -``` - -### 3. Create catalog +### 2. Create catalog ```bash docker compose run --rm scalardb-analytics-cli catalog create --catalog sample_catalog ``` -### 4. Register data sources +### 3. Register data sources ```bash # Register ScalarDB data source @@ -30,7 +26,7 @@ docker compose run --rm scalardb-analytics-cli data-source register --data-sourc docker compose run --rm scalardb-analytics-cli data-source register --data-source-json /config/data-sources/postgres.json ``` -### 5. Run Spark SQL +### 4. Run Spark SQL ```bash docker compose run --rm spark-sql From c7b101dd9fedfde5651933b337fc7b548e3e2885 Mon Sep 17 00:00:00 2001 From: Akihiro Okuno Date: Thu, 31 Jul 2025 00:35:00 +0900 Subject: [PATCH 5/5] refactor(scalardb-analytics-sample): rename directory from scalardb-analytics-spark-sample - Rename directory from 'scalardb-analytics-spark-sample' to 'scalardb-analytics-sample' - Update README.md title to match new directory name - This change reflects that the sample is not limited to Spark but covers ScalarDB Analytics in general --- .../.gitignore | 0 .../README.md | 2 +- .../config/data-sources/postgres.json | 0 .../config/data-sources/scalardb.json | 0 .../config/scalardb-analytics-cli.properties | 0 .../config/scalardb-analytics-server.properties | 0 .../config/scalardb.properties | 0 .../config/spark-defaults.conf | 0 .../data/customer.csv | 0 .../data/lineitem.csv | 0 .../data/orders.csv | 0 .../docker-compose.yml | 0 .../docker/Dockerfile.spark | 0 .../sample-data-loader/.gitattributes | 0 .../sample-data-loader/.gitignore | 0 .../sample-data-loader/Dockerfile | 0 .../sample-data-loader/build.gradle.kts | 0 .../gradle/wrapper/gradle-wrapper.jar | Bin .../gradle/wrapper/gradle-wrapper.properties | 0 .../sample-data-loader/gradlew | 0 .../sample-data-loader/gradlew.bat | 0 .../sample-data-loader/settings.gradle.kts | 0 .../src/main/java/sample/data/Loader.java | 0 .../src/main/java/sample/data/Main.java | 0 .../schema.json | 0 .../sql/postgres_copy.sql | 0 26 files changed, 1 insertion(+), 1 deletion(-) rename {scalardb-analytics-spark-sample => scalardb-analytics-sample}/.gitignore (100%) rename {scalardb-analytics-spark-sample => scalardb-analytics-sample}/README.md (96%) rename {scalardb-analytics-spark-sample => scalardb-analytics-sample}/config/data-sources/postgres.json (100%) rename {scalardb-analytics-spark-sample => scalardb-analytics-sample}/config/data-sources/scalardb.json (100%) rename {scalardb-analytics-spark-sample => scalardb-analytics-sample}/config/scalardb-analytics-cli.properties (100%) rename {scalardb-analytics-spark-sample => scalardb-analytics-sample}/config/scalardb-analytics-server.properties (100%) rename {scalardb-analytics-spark-sample => scalardb-analytics-sample}/config/scalardb.properties (100%) rename {scalardb-analytics-spark-sample => scalardb-analytics-sample}/config/spark-defaults.conf (100%) rename {scalardb-analytics-spark-sample => scalardb-analytics-sample}/data/customer.csv (100%) rename {scalardb-analytics-spark-sample => scalardb-analytics-sample}/data/lineitem.csv (100%) rename {scalardb-analytics-spark-sample => scalardb-analytics-sample}/data/orders.csv (100%) rename {scalardb-analytics-spark-sample => scalardb-analytics-sample}/docker-compose.yml (100%) rename {scalardb-analytics-spark-sample => scalardb-analytics-sample}/docker/Dockerfile.spark (100%) rename {scalardb-analytics-spark-sample => scalardb-analytics-sample}/sample-data-loader/.gitattributes (100%) rename {scalardb-analytics-spark-sample => scalardb-analytics-sample}/sample-data-loader/.gitignore (100%) rename {scalardb-analytics-spark-sample => scalardb-analytics-sample}/sample-data-loader/Dockerfile (100%) rename {scalardb-analytics-spark-sample => scalardb-analytics-sample}/sample-data-loader/build.gradle.kts (100%) rename {scalardb-analytics-spark-sample => scalardb-analytics-sample}/sample-data-loader/gradle/wrapper/gradle-wrapper.jar (100%) rename {scalardb-analytics-spark-sample => scalardb-analytics-sample}/sample-data-loader/gradle/wrapper/gradle-wrapper.properties (100%) rename {scalardb-analytics-spark-sample => scalardb-analytics-sample}/sample-data-loader/gradlew (100%) rename {scalardb-analytics-spark-sample => scalardb-analytics-sample}/sample-data-loader/gradlew.bat (100%) rename {scalardb-analytics-spark-sample => scalardb-analytics-sample}/sample-data-loader/settings.gradle.kts (100%) rename {scalardb-analytics-spark-sample => scalardb-analytics-sample}/sample-data-loader/src/main/java/sample/data/Loader.java (100%) rename {scalardb-analytics-spark-sample => scalardb-analytics-sample}/sample-data-loader/src/main/java/sample/data/Main.java (100%) rename {scalardb-analytics-spark-sample => scalardb-analytics-sample}/schema.json (100%) rename {scalardb-analytics-spark-sample => scalardb-analytics-sample}/sql/postgres_copy.sql (100%) diff --git a/scalardb-analytics-spark-sample/.gitignore b/scalardb-analytics-sample/.gitignore similarity index 100% rename from scalardb-analytics-spark-sample/.gitignore rename to scalardb-analytics-sample/.gitignore diff --git a/scalardb-analytics-spark-sample/README.md b/scalardb-analytics-sample/README.md similarity index 96% rename from scalardb-analytics-spark-sample/README.md rename to scalardb-analytics-sample/README.md index d198b0ee..5b7176f3 100644 --- a/scalardb-analytics-spark-sample/README.md +++ b/scalardb-analytics-sample/README.md @@ -1,4 +1,4 @@ -# ScalarDB Analytics Spark Sample +# ScalarDB Analytics Sample ## Setup diff --git a/scalardb-analytics-spark-sample/config/data-sources/postgres.json b/scalardb-analytics-sample/config/data-sources/postgres.json similarity index 100% rename from scalardb-analytics-spark-sample/config/data-sources/postgres.json rename to scalardb-analytics-sample/config/data-sources/postgres.json diff --git a/scalardb-analytics-spark-sample/config/data-sources/scalardb.json b/scalardb-analytics-sample/config/data-sources/scalardb.json similarity index 100% rename from scalardb-analytics-spark-sample/config/data-sources/scalardb.json rename to scalardb-analytics-sample/config/data-sources/scalardb.json diff --git a/scalardb-analytics-spark-sample/config/scalardb-analytics-cli.properties b/scalardb-analytics-sample/config/scalardb-analytics-cli.properties similarity index 100% rename from scalardb-analytics-spark-sample/config/scalardb-analytics-cli.properties rename to scalardb-analytics-sample/config/scalardb-analytics-cli.properties diff --git a/scalardb-analytics-spark-sample/config/scalardb-analytics-server.properties b/scalardb-analytics-sample/config/scalardb-analytics-server.properties similarity index 100% rename from scalardb-analytics-spark-sample/config/scalardb-analytics-server.properties rename to scalardb-analytics-sample/config/scalardb-analytics-server.properties diff --git a/scalardb-analytics-spark-sample/config/scalardb.properties b/scalardb-analytics-sample/config/scalardb.properties similarity index 100% rename from scalardb-analytics-spark-sample/config/scalardb.properties rename to scalardb-analytics-sample/config/scalardb.properties diff --git a/scalardb-analytics-spark-sample/config/spark-defaults.conf b/scalardb-analytics-sample/config/spark-defaults.conf similarity index 100% rename from scalardb-analytics-spark-sample/config/spark-defaults.conf rename to scalardb-analytics-sample/config/spark-defaults.conf diff --git a/scalardb-analytics-spark-sample/data/customer.csv b/scalardb-analytics-sample/data/customer.csv similarity index 100% rename from scalardb-analytics-spark-sample/data/customer.csv rename to scalardb-analytics-sample/data/customer.csv diff --git a/scalardb-analytics-spark-sample/data/lineitem.csv b/scalardb-analytics-sample/data/lineitem.csv similarity index 100% rename from scalardb-analytics-spark-sample/data/lineitem.csv rename to scalardb-analytics-sample/data/lineitem.csv diff --git a/scalardb-analytics-spark-sample/data/orders.csv b/scalardb-analytics-sample/data/orders.csv similarity index 100% rename from scalardb-analytics-spark-sample/data/orders.csv rename to scalardb-analytics-sample/data/orders.csv diff --git a/scalardb-analytics-spark-sample/docker-compose.yml b/scalardb-analytics-sample/docker-compose.yml similarity index 100% rename from scalardb-analytics-spark-sample/docker-compose.yml rename to scalardb-analytics-sample/docker-compose.yml diff --git a/scalardb-analytics-spark-sample/docker/Dockerfile.spark b/scalardb-analytics-sample/docker/Dockerfile.spark similarity index 100% rename from scalardb-analytics-spark-sample/docker/Dockerfile.spark rename to scalardb-analytics-sample/docker/Dockerfile.spark diff --git a/scalardb-analytics-spark-sample/sample-data-loader/.gitattributes b/scalardb-analytics-sample/sample-data-loader/.gitattributes similarity index 100% rename from scalardb-analytics-spark-sample/sample-data-loader/.gitattributes rename to scalardb-analytics-sample/sample-data-loader/.gitattributes diff --git a/scalardb-analytics-spark-sample/sample-data-loader/.gitignore b/scalardb-analytics-sample/sample-data-loader/.gitignore similarity index 100% rename from scalardb-analytics-spark-sample/sample-data-loader/.gitignore rename to scalardb-analytics-sample/sample-data-loader/.gitignore diff --git a/scalardb-analytics-spark-sample/sample-data-loader/Dockerfile b/scalardb-analytics-sample/sample-data-loader/Dockerfile similarity index 100% rename from scalardb-analytics-spark-sample/sample-data-loader/Dockerfile rename to scalardb-analytics-sample/sample-data-loader/Dockerfile diff --git a/scalardb-analytics-spark-sample/sample-data-loader/build.gradle.kts b/scalardb-analytics-sample/sample-data-loader/build.gradle.kts similarity index 100% rename from scalardb-analytics-spark-sample/sample-data-loader/build.gradle.kts rename to scalardb-analytics-sample/sample-data-loader/build.gradle.kts diff --git a/scalardb-analytics-spark-sample/sample-data-loader/gradle/wrapper/gradle-wrapper.jar b/scalardb-analytics-sample/sample-data-loader/gradle/wrapper/gradle-wrapper.jar similarity index 100% rename from scalardb-analytics-spark-sample/sample-data-loader/gradle/wrapper/gradle-wrapper.jar rename to scalardb-analytics-sample/sample-data-loader/gradle/wrapper/gradle-wrapper.jar diff --git a/scalardb-analytics-spark-sample/sample-data-loader/gradle/wrapper/gradle-wrapper.properties b/scalardb-analytics-sample/sample-data-loader/gradle/wrapper/gradle-wrapper.properties similarity index 100% rename from scalardb-analytics-spark-sample/sample-data-loader/gradle/wrapper/gradle-wrapper.properties rename to scalardb-analytics-sample/sample-data-loader/gradle/wrapper/gradle-wrapper.properties diff --git a/scalardb-analytics-spark-sample/sample-data-loader/gradlew b/scalardb-analytics-sample/sample-data-loader/gradlew similarity index 100% rename from scalardb-analytics-spark-sample/sample-data-loader/gradlew rename to scalardb-analytics-sample/sample-data-loader/gradlew diff --git a/scalardb-analytics-spark-sample/sample-data-loader/gradlew.bat b/scalardb-analytics-sample/sample-data-loader/gradlew.bat similarity index 100% rename from scalardb-analytics-spark-sample/sample-data-loader/gradlew.bat rename to scalardb-analytics-sample/sample-data-loader/gradlew.bat diff --git a/scalardb-analytics-spark-sample/sample-data-loader/settings.gradle.kts b/scalardb-analytics-sample/sample-data-loader/settings.gradle.kts similarity index 100% rename from scalardb-analytics-spark-sample/sample-data-loader/settings.gradle.kts rename to scalardb-analytics-sample/sample-data-loader/settings.gradle.kts diff --git a/scalardb-analytics-spark-sample/sample-data-loader/src/main/java/sample/data/Loader.java b/scalardb-analytics-sample/sample-data-loader/src/main/java/sample/data/Loader.java similarity index 100% rename from scalardb-analytics-spark-sample/sample-data-loader/src/main/java/sample/data/Loader.java rename to scalardb-analytics-sample/sample-data-loader/src/main/java/sample/data/Loader.java diff --git a/scalardb-analytics-spark-sample/sample-data-loader/src/main/java/sample/data/Main.java b/scalardb-analytics-sample/sample-data-loader/src/main/java/sample/data/Main.java similarity index 100% rename from scalardb-analytics-spark-sample/sample-data-loader/src/main/java/sample/data/Main.java rename to scalardb-analytics-sample/sample-data-loader/src/main/java/sample/data/Main.java diff --git a/scalardb-analytics-spark-sample/schema.json b/scalardb-analytics-sample/schema.json similarity index 100% rename from scalardb-analytics-spark-sample/schema.json rename to scalardb-analytics-sample/schema.json diff --git a/scalardb-analytics-spark-sample/sql/postgres_copy.sql b/scalardb-analytics-sample/sql/postgres_copy.sql similarity index 100% rename from scalardb-analytics-spark-sample/sql/postgres_copy.sql rename to scalardb-analytics-sample/sql/postgres_copy.sql