diff --git a/demos/demos-v2.yaml b/demos/demos-v2.yaml index e41560d3..4e04efb6 100644 --- a/demos/demos-v2.yaml +++ b/demos/demos-v2.yaml @@ -147,25 +147,6 @@ demos: cpu: 6800m memory: 15822Mi pvc: 28Gi - trino-subsea-data: - description: Demo loading ca. 600m^2 of ocean floor in a surface plot to visualize the irregularities of the ocean floor. - # documentation: -- Currently not documented - stackableStack: trino-superset-s3 - labels: - - trino - - superset - - minio - - s3 - - parquet - manifests: - - plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/trino-subsea-data/load-test-data.yaml - - plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/trino-subsea-data/create-table-in-trino.yaml - - plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/trino-subsea-data/setup-superset.yaml - supportedNamespaces: [] - resourceRequests: - cpu: 6800m - memory: 15822Mi - pvc: 28Gi data-lakehouse-iceberg-trino-spark: description: Data lakehouse using Iceberg lakehouse on S3, Trino as query engine, Spark for streaming ingest and Superset for data visualization. Multiple datasources like taxi data, water levels in Germany, earthquakes, e-charging stations and more are loaded. documentation: https://docs.stackable.tech/stackablectl/stable/demos/data-lakehouse-iceberg-trino-spark.html diff --git a/demos/trino-subsea-data/create-table-in-trino.yaml b/demos/trino-subsea-data/create-table-in-trino.yaml deleted file mode 100644 index 1c5dec46..00000000 --- a/demos/trino-subsea-data/create-table-in-trino.yaml +++ /dev/null @@ -1,83 +0,0 @@ ---- -apiVersion: batch/v1 -kind: Job -metadata: - name: create-subsea-multibeam-table-in-trino -spec: - template: - spec: - containers: - - name: create-subsea-multibeam-table-in-trino - image: docker.stackable.tech/stackable/testing-tools:0.2.0-stackable24.7.0 - command: ["bash", "-c", "python -u /tmp/script/script.py"] - volumeMounts: - - name: script - mountPath: /tmp/script - - name: trino-users - mountPath: /trino-users - volumes: - - name: script - configMap: - name: create-subsea-multibeam-table-in-trino-script - - name: trino-users - secret: - secretName: trino-users - restartPolicy: OnFailure - backoffLimit: 50 ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: create-subsea-multibeam-table-in-trino-script -data: - script.py: | - import sys - import trino - - if not sys.warnoptions: - import warnings - warnings.simplefilter("ignore") - - def get_connection(): - connection = trino.dbapi.connect( - host="trino-coordinator", - port=8443, - user="admin", - http_scheme='https', - auth=trino.auth.BasicAuthentication("admin", open("/trino-users/admin").read()), - ) - connection._http_session.verify = False - return connection - - def run_query(connection, query): - print(f"[DEBUG] Executing query {query}") - cursor = connection.cursor() - cursor.execute(query) - return cursor.fetchall() - - connection = get_connection() - - run_query(connection, "CREATE SCHEMA IF NOT EXISTS hive.demo WITH (location = 's3a://demo/')") - run_query(connection, """ - CREATE TABLE IF NOT EXISTS hive.demo.subsea ( - footprint_x DOUBLE, - footprint_y DOUBLE, - water_depth DOUBLE, - data_point_density DOUBLE, - geometry VARBINARY - ) WITH ( - external_location = 's3a://demo/subsea/', - format = 'parquet' - ) - """) - - loaded_rows = run_query(connection, "SELECT COUNT(*) FROM hive.demo.subsea")[0][0] - print(f"Loaded {loaded_rows} rows") - assert loaded_rows > 0 - - print("Analyzing table subsea") - analyze_rows = run_query(connection, """ANALYZE hive.demo.subsea""")[0][0] - assert analyze_rows == loaded_rows - stats = run_query(connection, """show stats for hive.demo.subsea""") - print("Produced the following stats:") - print(*stats, sep="\n") diff --git a/demos/trino-subsea-data/load-test-data.yaml b/demos/trino-subsea-data/load-test-data.yaml deleted file mode 100644 index 8971b06c..00000000 --- a/demos/trino-subsea-data/load-test-data.yaml +++ /dev/null @@ -1,21 +0,0 @@ ---- -apiVersion: batch/v1 -kind: Job -metadata: - name: load-subsea-multibeam-data -spec: - template: - spec: - containers: - - name: load-subsea-multibeam-data - image: "bitnami/minio:2024-debian-12" - command: ["bash", "-c", "cd /tmp && curl -O https://repo.stackable.tech/repository/misc/marispace/multibeam_data_point_density_example.parquet && mc --insecure alias set minio http://minio:9000/ $(cat /minio-s3-credentials/accessKey) $(cat /minio-s3-credentials/secretKey) && mc cp multibeam_data_point_density_example.parquet minio/demo/subsea"] - volumeMounts: - - name: minio-s3-credentials - mountPath: /minio-s3-credentials - volumes: - - name: minio-s3-credentials - secret: - secretName: minio-s3-credentials - restartPolicy: OnFailure - backoffLimit: 50 diff --git a/demos/trino-subsea-data/setup-superset.yaml b/demos/trino-subsea-data/setup-superset.yaml deleted file mode 100644 index 51fa8b5d..00000000 --- a/demos/trino-subsea-data/setup-superset.yaml +++ /dev/null @@ -1,95 +0,0 @@ ---- -apiVersion: batch/v1 -kind: Job -metadata: - name: setup-superset -spec: - template: - spec: - containers: - - name: setup-superset - image: docker.stackable.tech/stackable/testing-tools:0.2.0-stackable24.7.0 - command: ["bash", "-c", "curl -o superset-assets.zip https://raw.githubusercontent.com/stackabletech/demos/main/demos/trino-subsea-data/superset-assets.zip && python -u /tmp/script/script.py"] - volumeMounts: - - name: script - mountPath: /tmp/script - - name: trino-users - mountPath: /trino-users - - name: superset-credentials - mountPath: /superset-credentials - volumes: - - name: script - configMap: - name: setup-superset-script - - name: superset-credentials - secret: - secretName: superset-credentials - - name: trino-users - secret: - secretName: trino-users - restartPolicy: OnFailure - backoffLimit: 50 ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: setup-superset-script -data: - script.py: | - import logging - import requests - - base_url = "http://superset-external:8088" # For local testing / developing replace it, afterwards change back to http://superset-external:8088 - superset_username = open("/superset-credentials/adminUser.username").read() - superset_password = open("/superset-credentials/adminUser.password").read() - trino_username = "admin" - trino_password = open("/trino-users/admin").read() - - logging.basicConfig(level=logging.INFO) - logging.info("Starting setup of Superset") - - logging.info("Getting access token from /api/v1/security/login") - session = requests.session() - access_token = session.post(f"{base_url}/api/v1/security/login", json={"username": superset_username, "password": superset_password, "provider": "db", "refresh": True}).json()['access_token'] - # print(f"access_token: {access_token}") - - logging.info("Getting csrf token from /api/v1/security/csrf_token") - csrf_token = session.get(f"{base_url}/api/v1/security/csrf_token", headers={"Authorization": f"Bearer {access_token}"}).json()["result"] - # print(f"csrf_token: {csrf_token}") - - headers = { - "accept": "application/json", - "Authorization": f"Bearer {access_token}", - "X-CSRFToken": csrf_token, - } - - # To retrieve all of the assets (datasources, datasets, charts and dashboards) run the following commands - # logging.info("Exporting all assets") - # result = session.get(f"{base_url}/api/v1/assets/export", headers=headers) - # assert result.status_code == 200 - # with open("superset-assets.zip", "wb") as f: - # f.write(result.content) - - - ######################### - # IMPORTANT - ######################### - # The exported zip file had to be modified, otherwise we get: - # - # {"errors": [{"message": "Error importing assets", "error_type": "GENERIC_COMMAND_ERROR", "level": "warning", "extra": {"databases/Trino.yaml": {"extra": {"disable_data_preview": ["Unknown field."]}}, "issue_codes": [{"code": 1010, "message": "Issue 1010 - Superset encountered an error while running a command."}]}}]} - # - # The file databases/Trino.yaml was modified and the attribute "extra.disable_data_preview" was removed - ######################### - logging.info("Importing all assets") - files = { - "bundle": ("superset-assets.zip", open("superset-assets.zip", "rb")), - } - data = { - "passwords": '{"databases/Trino.yaml": "' + trino_password + '"}' - } - result = session.post(f"{base_url}/api/v1/assets/import", headers=headers, files=files, data=data) - print(result) - print(result.text) - assert result.status_code == 200 - - logging.info("Finished setup of Superset") diff --git a/demos/trino-subsea-data/superset-assets.zip b/demos/trino-subsea-data/superset-assets.zip deleted file mode 100644 index 60a02552..00000000 Binary files a/demos/trino-subsea-data/superset-assets.zip and /dev/null differ diff --git a/stacks/trino-superset-s3/superset.yaml b/stacks/trino-superset-s3/superset.yaml index b8363fae..24a8cfd4 100644 --- a/stacks/trino-superset-s3/superset.yaml +++ b/stacks/trino-superset-s3/superset.yaml @@ -14,11 +14,6 @@ spec: roleGroups: default: replicas: 1 - configOverrides: - superset_config.py: - # Needed by trino-subsea-data demo - ROW_LIMIT: "200000" - SQL_MAX_ROW: "200000" --- apiVersion: v1 kind: Secret