Skip to content

Commit 988cf3a

Browse files
GuyEshdatharitamar
andauthored
Ele 4874 dremio (#838)
Added Dremio support Co-authored-by: Itamar Hartstein <[email protected]>
1 parent eda3219 commit 988cf3a

File tree

55 files changed

+513
-86
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+513
-86
lines changed

.github/workflows/test-all-warehouses.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ jobs:
4949
athena,
5050
trino,
5151
clickhouse,
52+
dremio,
5253
]
5354
include:
5455
# If we're not running on a specific dbt version, then always add postgres on 1.8.0

.github/workflows/test-warehouse.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ on:
1717
- athena
1818
- trino
1919
- clickhouse
20+
- dremio
2021
elementary-ref:
2122
type: string
2223
required: false
@@ -88,6 +89,11 @@ jobs:
8889
working-directory: ${{ env.TESTS_DIR }}
8990
run: docker compose up -d clickhouse
9091

92+
- name: Start Dremio
93+
if: inputs.warehouse-type == 'dremio'
94+
working-directory: ${{ env.TESTS_DIR }}
95+
run: docker compose -f docker-compose-dremio.yml up -d
96+
9197
- name: Setup Python
9298
uses: actions/setup-python@v4
9399
with:

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
target/
22
dbt_packages/
3+
dbt_internal_packages/
34
logs/
45
scripts/
56

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,32 @@
11
{% macro generate_schema_name(custom_schema_name, node) -%}
2-
{%- set schema_name = target.schema -%}
3-
{% if custom_schema_name %}
4-
{% set schema_name = "{}_{}".format(schema_name, custom_schema_name) %}
5-
{% endif %}
6-
2+
{% set schema_name = get_default_schema_name(custom_schema_name, node) %}
73
{% set schema_name_suffix_by_var = var('schema_name_suffix', '') %}
84
{% if schema_name_suffix_by_var %}
95
{% set schema_name = schema_name + schema_name_suffix_by_var %}
106
{% endif %}
117

128
{% do return(schema_name) %}
139
{%- endmacro %}
10+
11+
{% macro get_default_schema_name(custom_schema_name, node) -%}
12+
{% do return(adapter.dispatch('get_default_schema_name', 'elementary_tests')(custom_schema_name, node)) %}
13+
{% endmacro %}
14+
15+
{% macro default__get_default_schema_name(custom_schema_name, node) -%}
16+
{%- set schema_name = target.schema -%}
17+
{% if custom_schema_name %}
18+
{% set schema_name = "{}_{}".format(schema_name, custom_schema_name) %}
19+
{% endif %}
20+
{% do return(schema_name) %}
21+
{%- endmacro %}
22+
23+
{% macro dremio__get_default_schema_name(custom_schema_name, node) -%}
24+
{%- set default_schema = target.schema if not is_datalake_node(node) else target.root_path -%}
25+
{%- if not custom_schema_name -%}
26+
{% do return(default_schema) %}
27+
{%- elif default_schema == 'no_schema' -%}
28+
{% do return(custom_schema_name) %}
29+
{%- else -%}
30+
{% do return("{}_{}".format(default_schema, custom_schema_name)) %}
31+
{%- endif -%}
32+
{%- endmacro %}

integration_tests/dbt_project/models/one.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@
66
)
77
}}
88

9-
SELECT 1 AS one
9+
SELECT 1 AS {{ elementary.escape_reserved_keywords('one') }}
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
services:
2+
# Nessie Catalog Server Using In-Memory Store
3+
nessie:
4+
image: projectnessie/nessie:latest
5+
container_name: catalog
6+
networks:
7+
- dremio-lakehouse
8+
ports:
9+
- 19120:19120
10+
11+
# Minio Storage Server
12+
minio:
13+
image: minio/minio:latest
14+
container_name: storage
15+
environment:
16+
- MINIO_ROOT_USER=admin
17+
- MINIO_ROOT_PASSWORD=password
18+
- MINIO_DOMAIN=storage
19+
- MINIO_REGION_NAME=us-east-1
20+
- MINIO_REGION=us-east-1
21+
networks:
22+
- dremio-lakehouse
23+
ports:
24+
- 9001:9001
25+
- 9000:9000
26+
command: ["server", "/data", "--console-address", ":9001"]
27+
volumes:
28+
- minio_data:/data
29+
30+
minio-setup:
31+
image: minio/mc
32+
container_name: minio-setup
33+
depends_on:
34+
- minio
35+
entrypoint: >
36+
/bin/sh -c "
37+
until (echo > /dev/tcp/minio/9000) >/dev/null 2>&1; do
38+
echo 'Waiting for MinIO...';
39+
sleep 2;
40+
done;
41+
mc alias set myminio http://minio:9000 admin password;
42+
mc mb myminio/datalake;
43+
mc ls myminio;
44+
"
45+
networks:
46+
- dremio-lakehouse
47+
48+
# Dremio
49+
dremio:
50+
image: dremio/dremio-oss:latest
51+
platform: linux/amd64
52+
ports:
53+
- 9047:9047
54+
- 31010:31010
55+
- 32010:32010
56+
- 45678:45678
57+
container_name: dremio
58+
environment:
59+
- DREMIO_JAVA_SERVER_EXTRA_OPTS=-Dpaths.dist=file:///opt/dremio/data/dist -Ddebug.addDefaultUser=true
60+
- SERVICES_COORDINATOR_ENABLED=true
61+
- SERVICES_EXECUTOR_ENABLED=true
62+
networks:
63+
- dremio-lakehouse
64+
volumes:
65+
- dremio_data:/opt/dremio/data:rw
66+
# Workaround for permission issues in podman
67+
user: "0"
68+
69+
dremio-setup:
70+
image: alpine:latest
71+
container_name: dremio-setup
72+
depends_on:
73+
- dremio
74+
volumes:
75+
- ./docker/dremio/dremio-setup.sh:/dremio-setup.sh
76+
command: sh /dremio-setup.sh
77+
networks:
78+
- dremio-lakehouse
79+
80+
networks:
81+
dremio-lakehouse:
82+
83+
volumes:
84+
dremio_data:
85+
minio_data:
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#!/bin/sh
2+
3+
# Install required tools
4+
apk add --no-cache curl jq
5+
6+
# Wait for Dremio to be ready
7+
until curl -s http://dremio:9047; do
8+
echo "Waiting for Dremio..."
9+
sleep 5
10+
done
11+
12+
echo "Dremio is up. Proceeding with configuration..."
13+
14+
# Log in to Dremio to get the auth token
15+
AUTH_TOKEN=$(curl -s -X POST "http://dremio:9047/apiv2/login" \
16+
-H "Content-Type: application/json" \
17+
--data "{\"userName\":\"dremio\", \"password\":\"dremio123\"}" | jq -r .token)
18+
19+
# Check if AUTH_TOKEN is not empty
20+
if [ -z "$AUTH_TOKEN" ]; then
21+
echo "Failed to obtain Dremio auth token"
22+
exit 1
23+
fi
24+
25+
echo "Obtained Dremio auth token"
26+
27+
# Create the S3 source in Dremio
28+
curl -s -X PUT "http://dremio:9047/apiv2/source/S3Source" \
29+
-H "Content-Type: application/json" \
30+
-H "Authorization: _dremio$AUTH_TOKEN" \
31+
--data "{\"name\":\"S3Source\",\"config\":{\"credentialType\":\"ACCESS_KEY\",\"accessKey\":\"admin\",\"accessSecret\":\"password\",\"secure\":false,\"externalBucketList\":[],\"enableAsync\":true,\"enableFileStatusCheck\":true,\"rootPath\":\"/\",\"defaultCtasFormat\":\"ICEBERG\",\"propertyList\":[{\"name\":\"fs.s3a.path.style.access\",\"value\":\"true\"},{\"name\":\"fs.s3a.endpoint\",\"value\":\"minio:9000\"},{\"name\":\"dremio.s3.compat\",\"value\":\"true\"}],\"whitelistedBuckets\":[],\"isCachingEnabled\":false,\"maxCacheSpacePct\":100},\"type\":\"S3\",\"metadataPolicy\":{\"deleteUnavailableDatasets\":true,\"autoPromoteDatasets\":false,\"namesRefreshMillis\":3600000,\"datasetDefinitionRefreshAfterMillis\":3600000,\"datasetDefinitionExpireAfterMillis\":10800000,\"authTTLMillis\":86400000,\"updateMode\":\"PREFETCH_QUERIED\"}}"
32+
33+
echo "S3 Source created in Dremio"

integration_tests/tests/data_seeder.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import csv
2+
from contextlib import contextmanager
23
from pathlib import Path
3-
from typing import List
4+
from typing import Generator, List
45

56
from elementary.clients.dbt.base_dbt_runner import BaseDbtRunner
67
from logger import get_logger
@@ -18,7 +19,8 @@ def __init__(
1819
self.dbt_project_path = dbt_project_path
1920
self.seeds_dir_path = seeds_dir_path
2021

21-
def seed(self, data: List[dict], table_name: str):
22+
@contextmanager
23+
def seed(self, data: List[dict], table_name: str) -> Generator[None, None, None]:
2224
seed_path = self.seeds_dir_path.joinpath(f"{table_name}.csv")
2325
try:
2426
with seed_path.open("w") as seed_file:
@@ -28,5 +30,7 @@ def seed(self, data: List[dict], table_name: str):
2830
writer.writerows(data)
2931
seed_file.flush()
3032
self.dbt_runner.seed(select=str(relative_seed_path), full_refresh=True)
33+
34+
yield
3135
finally:
3236
seed_path.unlink()

integration_tests/tests/dbt_project.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,11 @@
33
from contextlib import contextmanager, nullcontext
44
from pathlib import Path
55
from tempfile import NamedTemporaryFile
6-
from typing import Any, Dict, List, Literal, Optional, Union, overload
6+
from typing import Any, Dict, Generator, List, Literal, Optional, Union, overload
77
from uuid import uuid4
88

99
from data_seeder import DbtDataSeeder
10+
from dbt_utils import get_database_and_schema_properties
1011
from elementary.clients.dbt.base_dbt_runner import BaseDbtRunner
1112
from elementary.clients.dbt.factory import create_dbt_runner
1213
from logger import get_logger
@@ -42,7 +43,7 @@ def get_dbt_runner(target: str, project_dir: str) -> BaseDbtRunner:
4243
class DbtProject:
4344
def __init__(self, target: str, project_dir: str):
4445
self.dbt_runner = get_dbt_runner(target, project_dir)
45-
46+
self.target = target
4647
self.project_dir_path = Path(project_dir)
4748
self.models_dir_path = self.project_dir_path / "models"
4849
self.tmp_models_dir_path = self.models_dir_path / "tmp"
@@ -189,12 +190,16 @@ def test(
189190
test_id, materialization
190191
)
191192
else:
193+
database_property, schema_property = get_database_and_schema_properties(
194+
self.target
195+
)
192196
props_yaml = {
193197
"version": 2,
194198
"sources": [
195199
{
196200
"name": "test_data",
197-
"schema": f"{{{{ target.schema }}}}{SCHEMA_NAME_SUFFIX}",
201+
"schema": f"{{{{ target.{schema_property} }}}}{SCHEMA_NAME_SUFFIX}",
202+
"database": f"{{{{ target.{database_property} }}}}",
198203
"tables": [table_yaml],
199204
}
200205
],
@@ -232,9 +237,19 @@ def test(
232237
return [test_result] if multiple_results else test_result
233238

234239
def seed(self, data: List[dict], table_name: str):
235-
return DbtDataSeeder(
240+
with DbtDataSeeder(
241+
self.dbt_runner, self.project_dir_path, self.seeds_dir_path
242+
).seed(data, table_name):
243+
return
244+
245+
@contextmanager
246+
def seed_context(
247+
self, data: List[dict], table_name: str
248+
) -> Generator[None, None, None]:
249+
with DbtDataSeeder(
236250
self.dbt_runner, self.project_dir_path, self.seeds_dir_path
237-
).seed(data, table_name)
251+
).seed(data, table_name):
252+
yield
238253

239254
@contextmanager
240255
def create_temp_model_for_existing_table(
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
def get_database_and_schema_properties(target: str, is_view: bool = False):
2+
if target == "dremio" and not is_view:
3+
return "datalake", "root_path"
4+
elif target == "clickhouse":
5+
return "schema", "schema"
6+
return "database", "schema"

0 commit comments

Comments
 (0)