Skip to content

Commit 57ad292

Browse files
author
Ilyas Gasanov
committed
[DOP-29429] Add Iceberg integration tests
1 parent 916160c commit 57ad292

File tree

18 files changed

+739
-26
lines changed

18 files changed

+739
-26
lines changed

.env.docker.test

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,14 @@ TEST_HIVE_CLUSTER=test-hive
5151
TEST_HIVE_USER=syncmaster
5252
TEST_HIVE_PASSWORD=123UsedForTestOnly@!
5353

54+
TEST_ICEBERG_METASTORE_URL_FOR_CONFTEST=http://test-iceberg-rest:8181
55+
TEST_ICEBERG_METASTORE_URL_FOR_WORKER=http://test-iceberg-rest:8181
56+
TEST_ICEBERG_METASTORE_USERNAME=syncmaster
57+
TEST_ICEBERG_METASTORE_PASSWORD=123UsedForTestOnly@!
58+
TEST_ICEBERG_S3_WAREHOUSE_PATH=/data
59+
TEST_ICEBERG_S3_REGION=us-east-1
60+
TEST_ICEBERG_S3_PATH_STYLE_ACCESS=True
61+
5462
TEST_HDFS_HOST=test-hive
5563
TEST_HDFS_WEBHDFS_PORT=9870
5664
TEST_HDFS_IPC_PORT=9820

.env.local.test

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,14 @@ export TEST_HIVE_CLUSTER=test-hive
5151
export TEST_HIVE_USER=syncmaster
5252
export TEST_HIVE_PASSWORD=123UsedForTestOnly@!
5353

54+
export TEST_ICEBERG_METASTORE_URL_FOR_CONFTEST=http://localhost:8181
55+
export TEST_ICEBERG_METASTORE_URL_FOR_WORKER=http://test-iceberg-rest:8181
56+
export TEST_ICEBERG_METASTORE_USERNAME=syncmaster
57+
export TEST_ICEBERG_METASTORE_PASSWORD=123UsedForTestOnly@!
58+
export TEST_ICEBERG_S3_WAREHOUSE_PATH=/data
59+
export TEST_ICEBERG_S3_REGION=us-east-1
60+
export TEST_ICEBERG_S3_PATH_STYLE_ACCESS=True
61+
5462
export TEST_HDFS_HOST=test-hive
5563
export TEST_HDFS_WEBHDFS_PORT=9870
5664
export TEST_HDFS_IPC_PORT=9820
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
name: Iceberg Tests
2+
on:
3+
workflow_call:
4+
5+
env:
6+
DEFAULT_PYTHON: '3.13'
7+
8+
jobs:
9+
tests:
10+
name: Run Iceberg tests
11+
runs-on: ubuntu-latest
12+
13+
steps:
14+
- name: Checkout code
15+
uses: actions/checkout@v5
16+
17+
- name: Set up QEMU
18+
uses: docker/setup-qemu-action@v3
19+
20+
- name: Set up Docker Buildx
21+
uses: docker/setup-buildx-action@v3
22+
23+
- name: Cache jars
24+
uses: actions/cache@v4
25+
with:
26+
path: ./cached_jars
27+
key: ${{ runner.os }}-python-${{ env.DEFAULT_PYTHON }}-test-iceberg
28+
restore-keys: |
29+
${{ runner.os }}-python-${{ env.DEFAULT_PYTHON }}-test-iceberg
30+
${{ runner.os }}-python-
31+
32+
- name: Build Worker Image
33+
uses: docker/build-push-action@v6
34+
with:
35+
context: .
36+
tags: mtsrus/syncmaster-worker:${{ github.sha }}
37+
target: test
38+
file: docker/Dockerfile.worker
39+
load: true
40+
cache-from: type=gha,scope=test
41+
cache-to: type=gha,scope=test,mode=max
42+
43+
- name: Docker compose up
44+
run: |
45+
docker compose -f docker-compose.test.yml --profile all down -v --remove-orphans
46+
docker compose -f docker-compose.test.yml --profile iceberg up -d --wait --wait-timeout 200
47+
env:
48+
WORKER_IMAGE_TAG: ${{ github.sha }}
49+
50+
- name: Run Iceberg Tests
51+
run: |
52+
docker compose -f ./docker-compose.test.yml --profile iceberg exec -T worker coverage run -m pytest -vvv -s -m "worker and iceberg"
53+
54+
- name: Dump worker logs on failure
55+
if: failure()
56+
uses: jwalton/gh-docker-logs@v2
57+
with:
58+
images: mtsrus/hadoop,mtsrus/syncmaster-worker,mtsrus/horizon-backend,postgres,rabbitmq
59+
dest: ./logs
60+
61+
# This is important, as coverage is exported after receiving SIGTERM
62+
- name: Shutdown
63+
if: always()
64+
run: |
65+
docker compose -f docker-compose.test.yml --profile all down -v --remove-orphans
66+
67+
- name: Upload worker logs
68+
uses: actions/upload-artifact@v5
69+
if: failure()
70+
with:
71+
name: worker-logs-iceberg
72+
path: logs/*
73+
74+
- name: Upload coverage results
75+
uses: actions/upload-artifact@v5
76+
with:
77+
name: coverage-iceberg
78+
path: reports/*
79+
# https://github.com/actions/upload-artifact/issues/602
80+
include-hidden-files: true

.github/workflows/tests.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ jobs:
4646
name: Hive tests
4747
uses: ./.github/workflows/hive-tests.yml
4848

49+
iceberg_tests:
50+
name: Iceberg tests
51+
uses: ./.github/workflows/iceberg-tests.yml
52+
4953
s3_tests:
5054
name: S3 tests
5155
uses: ./.github/workflows/s3-tests.yml
@@ -90,6 +94,7 @@ jobs:
9094
- ftps_tests
9195
- hdfs_tests
9296
- hive_tests
97+
- iceberg_tests
9398
- mssql_tests
9499
- mysql_tests
95100
- oracle_tests

Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,10 @@ test-integration-hive: test-db ##@Test Run integration tests for Hive
8989
docker compose -f docker-compose.test.yml --profile hive up -d --wait $(DOCKER_COMPOSE_ARGS)
9090
${POETRY} run pytest ./tests/test_integration -m hive $(PYTEST_ARGS)
9191

92+
test-integration-iceberg: test-db ##@Test Run integration tests for Iceberg
93+
docker compose -f docker-compose.test.yml --profile iceberg up -d --wait $(DOCKER_COMPOSE_ARGS)
94+
${POETRY} run pytest ./tests/test_integration -m iceberg $(PYTEST_ARGS)
95+
9296
test-integration-clickhouse: test-db ##@Test Run integration tests for Clickhouse
9397
docker compose -f docker-compose.test.yml --profile clickhouse up -d --wait $(DOCKER_COMPOSE_ARGS)
9498
${POETRY} run pytest ./tests/test_integration -m clickhouse $(PYTEST_ARGS)

README.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ List of currently supported connections:
3636
* Apache Hive
3737
* Clickhouse
3838
* Postgres
39+
* Iceberg
3940
* Oracle
4041
* MSSQL
4142
* MySQL

docker-compose.test.yml

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ services:
124124
condition: service_completed_successfully
125125
rabbitmq:
126126
condition: service_healthy
127-
profiles: [worker, scheduler, s3, hdfs, hive, oracle, clickhouse, mysql, mssql, sftp, ftp, ftps, samba, webdav, all]
127+
profiles: [worker, scheduler, s3, hdfs, hive, iceberg, oracle, clickhouse, mysql, mssql, sftp, ftp, ftps, samba, webdav, all]
128128

129129
horizon:
130130
image: mtsrus/horizon-backend:develop
@@ -147,7 +147,7 @@ services:
147147
depends_on:
148148
horizon-db:
149149
condition: service_healthy
150-
profiles: [horizon, s3, hdfs, hive, oracle, clickhouse, mysql, mssql, sftp, ftp, ftps, samba, webdav, all]
150+
profiles: [horizon, s3, hdfs, hive, iceberg, oracle, clickhouse, mysql, mssql, sftp, ftp, ftps, samba, webdav, all]
151151

152152
horizon-db:
153153
image: postgres:17
@@ -167,7 +167,7 @@ services:
167167
interval: 30s
168168
timeout: 5s
169169
retries: 3
170-
profiles: [horizon, s3, hdfs, hive, oracle, clickhouse, mysql, mssql, sftp, ftp, ftps, samba, webdav, all]
170+
profiles: [horizon, s3, hdfs, hive, iceberg, oracle, clickhouse, mysql, mssql, sftp, ftp, ftps, samba, webdav, all]
171171

172172
test-postgres:
173173
image: postgres:17
@@ -185,7 +185,7 @@ services:
185185
interval: 30s
186186
timeout: 5s
187187
retries: 3
188-
profiles: [s3, hdfs, hive, oracle, clickhouse, mysql, mssql, sftp, ftp, ftps, samba, webdav, all]
188+
profiles: [s3, hdfs, hive, iceberg, oracle, clickhouse, mysql, mssql, sftp, ftp, ftps, samba, webdav, all]
189189

190190
test-s3:
191191
image: bitnamilegacy/minio:latest
@@ -204,7 +204,26 @@ services:
204204
interval: 30s
205205
timeout: 5s
206206
retries: 3
207-
profiles: [s3, all]
207+
profiles: [s3, iceberg, all]
208+
209+
test-iceberg-rest:
210+
image: tabulario/iceberg-rest:latest
211+
container_name: test-iceberg-rest
212+
restart: unless-stopped
213+
environment:
214+
CATALOG_WAREHOUSE: s3a://syncmaster/data/
215+
CATALOG_IO__IMPL: org.apache.iceberg.aws.s3.S3FileIO
216+
CATALOG_S3_ENDPOINT: http://test-s3:9000
217+
CATALOG_S3_PATH__STYLE__ACCESS: true
218+
AWS_ACCESS_KEY_ID: syncmaster
219+
AWS_SECRET_ACCESS_KEY: 123UsedForTestOnly@!
220+
AWS_REGION: us-east-1
221+
ports:
222+
- 8181:8181
223+
depends_on:
224+
test-s3:
225+
condition: service_healthy
226+
profiles: [iceberg, all]
208227

209228
test-oracle:
210229
image: gvenzl/oracle-xe:slim-faststart

poetry.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

syncmaster/worker/handlers/db/iceberg.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ def connect(self, spark: SparkSession):
5353

5454
@slot
5555
def read(self) -> DataFrame:
56+
table = f"{self.transfer_dto.catalog_name}.{self.transfer_dto.table_name}"
57+
self.connection.spark.catalog.refreshTable(table)
5658
return super().read()
5759

5860
@slot

syncmaster/worker/spark.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,15 @@ def get_worker_spark_session(
4949

5050
def get_packages(connection_types: set[str]) -> list[str]: # noqa: WPS212
5151
import pyspark
52-
from onetl.connection import MSSQL, Clickhouse, MySQL, Oracle, Postgres, SparkS3
52+
from onetl.connection import (
53+
MSSQL,
54+
Clickhouse,
55+
Iceberg,
56+
MySQL,
57+
Oracle,
58+
Postgres,
59+
SparkS3,
60+
)
5361
from onetl.file.format import XML, Excel
5462

5563
spark_version = pyspark.__version__
@@ -75,6 +83,14 @@ def get_packages(connection_types: set[str]) -> list[str]: # noqa: WPS212
7583
if connection_types & {"s3", "all"}:
7684
result.extend(SparkS3.get_packages(spark_version=spark_version))
7785

86+
if connection_types & {"iceberg_rest_s3", "all"}:
87+
result.extend(
88+
[
89+
*Iceberg.get_packages(package_version="1.10.0", spark_version=spark_version),
90+
*Iceberg.S3Warehouse.get_packages(package_version="1.10.0"),
91+
],
92+
)
93+
7894
if connection_types & {"s3", "hdfs", "sftp", "ftp", "ftps", "samba", "webdav", "all"}:
7995
result.extend(file_formats_spark_packages)
8096

@@ -128,4 +144,11 @@ def get_spark_session_conf(
128144
},
129145
)
130146

147+
if target.type == "iceberg_rest_s3" or source.type == "iceberg_rest_s3":
148+
config.update(
149+
{
150+
"spark.driver.extraJavaOptions": "-Daws.region=us-east-1",
151+
},
152+
)
153+
131154
return config

0 commit comments

Comments
 (0)