Skip to content

Commit 4605a04

Browse files
authored
Merge branch 'main' into refactor/consolidate-snapshot-expiration
2 parents b837f86 + 9c99f32 commit 4605a04

File tree

99 files changed

+8141
-3957
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

99 files changed

+8141
-3957
lines changed

.github/ISSUE_TEMPLATE/iceberg_bug_report.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ body:
2828
description: What Apache Iceberg version are you using?
2929
multiple: false
3030
options:
31-
- "0.9.0 (latest release)"
31+
- "0.9.1 (latest release)"
32+
- "0.9.0"
3233
- "0.8.1"
3334
- "0.8.0"
3435
- "0.7.1"

.github/workflows/pypi-build-artifacts.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ jobs:
6262
if: startsWith(matrix.os, 'ubuntu')
6363

6464
- name: Build wheels
65-
uses: pypa/cibuildwheel@v2.23.2
65+
uses: pypa/cibuildwheel@v3.0.0
6666
with:
6767
output-dir: wheelhouse
6868
config-file: "pyproject.toml"

.github/workflows/python-ci.yml

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,53 @@ jobs:
5858
python-version: ${{ matrix.python }}
5959
cache: poetry
6060
cache-dependency-path: ./poetry.lock
61+
- name: Install system dependencies
62+
run: sudo apt-get update && sudo apt-get install -y libkrb5-dev # for kerberos
6163
- name: Install
6264
run: make install-dependencies
63-
- name: Linters
65+
- name: Run linters
6466
run: make lint
65-
- name: Tests
66-
run: make test-coverage
67+
- name: Run unit tests with coverage
68+
run: COVERAGE=1 make test
69+
- name: Generate coverage report (85%) # Coverage threshold should only increase over time — never decrease it!
70+
run: COVERAGE_FAIL_UNDER=85 make coverage-report
71+
72+
integration-test:
73+
runs-on: ubuntu-22.04
74+
strategy:
75+
matrix:
76+
python: ['3.9', '3.10', '3.11', '3.12']
77+
78+
steps:
79+
- uses: actions/checkout@v4
80+
- name: Install system dependencies
81+
run: sudo apt-get update && sudo apt-get install -y libkrb5-dev # for kerberos
82+
- name: Install
83+
run: make install
84+
85+
- name: Run integration tests with coverage
86+
run: COVERAGE=1 make test-integration
87+
- name: Show debug logs
88+
if: ${{ failure() }}
89+
run: docker compose -f dev/docker-compose.yml logs
90+
91+
- name: Run s3 integration tests with coverage
92+
run: COVERAGE=1 make test-s3
93+
- name: Show debug logs
94+
if: ${{ failure() }}
95+
run: docker compose -f dev/docker-compose.yml logs
96+
97+
- name: Run adls integration tests with coverage
98+
run: COVERAGE=1 make test-adls
99+
- name: Show debug logs
100+
if: ${{ failure() }}
101+
run: docker compose -f dev/docker-compose-azurite.yml logs
102+
103+
- name: Run gcs integration tests with coverage
104+
run: COVERAGE=1 make test-gcs
105+
- name: Show debug logs
106+
if: ${{ failure() }}
107+
run: docker compose -f dev/docker-compose-gcs-server.yml logs
108+
109+
- name: Generate coverage report (75%) # Coverage threshold should only increase over time — never decrease it!
110+
run: COVERAGE_FAIL_UNDER=75 make coverage-report

.github/workflows/python-integration.yml

Lines changed: 0 additions & 59 deletions
This file was deleted.

.github/workflows/svn-build-artifacts.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ jobs:
5757
if: startsWith(matrix.os, 'ubuntu')
5858

5959
- name: Build wheels
60-
uses: pypa/cibuildwheel@v2.23.2
60+
uses: pypa/cibuildwheel@v3.0.0
6161
with:
6262
output-dir: wheelhouse
6363
config-file: "pyproject.toml"

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ coverage.xml
3535
.project
3636
.settings
3737
bin/
38+
.vscode/
3839

3940
# Hive/metastore files
4041
metastore_db/

.pre-commit-config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,13 @@ repos:
2727
- id: check-yaml
2828
- id: check-ast
2929
- repo: https://github.com/astral-sh/ruff-pre-commit
30-
rev: v0.8.6
30+
rev: v0.11.13
3131
hooks:
3232
- id: ruff
3333
args: [ --fix, --exit-non-zero-on-fix ]
3434
- id: ruff-format
3535
- repo: https://github.com/pre-commit/mirrors-mypy
36-
rev: v1.14.1
36+
rev: v1.16.0
3737
hooks:
3838
- id: mypy
3939
args:

Makefile

Lines changed: 92 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -14,102 +14,143 @@
1414
# KIND, either express or implied. See the License for the
1515
# specific language governing permissions and limitations
1616
# under the License.
17+
# ========================
18+
# Configuration Variables
19+
# ========================
1720

21+
PYTEST_ARGS ?= -v # Override with e.g. PYTEST_ARGS="-vv --tb=short"
22+
COVERAGE ?= 0 # Set COVERAGE=1 to enable coverage: make test COVERAGE=1
23+
COVERAGE_FAIL_UNDER ?= 85 # Minimum coverage % to pass: make coverage-report COVERAGE_FAIL_UNDER=70
1824

19-
help: ## Display this help
20-
@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_-]+:.*?##/ { printf " \033[36m%-20s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
25+
ifeq ($(COVERAGE),1)
26+
TEST_RUNNER = poetry run coverage run --parallel-mode --source=pyiceberg -m
27+
else
28+
TEST_RUNNER = poetry run
29+
endif
2130

22-
POETRY_VERSION = 2.0.1
23-
install-poetry: ## Ensure Poetry is installed and the correct version is being used.
31+
POETRY_VERSION = 2.1.1
32+
33+
# ============
34+
# Help Section
35+
# ============
36+
37+
##@ General
38+
39+
help: ## Display this help message
40+
@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m<target>\033[0m\n"} /^[a-zA-Z_-]+:.*?##/ { printf " \033[36m%-25s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
41+
42+
# ==================
43+
# Installation Tasks
44+
# ==================
45+
46+
##@ Setup
47+
48+
install-poetry: ## Ensure Poetry is installed at the specified version
2449
@if ! command -v poetry &> /dev/null; then \
25-
echo "Poetry could not be found. Installing..."; \
50+
echo "Poetry not found. Installing..."; \
2651
pip install --user poetry==$(POETRY_VERSION); \
2752
else \
2853
INSTALLED_VERSION=$$(pip show poetry | grep Version | awk '{print $$2}'); \
2954
if [ "$$INSTALLED_VERSION" != "$(POETRY_VERSION)" ]; then \
30-
echo "Poetry version $$INSTALLED_VERSION does not match required version $(POETRY_VERSION). Updating..."; \
55+
echo "Updating Poetry to version $(POETRY_VERSION)..."; \
3156
pip install --user --upgrade poetry==$(POETRY_VERSION); \
3257
else \
33-
echo "Poetry version $$INSTALLED_VERSION is already installed."; \
34-
fi \
58+
echo "Poetry version $(POETRY_VERSION) already installed."; \
59+
fi; \
3560
fi
3661

37-
install-dependencies: ## Install dependencies including dev, docs, and all extras
62+
install-dependencies: ## Install all dependencies including extras
3863
poetry install --all-extras
3964

40-
install: | install-poetry install-dependencies
65+
install: install-poetry install-dependencies ## Install Poetry and dependencies
66+
67+
# ===============
68+
# Code Validation
69+
# ===============
70+
71+
##@ Quality
4172

4273
check-license: ## Check license headers
4374
./dev/check-license
4475

45-
lint: ## lint
76+
lint: ## Run code linters via pre-commit
4677
poetry run pre-commit run --all-files
4778

48-
test: ## Run all unit tests, can add arguments with PYTEST_ARGS="-vv"
49-
poetry run pytest tests/ -m "(unmarked or parametrize) and not integration" ${PYTEST_ARGS}
79+
# ===============
80+
# Testing Section
81+
# ===============
5082

51-
test-s3: # Run tests marked with s3, can add arguments with PYTEST_ARGS="-vv"
52-
sh ./dev/run-minio.sh
53-
poetry run pytest tests/ -m s3 ${PYTEST_ARGS}
83+
##@ Testing
84+
85+
test: ## Run all unit tests (excluding integration)
86+
$(TEST_RUNNER) pytest tests/ -m "(unmarked or parametrize) and not integration" $(PYTEST_ARGS)
5487

55-
test-integration: ## Run all integration tests, can add arguments with PYTEST_ARGS="-vv"
88+
test-integration: test-integration-setup test-integration-exec ## Run integration tests
89+
90+
test-integration-setup: ## Start Docker services for integration tests
5691
docker compose -f dev/docker-compose-integration.yml kill
5792
docker compose -f dev/docker-compose-integration.yml rm -f
5893
docker compose -f dev/docker-compose-integration.yml up -d
5994
sleep 10
6095
docker compose -f dev/docker-compose-integration.yml cp ./dev/provision.py spark-iceberg:/opt/spark/provision.py
6196
docker compose -f dev/docker-compose-integration.yml exec -T spark-iceberg ipython ./provision.py
62-
poetry run pytest tests/ -v -m integration ${PYTEST_ARGS}
6397

64-
test-integration-rebuild:
98+
test-integration-exec: ## Run integration tests (excluding provision)
99+
$(TEST_RUNNER) pytest tests/ -m integration $(PYTEST_ARGS)
100+
101+
test-integration-rebuild: ## Rebuild integration Docker services from scratch
65102
docker compose -f dev/docker-compose-integration.yml kill
66103
docker compose -f dev/docker-compose-integration.yml rm -f
67104
docker compose -f dev/docker-compose-integration.yml build --no-cache
68105

69-
test-adls: ## Run tests marked with adls, can add arguments with PYTEST_ARGS="-vv"
106+
test-s3: ## Run tests marked with @pytest.mark.s3
107+
sh ./dev/run-minio.sh
108+
$(TEST_RUNNER) pytest tests/ -m s3 $(PYTEST_ARGS)
109+
110+
test-adls: ## Run tests marked with @pytest.mark.adls
70111
sh ./dev/run-azurite.sh
71-
poetry run pytest tests/ -m adls ${PYTEST_ARGS}
112+
$(TEST_RUNNER) pytest tests/ -m adls $(PYTEST_ARGS)
72113

73-
test-gcs: ## Run tests marked with gcs, can add arguments with PYTEST_ARGS="-vv"
114+
test-gcs: ## Run tests marked with @pytest.mark.gcs
74115
sh ./dev/run-gcs-server.sh
75-
poetry run pytest tests/ -m gcs ${PYTEST_ARGS}
76-
77-
test-coverage-unit: # Run test with coverage for unit tests, can add arguments with PYTEST_ARGS="-vv"
78-
poetry run coverage run --source=pyiceberg/ --data-file=.coverage.unit -m pytest tests/ -v -m "(unmarked or parametrize) and not integration" ${PYTEST_ARGS}
116+
$(TEST_RUNNER) pytest tests/ -m gcs $(PYTEST_ARGS)
79117

80-
test-coverage-integration: # Run test with coverage for integration tests, can add arguments with PYTEST_ARGS="-vv"
81-
docker compose -f dev/docker-compose-integration.yml kill
82-
docker compose -f dev/docker-compose-integration.yml rm -f
83-
docker compose -f dev/docker-compose-integration.yml up -d
84-
sh ./dev/run-azurite.sh
85-
sh ./dev/run-gcs-server.sh
86-
sleep 10
87-
docker compose -f dev/docker-compose-integration.yml cp ./dev/provision.py spark-iceberg:/opt/spark/provision.py
88-
docker compose -f dev/docker-compose-integration.yml exec -T spark-iceberg ipython ./provision.py
89-
poetry run coverage run --source=pyiceberg/ --data-file=.coverage.integration -m pytest tests/ -v -m integration ${PYTEST_ARGS}
118+
test-coverage: COVERAGE=1
119+
test-coverage: test test-integration test-s3 test-adls test-gcs coverage-report ## Run all tests with coverage and report
90120

91-
test-coverage: | test-coverage-unit test-coverage-integration ## Run all tests with coverage including unit and integration tests
92-
poetry run coverage combine .coverage.unit .coverage.integration
93-
poetry run coverage report -m --fail-under=90
121+
coverage-report: ## Combine and report coverage
122+
poetry run coverage combine
123+
poetry run coverage report -m --fail-under=$(COVERAGE_FAIL_UNDER)
94124
poetry run coverage html
95125
poetry run coverage xml
96126

127+
# ================
128+
# Documentation
129+
# ================
97130

98-
clean: ## Clean up the project Python working environment
99-
@echo "Cleaning up Cython and Python cached files"
100-
@rm -rf build dist *.egg-info
101-
@find . -name "*.so" -exec echo Deleting {} \; -delete
102-
@find . -name "*.pyc" -exec echo Deleting {} \; -delete
103-
@find . -name "__pycache__" -exec echo Deleting {} \; -exec rm -rf {} +
104-
@find . -name "*.pyd" -exec echo Deleting {} \; -delete
105-
@find . -name "*.pyo" -exec echo Deleting {} \; -delete
106-
@echo "Cleanup complete"
131+
##@ Documentation
107132

108-
docs-install:
133+
docs-install: ## Install docs dependencies
109134
poetry install --with docs
110135

111-
docs-serve:
136+
docs-serve: ## Serve local docs preview (hot reload)
112137
poetry run mkdocs serve -f mkdocs/mkdocs.yml
113138

114-
docs-build:
139+
docs-build: ## Build the static documentation site
115140
poetry run mkdocs build -f mkdocs/mkdocs.yml --strict
141+
142+
# ===================
143+
# Project Maintenance
144+
# ===================
145+
146+
##@ Maintenance
147+
148+
clean: ## Remove build artifacts and caches
149+
@echo "Cleaning up Cython and Python cached files..."
150+
@rm -rf build dist *.egg-info
151+
@find . -name "*.so" -exec echo Deleting {} \; -delete
152+
@find . -name "*.pyc" -exec echo Deleting {} \; -delete
153+
@find . -name "__pycache__" -exec echo Deleting {} \; -exec rm -rf {} +
154+
@find . -name "*.pyd" -exec echo Deleting {} \; -delete
155+
@find . -name "*.pyo" -exec echo Deleting {} \; -delete
156+
@echo "Cleanup complete."

dev/Dockerfile

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,22 +37,22 @@ RUN mkdir -p ${HADOOP_HOME} && mkdir -p ${SPARK_HOME} && mkdir -p /home/iceberg/
3737
WORKDIR ${SPARK_HOME}
3838

3939
# Remember to also update `tests/conftest`'s spark setting
40-
ENV SPARK_VERSION=3.5.4
40+
ENV SPARK_VERSION=3.5.6
4141
ENV ICEBERG_SPARK_RUNTIME_VERSION=3.5_2.12
42-
ENV ICEBERG_VERSION=1.9.0-SNAPSHOT
43-
ENV PYICEBERG_VERSION=0.9.0
42+
ENV ICEBERG_VERSION=1.9.1
43+
ENV PYICEBERG_VERSION=0.9.1
4444

4545
RUN curl --retry 5 -s -C - https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop3.tgz -o spark-${SPARK_VERSION}-bin-hadoop3.tgz \
4646
&& tar xzf spark-${SPARK_VERSION}-bin-hadoop3.tgz --directory /opt/spark --strip-components 1 \
4747
&& rm -rf spark-${SPARK_VERSION}-bin-hadoop3.tgz
4848

4949
# Download iceberg spark runtime
50-
RUN curl --retry 5 -s https://repository.apache.org/content/groups/snapshots/org/apache/iceberg/iceberg-spark-runtime-3.5_2.12/1.9.0-SNAPSHOT/iceberg-spark-runtime-3.5_2.12-1.9.0-20250409.001855-44.jar \
50+
RUN curl --retry 5 -s https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}/${ICEBERG_VERSION}/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar \
5151
-Lo /opt/spark/jars/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar
5252

5353

5454
# Download AWS bundle
55-
RUN curl --retry 5 -s https://repository.apache.org/content/groups/snapshots/org/apache/iceberg/iceberg-aws-bundle/1.9.0-SNAPSHOT/iceberg-aws-bundle-1.9.0-20250409.002731-88.jar \
55+
RUN curl --retry 5 -s https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-aws-bundle/${ICEBERG_VERSION}/iceberg-aws-bundle-${ICEBERG_VERSION}.jar \
5656
-Lo /opt/spark/jars/iceberg-aws-bundle-${ICEBERG_VERSION}.jar
5757

5858
COPY spark-defaults.conf /opt/spark/conf

0 commit comments

Comments
 (0)