Skip to content

Commit 1809774

Browse files
committed
Merge branch 'main' of github.com:apache/iceberg-python
2 parents 6cc1735 + d9f3a07 commit 1809774

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+2906
-1368
lines changed

.github/workflows/pypi-build-artifacts.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ jobs:
6262
if: startsWith(matrix.os, 'ubuntu')
6363

6464
- name: Build wheels
65-
uses: pypa/[email protected].2
65+
uses: pypa/[email protected].3
6666
with:
6767
output-dir: wheelhouse
6868
config-file: "pyproject.toml"

.github/workflows/python-ci.yml

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,29 @@ jobs:
5858
python-version: ${{ matrix.python }}
5959
cache: poetry
6060
cache-dependency-path: ./poetry.lock
61+
- name: Install system dependencies
62+
run: sudo apt-get update && sudo apt-get install -y libkrb5-dev # for kerberos
6163
- name: Install
6264
run: make install-dependencies
6365
- name: Linters
6466
run: make lint
6567
- name: Tests
66-
run: make test-coverage
68+
run: make test-coverage-unit
69+
70+
integration-test:
71+
runs-on: ubuntu-22.04
72+
strategy:
73+
matrix:
74+
python: ['3.9', '3.10', '3.11', '3.12']
75+
76+
steps:
77+
- uses: actions/checkout@v4
78+
- name: Install system dependencies
79+
run: sudo apt-get update && sudo apt-get install -y libkrb5-dev # for kerberos
80+
- name: Install
81+
run: make install
82+
- name: Run integration tests
83+
run: make test-coverage-integration
84+
- name: Show debug logs
85+
if: ${{ failure() }}
86+
run: docker compose -f dev/docker-compose.yml logs

.github/workflows/python-integration.yml

Lines changed: 0 additions & 59 deletions
This file was deleted.

.github/workflows/svn-build-artifacts.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ jobs:
5757
if: startsWith(matrix.os, 'ubuntu')
5858

5959
- name: Build wheels
60-
uses: pypa/[email protected].2
60+
uses: pypa/[email protected].3
6161
with:
6262
output-dir: wheelhouse
6363
config-file: "pyproject.toml"

Makefile

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
help: ## Display this help
2020
@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_-]+:.*?##/ { printf " \033[36m%-20s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
2121

22-
POETRY_VERSION = 2.0.1
22+
POETRY_VERSION = 2.1.1
2323
install-poetry: ## Ensure Poetry is installed and the correct version is being used.
2424
@if ! command -v poetry &> /dev/null; then \
2525
echo "Poetry could not be found. Installing..."; \
@@ -52,13 +52,17 @@ test-s3: # Run tests marked with s3, can add arguments with PYTEST_ARGS="-vv"
5252
sh ./dev/run-minio.sh
5353
poetry run pytest tests/ -m s3 ${PYTEST_ARGS}
5454

55-
test-integration: ## Run all integration tests, can add arguments with PYTEST_ARGS="-vv"
55+
test-integration: | test-integration-setup test-integration-exec ## Run all integration tests, can add arguments with PYTEST_ARGS="-vv"
56+
57+
test-integration-setup: # Prepare the environment for integration
5658
docker compose -f dev/docker-compose-integration.yml kill
5759
docker compose -f dev/docker-compose-integration.yml rm -f
5860
docker compose -f dev/docker-compose-integration.yml up -d
5961
sleep 10
6062
docker compose -f dev/docker-compose-integration.yml cp ./dev/provision.py spark-iceberg:/opt/spark/provision.py
6163
docker compose -f dev/docker-compose-integration.yml exec -T spark-iceberg ipython ./provision.py
64+
65+
test-integration-exec: # Execute integration tests, can add arguments with PYTEST_ARGS="-vv"
6266
poetry run pytest tests/ -v -m integration ${PYTEST_ARGS}
6367

6468
test-integration-rebuild:

dev/Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,20 +39,20 @@ WORKDIR ${SPARK_HOME}
3939
# Remember to also update `tests/conftest`'s spark setting
4040
ENV SPARK_VERSION=3.5.4
4141
ENV ICEBERG_SPARK_RUNTIME_VERSION=3.5_2.12
42-
ENV ICEBERG_VERSION=1.9.0-SNAPSHOT
42+
ENV ICEBERG_VERSION=1.9.0
4343
ENV PYICEBERG_VERSION=0.9.0
4444

4545
RUN curl --retry 5 -s -C - https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop3.tgz -o spark-${SPARK_VERSION}-bin-hadoop3.tgz \
4646
&& tar xzf spark-${SPARK_VERSION}-bin-hadoop3.tgz --directory /opt/spark --strip-components 1 \
4747
&& rm -rf spark-${SPARK_VERSION}-bin-hadoop3.tgz
4848

4949
# Download iceberg spark runtime
50-
RUN curl --retry 5 -s https://repository.apache.org/content/groups/snapshots/org/apache/iceberg/iceberg-spark-runtime-3.5_2.12/1.9.0-SNAPSHOT/iceberg-spark-runtime-3.5_2.12-1.9.0-20250409.001855-44.jar \
50+
RUN curl --retry 5 -s https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}/${ICEBERG_VERSION}/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar \
5151
-Lo /opt/spark/jars/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar
5252

5353

5454
# Download AWS bundle
55-
RUN curl --retry 5 -s https://repository.apache.org/content/groups/snapshots/org/apache/iceberg/iceberg-aws-bundle/1.9.0-SNAPSHOT/iceberg-aws-bundle-1.9.0-20250409.002731-88.jar \
55+
RUN curl --retry 5 -s https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-aws-bundle/${ICEBERG_VERSION}/iceberg-aws-bundle-${ICEBERG_VERSION}.jar \
5656
-Lo /opt/spark/jars/iceberg-aws-bundle-${ICEBERG_VERSION}.jar
5757

5858
COPY spark-defaults.conf /opt/spark/conf

mkdocs/docs/api.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,17 @@ static_table = StaticTable.from_metadata(
215215

216216
The static-table is considered read-only.
217217

218+
Alternatively, if your table metadata directory contains a `version-hint.text` file, you can just specify
219+
the table root path, and the latest metadata file will be picked automatically.
220+
221+
```python
222+
from pyiceberg.table import StaticTable
223+
224+
static_table = StaticTable.from_metadata(
225+
"s3://warehouse/wh/nyc.db/taxis
226+
)
227+
```
228+
218229
## Check if a table exists
219230

220231
To check whether the `bids` table exists:

mkdocs/docs/configuration.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ PyIceberg uses [S3FileSystem](https://arrow.apache.org/docs/python/generated/pya
189189
| s3.access-key-id | admin | Configure the static access key id used to access the FileIO. |
190190
| s3.secret-access-key | password | Configure the static secret access key used to access the FileIO. |
191191
| s3.session-token | AQoDYXdzEJr... | Configure the static session token used to access the FileIO. |
192-
| s3.force-virtual-addressing | True | Whether to use virtual addressing of buckets. This must be set to True as OSS can only be accessed with virtual hosted style address. |
192+
| s3.force-virtual-addressing | True | Whether to use virtual addressing of buckets. This is set to `True` by default as OSS can only be accessed with virtual hosted style address. |
193193

194194
<!-- markdown-link-check-enable-->
195195

mkdocs/docs/contributing.md

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,18 +37,10 @@ The PyIceberg Project is hosted on GitHub at <https://github.com/apache/iceberg-
3737
For the development, Poetry is used for packing and dependency management. You can install this using:
3838

3939
```bash
40-
pip install poetry
40+
make install-poetry
4141
```
4242

43-
Make sure you're using an up-to-date environment from venv
44-
45-
```bash
46-
pip install --upgrade virtualenv pip
47-
python -m venv ./venv
48-
source ./venv/bin/activate
49-
```
50-
51-
To get started, you can run `make install`, which installs Poetry and all the dependencies of the Iceberg library. This also installs the development dependencies. If you don't want to install the development dependencies, you need to install using `poetry install --no-dev`.
43+
To get started, you can run `make install`, which installs all the dependencies of the Iceberg library. This also installs the development dependencies. If you don't want to install the development dependencies, you need to install using `poetry install --without dev` instead of `make install`.
5244

5345
If you want to install the library on the host, you can simply run `pip3 install -e .`. If you wish to use a virtual environment, you can run `poetry shell`. Poetry will open up a virtual environment with all the dependencies set.
5446

0 commit comments

Comments
 (0)