Skip to content

Commit 18392d1

Browse files
committed
Merge branch 'main' of github.com:apache/iceberg-python into fd-add-ability-to-delete-full-data-files
2 parents 2ea157e + aa361d1 commit 18392d1

19 files changed

+659
-215
lines changed

mkdocs/requirements.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@
1717

1818
mkdocs==1.6.0
1919
griffe==0.44.0
20-
jinja2==3.1.3
21-
mkdocstrings==0.25.0
20+
jinja2==3.1.4
21+
mkdocstrings==0.25.1
2222
mkdocstrings-python==1.10.0
2323
mkdocs-literate-nav==0.6.1
2424
mkdocs-autorefs==1.0.1
2525
mkdocs-gen-files==0.5.0
26-
mkdocs-material==9.5.20
26+
mkdocs-material==9.5.21
2727
mkdocs-material-extensions==1.3.1
28-
mkdocs-section-index==0.3.8
28+
mkdocs-section-index==0.3.9

poetry.lock

Lines changed: 188 additions & 187 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyiceberg/catalog/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -779,7 +779,7 @@ def _get_updated_props_and_update_summary(
779779
def _resolve_table_location(self, location: Optional[str], database_name: str, table_name: str) -> str:
780780
if not location:
781781
return self._get_default_warehouse_location(database_name, table_name)
782-
return location
782+
return location.rstrip("/")
783783

784784
def _get_default_warehouse_location(self, database_name: str, table_name: str) -> str:
785785
database_properties = self.load_namespace_properties(database_name)

pyiceberg/catalog/rest.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -519,6 +519,8 @@ def _create_table(
519519
fresh_sort_order = assign_fresh_sort_order_ids(sort_order, iceberg_schema, fresh_schema)
520520

521521
namespace_and_table = self._split_identifier_for_path(identifier)
522+
if location:
523+
location = location.rstrip("/")
522524
request = CreateTableRequest(
523525
name=namespace_and_table["table"],
524526
location=location,

pyiceberg/io/pyarrow.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -731,6 +731,16 @@ def _(obj: pa.MapType, visitor: PyArrowSchemaVisitor[T]) -> T:
731731
return visitor.map(obj, key_result, value_result)
732732

733733

734+
@visit_pyarrow.register(pa.DictionaryType)
735+
def _(obj: pa.DictionaryType, visitor: PyArrowSchemaVisitor[T]) -> T:
736+
# Parquet has no dictionary type. dictionary-encoding is handled
737+
# as an encoding detail, not as a separate type.
738+
# We will follow this approach in determining the Iceberg Type,
739+
# as we only support parquet in PyIceberg for now.
740+
logger.warning(f"Iceberg does not have a dictionary type. {type(obj)} will be inferred as {obj.value_type} on read.")
741+
return visit_pyarrow(obj.value_type, visitor)
742+
743+
734744
@visit_pyarrow.register(pa.DataType)
735745
def _(obj: pa.DataType, visitor: PyArrowSchemaVisitor[T]) -> T:
736746
if pa.types.is_nested(obj):

pyiceberg/table/__init__.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -539,7 +539,7 @@ def delete(self, delete_filter: Union[str, BooleanExpression], snapshot_properti
539539
if not delete_snapshot.files_affected and not delete_snapshot.rewrites_needed:
540540
warnings.warn("Delete operation did not match any records")
541541

542-
def add_files(self, file_paths: List[str]) -> None:
542+
def add_files(self, file_paths: List[str], snapshot_properties: Dict[str, str] = EMPTY_DICT) -> None:
543543
"""
544544
Shorthand API for adding files as data files to the table transaction.
545545
@@ -551,7 +551,7 @@ def add_files(self, file_paths: List[str]) -> None:
551551
"""
552552
if self._table.name_mapping() is None:
553553
self.set_properties(**{TableProperties.DEFAULT_NAME_MAPPING: self._table.schema().name_mapping.model_dump_json()})
554-
with self.update_snapshot().fast_append() as update_snapshot:
554+
with self.update_snapshot(snapshot_properties=snapshot_properties).fast_append() as update_snapshot:
555555
data_files = _parquet_files_to_data_files(
556556
table_metadata=self._table.metadata, file_paths=file_paths, io=self._table.io
557557
)
@@ -1443,6 +1443,7 @@ def overwrite(
14431443
with self.transaction() as tx:
14441444
tx.overwrite(df=df, overwrite_filter=overwrite_filter, snapshot_properties=snapshot_properties)
14451445

1446+
<<<<<<< HEAD
14461447
def delete(
14471448
self, delete_filter: Union[BooleanExpression, str] = ALWAYS_TRUE, snapshot_properties: Dict[str, str] = EMPTY_DICT
14481449
) -> None:
@@ -1457,6 +1458,9 @@ def delete(
14571458
tx.delete(delete_filter=delete_filter, snapshot_properties=snapshot_properties)
14581459

14591460
def add_files(self, file_paths: List[str]) -> None:
1461+
=======
1462+
def add_files(self, file_paths: List[str], snapshot_properties: Dict[str, str] = EMPTY_DICT) -> None:
1463+
>>>>>>> aa361d1485f4a914bc0bbc2e574becaec9a773ac
14601464
"""
14611465
Shorthand API for adding files as data files to the table.
14621466
@@ -1467,7 +1471,7 @@ def add_files(self, file_paths: List[str]) -> None:
14671471
FileNotFoundError: If the file does not exist.
14681472
"""
14691473
with self.transaction() as tx:
1470-
tx.add_files(file_paths=file_paths)
1474+
tx.add_files(file_paths=file_paths, snapshot_properties=snapshot_properties)
14711475

14721476
def update_spec(self, case_sensitive: bool = True) -> UpdateSpec:
14731477
return UpdateSpec(Transaction(self, autocommit=True), case_sensitive=case_sensitive)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ moto = { version = "^5.0.2", extras = ["server"] }
8585
typing-extensions = "4.11.0"
8686
pytest-mock = "3.14.0"
8787
pyspark = "3.5.1"
88-
cython = "3.0.8"
88+
cython = "3.0.10"
8989
deptry = ">=0.14,<0.17"
9090
docutils = "!=0.21"
9191

tests/catalog/integration_test_dynamodb.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,3 +262,9 @@ def test_update_namespace_properties(test_catalog: Catalog, database_name: str)
262262
else:
263263
assert k in update_report.removed
264264
assert "updated test description" == test_catalog.load_namespace_properties(database_name)["comment"]
265+
266+
267+
def test_table_exists(test_catalog: Catalog, table_schema_nested: Schema, database_name: str, table_name: str) -> None:
268+
test_catalog.create_namespace(database_name)
269+
test_catalog.create_table((database_name, table_name), table_schema_nested)
270+
assert test_catalog.table_exists((database_name, table_name)) is True

tests/catalog/integration_test_glue.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -557,3 +557,9 @@ def test_create_table_transaction(
557557
]
558558
},
559559
]
560+
561+
562+
def test_table_exists(test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str) -> None:
563+
test_catalog.create_namespace(database_name)
564+
test_catalog.create_table((database_name, table_name), table_schema_nested)
565+
assert test_catalog.table_exists((database_name, table_name)) is True

tests/catalog/test_base.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ def create_table(
105105

106106
if not location:
107107
location = f'{self._warehouse_location}/{"/".join(identifier)}'
108+
location = location.rstrip("/")
108109

109110
metadata_location = self._get_metadata_location(location=location)
110111
metadata = new_table_metadata(
@@ -353,6 +354,19 @@ def test_create_table_location_override(catalog: InMemoryCatalog) -> None:
353354
assert table.location() == new_location
354355

355356

357+
def test_create_table_removes_trailing_slash_from_location(catalog: InMemoryCatalog) -> None:
358+
new_location = f"{catalog._warehouse_location}/new_location"
359+
table = catalog.create_table(
360+
identifier=TEST_TABLE_IDENTIFIER,
361+
schema=TEST_TABLE_SCHEMA,
362+
location=f"{new_location}/",
363+
partition_spec=TEST_TABLE_PARTITION_SPEC,
364+
properties=TEST_TABLE_PROPERTIES,
365+
)
366+
assert catalog.load_table(TEST_TABLE_IDENTIFIER) == table
367+
assert table.location() == new_location
368+
369+
356370
@pytest.mark.parametrize(
357371
"schema,expected",
358372
[

0 commit comments

Comments
 (0)