apache
diff --git a/‎mkdocs/docs/api.md
Lines changed: 78 additions & 0 deletions b/‎mkdocs/docs/api.md
Lines changed: 78 additions & 0 deletions
diff --git a/‎pyiceberg/catalog/hive.py
Lines changed: 20 additions & 18 deletions b/‎pyiceberg/catalog/hive.py
Lines changed: 20 additions & 18 deletions
diff --git a/‎pyiceberg/io/pyarrow.py
Lines changed: 17 additions & 14 deletions b/‎pyiceberg/io/pyarrow.py
Lines changed: 17 additions & 14 deletions
@@ -342,6 +342,18 @@ table.append(df)
 
 To explore the table metadata, tables can be inspected.
 
+<!-- prettier-ignore-start -->
+
+!!! tip "Time Travel"
+    To inspect a tables's metadata with the time travel feature, call the inspect table method with the `snapshot_id` argument.
+    Time travel is supported on all metadata tables except `snapshots` and `refs`.
+
+    ```python
+    table.inspect.entries(snapshot_id=805611270568163028)
+    ```
+
+<!-- prettier-ignore-end -->
+
 ### Snapshots
 
 Inspect the snapshots of the table:
@@ -370,6 +382,47 @@ manifest_list: [["s3://warehouse/default/table_metadata_snapshots/metadata/snap-
 summary: [[keys:["added-files-size","added-data-files","added-records","total-data-files","total-delete-files","total-records","total-files-size","total-position-deletes","total-equality-deletes"]values:["5459","1","3","1","0","3","5459","0","0"],keys:["added-files-size","added-data-files","added-records","total-data-files","total-records",...,"total-equality-deletes","total-files-size","deleted-data-files","deleted-records","removed-files-size"]values:["5459","1","3","1","3",...,"0","5459","1","3","5459"],keys:["added-files-size","added-data-files","added-records","total-data-files","total-delete-files","total-records","total-files-size","total-position-deletes","total-equality-deletes"]values:["5459","1","3","2","0","6","10918","0","0"]]]
 ```
 
+### Partitions
+
+Inspect the partitions of the table:
+
+```python
+table.inspect.partitions()
+```
+
+```
+pyarrow.Table
+partition: struct<dt_month: int32, dt_day: date32[day]> not null
+  child 0, dt_month: int32
+  child 1, dt_day: date32[day]
+spec_id: int32 not null
+record_count: int64 not null
+file_count: int32 not null
+total_data_file_size_in_bytes: int64 not null
+position_delete_record_count: int64 not null
+position_delete_file_count: int32 not null
+equality_delete_record_count: int64 not null
+equality_delete_file_count: int32 not null
+last_updated_at: timestamp[ms]
+last_updated_snapshot_id: int64
+----
+partition: [
+  -- is_valid: all not null
+  -- child 0 type: int32
+[null,null,612]
+  -- child 1 type: date32[day]
+[null,2021-02-01,null]]
+spec_id: [[2,1,0]]
+record_count: [[1,1,2]]
+file_count: [[1,1,2]]
+total_data_file_size_in_bytes: [[641,641,1260]]
+position_delete_record_count: [[0,0,0]]
+position_delete_file_count: [[0,0,0]]
+equality_delete_record_count: [[0,0,0]]
+equality_delete_file_count: [[0,0,0]]
+last_updated_at: [[2024-04-13 18:59:35.981,2024-04-13 18:59:35.465,2024-04-13 18:59:35.003]]
+```
+
 ### Entries
 
 To show all the table's current manifest entries for both data and delete files.
@@ -528,6 +581,31 @@ readable_metrics: [
 [6.0989]]
 ```
 
+### References
+
+To show a table's known snapshot references:
+
+```python
+table.inspect.refs()
+```
+
+```
+pyarrow.Table
+name: string not null
+type: string not null
+snapshot_id: int64 not null
+max_reference_age_in_ms: int64
+min_snapshots_to_keep: int32
+max_snapshot_age_in_ms: int64
+----
+name: [["main","testTag"]]
+type: [["BRANCH","TAG"]]
+snapshot_id: [[2278002651076891950,2278002651076891950]]
+max_reference_age_in_ms: [[null,604800000]]
+min_snapshots_to_keep: [[null,10]]
+max_snapshot_age_in_ms: [[null,604800000]]
+```
+
 ## Add Files
 
 Expert Iceberg users may choose to commit existing parquet files to the Iceberg table as data files, without rewriting them.
 
@@ -372,22 +372,7 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons
         identifier_tuple = self.identifier_to_tuple_without_catalog(
             tuple(table_request.identifier.namespace.root + [table_request.identifier.name])
         )
-        current_table = self.load_table(identifier_tuple)
         database_name, table_name = self.identifier_to_database_and_table(identifier_tuple, NoSuchTableError)
-        base_metadata = current_table.metadata
-        for requirement in table_request.requirements:
-            requirement.validate(base_metadata)
-
-        updated_metadata = update_table_metadata(base_metadata, table_request.updates)
-        if updated_metadata == base_metadata:
-            # no changes, do nothing
-            return CommitTableResponse(metadata=base_metadata, metadata_location=current_table.metadata_location)
-
-        # write new metadata
-        new_metadata_version = self._parse_metadata_version(current_table.metadata_location) + 1
-        new_metadata_location = self._get_metadata_location(current_table.metadata.location, new_metadata_version)
-        self._write_metadata(updated_metadata, current_table.io, new_metadata_location)
-
         # commit to hive
         # https://github.com/apache/hive/blob/master/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift#L1232
         with self._client as open_client:
@@ -397,11 +382,28 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons
                 if lock.state != LockState.ACQUIRED:
                     raise CommitFailedException(f"Failed to acquire lock for {table_request.identifier}, state: {lock.state}")
 
-                tbl = open_client.get_table(dbname=database_name, tbl_name=table_name)
-                tbl.parameters = _construct_parameters(
+                hive_table = open_client.get_table(dbname=database_name, tbl_name=table_name)
+                io = load_file_io({**self.properties, **hive_table.parameters}, hive_table.sd.location)
+                current_table = self._convert_hive_into_iceberg(hive_table, io)
+
+                base_metadata = current_table.metadata
+                for requirement in table_request.requirements:
+                    requirement.validate(base_metadata)
+
+                updated_metadata = update_table_metadata(base_metadata, table_request.updates)
+                if updated_metadata == base_metadata:
+                    # no changes, do nothing
+                    return CommitTableResponse(metadata=base_metadata, metadata_location=current_table.metadata_location)
+
+                # write new metadata
+                new_metadata_version = self._parse_metadata_version(current_table.metadata_location) + 1
+                new_metadata_location = self._get_metadata_location(current_table.metadata.location, new_metadata_version)
+                self._write_metadata(updated_metadata, current_table.io, new_metadata_location)
+
+                hive_table.parameters = _construct_parameters(
                     metadata_location=new_metadata_location, previous_metadata_location=current_table.metadata_location
                 )
-                open_client.alter_table(dbname=database_name, tbl_name=table_name, new_tbl=tbl)
+                open_client.alter_table(dbname=database_name, tbl_name=table_name, new_tbl=hive_table)
             except NoSuchObjectException as e:
                 raise NoSuchTableError(f"Table does not exist: {table_name}") from e
             finally:
 
@@ -966,20 +966,15 @@ def _task_to_table(
     with fs.open_input_file(path) as fin:
         fragment = arrow_format.make_fragment(fin)
         physical_schema = fragment.physical_schema
-        schema_raw = None
-        if metadata := physical_schema.metadata:
-            schema_raw = metadata.get(ICEBERG_SCHEMA)
-        file_schema = (
-            Schema.model_validate_json(schema_raw) if schema_raw is not None else pyarrow_to_schema(physical_schema, name_mapping)
-        )
+        file_schema = pyarrow_to_schema(physical_schema, name_mapping)
 
         pyarrow_filter = None
         if bound_row_filter is not AlwaysTrue():
             translated_row_filter = translate_column_names(bound_row_filter, file_schema, case_sensitive=case_sensitive)
             bound_file_filter = bind(file_schema, translated_row_filter, case_sensitive=case_sensitive)
             pyarrow_filter = expression_to_pyarrow(bound_file_filter)
 
-        file_project_schema = sanitize_column_names(prune_columns(file_schema, projected_field_ids, select_full_types=False))
+        file_project_schema = prune_columns(file_schema, projected_field_ids, select_full_types=False)
 
         if file_schema is None:
             raise ValueError(f"Missing Iceberg schema in Metadata for file: {path}")
@@ -1022,7 +1017,6 @@ def _task_to_table(
 
         if len(arrow_table) < 1:
             return None
-
         return to_requested_schema(projected_schema, file_project_schema, arrow_table)
 
 
@@ -1783,25 +1777,34 @@ def write_file(io: FileIO, table_metadata: TableMetadata, tasks: Iterable["Write
 
     schema = table_metadata.schema()
     arrow_file_schema = schema.as_arrow()
-    parquet_writer_kwargs = _get_parquet_writer_kwargs(table_metadata.properties)
 
+    parquet_writer_kwargs = _get_parquet_writer_kwargs(table_metadata.properties)
     row_group_size = PropertyUtil.property_as_int(
         properties=table_metadata.properties,
         property_name=TableProperties.PARQUET_ROW_GROUP_SIZE_BYTES,
         default=TableProperties.PARQUET_ROW_GROUP_SIZE_BYTES_DEFAULT,
     )
 
     def write_parquet(task: WriteTask) -> DataFile:
+        table_schema = task.schema
+        arrow_table = pa.Table.from_batches(task.record_batches)
+        # if schema needs to be transformed, use the transformed schema and adjust the arrow table accordingly
+        # otherwise use the original schema
+        if (sanitized_schema := sanitize_column_names(table_schema)) != table_schema:
+            file_schema = sanitized_schema
+            arrow_table = to_requested_schema(requested_schema=file_schema, file_schema=table_schema, table=arrow_table)
+        else:
+            file_schema = table_schema
+
         file_path = f'{table_metadata.location}/data/{task.generate_data_file_path("parquet")}'
         fo = io.new_output(file_path)
         with fo.create(overwrite=True) as fos:
-            with pq.ParquetWriter(fos, schema=arrow_file_schema, **parquet_writer_kwargs) as writer:
-                writer.write(pa.Table.from_batches(task.record_batches), row_group_size=row_group_size)
-
+            with pq.ParquetWriter(fos, schema=file_schema.as_arrow(), **parquet_writer_kwargs) as writer:
+                writer.write(arrow_table, row_group_size=row_group_size)
         statistics = data_file_statistics_from_parquet_metadata(
             parquet_metadata=writer.writer.metadata,
-            stats_columns=compute_statistics_plan(schema, table_metadata.properties),
-            parquet_column_mapping=parquet_path_to_id_mapping(schema),
+            stats_columns=compute_statistics_plan(file_schema, table_metadata.properties),
+            parquet_column_mapping=parquet_path_to_id_mapping(file_schema),
         )
         data_file = DataFile(
             content=DataFileContent.DATA,