add test

Yingjian Wu · Yingjian Wu · commit 3a942cf2cb04 · 2025-08-24T13:32:17.000-07:00
diff --git a/pyiceberg/table/update/snapshot.py b/pyiceberg/table/update/snapshot.py
@@ -139,7 +139,7 @@ def __init__(
         )
 
     def _validate_target_branch(self, branch: Optional[str]) -> Optional[str]:
-        # Default is already set to MAIN_BRANCH. So branch name can't be None.
+        # if branch is none, write will be written into a staging snapshot
         if branch is not None:
             if branch in self._transaction.table_metadata.refs:
                 ref = self._transaction.table_metadata.refs[branch]
diff --git a/tests/integration/test_writes/test_writes.py b/tests/integration/test_writes/test_writes.py
@@ -2293,20 +2293,36 @@ def test_stage_only_delete(
     # a new delete snapshot is added
     snapshots = tbl.snapshots()
     assert len(snapshots) == 2
+    # snapshot main ref has not changed
+    assert current_snapshot == tbl.metadata.current_snapshot_id
+    assert len(tbl.scan().to_arrow()) == original_count
+
+    # Write to main branch
+    with tbl.transaction() as txn:
+        with txn.update_snapshot().fast_append() as fast_append:
+            for data_file in _dataframe_to_data_files(
+                    table_metadata=txn.table_metadata, df=arrow_table_with_null, io=txn._table.io
+            ):
+                fast_append.append_data_file(data_file=data_file)
+
+    # Main ref has changed
+    assert current_snapshot != tbl.metadata.current_snapshot_id
+    assert len(tbl.scan().to_arrow()) == 3
+    snapshots = tbl.snapshots()
+    assert len(snapshots) == 3
 
     rows = spark.sql(
         f"""
-                    SELECT operation, summary
-                    FROM {identifier}.snapshots
-                    ORDER BY committed_at ASC
-                """
+                SELECT operation, parent_id
+                FROM {identifier}.snapshots
+                ORDER BY committed_at ASC
+            """
     ).collect()
     operations = [row.operation for row in rows]
-    assert operations == ["append", "delete"]
-
-    # snapshot main ref has not changed
-    assert current_snapshot == tbl.metadata.current_snapshot_id
-    assert len(tbl.scan().to_arrow()) == original_count
+    parent_snapshot_id = [row.parent_id for row in rows]
+    assert operations == ["append", "delete", "append"]
+    # both subsequent parent id should be the first snapshot id
+    assert parent_snapshot_id == [None, current_snapshot, current_snapshot]
 
 
 @pytest.mark.integration
@@ -2323,6 +2339,7 @@ def test_stage_only_fast_append(
     original_count = len(tbl.scan().to_arrow())
     assert original_count == 3
 
+    # Write to staging branch
     with tbl.transaction() as txn:
         with txn.update_snapshot(branch=None).fast_append() as fast_append:
             for data_file in _dataframe_to_data_files(
@@ -2333,20 +2350,37 @@ def test_stage_only_fast_append(
     # Main ref has not changed and data is not yet appended
     assert current_snapshot == tbl.metadata.current_snapshot_id
     assert len(tbl.scan().to_arrow()) == original_count
-
     # There should be a new staged snapshot
     snapshots = tbl.snapshots()
     assert len(snapshots) == 2
 
+    # Write to main branch
+    with tbl.transaction() as txn:
+        with txn.update_snapshot().fast_append() as fast_append:
+            for data_file in _dataframe_to_data_files(
+                    table_metadata=txn.table_metadata, df=arrow_table_with_null, io=txn._table.io
+            ):
+                fast_append.append_data_file(data_file=data_file)
+
+    # Main ref has changed
+    assert current_snapshot != tbl.metadata.current_snapshot_id
+    assert len(tbl.scan().to_arrow()) == 6
+    snapshots = tbl.snapshots()
+    assert len(snapshots) == 3
+
     rows = spark.sql(
         f"""
-            SELECT operation, summary
+            SELECT operation, parent_id
             FROM {identifier}.snapshots
             ORDER BY committed_at ASC
         """
     ).collect()
     operations = [row.operation for row in rows]
-    assert operations == ["append", "append"]
+    parent_snapshot_id = [row.parent_id for row in rows]
+    assert operations == ["append", "append", "append"]
+    # both subsequent parent id should be the first snapshot id
+    assert parent_snapshot_id == [None, current_snapshot, current_snapshot]
+
 
 
 @pytest.mark.integration
@@ -2378,15 +2412,32 @@ def test_stage_only_merge_append(
     snapshots = tbl.snapshots()
     assert len(snapshots) == 2
 
+    # Write to main branch
+    with tbl.transaction() as txn:
+        with txn.update_snapshot().fast_append() as fast_append:
+            for data_file in _dataframe_to_data_files(
+                    table_metadata=txn.table_metadata, df=arrow_table_with_null, io=txn._table.io
+            ):
+                fast_append.append_data_file(data_file=data_file)
+
+    # Main ref has changed
+    assert current_snapshot != tbl.metadata.current_snapshot_id
+    assert len(tbl.scan().to_arrow()) == 6
+    snapshots = tbl.snapshots()
+    assert len(snapshots) == 3
+
     rows = spark.sql(
         f"""
-            SELECT operation, summary
-            FROM {identifier}.snapshots
-            ORDER BY committed_at ASC
-        """
+                    SELECT operation, parent_id
+                    FROM {identifier}.snapshots
+                    ORDER BY committed_at ASC
+                """
     ).collect()
     operations = [row.operation for row in rows]
-    assert operations == ["append", "append"]
+    parent_snapshot_id = [row.parent_id for row in rows]
+    assert operations == ["append", "append", "append"]
+    # both subsequent parent id should be the first snapshot id
+    assert parent_snapshot_id == [None, current_snapshot, current_snapshot]
 
 
 @pytest.mark.integration
@@ -2418,16 +2469,32 @@ def test_stage_only_overwrite_files(
 
     assert current_snapshot == tbl.metadata.current_snapshot_id
     assert len(tbl.scan().to_arrow()) == original_count
-
     snapshots = tbl.snapshots()
     assert len(snapshots) == 2
 
+    # Write to main branch
+    with tbl.transaction() as txn:
+        with txn.update_snapshot().fast_append() as fast_append:
+            for data_file in _dataframe_to_data_files(
+                    table_metadata=txn.table_metadata, df=arrow_table_with_null, io=txn._table.io
+            ):
+                fast_append.append_data_file(data_file=data_file)
+
+    # Main ref has changed
+    assert current_snapshot != tbl.metadata.current_snapshot_id
+    assert len(tbl.scan().to_arrow()) == 6
+    snapshots = tbl.snapshots()
+    assert len(snapshots) == 3
+
     rows = spark.sql(
         f"""
-            SELECT operation, summary
-            FROM {identifier}.snapshots
-            ORDER BY committed_at ASC
-        """
+                    SELECT operation, parent_id
+                    FROM {identifier}.snapshots
+                    ORDER BY committed_at ASC
+                """
     ).collect()
     operations = [row.operation for row in rows]
-    assert operations == ["append", "overwrite"]
+    parent_snapshot_id = [row.parent_id for row in rows]
+    assert operations == ["append", "overwrite", "append"]
+    # both subsequent parent id should be the first snapshot id
+    assert parent_snapshot_id == [None, current_snapshot, current_snapshot]

Original file line number	Diff line number	Diff line change
`@@ -139,7 +139,7 @@ def __init__(`
`139`	`139`	`)`
`140`	`140`
`141`	`141`	`def _validate_target_branch(self, branch: Optional[str]) -> Optional[str]:`
`142`		`- # Default is already set to MAIN_BRANCH. So branch name can't be None.`
	`142`	`+ # if branch is none, write will be written into a staging snapshot`
`143`	`143`	`if branch is not None:`
`144`	`144`	`if branch in self._transaction.table_metadata.refs:`
`145`	`145`	`ref = self._transaction.table_metadata.refs[branch]`