Skip to content

Commit b3e2e08

Browse files
authored
GH-48442: [Python] Remove workaround that excluded struct types from chunked_arrays (#48443)
### Rationale for this change The `chunked_arrays` hypothesis strategy had a workaround that excluded struct types with the assumption that field metadata is not preserved (added from d06c664). Testing confirms that field metadata is now correctly preserved in chunked arrays with struct types, so the workaround is no longer necessary, and it is fixed by dd0988b Now it explicitly calls `CChunkedArray::Make()` instead of manual construction of `CChunkedArray`. ### What changes are included in this PR? Remove the assumption that field metadata is not preserved. ### Are these changes tested? Manually tested the creation of metadata (generated by ChatGPT) ```python import sys import pyarrow as pa # Create a struct type with custom field metadata struct_type = pa.struct([ pa.field('a', pa.int32(), metadata={'custom_key': 'custom_value_a', 'description': 'field a'}), pa.field('b', pa.string(), metadata={'custom_key': 'custom_value_b', 'description': 'field b'}) ]) print("=== Original struct type ===") print(f"Type: {struct_type}") print(f"Field 'a' metadata: {struct_type[0].metadata}") print(f"Field 'b' metadata: {struct_type[1].metadata}") print() # Create arrays with this struct type arr1 = pa.array([ {'a': 1, 'b': 'foo'}, {'a': 2, 'b': 'bar'} ], type=struct_type) arr2 = pa.array([ {'a': 3, 'b': 'baz'}, {'a': 4, 'b': 'qux'} ], type=struct_type) print("=== Individual arrays ===") print(f"arr1.type: {arr1.type}") print(f"arr1.type[0].metadata: {arr1.type[0].metadata}") print(f"arr2.type: {arr2.type}") print(f"arr2.type[0].metadata: {arr2.type[0].metadata}") print() # Create chunked array WITH explicit type parameter (preserves metadata) chunked_with_type = pa.chunked_array([arr1, arr2], type=struct_type) print("=== Chunked array (with explicit type) ===") print(f"Type: {chunked_with_type.type}") print(f"Field 'a' metadata: {chunked_with_type.type[0].metadata}") print(f"Field 'b' metadata: {chunked_with_type.type[1].metadata}") print() # Verify metadata is preserved if (chunked_with_type.type[0].metadata == struct_type[0].metadata and chunked_with_type.type[1].metadata == struct_type[1].metadata): print("✓ SUCCESS: Field metadata IS preserved!") print(f" Field 'a': {dict(chunked_with_type.type[0].metadata)}") print(f" Field 'b': {dict(chunked_with_type.type[1].metadata)}") exit_code = 0 else: print("✗ FAILED: Field metadata was lost") exit_code = 1 print() print("=== Test without explicit type (for comparison) ===") # What happens without explicit type? (inferred from first chunk) chunked_without_type = pa.chunked_array([arr1, arr2]) print(f"Type: {chunked_without_type.type}") print(f"Field 'a' metadata: {chunked_without_type.type[0].metadata}") print(f"Field 'b' metadata: {chunked_without_type.type[1].metadata}") if chunked_without_type.type[0].metadata == struct_type[0].metadata: print(" → Metadata preserved even without explicit type (from first chunk)") else: print(" → Note: Even without explicit type, metadata is preserved from first chunk") ``` ### Are there any user-facing changes? No, test-only. * GitHub Issue: #48442 Authored-by: Hyukjin Kwon <[email protected]> Signed-off-by: Adam Reeve <[email protected]>
1 parent ce3812d commit b3e2e08

File tree

1 file changed

+0
-3
lines changed

1 file changed

+0
-3
lines changed

python/pyarrow/tests/strategies.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -386,9 +386,6 @@ def chunked_arrays(draw, type, min_chunks=0, max_chunks=None, chunk_size=None):
386386
if isinstance(type, st.SearchStrategy):
387387
type = draw(type)
388388

389-
# TODO(kszucs): remove it, field metadata is not kept
390-
h.assume(not pa.types.is_struct(type))
391-
392389
chunk = arrays(type, size=chunk_size)
393390
chunks = st.lists(chunk, min_size=min_chunks, max_size=max_chunks)
394391

0 commit comments

Comments
 (0)