Skip to content

Commit b3e2e08

Browse files
authored
apacheGH-48442: [Python] Remove workaround that excluded struct types from chunked_arrays (apache#48443)
### Rationale for this change The `chunked_arrays` hypothesis strategy had a workaround that excluded struct types with the assumption that field metadata is not preserved (added from apache@d06c664). Testing confirms that field metadata is now correctly preserved in chunked arrays with struct types, so the workaround is no longer necessary, and it is fixed by apache@dd0988b Now it explicitly calls `CChunkedArray::Make()` instead of manual construction of `CChunkedArray`. ### What changes are included in this PR? Remove the assumption that field metadata is not preserved. ### Are these changes tested? Manually tested the creation of metadata (generated by ChatGPT) ```python import sys import pyarrow as pa # Create a struct type with custom field metadata struct_type = pa.struct([ pa.field('a', pa.int32(), metadata={'custom_key': 'custom_value_a', 'description': 'field a'}), pa.field('b', pa.string(), metadata={'custom_key': 'custom_value_b', 'description': 'field b'}) ]) print("=== Original struct type ===") print(f"Type: {struct_type}") print(f"Field 'a' metadata: {struct_type[0].metadata}") print(f"Field 'b' metadata: {struct_type[1].metadata}") print() # Create arrays with this struct type arr1 = pa.array([ {'a': 1, 'b': 'foo'}, {'a': 2, 'b': 'bar'} ], type=struct_type) arr2 = pa.array([ {'a': 3, 'b': 'baz'}, {'a': 4, 'b': 'qux'} ], type=struct_type) print("=== Individual arrays ===") print(f"arr1.type: {arr1.type}") print(f"arr1.type[0].metadata: {arr1.type[0].metadata}") print(f"arr2.type: {arr2.type}") print(f"arr2.type[0].metadata: {arr2.type[0].metadata}") print() # Create chunked array WITH explicit type parameter (preserves metadata) chunked_with_type = pa.chunked_array([arr1, arr2], type=struct_type) print("=== Chunked array (with explicit type) ===") print(f"Type: {chunked_with_type.type}") print(f"Field 'a' metadata: {chunked_with_type.type[0].metadata}") print(f"Field 'b' metadata: {chunked_with_type.type[1].metadata}") print() # Verify metadata is preserved if (chunked_with_type.type[0].metadata == struct_type[0].metadata and chunked_with_type.type[1].metadata == struct_type[1].metadata): print("✓ SUCCESS: Field metadata IS preserved!") print(f" Field 'a': {dict(chunked_with_type.type[0].metadata)}") print(f" Field 'b': {dict(chunked_with_type.type[1].metadata)}") exit_code = 0 else: print("✗ FAILED: Field metadata was lost") exit_code = 1 print() print("=== Test without explicit type (for comparison) ===") # What happens without explicit type? (inferred from first chunk) chunked_without_type = pa.chunked_array([arr1, arr2]) print(f"Type: {chunked_without_type.type}") print(f"Field 'a' metadata: {chunked_without_type.type[0].metadata}") print(f"Field 'b' metadata: {chunked_without_type.type[1].metadata}") if chunked_without_type.type[0].metadata == struct_type[0].metadata: print(" → Metadata preserved even without explicit type (from first chunk)") else: print(" → Note: Even without explicit type, metadata is preserved from first chunk") ``` ### Are there any user-facing changes? No, test-only. * GitHub Issue: apache#48442 Authored-by: Hyukjin Kwon <[email protected]> Signed-off-by: Adam Reeve <[email protected]>
1 parent ce3812d commit b3e2e08

File tree

1 file changed

+0
-3
lines changed

1 file changed

+0
-3
lines changed

python/pyarrow/tests/strategies.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -386,9 +386,6 @@ def chunked_arrays(draw, type, min_chunks=0, max_chunks=None, chunk_size=None):
386386
if isinstance(type, st.SearchStrategy):
387387
type = draw(type)
388388

389-
# TODO(kszucs): remove it, field metadata is not kept
390-
h.assume(not pa.types.is_struct(type))
391-
392389
chunk = arrays(type, size=chunk_size)
393390
chunks = st.lists(chunk, min_size=min_chunks, max_size=max_chunks)
394391

0 commit comments

Comments
 (0)