Commit b3e2e08
authored
apacheGH-48442: [Python] Remove workaround that excluded struct types from
### Rationale for this change
The `chunked_arrays` hypothesis strategy had a workaround that excluded struct types with the assumption that field metadata is not preserved (added from apache@d06c664).
Testing confirms that field metadata is now correctly preserved in chunked arrays with struct types, so the workaround is no longer necessary, and it is fixed by apache@dd0988b
Now it explicitly calls `CChunkedArray::Make()` instead of manual construction of `CChunkedArray`.
### What changes are included in this PR?
Remove the assumption that field metadata is not preserved.
### Are these changes tested?
Manually tested the creation of metadata (generated by ChatGPT)
```python
import sys
import pyarrow as pa
# Create a struct type with custom field metadata
struct_type = pa.struct([
pa.field('a', pa.int32(), metadata={'custom_key': 'custom_value_a', 'description': 'field a'}),
pa.field('b', pa.string(), metadata={'custom_key': 'custom_value_b', 'description': 'field b'})
])
print("=== Original struct type ===")
print(f"Type: {struct_type}")
print(f"Field 'a' metadata: {struct_type[0].metadata}")
print(f"Field 'b' metadata: {struct_type[1].metadata}")
print()
# Create arrays with this struct type
arr1 = pa.array([
{'a': 1, 'b': 'foo'},
{'a': 2, 'b': 'bar'}
], type=struct_type)
arr2 = pa.array([
{'a': 3, 'b': 'baz'},
{'a': 4, 'b': 'qux'}
], type=struct_type)
print("=== Individual arrays ===")
print(f"arr1.type: {arr1.type}")
print(f"arr1.type[0].metadata: {arr1.type[0].metadata}")
print(f"arr2.type: {arr2.type}")
print(f"arr2.type[0].metadata: {arr2.type[0].metadata}")
print()
# Create chunked array WITH explicit type parameter (preserves metadata)
chunked_with_type = pa.chunked_array([arr1, arr2], type=struct_type)
print("=== Chunked array (with explicit type) ===")
print(f"Type: {chunked_with_type.type}")
print(f"Field 'a' metadata: {chunked_with_type.type[0].metadata}")
print(f"Field 'b' metadata: {chunked_with_type.type[1].metadata}")
print()
# Verify metadata is preserved
if (chunked_with_type.type[0].metadata == struct_type[0].metadata and
chunked_with_type.type[1].metadata == struct_type[1].metadata):
print("✓ SUCCESS: Field metadata IS preserved!")
print(f" Field 'a': {dict(chunked_with_type.type[0].metadata)}")
print(f" Field 'b': {dict(chunked_with_type.type[1].metadata)}")
exit_code = 0
else:
print("✗ FAILED: Field metadata was lost")
exit_code = 1
print()
print("=== Test without explicit type (for comparison) ===")
# What happens without explicit type? (inferred from first chunk)
chunked_without_type = pa.chunked_array([arr1, arr2])
print(f"Type: {chunked_without_type.type}")
print(f"Field 'a' metadata: {chunked_without_type.type[0].metadata}")
print(f"Field 'b' metadata: {chunked_without_type.type[1].metadata}")
if chunked_without_type.type[0].metadata == struct_type[0].metadata:
print(" → Metadata preserved even without explicit type (from first chunk)")
else:
print(" → Note: Even without explicit type, metadata is preserved from first chunk")
```
### Are there any user-facing changes?
No, test-only.
* GitHub Issue: apache#48442
Authored-by: Hyukjin Kwon <[email protected]>
Signed-off-by: Adam Reeve <[email protected]>chunked_arrays (apache#48443)1 parent ce3812d commit b3e2e08
1 file changed
+0
-3
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
386 | 386 | | |
387 | 387 | | |
388 | 388 | | |
389 | | - | |
390 | | - | |
391 | | - | |
392 | 389 | | |
393 | 390 | | |
394 | 391 | | |
| |||
0 commit comments