Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 25 additions & 7 deletions python/pyarrow/src/arrow/python/numpy_to_arrow.cc
Original file line number Diff line number Diff line change
Expand Up @@ -364,15 +364,33 @@ Status CastBuffer(const std::shared_ptr<DataType>& in_type,
return Status::OK();
}

// Downcast buffer from FromType to ToType with optional overflow checking.
// This function only supports narrowing casts (FromType wider than ToType).
// Do not use this function for widening casts (ToType wider than FromType).
template <typename FromType, typename ToType>
Status StaticCastBuffer(const Buffer& input, const int64_t length, MemoryPool* pool,
std::shared_ptr<Buffer>* out) {
Status StaticDowncastBuffer(const Buffer& input, int64_t length, MemoryPool* pool,
const uint8_t* null_bitmap,
const compute::CastOptions& cast_options,
std::shared_ptr<Buffer>* out) {
ARROW_ASSIGN_OR_RAISE(auto result, AllocateBuffer(sizeof(ToType) * length, pool));

auto in_values = reinterpret_cast<const FromType*>(input.data());
auto out_values = reinterpret_cast<ToType*>(result->mutable_data());

constexpr FromType kMin = std::numeric_limits<ToType>::min();
constexpr FromType kMax = std::numeric_limits<ToType>::max();

for (int64_t i = 0; i < length; ++i) {
*out_values++ = static_cast<ToType>(*in_values++);
FromType value = *in_values++;
// Check overflow only when cast_options.allow_int_overflow is false and value is not
// null
bool check_overflow = !cast_options.allow_int_overflow &&
((null_bitmap == nullptr) || bit_util::GetBit(null_bitmap, i));
if (check_overflow && (value < kMin || value > kMax)) {
return Status::Invalid("Integer value ", value, " out of bounds for int",
sizeof(ToType) * 8, " conversion at index ", i);
}
*out_values++ = static_cast<ToType>(value);
}
*out = std::move(result);
return Status::OK();
Expand Down Expand Up @@ -496,10 +514,10 @@ inline Status NumPyConverter::ConvertData<Date32Type>(std::shared_ptr<Buffer>* d
// separately here from int64_t to int32_t, because this data is not
// supported in compute::Cast
if (date_dtype->meta.base == NPY_FR_D) {
// TODO(wesm): How pedantic do we really want to be about checking for int32
// overflow here?
Status s = StaticCastBuffer<int64_t, int32_t>(**data, length_, pool_, data);
RETURN_NOT_OK(s);
// Downcast from int64 to int32 with overflow checking
const uint8_t* null_bitmap_ptr = null_bitmap_ ? null_bitmap_->data() : nullptr;
RETURN_NOT_OK((StaticDowncastBuffer<int64_t, int32_t>(
**data, length_, pool_, null_bitmap_ptr, cast_options_, data)));
} else {
ARROW_ASSIGN_OR_RAISE(input_type, NumPyDtypeToArrow(dtype_));
if (!input_type->Equals(*type_)) {
Expand Down
26 changes: 26 additions & 0 deletions python/pyarrow/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -2445,6 +2445,32 @@ def test_array_roundtrip_from_numpy_datetimeD():
assert result.dtype == arr.dtype


@pytest.mark.numpy
def test_array_from_numpy_datetime_overflow():
# datetime64[D] to date32 conversion should check for int32 overflow
# when safe=True (default)
overflow_value = np.int64(3000000000)
arr = np.array([overflow_value], dtype='datetime64[D]')
with pytest.raises(pa.ArrowInvalid, match='value .* out of bounds'):
pa.array(arr, type=pa.date32())

underflow_value = np.int64(-3000000000)
arr = np.array([underflow_value], dtype='datetime64[D]')
with pytest.raises(pa.ArrowInvalid, match='value .* out of bounds'):
pa.array(arr, type=pa.date32())

# safe=False should allow overflow
result = pa.array(np.array([overflow_value], dtype='datetime64[D]'),
type=pa.date32(), safe=False)
assert len(result) == 1

# Values within int32 range should work
valid_arr = np.array([0, 100, -100, 2147483647, -2147483648],
dtype='datetime64[D]')
result = pa.array(valid_arr, type=pa.date32())
assert len(result) == 5


def test_array_from_naive_datetimes():
arr = pa.array([
None,
Expand Down
Loading