@@ -1862,8 +1862,13 @@ class CategoricalWriter
18621862 }
18631863
18641864 Status WriteIndicesUniform (const ChunkedArray& data) {
1865- RETURN_NOT_OK (this ->AllocateNDArray (TRAITS::npy_type, 1 ));
1866- T* out_values = reinterpret_cast <T*>(this ->block_data_ );
1865+ // For unsigned types, convert to int32 since pandas uses -1 for nulls
1866+ const bool is_unsigned = std::is_unsigned<T>::value;
1867+ using OutputType = std::conditional_t <is_unsigned, int32_t , T>;
1868+ const int npy_output_type = is_unsigned ? NPY_INT32 : TRAITS::npy_type;
1869+
1870+ RETURN_NOT_OK (this ->AllocateNDArray (npy_output_type, 1 ));
1871+ auto out_values = reinterpret_cast <OutputType*>(this ->block_data_ );
18671872
18681873 for (int c = 0 ; c < data.num_chunks (); c++) {
18691874 const auto & arr = checked_cast<const DictionaryArray&>(*data.chunk (c));
@@ -1874,7 +1879,7 @@ class CategoricalWriter
18741879 // Null is -1 in CategoricalBlock
18751880 for (int i = 0 ; i < arr.length (); ++i) {
18761881 if (indices.IsValid (i)) {
1877- *out_values++ = values[i];
1882+ *out_values++ = static_cast <OutputType>( values[i]) ;
18781883 } else {
18791884 *out_values++ = -1 ;
18801885 }
@@ -1927,7 +1932,11 @@ class CategoricalWriter
19271932 const auto & arr_first = checked_cast<const DictionaryArray&>(*data.chunk (0 ));
19281933 const auto indices_first = std::static_pointer_cast<ArrayType>(arr_first.indices ());
19291934
1930- if (data.num_chunks () == 1 && indices_first->null_count () == 0 ) {
1935+ // For unsigned types, we need to convert to signed for pandas compatibility
1936+ // even when there are no nulls, so we skip the fast path
1937+ const bool is_unsigned = std::is_unsigned<T>::value;
1938+
1939+ if (data.num_chunks () == 1 && indices_first->null_count () == 0 && !is_unsigned) {
19311940 RETURN_NOT_OK (
19321941 CheckIndexBounds (*indices_first->data (), arr_first.dictionary ()->length ()));
19331942
@@ -2023,13 +2032,10 @@ Status MakeWriter(const PandasOptions& options, PandasWriter::type writer_type,
20232032 CATEGORICAL_CASE (Int16Type);
20242033 CATEGORICAL_CASE (Int32Type);
20252034 CATEGORICAL_CASE (Int64Type);
2026- case Type::UINT8:
2027- case Type::UINT16:
2028- case Type::UINT32:
2029- case Type::UINT64:
2030- return Status::TypeError (
2031- " Converting unsigned dictionary indices to pandas" ,
2032- " not yet supported, index type: " , index_type.ToString ());
2035+ CATEGORICAL_CASE (UInt8Type);
2036+ CATEGORICAL_CASE (UInt16Type);
2037+ CATEGORICAL_CASE (UInt32Type);
2038+ CATEGORICAL_CASE (UInt64Type);
20332039 default :
20342040 // Unreachable
20352041 ARROW_DCHECK (false );
0 commit comments