rapidsai
diff --git a/‎cpp/benchmarks/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion b/‎cpp/benchmarks/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cpp/benchmarks/common/generate_nested_types.cpp‎
Lines changed: 79 additions & 0 deletions b/‎cpp/benchmarks/common/generate_nested_types.cpp‎
Lines changed: 79 additions & 0 deletions
diff --git a/‎cpp/benchmarks/common/generate_nested_types.hpp‎
Lines changed: 9 additions & 72 deletions b/‎cpp/benchmarks/common/generate_nested_types.hpp‎
Lines changed: 9 additions & 72 deletions
diff --git a/‎cpp/benchmarks/sort/sort_lists.cpp‎
Lines changed: 6 additions & 7 deletions b/‎cpp/benchmarks/sort/sort_lists.cpp‎
Lines changed: 6 additions & 7 deletions
diff --git a/‎cpp/benchmarks/sort/sort_structs.cpp‎
Lines changed: 3 additions & 4 deletions b/‎cpp/benchmarks/sort/sort_structs.cpp‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎python/cudf/cudf/core/column/categorical.py‎
Lines changed: 16 additions & 0 deletions b/‎python/cudf/cudf/core/column/categorical.py‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎python/cudf/cudf/core/column/column.py‎
Lines changed: 23 additions & 12 deletions b/‎python/cudf/cudf/core/column/column.py‎
Lines changed: 23 additions & 12 deletions
@@ -7,7 +7,7 @@
 
 find_package(Threads REQUIRED)
 
-add_library(cudf_datagen STATIC common/generate_input.cu)
+add_library(cudf_datagen STATIC common/generate_input.cu common/generate_nested_types.cpp)
 target_compile_features(cudf_datagen PUBLIC cxx_std_20 cuda_std_20)
 
 target_compile_options(
 
@@ -0,0 +1,79 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "generate_nested_types.hpp"
+
+#include "generate_input.hpp"
+
+#include <cudf_test/column_wrapper.hpp>
+
+#include <algorithm>
+#include <random>
+#include <vector>
+
+std::unique_ptr<cudf::table> create_lists_data(nvbench::state& state,
+                                               cudf::size_type const num_columns,
+                                               cudf::size_type const min_val,
+                                               cudf::size_type const max_val)
+{
+  size_t const size_bytes(state.get_int64("size_bytes"));
+  cudf::size_type const depth{static_cast<cudf::size_type>(state.get_int64("depth"))};
+  auto const null_frequency{state.get_float64("null_frequency")};
+
+  data_profile table_profile;
+  table_profile.set_distribution_params(
+    cudf::type_id::LIST, distribution_id::UNIFORM, min_val, max_val);
+  table_profile.set_list_depth(depth);
+  table_profile.set_null_probability(null_frequency);
+  return create_random_table(std::vector<cudf::type_id>(num_columns, cudf::type_id::LIST),
+                             table_size_bytes{size_bytes},
+                             table_profile);
+}
+
+std::unique_ptr<cudf::table> create_structs_data(nvbench::state& state,
+                                                 cudf::size_type const n_cols)
+{
+  using Type           = int;
+  using column_wrapper = cudf::test::fixed_width_column_wrapper<Type>;
+  std::default_random_engine generator;
+  std::uniform_int_distribution<int> distribution(0, 100);
+
+  cudf::size_type const n_rows{static_cast<cudf::size_type>(state.get_int64("NumRows"))};
+  cudf::size_type const depth{static_cast<cudf::size_type>(state.get_int64("Depth"))};
+  bool const nulls{static_cast<bool>(state.get_int64("Nulls"))};
+
+  // Create columns with values in the range [0,100)
+  std::vector<column_wrapper> columns;
+  columns.reserve(n_cols);
+  std::generate_n(std::back_inserter(columns), n_cols, [&]() {
+    auto const elements = cudf::detail::make_counting_transform_iterator(
+      0, [&](auto row) { return distribution(generator); });
+    if (!nulls) return column_wrapper(elements, elements + n_rows);
+    auto valids =
+      cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 10 != 0; });
+    return column_wrapper(elements, elements + n_rows, valids);
+  });
+
+  std::vector<std::unique_ptr<cudf::column>> cols;
+  std::transform(columns.begin(), columns.end(), std::back_inserter(cols), [](column_wrapper& col) {
+    return col.release();
+  });
+
+  std::vector<std::unique_ptr<cudf::column>> child_cols = std::move(cols);
+  // Nest the child columns in a struct, then nest that struct column inside another
+  // struct column up to the desired depth
+  for (int i = 0; i < depth; i++) {
+    std::vector<bool> struct_validity;
+    std::uniform_int_distribution<int> bool_distribution(0, 100 * (i + 1));
+    std::generate_n(
+      std::back_inserter(struct_validity), n_rows, [&]() { return bool_distribution(generator); });
+    cudf::test::structs_column_wrapper struct_col(std::move(child_cols), struct_validity);
+    child_cols = std::vector<std::unique_ptr<cudf::column>>{};
+    child_cols.push_back(struct_col.release());
+  }
+
+  // Create table view
+  return std::make_unique<cudf::table>(std::move(child_cols));
+}
@@ -1,82 +1,19 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 
 #pragma once
 
-#include "generate_input.hpp"
+#include <cudf/table/table.hpp>
+#include <cudf/types.hpp>
 
-#include <cudf_test/column_wrapper.hpp>
-
-// This error appears in GCC 11.3 and may be a compiler bug or nvbench bug.
-#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #include <nvbench/nvbench.cuh>
-#pragma GCC diagnostic pop
-
-#include <random>
-
-inline std::unique_ptr<cudf::table> create_lists_data(nvbench::state& state,
-                                                      cudf::size_type const num_columns = 1,
-                                                      cudf::size_type const min_val     = 0,
-                                                      cudf::size_type const max_val     = 5)
-{
-  size_t const size_bytes(state.get_int64("size_bytes"));
-  cudf::size_type const depth{static_cast<cudf::size_type>(state.get_int64("depth"))};
-  auto const null_frequency{state.get_float64("null_frequency")};
-
-  data_profile table_profile;
-  table_profile.set_distribution_params(
-    cudf::type_id::LIST, distribution_id::UNIFORM, min_val, max_val);
-  table_profile.set_list_depth(depth);
-  table_profile.set_null_probability(null_frequency);
-  return create_random_table(std::vector<cudf::type_id>(num_columns, cudf::type_id::LIST),
-                             table_size_bytes{size_bytes},
-                             table_profile);
-}
-
-inline std::unique_ptr<cudf::table> create_structs_data(nvbench::state& state,
-                                                        cudf::size_type const n_cols = 1)
-{
-  using Type           = int;
-  using column_wrapper = cudf::test::fixed_width_column_wrapper<Type>;
-  std::default_random_engine generator;
-  std::uniform_int_distribution<int> distribution(0, 100);
-
-  cudf::size_type const n_rows{static_cast<cudf::size_type>(state.get_int64("NumRows"))};
-  cudf::size_type const depth{static_cast<cudf::size_type>(state.get_int64("Depth"))};
-  bool const nulls{static_cast<bool>(state.get_int64("Nulls"))};
-
-  // Create columns with values in the range [0,100)
-  std::vector<column_wrapper> columns;
-  columns.reserve(n_cols);
-  std::generate_n(std::back_inserter(columns), n_cols, [&]() {
-    auto const elements = cudf::detail::make_counting_transform_iterator(
-      0, [&](auto row) { return distribution(generator); });
-    if (!nulls) return column_wrapper(elements, elements + n_rows);
-    auto valids =
-      cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 10 != 0; });
-    return column_wrapper(elements, elements + n_rows, valids);
-  });
-
-  std::vector<std::unique_ptr<cudf::column>> cols;
-  std::transform(columns.begin(), columns.end(), std::back_inserter(cols), [](column_wrapper& col) {
-    return col.release();
-  });
 
-  std::vector<std::unique_ptr<cudf::column>> child_cols = std::move(cols);
-  // Nest the child columns in a struct, then nest that struct column inside another
-  // struct column up to the desired depth
-  for (int i = 0; i < depth; i++) {
-    std::vector<bool> struct_validity;
-    std::uniform_int_distribution<int> bool_distribution(0, 100 * (i + 1));
-    std::generate_n(
-      std::back_inserter(struct_validity), n_rows, [&]() { return bool_distribution(generator); });
-    cudf::test::structs_column_wrapper struct_col(std::move(child_cols), struct_validity);
-    child_cols = std::vector<std::unique_ptr<cudf::column>>{};
-    child_cols.push_back(struct_col.release());
-  }
+std::unique_ptr<cudf::table> create_lists_data(nvbench::state& state,
+                                               cudf::size_type const num_columns = 1,
+                                               cudf::size_type const min_val     = 0,
+                                               cudf::size_type const max_val     = 5);
 
-  // Create table view
-  return std::make_unique<cudf::table>(std::move(child_cols));
-}
+std::unique_ptr<cudf::table> create_structs_data(nvbench::state& state,
+                                                 cudf::size_type const n_cols = 1);
@@ -1,11 +1,12 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 
 #include <benchmarks/common/generate_nested_types.hpp>
 
-#include <cudf/detail/sorting.hpp>
+#include <cudf/lists/lists_column_view.hpp>
+#include <cudf/sorting.hpp>
 #include <cudf/utilities/memory_resource.hpp>
 
 #include <nvbench/nvbench.cuh>
@@ -22,8 +23,7 @@ void sort_multiple_lists(nvbench::state& state)
 
   state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
-    cudf::detail::sorted_order(
-      *input_table, {}, {}, stream, cudf::get_current_device_resource_ref());
+    cudf::sorted_order(*input_table, {}, {}, stream, cudf::get_current_device_resource_ref());
   });
 }
 
@@ -66,8 +66,7 @@ void sort_lists_of_structs(nvbench::state& state)
   state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
     rmm::cuda_stream_view stream_view{launch.get_stream()};
-    cudf::detail::sorted_order(
-      input_table, {}, {}, stream, cudf::get_current_device_resource_ref());
+    cudf::sorted_order(input_table, {}, {}, stream, cudf::get_current_device_resource_ref());
   });
 }
 
@@ -85,7 +84,7 @@ void nvbench_sort_lists(nvbench::state& state)
 
 NVBENCH_BENCH(nvbench_sort_lists)
   .set_name("sort_list")
-  .add_int64_power_of_two_axis("size_bytes", {10, 18, 24, 28})
+  .add_int64_power_of_two_axis("size_bytes", {18, 24, 28})
   .add_int64_axis("depth", {1, 4})
   .add_int64_axis("num_columns", {1})
   .add_int64_axis("lists_of_structs", {0, 1})
 
@@ -1,11 +1,11 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 
 #include <benchmarks/common/generate_nested_types.hpp>
 
-#include <cudf/detail/sorting.hpp>
+#include <cudf/sorting.hpp>
 #include <cudf/utilities/memory_resource.hpp>
 
 #include <nvbench/nvbench.cuh>
@@ -16,8 +16,7 @@ void nvbench_sort_struct(nvbench::state& state)
 
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
     rmm::cuda_stream_view stream_view{launch.get_stream()};
-    cudf::detail::sorted_order(
-      *input, {}, {}, stream_view, cudf::get_current_device_resource_ref());
+    cudf::sorted_order(*input, {}, {}, stream_view, cudf::get_current_device_resource_ref());
   });
 }
 
 
@@ -79,6 +79,13 @@ class CategoricalColumn(column.ColumnBase):
         plc.TypeId.UINT64,
     }
 
+    @staticmethod
+    def _validate_dtype_to_plc_column(
+        plc_column: plc.Column, dtype: DtypeObj
+    ) -> None:
+        """Validate that the dtype matches the equivalent type of the plc_column"""
+        return None
+
     @classmethod
     def _validate_args(  # type: ignore[override]
         cls, plc_column: plc.Column, dtype: CategoricalDtype
@@ -343,6 +350,15 @@ def _cast_self_and_other_for_where(
 
         return self.codes, other
 
+    def where(
+        self, cond: ColumnBase, other: ScalarLike | ColumnBase, inplace: bool
+    ) -> ColumnBase:
+        casted_col, casted_other = self._cast_self_and_other_for_where(
+            other, inplace
+        )
+        result = casted_col.copy_if_else(casted_other, cond)  # type: ignore[arg-type]
+        return column.ColumnBase.create(result.plc_column, self.dtype)
+
     def _encode(self, value: ScalarLike) -> ScalarLike:
         return self.categories.find_first_value(value)
 
 
@@ -330,6 +330,17 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
             "ColumnBase and its subclasses must be instantiated via from_pylibcudf."
         )
 
+    @staticmethod
+    def _validate_dtype_to_plc_column(
+        plc_column: plc.Column, dtype: DtypeObj
+    ) -> None:
+        """Validate that the dtype matches the equivalent type of the plc_column"""
+        if dtype_to_pylibcudf_type(dtype) != plc_column.type():
+            # TODO: Override ListColumn, StructColumn, IntervalColumn to also validate children
+            raise ValueError(
+                f"dtype {dtype} does not match the type of the plc_column {plc_column.type().id()}"
+            )
+
     @classmethod
     def _validate_args(
         cls, plc_column: plc.Column, dtype: DtypeObj
@@ -342,6 +353,7 @@ def _validate_args(
             raise ValueError(
                 f"plc_column must be a pylibcudf.Column with a TypeId in {cls._VALID_PLC_TYPES}"
             )
+        cls._validate_dtype_to_plc_column(plc_column, dtype)
         return plc_column, dtype
 
     @property
@@ -1147,9 +1159,9 @@ def from_arrow(cls, array: pa.Array | pa.ChunkedArray) -> ColumnBase:
                 )
             )
         else:
-            result = cls.from_pylibcudf(plc.Column.from_arrow(array))
-            return result._with_type_metadata(
-                cudf_dtype_from_pa_type(array.type)
+            return cls.create(
+                plc.Column.from_arrow(array),
+                cudf_dtype_from_pa_type(array.type),
             )
 
     def _get_mask_as_column(self) -> ColumnBase:
@@ -2606,24 +2618,23 @@ def _reduce(
                 aggregation.make_aggregation(op, kwargs).plc_obj,
                 dtype_to_pylibcudf_type(col_dtype),
             )
+            # Hook for subclasses (e.g., DecimalBaseColumn adjusts precision)
+            col_dtype = col._adjust_reduce_result_dtype(
+                op, col_dtype, plc_scalar
+            )
             result_col = ColumnBase.create(
                 plc.Column.from_scalar(plc_scalar, 1), col_dtype
             )
-            # Hook for subclasses (e.g., DecimalBaseColumn adjusts precision)
-            result_col = col._adjust_reduce_result(
-                result_col, op, col_dtype, plc_scalar
-            )
         return result_col.element_indexing(0)
 
-    def _adjust_reduce_result(
+    def _adjust_reduce_result_dtype(
         self,
-        result_col: ColumnBase,
         op: str,
         col_dtype: DtypeObj,
         plc_scalar: plc.Scalar,
-    ) -> ColumnBase:
+    ) -> DtypeObj:
         """Hook for subclasses to adjust reduction result."""
-        return result_col
+        return col_dtype
 
     def minmax(self) -> tuple[ScalarLike, ScalarLike]:
         with self.access(mode="read", scope="internal"):
@@ -2788,7 +2799,7 @@ def where(
             other, inplace
         )
         result = casted_col.copy_if_else(casted_other, cond)  # type: ignore[arg-type]
-        return ColumnBase.create(result.plc_column, self.dtype)
+        return ColumnBase.create(result.plc_column, casted_col.dtype)
 
 
 def column_empty(