@@ -1470,6 +1470,25 @@ static void check_incompletes_index_ranges_dont_overlap(
1470
1470
}
1471
1471
}
1472
1472
1473
+ void init_sparse_dst_column_before_copy (
1474
+ Column& dst_column, size_t offset, size_t num_rows, size_t dst_rawtype_size, OutputFormat output_format,
1475
+ const std::optional<util::BitSet>& src_sparse_map, const std::optional<Value>& default_value
1476
+ ) {
1477
+ if (output_format != OutputFormat::ARROW || default_value.has_value ()) {
1478
+ auto total_size = dst_rawtype_size * num_rows;
1479
+ auto dst_ptr = dst_column.bytes_at (offset, total_size);
1480
+ dst_column.type ().visit_tag ([&](auto dst_desc_tag) {
1481
+ util::initialize<decltype (dst_desc_tag)>(dst_ptr, total_size, default_value);
1482
+ });
1483
+ } else {
1484
+ if (src_sparse_map.has_value ()) {
1485
+ create_dense_bitmap (offset, src_sparse_map.value (), dst_column, AllocationType::DETACHABLE);
1486
+ } else {
1487
+ create_dense_bitmap_all_zeros (offset, num_rows, dst_column, AllocationType::DETACHABLE);
1488
+ }
1489
+ }
1490
+ }
1491
+
1473
1492
void copy_frame_data_to_buffer (
1474
1493
SegmentInMemory& destination, size_t target_index, SegmentInMemory& source, size_t source_index,
1475
1494
const RowRange& row_range, DecodePathData shared_data, std::any& handler_data, OutputFormat output_format,
@@ -1510,13 +1529,9 @@ void copy_frame_data_to_buffer(
1510
1529
};
1511
1530
handler->convert_type (src_column, dst_column, mapping, shared_data, handler_data, source.string_pool_ptr ());
1512
1531
} else if (is_empty_type (src_column.type ().data_type ())) {
1513
- if (output_format != OutputFormat::ARROW || default_value.has_value ()) {
1514
- dst_column.type ().visit_tag ([&](auto dst_desc_tag) {
1515
- util::initialize<decltype (dst_desc_tag)>(dst_ptr, total_size, default_value);
1516
- });
1517
- } else {
1518
- create_dense_bitmap_all_zeros (offset, num_rows, dst_column, AllocationType::DETACHABLE);
1519
- }
1532
+ init_sparse_dst_column_before_copy (
1533
+ dst_column, offset, num_rows, dst_rawtype_size, output_format, std::nullopt , default_value
1534
+ );
1520
1535
// Do not use src_column.is_sparse() here, as that misses columns that are dense, but have fewer than num_rows
1521
1536
// values
1522
1537
} else if (src_column.opt_sparse_map ().has_value () &&
@@ -1527,12 +1542,15 @@ void copy_frame_data_to_buffer(
1527
1542
using dst_type_info = ScalarTypeInfo<decltype (dst_tag)>;
1528
1543
typename dst_type_info::RawType* typed_dst_ptr =
1529
1544
reinterpret_cast <typename dst_type_info::RawType*>(dst_ptr);
1530
- // TODO: Extract this as common method to be used both in null value reducer and here
1531
- if (output_format != OutputFormat::ARROW || default_value.has_value ()) {
1532
- util::initialize<typename dst_type_info::TDT>(dst_ptr, num_rows * dst_rawtype_size, default_value);
1533
- } else {
1534
- create_dense_bitmap (offset, src_column.sparse_map (), dst_column, AllocationType::DETACHABLE);
1535
- }
1545
+ init_sparse_dst_column_before_copy (
1546
+ dst_column,
1547
+ offset,
1548
+ num_rows,
1549
+ dst_rawtype_size,
1550
+ output_format,
1551
+ src_column.opt_sparse_map (),
1552
+ default_value
1553
+ );
1536
1554
details::visit_type (src_column.type ().data_type (), [&](auto src_tag) {
1537
1555
using src_type_info = ScalarTypeInfo<decltype (src_tag)>;
1538
1556
Column::for_each_enumerated<typename src_type_info::TDT>(
@@ -1555,11 +1573,15 @@ void copy_frame_data_to_buffer(
1555
1573
dst_ptr += row_count * sizeof (SourceType);
1556
1574
}
1557
1575
} else {
1558
- if (output_format != OutputFormat::ARROW || default_value.has_value ()) {
1559
- util::initialize<SourceTDT>(dst_ptr, num_rows * dst_rawtype_size, default_value);
1560
- } else {
1561
- create_dense_bitmap (offset, src_column.sparse_map (), dst_column, AllocationType::DETACHABLE);
1562
- }
1576
+ init_sparse_dst_column_before_copy (
1577
+ dst_column,
1578
+ offset,
1579
+ num_rows,
1580
+ dst_rawtype_size,
1581
+ output_format,
1582
+ src_column.opt_sparse_map (),
1583
+ default_value
1584
+ );
1563
1585
SourceType* typed_dst_ptr = reinterpret_cast <SourceType*>(dst_ptr);
1564
1586
Column::for_each_enumerated<SourceTDT>(src_column, [&](const auto & row) {
1565
1587
typed_dst_ptr[row.idx ()] = row.value ();
@@ -1590,23 +1612,22 @@ void copy_frame_data_to_buffer(
1590
1612
// one with float32 dtype and one with dtype:
1591
1613
// common_type(common_type(uint16, int8), float32) = common_type(int32, float32) = float64
1592
1614
details::visit_type (dst_column.type ().data_type (), [&](auto dest_desc_tag) {
1593
- using dst_type_info = ScalarTypeInfo<decltype (dest_desc_tag)>;
1594
1615
using DestinationRawType = typename decltype (dest_desc_tag)::DataTypeTag::raw_type;
1595
1616
auto typed_dst_ptr = reinterpret_cast <DestinationRawType*>(dst_ptr);
1596
1617
details::visit_type (src_column.type ().data_type (), [&](auto src_desc_tag) {
1597
1618
using source_type_info = ScalarTypeInfo<decltype (src_desc_tag)>;
1598
1619
if constexpr (std::is_arithmetic_v<typename source_type_info::RawType> &&
1599
1620
std::is_arithmetic_v<DestinationRawType>) {
1600
1621
if (src_column.is_sparse ()) {
1601
- if (output_format != OutputFormat::ARROW || default_value. has_value ()) {
1602
- util::initialize< typename dst_type_info::TDT>(
1603
- dst_ptr, num_rows * dst_rawtype_size, default_value
1604
- );
1605
- } else {
1606
- create_dense_bitmap (
1607
- offset, src_column.sparse_map (), dst_column, AllocationType::DETACHABLE
1608
- );
1609
- }
1622
+ init_sparse_dst_column_before_copy (
1623
+ dst_column,
1624
+ offset,
1625
+ num_rows,
1626
+ dst_rawtype_size,
1627
+ output_format,
1628
+ src_column.opt_sparse_map (),
1629
+ default_value
1630
+ );
1610
1631
Column::for_each_enumerated<typename source_type_info::TDT>(src_column, [&](const auto & row) {
1611
1632
typed_dst_ptr[row.idx ()] = row.value ();
1612
1633
});
0 commit comments