@@ -1350,6 +1350,29 @@ static void check_incompletes_index_ranges_dont_overlap(const std::shared_ptr<Pi
1350
1350
}
1351
1351
}
1352
1352
1353
+ void init_sparse_dst_column_before_copy (
1354
+ Column& dst_column,
1355
+ size_t offset,
1356
+ size_t num_rows,
1357
+ size_t dst_rawtype_size,
1358
+ OutputFormat output_format,
1359
+ const std::optional<util::BitSet>& src_sparse_map,
1360
+ const std::optional<Value>& default_value) {
1361
+ if (output_format != OutputFormat::ARROW || default_value.has_value ()) {
1362
+ auto total_size = dst_rawtype_size * num_rows;
1363
+ auto dst_ptr = dst_column.bytes_at (offset, total_size);
1364
+ dst_column.type ().visit_tag ([&](auto dst_desc_tag) {
1365
+ util::initialize<decltype (dst_desc_tag)>(dst_ptr, total_size, default_value);
1366
+ });
1367
+ } else {
1368
+ if (src_sparse_map.has_value ()) {
1369
+ create_dense_bitmap (offset, src_sparse_map.value (), dst_column, AllocationType::DETACHABLE);
1370
+ } else {
1371
+ create_dense_bitmap_all_zeros (offset, num_rows, dst_column, AllocationType::DETACHABLE);
1372
+ }
1373
+ }
1374
+ }
1375
+
1353
1376
void copy_frame_data_to_buffer (
1354
1377
SegmentInMemory& destination,
1355
1378
size_t target_index,
@@ -1381,24 +1404,13 @@ void copy_frame_data_to_buffer(
1381
1404
const ColumnMapping mapping{src_column.type (), dst_column.type (), destination.field (target_index), type_size, num_rows, row_range.first , offset, total_size, target_index};
1382
1405
handler->convert_type (src_column, dst_column, mapping, shared_data, handler_data, source.string_pool_ptr ());
1383
1406
} else if (is_empty_type (src_column.type ().data_type ())) {
1384
- if (output_format != OutputFormat::ARROW || default_value.has_value ()) {
1385
- dst_column.type ().visit_tag ([&](auto dst_desc_tag) {
1386
- util::initialize<decltype (dst_desc_tag)>(dst_ptr, total_size, default_value);
1387
- });
1388
- } else {
1389
- create_dense_bitmap_all_zeros (offset, num_rows, dst_column, AllocationType::DETACHABLE);
1390
- }
1407
+ init_sparse_dst_column_before_copy (dst_column, offset, num_rows, dst_rawtype_size, output_format, std::nullopt , default_value);
1391
1408
// Do not use src_column.is_sparse() here, as that misses columns that are dense, but have fewer than num_rows values
1392
1409
} else if (src_column.opt_sparse_map ().has_value () && is_valid_type_promotion_to_target (src_column.type (), dst_column.type (), IntToFloatConversion::PERMISSIVE)) {
1393
1410
details::visit_type (dst_column.type ().data_type (), [&](auto dst_tag) {
1394
1411
using dst_type_info = ScalarTypeInfo<decltype (dst_tag)>;
1395
1412
typename dst_type_info::RawType* typed_dst_ptr = reinterpret_cast <typename dst_type_info::RawType*>(dst_ptr);
1396
- // TODO: Extract this as common method to be used both in null value reducer and here
1397
- if (output_format != OutputFormat::ARROW || default_value.has_value ()) {
1398
- util::initialize<typename dst_type_info::TDT>(dst_ptr, num_rows * dst_rawtype_size, default_value);
1399
- } else {
1400
- create_dense_bitmap (offset, src_column.sparse_map (), dst_column, AllocationType::DETACHABLE);
1401
- }
1413
+ init_sparse_dst_column_before_copy (dst_column, offset, num_rows, dst_rawtype_size, output_format, src_column.opt_sparse_map (), default_value);
1402
1414
details::visit_type (src_column.type ().data_type (), [&](auto src_tag) {
1403
1415
using src_type_info = ScalarTypeInfo<decltype (src_tag)>;
1404
1416
Column::for_each_enumerated<typename src_type_info::TDT>(src_column, [typed_dst_ptr](auto enumerating_it) {
@@ -1417,11 +1429,7 @@ void copy_frame_data_to_buffer(
1417
1429
dst_ptr += row_count * sizeof (SourceType);
1418
1430
}
1419
1431
} else {
1420
- if (output_format != OutputFormat::ARROW || default_value.has_value ()) {
1421
- util::initialize<SourceTDT>(dst_ptr, num_rows * dst_rawtype_size, default_value);
1422
- } else {
1423
- create_dense_bitmap (offset, src_column.sparse_map (), dst_column, AllocationType::DETACHABLE);
1424
- }
1432
+ init_sparse_dst_column_before_copy (dst_column, offset, num_rows, dst_rawtype_size, output_format, src_column.opt_sparse_map (), default_value);
1425
1433
SourceType* typed_dst_ptr = reinterpret_cast <SourceType*>(dst_ptr);
1426
1434
Column::for_each_enumerated<SourceTDT>(src_column, [&](const auto & row) {
1427
1435
typed_dst_ptr[row.idx ()] = row.value ();
@@ -1449,18 +1457,13 @@ void copy_frame_data_to_buffer(
1449
1457
// one with float32 dtype and one with dtype:
1450
1458
// common_type(common_type(uint16, int8), float32) = common_type(int32, float32) = float64
1451
1459
details::visit_type (dst_column.type ().data_type () ,[&] (auto dest_desc_tag) {
1452
- using dst_type_info = ScalarTypeInfo<decltype (dest_desc_tag)>;
1453
1460
using DestinationRawType = typename decltype (dest_desc_tag)::DataTypeTag::raw_type;
1454
1461
auto typed_dst_ptr = reinterpret_cast <DestinationRawType*>(dst_ptr);
1455
1462
details::visit_type (src_column.type ().data_type () ,[&] (auto src_desc_tag) {
1456
1463
using source_type_info = ScalarTypeInfo<decltype (src_desc_tag)>;
1457
1464
if constexpr (std::is_arithmetic_v<typename source_type_info::RawType> && std::is_arithmetic_v<DestinationRawType>) {
1458
1465
if (src_column.is_sparse ()) {
1459
- if (output_format != OutputFormat::ARROW || default_value.has_value ()) {
1460
- util::initialize<typename dst_type_info::TDT>(dst_ptr, num_rows * dst_rawtype_size, default_value);
1461
- } else {
1462
- create_dense_bitmap (offset, src_column.sparse_map (), dst_column, AllocationType::DETACHABLE);
1463
- }
1466
+ init_sparse_dst_column_before_copy (dst_column, offset, num_rows, dst_rawtype_size, output_format, src_column.opt_sparse_map (), default_value);
1464
1467
Column::for_each_enumerated<typename source_type_info::TDT>(src_column, [&](const auto & row) {
1465
1468
typed_dst_ptr[row.idx ()] = row.value ();
1466
1469
});
0 commit comments