Skip to content

Commit 2baaf66

Browse files
committed
Fix for alter table delete
1 parent 56b0d59 commit 2baaf66

File tree

7 files changed

+42
-45
lines changed

7 files changed

+42
-45
lines changed

src/Core/Range.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,13 @@
22
#include <Core/Range.h>
33
#include <IO/Operators.h>
44
#include <IO/WriteBufferFromString.h>
5-
#include <IO/ReadBufferFromString.h>
65
#include <Common/FieldVisitorToString.h>
76
#include <Common/FieldAccurateComparison.h>
87

98

109
namespace DB
1110
{
1211

13-
1412
FieldRef::FieldRef(ColumnsWithTypeAndName * columns_, size_t row_idx_, size_t column_idx_)
1513
: Field((*(*columns_)[column_idx_].column)[row_idx_]), columns(columns_), row_idx(row_idx_), column_idx(column_idx_)
1614
{

src/Core/SettingsChangesHistory.cpp

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ const VersionToSettingsChangesMap & getSettingsChangesHistory()
3939
/// controls new feature and it's 'true' by default, use 'false' as previous_value).
4040
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
4141
/// Note: please check if the key already exists to prevent duplicate entries.
42+
addSettingsChanges(settings_changes_history, "25.8.1.20384",
43+
{
44+
{"allow_experimental_iceberg_read_optimization", true, true, "New setting."},
45+
});
4246
addSettingsChanges(settings_changes_history, "25.8",
4347
{
4448
{"output_format_json_quote_64bit_integers", true, false, "Disable quoting of the 64 bit integers in JSON by default"},
@@ -132,18 +136,6 @@ const VersionToSettingsChangesMap & getSettingsChangesHistory()
132136
{"distributed_plan_force_shuffle_aggregation", 0, 0, "New experimental setting"},
133137
{"allow_experimental_insert_into_iceberg", false, false, "New setting."},
134138
/// RELEASE CLOSED
135-
{"allow_experimental_database_iceberg", false, true, "Turned ON by default for Antalya"},
136-
{"allow_experimental_database_unity_catalog", false, true, "Turned ON by default for Antalya"},
137-
{"allow_experimental_database_glue_catalog", false, true, "Turned ON by default for Antalya"},
138-
{"output_format_parquet_enum_as_byte_array", true, true, "Enable writing Enum as byte array in Parquet by default"},
139-
{"lock_object_storage_task_distribution_ms", 0, 0, "New setting."},
140-
{"object_storage_cluster", "", "", "New setting"},
141-
{"object_storage_max_nodes", 0, 0, "New setting"},
142-
{"object_storage_cluster_join_mode", "allow", "allow", "New setting"},
143-
{"object_storage_remote_initiator", false, false, "New setting."},
144-
{"allow_experimental_export_merge_tree_part", false, false, "New setting."},
145-
{"allow_experimental_iceberg_read_optimization", true, true, "New setting."},
146-
{"export_merge_tree_part_overwrite_file_if_exists", false, false, "New setting."},
147139
});
148140
addSettingsChanges(settings_changes_history, "25.6",
149141
{

src/Disks/ObjectStorages/IObjectStorage.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99

1010
#include <Poco/Timestamp.h>
1111
#include <Poco/Util/AbstractConfiguration.h>
12-
#include <Poco/JSON/Object.h>
1312
#include <Core/Defines.h>
1413
#include <IO/ReadSettings.h>
1514
#include <IO/WriteSettings.h>

src/Processors/Sources/ConstChunkGenerator.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#pragma once
22

33
#include <Processors/ISource.h>
4+
#include <Processors/Formats/IInputFormat.h>
45

56

67
namespace DB
@@ -13,7 +14,7 @@ class ConstChunkGenerator : public ISource
1314
public:
1415
ConstChunkGenerator(SharedHeader header, size_t total_num_rows, size_t max_block_size_)
1516
: ISource(std::move(header))
16-
, remaining_rows(total_num_rows), max_block_size(max_block_size_)
17+
, generated_rows(0), remaining_rows(total_num_rows), max_block_size(max_block_size_)
1718
{
1819
}
1920

@@ -27,10 +28,14 @@ class ConstChunkGenerator : public ISource
2728

2829
size_t num_rows = std::min(max_block_size, remaining_rows);
2930
remaining_rows -= num_rows;
30-
return cloneConstWithDefault(Chunk{getPort().getHeader().getColumns(), 0}, num_rows);
31+
auto chunk = cloneConstWithDefault(Chunk{getPort().getHeader().getColumns(), 0}, num_rows);
32+
chunk.getChunkInfos().add(std::make_shared<ChunkInfoRowNumOffset>(generated_rows));
33+
generated_rows += num_rows;
34+
return chunk;
3135
}
3236

3337
private:
38+
size_t generated_rows;
3439
size_t remaining_rows;
3540
size_t max_block_size;
3641
};

src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,6 @@ extern const SettingsBool use_roaring_bitmap_iceberg_positional_deletes;
108108
extern const SettingsString iceberg_metadata_compression_method;
109109
extern const SettingsBool allow_experimental_insert_into_iceberg;
110110
extern const SettingsBool allow_experimental_iceberg_compaction;
111-
extern const SettingsBool allow_experimental_iceberg_read_optimization;
112111
}
113112

114113
namespace
@@ -820,12 +819,14 @@ void IcebergMetadata::addDeleteTransformers(
820819

821820
if (!iceberg_object_info->position_deletes_objects.empty())
822821
{
822+
LOG_DEBUG(log, "Constructing filter transform for position delete, there are {} delete objects", iceberg_object_info->position_deletes_objects.size());
823823
builder.addSimpleTransform(
824824
[&](const SharedHeader & header)
825825
{ return iceberg_object_info->getPositionDeleteTransformer(object_storage, header, format_settings, local_context); });
826826
}
827827
const auto & delete_files = iceberg_object_info->equality_deletes_objects;
828-
LOG_DEBUG(log, "Constructing filter transform for equality delete, there are {} delete files", delete_files.size());
828+
if (!delete_files.empty())
829+
LOG_DEBUG(log, "Constructing filter transform for equality delete, there are {} delete files", delete_files.size());
829830
for (const ManifestFileEntry & delete_file : delete_files)
830831
{
831832
auto simple_transform_adder = [&](const SharedHeader & header)

src/Storages/ObjectStorage/StorageObjectStorageSource.cpp

Lines changed: 19 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -273,8 +273,6 @@ Chunk StorageObjectStorageSource::generate()
273273
{
274274
lazyInitialize();
275275

276-
bool use_iceberg_read_optimization = read_context->getSettingsRef()[Setting::allow_experimental_iceberg_read_optimization];
277-
278276
while (true)
279277
{
280278
if (isCancelled() || !reader)
@@ -330,14 +328,14 @@ Chunk StorageObjectStorageSource::generate()
330328
.data_lake_snapshot_version = file_iterator->getSnapshotVersion()},
331329
read_context);
332330

333-
if (use_iceberg_read_optimization)
331+
/// Not empty when allow_experimental_iceberg_read_optimization=true
332+
/// and some columns were removed from read list as columns with constant values.
333+
/// Restore data for these columns.
334+
for (const auto & constant_column : reader.constant_columns_with_values)
334335
{
335-
for (const auto & constant_column : reader.constant_columns_with_values)
336-
{
337-
chunk.addColumn(constant_column.first,
338-
constant_column.second.name_and_type.type->createColumnConst(
339-
chunk.getNumRows(), constant_column.second.value)->convertToFullColumnIfConst());
340-
}
336+
chunk.addColumn(constant_column.first,
337+
constant_column.second.name_and_type.type->createColumnConst(
338+
chunk.getNumRows(), constant_column.second.value));
341339
}
342340

343341
#if USE_PARQUET && USE_AWS_S3
@@ -555,18 +553,19 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
555553
else
556554
rows_count_from_metadata = column.second.rows_count;
557555
}
556+
558557
if (column.second.hyperrectangle.has_value())
559558
{
559+
auto column_name = column.first;
560+
561+
auto i_column = requested_columns_list.find(column_name);
562+
if (i_column == requested_columns_list.end())
563+
continue;
564+
560565
if (column.second.hyperrectangle.value().isPoint() &&
561566
(!column.second.nulls_count.has_value() || column.second.nulls_count.value() <= 0))
562567
{
563-
auto column_name = column.first;
564-
565-
auto i_column = requested_columns_list.find(column_name);
566-
if (i_column == requested_columns_list.end())
567-
continue;
568-
569-
/// isPoint() method checks that left==right
568+
/// isPoint() method checks before that left==right
570569
constant_columns_with_values[i_column->second.first] =
571570
ConstColumnWithValue{
572571
i_column->second.second,
@@ -581,17 +580,9 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
581580
column.second.hyperrectangle.value().left.dump());
582581
}
583582
else if (column.second.rows_count.has_value() && column.second.nulls_count.has_value()
584-
&& column.second.rows_count.value() == column.second.nulls_count.value())
583+
&& column.second.rows_count.value() == column.second.nulls_count.value()
584+
&& i_column->second.second.type->isNullable())
585585
{
586-
auto column_name = column.first;
587-
588-
auto i_column = requested_columns_list.find(column_name);
589-
if (i_column == requested_columns_list.end())
590-
continue;
591-
592-
if (!i_column->second.second.type->isNullable())
593-
continue;
594-
595586
constant_columns_with_values[i_column->second.first] =
596587
ConstColumnWithValue{
597588
i_column->second.second,
@@ -660,6 +651,8 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
660651
columns.emplace_back(type->createColumn(), type, name);
661652
builder.init(Pipe(std::make_shared<ConstChunkGenerator>(
662653
std::make_shared<const Block>(columns), *num_rows_from_cache, max_block_size)));
654+
if (!constant_columns.empty())
655+
configuration->addDeleteTransformers(object_info, builder, format_settings, context_);
663656
}
664657
else
665658
{

tests/integration/test_storage_iceberg/test.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3292,3 +3292,12 @@ def check_events(query_id, event, expected):
32923292
check_events(const_partial_query_id, event, 4) # 1-2025, 6-2025 and 2-2025 must not be read
32933293
check_events(const_partial2_query_id, event, 3) # 6-2025 must not be read, 1-2024, 1-2025, 2-2025 don't have new column 'name'
32943294
check_events(count_query_id, event, 0) # All must not be read
3295+
3296+
def compare_selects(query):
3297+
result_expected = instance.query(f"{query} SETTINGS allow_experimental_iceberg_read_optimization=0")
3298+
result_optimized = instance.query(f"{query} SETTINGS allow_experimental_iceberg_read_optimization=1")
3299+
assert result_expected == result_optimized
3300+
3301+
compare_selects(f"SELECT _path,* FROM {creation_expression} ORDER BY ALL")
3302+
compare_selects(f"SELECT _path,* FROM {creation_expression} WHERE name_old='vasily' ORDER BY ALL")
3303+
compare_selects(f"SELECT _path,* FROM {creation_expression} WHERE ((tag + length(name_old)) % 2 = 1) ORDER BY ALL")

0 commit comments

Comments
 (0)