Skip to content

Commit 4521ac4

Browse files
Backport ClickHouse#86832 to 25.8: Return fallback for schema resolution for a manifest file entry
1 parent 8950b79 commit 4521ac4

File tree

2 files changed

+22
-8
lines changed

2 files changed

+22
-8
lines changed

src/Storages/ObjectStorage/DataLakes/Iceberg/ManifestFile.cpp

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
#include <IO/ReadBufferFromString.h>
2323
#include <IO/ReadHelpers.h>
2424

25+
#include <Common/logger_useful.h>
26+
27+
2528
namespace DB::ErrorCodes
2629
{
2730
extern const int ICEBERG_SPECIFICATION_VIOLATION;
@@ -143,7 +146,7 @@ ManifestFileContent::ManifestFileContent(
143146
const String & manifest_file_name,
144147
Int32 format_version_,
145148
const String & common_path,
146-
const IcebergSchemaProcessor & schema_processor,
149+
IcebergSchemaProcessor & schema_processor,
147150
Int64 inherited_sequence_number,
148151
Int64 inherited_snapshot_id,
149152
const String & table_location,
@@ -196,6 +199,8 @@ ManifestFileContent::ManifestFileContent(
196199
const Poco::JSON::Object::Ptr & schema_object = json.extract<Poco::JSON::Object::Ptr>();
197200
Int32 manifest_schema_id = schema_object->getValue<int>(f_schema_id);
198201

202+
schema_processor.addIcebergTableSchema(schema_object);
203+
199204
for (size_t i = 0; i != partition_specification->size(); ++i)
200205
{
201206
auto partition_specification_field = partition_specification->getObject(static_cast<UInt32>(i));
@@ -262,13 +267,22 @@ ManifestFileContent::ManifestFileContent(
262267
const auto schema_id_opt = schema_processor.tryGetSchemaIdForSnapshot(snapshot_id);
263268
if (!schema_id_opt.has_value())
264269
{
265-
throw Exception(
266-
ErrorCodes::ICEBERG_SPECIFICATION_VIOLATION,
267-
"Cannot read Iceberg table: manifest file '{}' has entry with snapshot_id '{}' for which write file schema is unknown",
268-
manifest_file_name,
269-
snapshot_id);
270+
/// Error logged but not thrown to avoid breaking whole query because of backward compatibility reasons.
271+
/// That's actually an error because it can lead to incorrect query results, so we are creating an exception to put it to system.error_log.
272+
try
273+
{
274+
throw Exception(
275+
ErrorCodes::ICEBERG_SPECIFICATION_VIOLATION,
276+
"Cannot read Iceberg table: manifest file '{}' has entry with snapshot_id '{}' for which write file schema is unknown",
277+
manifest_file_name,
278+
snapshot_id);
279+
}
280+
catch (const Exception &)
281+
{
282+
tryLogCurrentException("ICEBERG_SPECIFICATION_VIOLATION", "", LogsLevel::error);
283+
}
270284
}
271-
const auto schema_id = schema_id_opt.value();
285+
const auto schema_id = schema_id_opt.has_value() ? schema_id_opt.value() : manifest_schema_id;
272286

273287
const auto file_path_key
274288
= manifest_file_deserializer.getValueFromRowByName(i, c_data_file_file_path, TypeIndex::String).safeGet<String>();

src/Storages/ObjectStorage/DataLakes/Iceberg/ManifestFile.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ class ManifestFileContent : public boost::noncopyable
111111
const String & manifest_file_name,
112112
Int32 format_version_,
113113
const String & common_path,
114-
const IcebergSchemaProcessor & schema_processor,
114+
IcebergSchemaProcessor & schema_processor,
115115
Int64 inherited_sequence_number,
116116
Int64 inherited_snapshot_id,
117117
const std::string & table_location,

0 commit comments

Comments
 (0)