Skip to content

Commit 2e093bb

Browse files
d-beckerImpala Public Jenkins
authored andcommitted
IMPALA-13085: Add warning and NULL out DECIMAL values in Iceberg metadata tables
DECIMAL values are not supported in Iceberg metadata tables and Impala runs on a DCHECK and crashes if it encounters one. Until this issue is properly fixed (see IMPALA-13080), this commit introduces a temporary solution: DECIMAL values coming from Iceberg metadata tables are NULLed out and a warning is issued. Testing: - added a DECIMAL column to the 'iceberg_metadata_alltypes' test table, so querying the `files` metadata table will include a DECIMAL in the 'readable_metrics' struct. Change-Id: I0c8791805bc4fa2112e092e65366ca2815f3fa22 Reviewed-on: http://gerrit.cloudera.org:8080/21429 Reviewed-by: Daniel Becker <daniel.becker@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
1 parent d0237fb commit 2e093bb

File tree

4 files changed

+31
-5
lines changed

4 files changed

+31
-5
lines changed

be/src/exec/iceberg-metadata/iceberg-row-reader.cc

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ namespace impala {
3333
IcebergRowReader::IcebergRowReader(ScanNode* scan_node,
3434
IcebergMetadataScanner* metadata_scanner)
3535
: scan_node_(scan_node),
36-
metadata_scanner_(metadata_scanner) {}
36+
metadata_scanner_(metadata_scanner),
37+
unsupported_decimal_warning_emitted_(false) {}
3738

3839
Status IcebergRowReader::InitJNI() {
3940
DCHECK(list_cl_ == nullptr) << "InitJNI() already called!";
@@ -120,7 +121,10 @@ Status IcebergRowReader::WriteSlot(JNIEnv* env, const jobject* struct_like_row,
120121
} case TYPE_DOUBLE: { // java.lang.Double
121122
RETURN_IF_ERROR(WriteDoubleSlot(env, accessed_value, slot));
122123
break;
123-
} case TYPE_TIMESTAMP: { // org.apache.iceberg.types.TimestampType
124+
} case TYPE_DECIMAL: {
125+
RETURN_IF_ERROR(WriteDecimalSlot(slot_desc, tuple, state));
126+
break;
127+
}case TYPE_TIMESTAMP: { // org.apache.iceberg.types.TimestampType
124128
RETURN_IF_ERROR(WriteTimeStampSlot(env, accessed_value, slot));
125129
break;
126130
} case TYPE_STRING: {
@@ -220,6 +224,20 @@ Status IcebergRowReader::WriteDoubleSlot(JNIEnv* env, const jobject &accessed_va
220224
return Status::OK();
221225
}
222226

227+
Status IcebergRowReader::WriteDecimalSlot(const SlotDescriptor* slot_desc, Tuple* tuple,
228+
RuntimeState* state) {
229+
// TODO IMPALA-13080: Handle DECIMALs without NULLing them out.
230+
constexpr const char* warning = "DECIMAL values from Iceberg metadata tables "
231+
"are displayed as NULL. See IMPALA-13080.";
232+
if (!unsupported_decimal_warning_emitted_) {
233+
unsupported_decimal_warning_emitted_ = true;
234+
LOG(WARNING) << warning;
235+
state->LogError(ErrorMsg(TErrorCode::NOT_IMPLEMENTED_ERROR, warning));
236+
}
237+
tuple->SetNull(slot_desc->null_indicator_offset());
238+
return Status::OK();
239+
}
240+
223241
Status IcebergRowReader::WriteTimeStampSlot(JNIEnv* env, const jobject &accessed_value,
224242
void* slot) {
225243
DCHECK(accessed_value != nullptr);

be/src/exec/iceberg-metadata/iceberg-row-reader.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,10 @@ class IcebergRowReader {
7878
/// IcebergMetadataScanner class, used to get and access values inside java objects.
7979
IcebergMetadataScanner* metadata_scanner_;
8080

81+
/// We want to emit a warning about DECIMAL values being NULLed out at most once. This
82+
/// member keeps track of whether the warning has already been emitted.
83+
bool unsupported_decimal_warning_emitted_;
84+
8185
// Writes a Java value into the target tuple. 'struct_like_row' is only used for struct
8286
// types. It is needed because struct children reside directly in the parent tuple of
8387
// the struct.
@@ -99,6 +103,8 @@ class IcebergRowReader {
99103
WARN_UNUSED_RESULT;
100104
Status WriteDoubleSlot(JNIEnv* env, const jobject &accessed_value, void* slot)
101105
WARN_UNUSED_RESULT;
106+
Status WriteDecimalSlot(const SlotDescriptor* slot_desc, Tuple* tuple,
107+
RuntimeState* state) WARN_UNUSED_RESULT;
102108
/// Iceberg TimeStamp is parsed into TimestampValue.
103109
Status WriteTimeStampSlot(JNIEnv* env, const jobject &accessed_value, void* slot)
104110
WARN_UNUSED_RESULT;

testdata/datasets/functional/functional_schema_template.sql

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3905,7 +3905,7 @@ CREATE TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} (
39053905
dt date,
39063906
s string,
39073907
bn binary,
3908-
-- TODO IMPALA-13080: Add decimal.
3908+
dc decimal,
39093909
strct struct<i: int>,
39103910
arr array<double>,
39113911
mp map<int, float>
@@ -3924,6 +3924,7 @@ INSERT INTO {db_name}{db_suffix}.{table_name} VALUES (
39243924
to_date("2024-05-14"),
39253925
"Some string",
39263926
"bin1",
3927+
15.48,
39273928
named_struct("i", 10),
39283929
array(cast(10.0 as double), cast(20.0 as double)),
39293930
map(10, cast(10.0 as float), 100, cast(100.0 as float))
@@ -3938,6 +3939,7 @@ INSERT INTO {db_name}{db_suffix}.{table_name} VALUES (
39383939
to_date("2025-06-15"),
39393940
"A string",
39403941
NULL,
3942+
5.8,
39413943
named_struct("i", -150),
39423944
array(cast(-10.0 as double), cast(-2e100 as double)),
39433945
map(10, cast(0.5 as float), 101, cast(1e3 as float))
@@ -3952,6 +3954,7 @@ INSERT INTO {db_name}{db_suffix}.{table_name} VALUES (
39523954
NULL,
39533955
NULL,
39543956
"bin2",
3957+
NULL,
39553958
named_struct("i", -150),
39563959
array(cast(-12.0 as double), cast(-2e100 as double)),
39573960
map(10, cast(0.5 as float), 101, cast(1e3 as float))

testdata/workloads/functional-query/queries/QueryTest/iceberg-metadata-tables.test

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -834,12 +834,11 @@ STRING,DATE,DATE
834834
# Query the `files` metadata table of a table that contains all types - because of lower
835835
# and upper bounds, the 'readable_metrics' struct of the metadata table will also contain
836836
# all types.
837-
# TODO IMPALA-13080: Add DECIMAL.
838837
####
839838
---- QUERY
840839
select readable_metrics from functional_parquet.iceberg_metadata_alltypes.`files`;
841840
---- RESULTS
842-
regex:'{"arr.element":{"column_size":\d+,"value_count":6,"null_value_count":0,"nan_value_count":0,"lower_bound":-2e\+100,"upper_bound":20},"b":{"column_size":\d+,"value_count":3,"null_value_count":1,"nan_value_count":null,"lower_bound":false,"upper_bound":true},"bn":{"column_size":\d+,"value_count":3,"null_value_count":1,"nan_value_count":null,"lower_bound":"YmluMQ==","upper_bound":"YmluMg=="},"d":{"column_size":\d+,"value_count":3,"null_value_count":0,"nan_value_count":1,"lower_bound":-2e-100,"upper_bound":2e\+100},"dt":{"column_size":\d+,"value_count":3,"null_value_count":1,"nan_value_count":null,"lower_bound":"2024-05-14","upper_bound":"2025-06-15"},"f":{"column_size":\d+,"value_count":3,"null_value_count":0,"nan_value_count":1,"lower_bound":2.000000026702864e-10,"upper_bound":1999999973982208},"i":{"column_size":\d+,"value_count":3,"null_value_count":0,"nan_value_count":null,"lower_bound":1,"upper_bound":5},"l":{"column_size":\d+,"value_count":3,"null_value_count":0,"nan_value_count":null,"lower_bound":-10,"upper_bound":150},"mp.key":{"column_size":\d+,"value_count":6,"null_value_count":0,"nan_value_count":null,"lower_bound":null,"upper_bound":null},"mp.value":{"column_size":\d+,"value_count":6,"null_value_count":0,"nan_value_count":0,"lower_bound":0.5,"upper_bound":1000},"s":{"column_size":\d+,"value_count":3,"null_value_count":1,"nan_value_count":null,"lower_bound":"A string","upper_bound":"Some string"},"strct.i":{"column_size":\d+,"value_count":3,"null_value_count":0,"nan_value_count":null,"lower_bound":-150,"upper_bound":10},"ts":{"column_size":\d+,"value_count":3,"null_value_count":1,"nan_value_count":null,"lower_bound":"2024-05-14 14:51:12","upper_bound":"2025-06-15 18:51:12"}}'
841+
regex:'{"arr.element":{"column_size":\d+,"value_count":6,"null_value_count":0,"nan_value_count":0,"lower_bound":-2e\+100,"upper_bound":20},"b":{"column_size":\d+,"value_count":3,"null_value_count":1,"nan_value_count":null,"lower_bound":false,"upper_bound":true},"bn":{"column_size":\d+,"value_count":3,"null_value_count":1,"nan_value_count":null,"lower_bound":"YmluMQ==","upper_bound":"YmluMg=="},"d":{"column_size":\d+,"value_count":3,"null_value_count":0,"nan_value_count":1,"lower_bound":-2e-100,"upper_bound":2e\+100},"dc":{"column_size":\d+,"value_count":3,"null_value_count":1,"nan_value_count":null,"lower_bound":null,"upper_bound":null},"dt":{"column_size":\d+,"value_count":3,"null_value_count":1,"nan_value_count":null,"lower_bound":"2024-05-14","upper_bound":"2025-06-15"},"f":{"column_size":\d+,"value_count":3,"null_value_count":0,"nan_value_count":1,"lower_bound":2.000000026702864e-10,"upper_bound":1999999973982208},"i":{"column_size":\d+,"value_count":3,"null_value_count":0,"nan_value_count":null,"lower_bound":1,"upper_bound":5},"l":{"column_size":\d+,"value_count":3,"null_value_count":0,"nan_value_count":null,"lower_bound":-10,"upper_bound":150},"mp.key":{"column_size":\d+,"value_count":6,"null_value_count":0,"nan_value_count":null,"lower_bound":null,"upper_bound":null},"mp.value":{"column_size":\d+,"value_count":6,"null_value_count":0,"nan_value_count":0,"lower_bound":0.5,"upper_bound":1000},"s":{"column_size":\d+,"value_count":3,"null_value_count":1,"nan_value_count":null,"lower_bound":"A string","upper_bound":"Some string"},"strct.i":{"column_size":\d+,"value_count":3,"null_value_count":0,"nan_value_count":null,"lower_bound":-150,"upper_bound":10},"ts":{"column_size":\d+,"value_count":3,"null_value_count":1,"nan_value_count":null,"lower_bound":"2024-05-14 14:51:12","upper_bound":"2025-06-15 18:51:12"}}'
843842
---- TYPES
844843
STRING
845844
====

0 commit comments

Comments
 (0)