Skip to content

Commit 113eaa2

Browse files
committed
Add reproducer for query metadata error
1 parent 924037e commit 113eaa2

File tree

2 files changed

+51
-12
lines changed

2 files changed

+51
-12
lines changed

datafusion/expr/src/expr_schema.rs

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ use crate::type_coercion::functions::fields_with_udf;
2525
use crate::udf::ReturnFieldArgs;
2626
use crate::{LogicalPlan, Projection, Subquery, WindowFunctionDefinition, utils};
2727
use arrow::compute::can_cast_types;
28-
use arrow::datatypes::{DataType, Field};
28+
use arrow::datatypes::{DataType, Field, FieldRef};
2929
use datafusion_common::datatype::FieldExt;
3030
use datafusion_common::metadata::FieldMetadata;
3131
use datafusion_common::{
@@ -458,7 +458,7 @@ impl ExprSchemable for Expr {
458458
/// with the default implementation returning empty field metadata
459459
/// - **Aggregate functions**: Generate metadata via function's [`return_field`] method,
460460
/// with the default implementation returning empty field metadata
461-
/// - **Window functions**: field metadata is empty
461+
/// - **Window functions**: field metadata follows the function's return field
462462
///
463463
/// ## Table Reference Scoping
464464
/// - Establishes proper qualified field references when columns belong to specific tables
@@ -534,11 +534,7 @@ impl ExprSchemable for Expr {
534534
)))
535535
}
536536
Expr::WindowFunction(window_function) => {
537-
let (dt, nullable) = self.data_type_and_nullable_with_window_function(
538-
schema,
539-
window_function,
540-
)?;
541-
Ok(Arc::new(Field::new(&schema_name, dt, nullable)))
537+
self.window_function_field(schema, window_function)
542538
}
543539
Expr::AggregateFunction(aggregate_function) => {
544540
let AggregateFunction {
@@ -703,6 +699,15 @@ impl Expr {
703699
schema: &dyn ExprSchema,
704700
window_function: &WindowFunction,
705701
) -> Result<(DataType, bool)> {
702+
let return_field = self.window_function_field(schema, window_function)?;
703+
Ok((return_field.data_type().clone(), return_field.is_nullable()))
704+
}
705+
706+
fn window_function_field(
707+
&self,
708+
schema: &dyn ExprSchema,
709+
window_function: &WindowFunction,
710+
) -> Result<FieldRef> {
706711
let WindowFunction {
707712
fun,
708713
params: WindowFunctionParams { args, .. },
@@ -738,9 +743,7 @@ impl Expr {
738743
.into_iter()
739744
.collect::<Vec<_>>();
740745

741-
let return_field = udaf.return_field(&new_fields)?;
742-
743-
Ok((return_field.data_type().clone(), return_field.is_nullable()))
746+
udaf.return_field(&new_fields)
744747
}
745748
WindowFunctionDefinition::WindowUDF(udwf) => {
746749
let data_types = fields
@@ -769,7 +772,6 @@ impl Expr {
769772
let field_args = WindowUDFFieldArgs::new(&new_fields, &function_name);
770773

771774
udwf.field(field_args)
772-
.map(|field| (field.data_type().clone(), field.is_nullable()))
773775
}
774776
}
775777
}

datafusion/sqllogictest/test_files/metadata.slt

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,22 @@
2424
## in the test harness as there is no way to define schema
2525
## with metadata in SQL.
2626

27+
query ITTPT
28+
select * from table_with_metadata;
29+
----
30+
1 NULL NULL 2020-09-08T13:42:29.190855123 no_foo
31+
NULL bar l_bar 2020-09-08T13:42:29.190855123 no_bar
32+
3 baz l_baz 2020-09-08T13:42:29.190855123 no_baz
33+
34+
query TTT
35+
describe table_with_metadata;
36+
----
37+
id Int32 YES
38+
name Utf8 YES
39+
l_name Utf8 YES
40+
ts Timestamp(ns) NO
41+
nonnull_name Utf8 NO
42+
2743
query IT
2844
select id, name from table_with_metadata;
2945
----
@@ -235,7 +251,28 @@ order by 1 asc nulls last;
235251
3 1
236252
NULL 1
237253

238-
# Regression test: first_value should preserve metadata
254+
# Reproducer for https://github.com/apache/datafusion/issues/18337
255+
# this query should not get an internal error
256+
query TI
257+
SELECT
258+
'foo' AS name,
259+
COUNT(
260+
CASE
261+
WHEN prev_value = 'no_bar' AND value = 'no_baz' THEN 1
262+
ELSE NULL
263+
END
264+
) AS count_rises
265+
FROM
266+
(
267+
SELECT
268+
nonnull_name as value,
269+
LAG(nonnull_name) OVER (ORDER BY ts) AS prev_value
270+
FROM
271+
table_with_metadata
272+
);
273+
----
274+
foo 1
275+
239276
query IT
240277
select first_value(id order by id asc nulls last), arrow_metadata(first_value(id order by id asc nulls last), 'metadata_key')
241278
from table_with_metadata;

0 commit comments

Comments
 (0)