Skip to content

Commit a723a44

Browse files
authored
fix(query): fix incorrect domain folder (#17549)
* fix(query): fix incorrect domain folder * fix(query): fix incorrect domain folder * fix(query): fix incorrect domain folder * fix(query): fix incorrect domain folder * fix(query): fix incorrect domain folder * update * update * update * update
1 parent 1263e53 commit a723a44

File tree

22 files changed

+1352
-1187
lines changed

22 files changed

+1352
-1187
lines changed

Cargo.lock

Lines changed: 23 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,7 @@ jaq-core = "1.5.1"
339339
jaq-interpret = "1.5.0"
340340
jaq-parse = "1.0.3"
341341
jaq-std = "1.6.0"
342-
jiff = { version = "0.1.26", features = ["serde", "tzdb-bundle-always"] }
342+
jiff = { version = "0.2.1", features = ["serde", "tzdb-bundle-always"] }
343343
jsonb = "0.4.4"
344344
jwt-simple = { version = "0.12.10", default-features = false, features = ["pure-rust"] }
345345
lenient_semver = "0.4.2"

src/query/expression/src/evaluator.rs

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2124,8 +2124,9 @@ impl<'a, Index: ColumnIndex> ConstantFolder<'a, Index> {
21242124
}
21252125
};
21262126

2127-
let func_domain = args_domain.and_then(|domains| {
2128-
match ((calc_domain)(self.func_ctx, &domains), is_monotonicity) {
2127+
let func_domain = args_domain.and_then(|domains: Vec<Domain>| {
2128+
let res = (calc_domain)(self.func_ctx, &domains);
2129+
match (res, is_monotonicity) {
21292130
(FunctionDomain::MayThrow | FunctionDomain::Full, true) => {
21302131
let (min, max) = domains.iter().map(Domain::to_minmax).next().unwrap();
21312132

@@ -2142,7 +2143,6 @@ impl<'a, Index: ColumnIndex> ConstantFolder<'a, Index> {
21422143
func_ctx: self.func_ctx,
21432144
suppress_error: false,
21442145
};
2145-
21462146
let mut builder =
21472147
ColumnBuilder::with_capacity(args[0].data_type(), 2);
21482148
builder.push(min.as_ref());
@@ -2154,21 +2154,35 @@ impl<'a, Index: ColumnIndex> ConstantFolder<'a, Index> {
21542154
if result.is_scalar() {
21552155
None
21562156
} else {
2157-
let d = result.as_column().unwrap().domain();
2158-
if !ctx.has_error(0) && !ctx.has_error(1) {
2159-
Some(d)
2160-
} else {
2161-
let (mut min, mut max) = d.to_minmax();
2157+
// if error happens, domain maybe incorrect
2158+
// min, max: String("2024-09-02 00:00") String("2024-09-02 00:0�")
2159+
// to_date(s) > to_date('2024-01-1')
2160+
let col = result.as_column().unwrap();
2161+
let d = if ctx.has_error(0) || ctx.has_error(1) {
21622162
let (full_min, full_max) =
21632163
Domain::full(return_type).to_minmax();
2164-
if ctx.has_error(0) {
2165-
min = full_min;
2164+
if full_min.is_null() || full_max.is_null() {
2165+
return None;
21662166
}
2167-
if ctx.has_error(1) {
2168-
max = full_max;
2167+
2168+
let mut builder =
2169+
ColumnBuilder::with_capacity(return_type, 2);
2170+
2171+
for (i, (v, f)) in
2172+
col.iter().zip([full_min, full_max].iter()).enumerate()
2173+
{
2174+
if ctx.has_error(i) {
2175+
builder.push(f.as_ref());
2176+
} else {
2177+
builder.push(v);
2178+
}
21692179
}
2170-
Some(Domain::from_min_max(min, max, return_type))
2171-
}
2180+
builder.build().domain()
2181+
} else {
2182+
result.as_column().unwrap().domain()
2183+
};
2184+
let (min, max) = d.to_minmax();
2185+
Some(Domain::from_min_max(min, max, return_type))
21722186
}
21732187
}
21742188
}

src/query/expression/src/types/date.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,10 @@ use crate::ScalarRef;
4242

4343
pub const DATE_FORMAT: &str = "%Y-%m-%d";
4444
/// Minimum valid date, represented by the day offset from 1970-01-01.
45-
pub const DATE_MIN: i32 = -354285;
45+
/// 0001-01-01
46+
pub const DATE_MIN: i32 = -719162;
4647
/// Maximum valid date, represented by the day offset from 1970-01-01.
48+
/// 9999-12-31
4749
pub const DATE_MAX: i32 = 2932896;
4850

4951
/// Check if date is within range.

src/query/expression/src/types/timestamp.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ use crate::ColumnBuilder;
4242
use crate::ScalarRef;
4343

4444
pub const TIMESTAMP_FORMAT: &str = "%Y-%m-%d %H:%M:%S%.6f";
45-
/// Minimum valid timestamp `1000-01-01 00:00:00.000000`, represented by the microsecs offset from 1970-01-01.
46-
pub const TIMESTAMP_MIN: i64 = -30610224000000000;
45+
/// Minimum valid timestamp `0001-01-01 00:00:00.000000`, represented by the microsecs offset from 1970-01-01.
46+
pub const TIMESTAMP_MIN: i64 = -62135596800000000;
4747
/// Maximum valid timestamp `9999-12-31 23:59:59.999999`, represented by the microsecs offset from 1970-01-01.
4848
pub const TIMESTAMP_MAX: i64 = 253402300799999999;
4949

src/query/expression/src/values.rs

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1093,16 +1093,24 @@ impl Column {
10931093
}
10941094

10951095
pub fn domain(&self) -> Domain {
1096-
if !matches!(self, Column::Array(_) | Column::Map(_)) {
1097-
assert!(self.len() > 0);
1096+
if self.len() == 0 {
1097+
if matches!(self, Column::Array(_)) {
1098+
return Domain::Array(None);
1099+
}
1100+
if matches!(self, Column::Map(_)) {
1101+
return Domain::Map(None);
1102+
}
1103+
return Domain::full(&self.data_type());
10981104
}
1105+
10991106
match self {
11001107
Column::Null { .. } => Domain::Nullable(NullableDomain {
11011108
has_null: true,
11021109
value: None,
11031110
}),
11041111
Column::EmptyArray { .. } => Domain::Array(None),
11051112
Column::EmptyMap { .. } => Domain::Map(None),
1113+
11061114
Column::Number(col) => Domain::Number(col.domain()),
11071115
Column::Decimal(col) => Domain::Decimal(col.domain()),
11081116
Column::Boolean(col) => Domain::Boolean(BooleanDomain {
@@ -1154,7 +1162,13 @@ impl Column {
11541162
}
11551163
}
11561164
Column::Nullable(col) => {
1157-
let inner_domain = col.column.domain();
1165+
let inner_domain = if col.validity.null_count() > 0 {
1166+
// goes into the slower path, we will create a new column without nulls
1167+
let inner = col.column.clone().filter(&col.validity);
1168+
inner.domain()
1169+
} else {
1170+
col.column.domain()
1171+
};
11581172
Domain::Nullable(NullableDomain {
11591173
has_null: col.validity.null_count() > 0,
11601174
value: Some(Box::new(inner_domain)),

src/query/functions/src/scalars/timestamp/src/datetime.rs

Lines changed: 75 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ use databend_common_expression::types::timestamp::string_to_timestamp;
3939
use databend_common_expression::types::timestamp::timestamp_to_string;
4040
use databend_common_expression::types::timestamp::MICROS_PER_MILLI;
4141
use databend_common_expression::types::timestamp::MICROS_PER_SEC;
42+
use databend_common_expression::types::timestamp::TIMESTAMP_MAX;
43+
use databend_common_expression::types::timestamp::TIMESTAMP_MIN;
4244
use databend_common_expression::types::Bitmap;
4345
use databend_common_expression::types::DataType;
4446
use databend_common_expression::types::DateType;
@@ -221,7 +223,42 @@ fn register_string_to_timestamp(registry: &mut FunctionRegistry) {
221223

222224
registry.register_passthrough_nullable_1_arg::<StringType, TimestampType, _, _>(
223225
"to_timestamp",
224-
|_, _| FunctionDomain::MayThrow,
226+
|ctx, d| {
227+
let max = d.max.clone().unwrap_or_default();
228+
let mut res = Vec::with_capacity(2);
229+
for (i, v) in [&d.min, &max].iter().enumerate() {
230+
let mut extend_num = 0;
231+
if i == 1 && d.max.is_none() {
232+
// the max domain is unbounded
233+
res.push(TIMESTAMP_MAX);
234+
break;
235+
}
236+
let mut d = string_to_timestamp(v, &ctx.tz);
237+
// the string max domain maybe truncated into `"2024-09-02 00:0�"`
238+
const MAX_LEN: usize = "1000-01-01".len();
239+
if d.is_err() && v.len() > MAX_LEN {
240+
d = string_to_timestamp(&v[0..MAX_LEN], &ctx.tz);
241+
if i == 0 {
242+
extend_num = -1;
243+
} else {
244+
extend_num = 1;
245+
}
246+
}
247+
248+
if let Ok(ts) = d {
249+
res.push(
250+
ts.timestamp().as_microsecond()
251+
+ extend_num * (24 * 60 * 60 * MICROS_PER_SEC - 1),
252+
);
253+
} else {
254+
return FunctionDomain::MayThrow;
255+
}
256+
}
257+
FunctionDomain::Domain(SimpleDomain {
258+
min: res[0].clamp(TIMESTAMP_MIN, TIMESTAMP_MAX),
259+
max: res[1].clamp(TIMESTAMP_MIN, TIMESTAMP_MAX),
260+
})
261+
},
225262
eval_string_to_timestamp,
226263
);
227264
registry.register_combine_nullable_1_arg::<StringType, TimestampType, _, _>(
@@ -502,7 +539,43 @@ fn register_number_to_timestamp(registry: &mut FunctionRegistry) {
502539
fn register_string_to_date(registry: &mut FunctionRegistry) {
503540
registry.register_passthrough_nullable_1_arg::<StringType, DateType, _, _>(
504541
"to_date",
505-
|_, _| FunctionDomain::MayThrow,
542+
|ctx, d| {
543+
let max = d.max.clone().unwrap_or_default();
544+
let mut res = Vec::with_capacity(2);
545+
for (i, v) in [&d.min, &max].iter().enumerate() {
546+
if i == 1 && d.max.is_none() {
547+
// the max domain is unbounded
548+
res.push(DATE_MAX);
549+
break;
550+
}
551+
552+
let mut extend_num = 0;
553+
let mut d = string_to_date(v, &ctx.tz);
554+
if d.is_err() && v.len() > 10 {
555+
d = string_to_date(&v[0..10], &ctx.tz);
556+
if i == 0 {
557+
extend_num = -1;
558+
} else {
559+
extend_num = 1;
560+
}
561+
}
562+
563+
if d.is_err() {
564+
return FunctionDomain::MayThrow;
565+
}
566+
let days = d
567+
.unwrap()
568+
.since((Unit::Day, date(1970, 1, 1)))
569+
.unwrap()
570+
.get_days();
571+
res.push(days + extend_num);
572+
}
573+
574+
FunctionDomain::Domain(SimpleDomain {
575+
min: res[0].clamp(DATE_MIN, DATE_MAX),
576+
max: res[1].clamp(DATE_MIN, DATE_MAX),
577+
})
578+
},
506579
eval_string_to_date,
507580
);
508581
registry.register_combine_nullable_1_arg::<StringType, DateType, _, _>(

0 commit comments

Comments
 (0)