Skip to content

Commit 07034df

Browse files
authored
fix: column not found in analyze (#17321)
* fix: Column not found in analyze * fix
1 parent 3a32c18 commit 07034df

File tree

2 files changed

+77
-37
lines changed

2 files changed

+77
-37
lines changed

src/query/service/src/interpreters/interpreter_table_analyze.rs

Lines changed: 33 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,6 @@ impl Interpreter for AnalyzeTableInterpreter {
117117

118118
if let Some(snapshot) = snapshot_opt {
119119
// plan sql
120-
let schema = table.schema();
121120
let _table_info = table.get_table_info();
122121

123122
let table_statistics = table
@@ -165,22 +164,20 @@ impl Interpreter for AnalyzeTableInterpreter {
165164
.get_settings()
166165
.get_sql_dialect()?
167166
.default_ident_quote();
168-
let index_cols: Vec<(u32, String)> = schema
169-
.fields()
170-
.iter()
171-
.filter(|f| RangeIndex::supported_type(&f.data_type().into()))
172-
.map(|f| (f.column_id(), format!("{quote}{}{quote}", f.name)))
173-
.collect();
174167

175168
// 0.01625 --> 12 buckets --> 4K size per column
176169
// 1.04 / math.sqrt(1<<12) --> 0.01625
177170
const DISTINCT_ERROR_RATE: f64 = 0.01625;
178-
let ndv_select_expr = index_cols
171+
let ndv_select_expr = snapshot
172+
.schema
173+
.fields()
179174
.iter()
180-
.map(|c| {
175+
.filter(|f| RangeIndex::supported_type(&f.data_type().into()))
176+
.map(|f| {
181177
format!(
182-
"approx_count_distinct_state({DISTINCT_ERROR_RATE})({}) as ndv_{}",
183-
c.1, c.0
178+
"approx_count_distinct_state({DISTINCT_ERROR_RATE})({quote}{}{quote}) as ndv_{}",
179+
f.name,
180+
f.column_id()
184181
)
185182
})
186183
.join(", ");
@@ -190,7 +187,7 @@ impl Interpreter for AnalyzeTableInterpreter {
190187
plan.database, plan.table,
191188
);
192189

193-
info!("Analyze via sql {:?}", sql);
190+
info!("Analyze via sql: {sql}");
194191

195192
let (physical_plan, bind_context) = self.plan_sql(sql).await?;
196193
let mut build_res =
@@ -200,34 +197,33 @@ impl Interpreter for AnalyzeTableInterpreter {
200197
// We add a setting `enable_analyze_histogram` to control whether to compute histogram(default is closed).
201198
let mut histogram_info_receivers = HashMap::new();
202199
if self.ctx.get_settings().get_enable_analyze_histogram()? {
203-
let histogram_sqls = index_cols
200+
let histogram_sqls = table
201+
.schema()
202+
.fields()
204203
.iter()
205-
.map(|c| {
206-
format!(
207-
"SELECT quantile,
208-
COUNT(DISTINCT {}) AS ndv,
209-
MAX({}) AS max_value,
210-
MIN({}) AS min_value,
211-
COUNT() as count
212-
FROM (
213-
SELECT {}, NTILE({}) OVER (ORDER BY {}) AS quantile
214-
FROM {}.{} WHERE {} IS DISTINCT FROM NULL
215-
)
216-
GROUP BY quantile ORDER BY quantile \n",
217-
c.1,
218-
c.1,
219-
c.1,
220-
c.1,
221-
DEFAULT_HISTOGRAM_BUCKETS,
222-
c.1,
223-
plan.database,
224-
plan.table,
225-
c.1,
204+
.filter(|f| RangeIndex::supported_type(&f.data_type().into()))
205+
.map(|f| {
206+
let col_name = format!("{quote}{}{quote}", f.name);
207+
(
208+
format!(
209+
"SELECT quantile, \
210+
COUNT(DISTINCT {col_name}) AS ndv, \
211+
MAX({col_name}) AS max_value, \
212+
MIN({col_name}) AS min_value, \
213+
COUNT() as count \
214+
FROM ( \
215+
SELECT {col_name}, NTILE({}) OVER (ORDER BY {col_name}) AS quantile \
216+
FROM {}.{} WHERE {col_name} IS DISTINCT FROM NULL \
217+
) \
218+
GROUP BY quantile ORDER BY quantile",
219+
DEFAULT_HISTOGRAM_BUCKETS, plan.database, plan.table,
220+
),
221+
f.column_id(),
226222
)
227223
})
228224
.collect::<Vec<_>>();
229-
for (sql, (col_id, _)) in histogram_sqls.into_iter().zip(index_cols.iter()) {
230-
info!("Analyze histogram via sql {:?}", sql);
225+
for (sql, col_id) in histogram_sqls.into_iter() {
226+
info!("Analyze histogram via sql: {sql}");
231227
let (mut histogram_plan, bind_context) = self.plan_sql(sql).await?;
232228
if !self.ctx.get_cluster().is_empty() {
233229
histogram_plan = remove_exchange(histogram_plan);
@@ -253,7 +249,7 @@ impl Interpreter for AnalyzeTableInterpreter {
253249
build_res
254250
.sources_pipelines
255251
.extend(histogram_build_res.sources_pipelines);
256-
histogram_info_receivers.insert(*col_id, rx);
252+
histogram_info_receivers.insert(col_id, rx);
257253
}
258254
}
259255
FuseTable::do_analyze(
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
statement ok
2+
create or replace database issue_17314;
3+
4+
statement ok
5+
use issue_17314
6+
7+
statement ok
8+
set enable_analyze_histogram=1;
9+
10+
statement ok
11+
create or replace table t1(a string, biz_date1 string);
12+
13+
statement ok
14+
insert into t1 values('1', '11');
15+
16+
statement ok
17+
alter table t1 rename BIZ_date1 to BIZ_DATE;
18+
19+
statement ok
20+
analyze table t1;
21+
22+
statement ok
23+
insert into t1 values('2', '22');
24+
25+
statement ok
26+
insert into t1 values('3', '33');
27+
28+
statement ok
29+
alter table t1 rename BIZ_DATE to b;
30+
31+
statement ok
32+
analyze table t1;
33+
34+
query IIT
35+
select * from fuse_statistic('issue_17314', 't1') order by column_name;
36+
----
37+
a 3 [bucket id: 0, min: "1", max: "1", ndv: 1.0, count: 1.0], [bucket id: 1, min: "2", max: "2", ndv: 1.0, count: 1.0], [bucket id: 2, min: "3", max: "3", ndv: 1.0, count: 1.0]
38+
b 3 [bucket id: 0, min: "11", max: "11", ndv: 1.0, count: 1.0], [bucket id: 1, min: "22", max: "22", ndv: 1.0, count: 1.0], [bucket id: 2, min: "33", max: "33", ndv: 1.0, count: 1.0]
39+
40+
statement ok
41+
drop table t1 all;
42+
43+
statement ok
44+
drop database issue_17314;

0 commit comments

Comments
 (0)