|
12 | 12 |
|
13 | 13 | //! Logic for executing a planned SQL query. |
14 | 14 |
|
15 | | -use std::collections::BTreeSet; |
| 15 | +use std::collections::{BTreeMap, BTreeSet}; |
16 | 16 | use std::str::FromStr; |
17 | 17 | use std::sync::Arc; |
18 | 18 |
|
@@ -40,17 +40,21 @@ use mz_sql::plan::{ |
40 | 40 | }; |
41 | 41 | use mz_sql::rbac; |
42 | 42 | use mz_sql::session::metadata::SessionMetadata; |
| 43 | +use mz_sql::session::vars; |
43 | 44 | use mz_sql::session::vars::SessionVars; |
44 | 45 | use mz_sql_parser::ast::{Raw, Statement}; |
45 | 46 | use mz_storage_client::client::TableData; |
46 | 47 | use mz_storage_client::storage_collections::StorageCollections; |
47 | 48 | use mz_storage_types::connections::inline::IntoInlineConnection; |
| 49 | +use mz_storage_types::controller::StorageError; |
48 | 50 | use mz_storage_types::stats::RelationPartStats; |
49 | 51 | use mz_transform::dataflow::DataflowMetainfo; |
50 | 52 | use mz_transform::notice::{OptimizerNoticeApi, OptimizerNoticeKind, RawOptimizerNotice}; |
| 53 | +use mz_transform::{EmptyStatisticsOracle, StatisticsOracle}; |
51 | 54 | use timely::progress::Antichain; |
| 55 | +use timely::progress::Timestamp as TimelyTimestamp; |
52 | 56 | use tokio::sync::oneshot; |
53 | | -use tracing::{Instrument, Level, Span, event}; |
| 57 | +use tracing::{Instrument, Level, Span, event, warn}; |
54 | 58 |
|
55 | 59 | use crate::ExecuteContext; |
56 | 60 | use crate::catalog::{Catalog, CatalogState}; |
@@ -183,14 +187,14 @@ impl Coordinator { |
183 | 187 |
|
184 | 188 | if let Err(e) = rbac::check_plan( |
185 | 189 | &session_catalog, |
186 | | - |id| { |
| 190 | + Some(|id| { |
187 | 191 | // We use linear search through active connections if needed, which is fine |
188 | 192 | // because the RBAC check will call the closure at most once. |
189 | 193 | self.active_conns() |
190 | 194 | .into_iter() |
191 | 195 | .find(|(conn_id, _)| conn_id.unhandled() == id) |
192 | 196 | .map(|(_, conn_meta)| *conn_meta.authenticated_role_id()) |
193 | | - }, |
| 197 | + }), |
194 | 198 | ctx.session(), |
195 | 199 | &plan, |
196 | 200 | target_cluster_id, |
@@ -1205,3 +1209,88 @@ pub(crate) async fn explain_plan_inner( |
1205 | 1209 |
|
1206 | 1210 | Ok(rows) |
1207 | 1211 | } |
| 1212 | + |
| 1213 | +/// Creates a statistics oracle for query optimization. |
| 1214 | +/// |
| 1215 | +/// This is a free-standing function that can be called from both the old peek sequencing |
| 1216 | +/// and the new frontend peek sequencing. |
| 1217 | +pub(crate) async fn statistics_oracle( |
| 1218 | + session: &Session, |
| 1219 | + source_ids: &BTreeSet<GlobalId>, |
| 1220 | + query_as_of: &Antichain<Timestamp>, |
| 1221 | + is_oneshot: bool, |
| 1222 | + system_config: &vars::SystemVars, |
| 1223 | + storage_collections: &dyn StorageCollections<Timestamp = Timestamp>, |
| 1224 | +) -> Result<Box<dyn StatisticsOracle>, AdapterError> { |
| 1225 | + if !session.vars().enable_session_cardinality_estimates() { |
| 1226 | + return Ok(Box::new(EmptyStatisticsOracle)); |
| 1227 | + } |
| 1228 | + |
| 1229 | + let timeout = if is_oneshot { |
| 1230 | + // TODO(mgree): ideally, we would shorten the timeout even more if we think the query could take the fast path |
| 1231 | + system_config.optimizer_oneshot_stats_timeout() |
| 1232 | + } else { |
| 1233 | + system_config.optimizer_stats_timeout() |
| 1234 | + }; |
| 1235 | + |
| 1236 | + let cached_stats = mz_ore::future::timeout( |
| 1237 | + timeout, |
| 1238 | + CachedStatisticsOracle::new(source_ids, query_as_of, storage_collections), |
| 1239 | + ) |
| 1240 | + .await; |
| 1241 | + |
| 1242 | + match cached_stats { |
| 1243 | + Ok(stats) => Ok(Box::new(stats)), |
| 1244 | + Err(mz_ore::future::TimeoutError::DeadlineElapsed) => { |
| 1245 | + warn!( |
| 1246 | + is_oneshot = is_oneshot, |
| 1247 | + "optimizer statistics collection timed out after {}ms", |
| 1248 | + timeout.as_millis() |
| 1249 | + ); |
| 1250 | + |
| 1251 | + Ok(Box::new(EmptyStatisticsOracle)) |
| 1252 | + } |
| 1253 | + Err(mz_ore::future::TimeoutError::Inner(e)) => Err(AdapterError::Storage(e)), |
| 1254 | + } |
| 1255 | +} |
| 1256 | + |
| 1257 | +#[derive(Debug)] |
| 1258 | +struct CachedStatisticsOracle { |
| 1259 | + cache: BTreeMap<GlobalId, usize>, |
| 1260 | +} |
| 1261 | + |
| 1262 | +impl CachedStatisticsOracle { |
| 1263 | + pub async fn new<T: TimelyTimestamp>( |
| 1264 | + ids: &BTreeSet<GlobalId>, |
| 1265 | + as_of: &Antichain<T>, |
| 1266 | + storage_collections: &dyn mz_storage_client::storage_collections::StorageCollections<Timestamp = T>, |
| 1267 | + ) -> Result<Self, StorageError<T>> { |
| 1268 | + let mut cache = BTreeMap::new(); |
| 1269 | + |
| 1270 | + for id in ids { |
| 1271 | + let stats = storage_collections.snapshot_stats(*id, as_of.clone()).await; |
| 1272 | + |
| 1273 | + match stats { |
| 1274 | + Ok(stats) => { |
| 1275 | + cache.insert(*id, stats.num_updates); |
| 1276 | + } |
| 1277 | + Err(StorageError::IdentifierMissing(id)) => { |
| 1278 | + ::tracing::debug!("no statistics for {id}") |
| 1279 | + } |
| 1280 | + Err(e) => return Err(e), |
| 1281 | + } |
| 1282 | + } |
| 1283 | + |
| 1284 | + Ok(Self { cache }) |
| 1285 | + } |
| 1286 | +} |
| 1287 | + |
| 1288 | +impl StatisticsOracle for CachedStatisticsOracle { |
| 1289 | + fn cardinality_estimate(&self, id: GlobalId) -> Option<usize> { |
| 1290 | + self.cache.get(&id).map(|estimate| *estimate) |
| 1291 | + } |
| 1292 | + |
| 1293 | + fn as_map(&self) -> BTreeMap<GlobalId, usize> { |
| 1294 | + self.cache.clone() |
| 1295 | + } |
| 1296 | +} |
0 commit comments