diff --git a/packages/cubejs-api-gateway/openspec.yml b/packages/cubejs-api-gateway/openspec.yml index ce0051b83ab5f..a895bdb3cbab5 100644 --- a/packages/cubejs-api-gateway/openspec.yml +++ b/packages/cubejs-api-gateway/openspec.yml @@ -329,6 +329,24 @@ components: - $ref: "#/components/schemas/V1LoadRequestQueryFilterBase" - $ref: "#/components/schemas/V1LoadRequestQueryFilterLogicalOr" - $ref: "#/components/schemas/V1LoadRequestQueryFilterLogicalAnd" + V1LoadRequestQueryJoinSubquery: + type: "object" + properties: + sql: + type: "string" + # TODO This is _always_ a member expression, maybe pass as parsed, without intermediate string? + "on": + type: "string" + # TODO why string? it's enum + joinType: + type: "string" + alias: + type: "string" + required: + - sql + - "on" + - joinType + - alias V1LoadRequestQuery: type: "object" properties: @@ -366,6 +384,12 @@ components: $ref: "#/components/schemas/V1LoadRequestQueryFilterItem" ungrouped: type: "boolean" + # vector of (subquery sql: string, join condition: member expression, join type: enum) + # they will be added to end of joinQuery in BaseQuery, in same order as here + subqueryJoins: + type: "array" + items: + $ref: "#/components/schemas/V1LoadRequestQueryJoinSubquery" V1LoadRequest: type: "object" properties: diff --git a/packages/cubejs-api-gateway/src/gateway.ts b/packages/cubejs-api-gateway/src/gateway.ts index f88b1c675dd59..006db77b91d3b 100644 --- a/packages/cubejs-api-gateway/src/gateway.ts +++ b/packages/cubejs-api-gateway/src/gateway.ts @@ -1312,7 +1312,12 @@ class ApiGateway { } private hasExpressionsInQuery(query: Query): boolean { - const arraysToCheck = [query.measures, query.dimensions, query.segments]; + const arraysToCheck = [ + query.measures, + query.dimensions, + query.segments, + (query.subqueryJoins ?? []).map(join => join.on), + ]; return arraysToCheck.some(array => array?.some(item => typeof item === 'string' && item.startsWith('{'))); } @@ -1323,6 +1328,10 @@ class ApiGateway { measures: (query.measures || []).map(m => (typeof m === 'string' ? this.parseMemberExpression(m) : m)), dimensions: (query.dimensions || []).map(m => (typeof m === 'string' ? this.parseMemberExpression(m) : m)), segments: (query.segments || []).map(m => (typeof m === 'string' ? this.parseMemberExpression(m) : m)), + subqueryJoins: (query.subqueryJoins ?? []).map(join => (typeof join.on === 'string' ? { + ...join, + on: this.parseMemberExpression(join.on), + } : join)), }; } @@ -1361,6 +1370,10 @@ class ApiGateway { measures: (query.measures || []).map(m => (typeof m !== 'string' ? this.evalMemberExpression(m as ParsedMemberExpression) : m)), dimensions: (query.dimensions || []).map(m => (typeof m !== 'string' ? this.evalMemberExpression(m as ParsedMemberExpression) : m)), segments: (query.segments || []).map(m => (typeof m !== 'string' ? this.evalMemberExpression(m as ParsedMemberExpression) : m)), + subqueryJoins: (query.subqueryJoins ?? []).map(join => (typeof join.on !== 'string' ? { + ...join, + on: this.evalMemberExpression(join.on as ParsedMemberExpression) + } : join)), }; } diff --git a/packages/cubejs-api-gateway/src/query.js b/packages/cubejs-api-gateway/src/query.js index b83148c80c668..18735bbcc7d93 100644 --- a/packages/cubejs-api-gateway/src/query.js +++ b/packages/cubejs-api-gateway/src/query.js @@ -96,6 +96,15 @@ const oneCondition = Joi.object().keys({ and: Joi.array().items(oneFilter, Joi.link('...').description('oneCondition schema')), }).xor('or', 'and'); +const subqueryJoin = Joi.object().keys({ + sql: Joi.string(), + // TODO This is _always_ a member expression, maybe pass as parsed, without intermediate string? + // TODO there are three different types instead of alternatives for this actually + on: Joi.alternatives(Joi.string(), memberExpression, parsedMemberExpression), + joinType: Joi.string().valid('LEFT', 'INNER'), + alias: Joi.string(), +}); + const querySchema = Joi.object().keys({ // TODO add member expression alternatives only for SQL API queries? measures: Joi.array().items(Joi.alternatives(id, memberExpression, parsedMemberExpression)), @@ -122,6 +131,7 @@ const querySchema = Joi.object().keys({ renewQuery: Joi.boolean(), ungrouped: Joi.boolean(), responseFormat: Joi.valid('default', 'compact'), + subqueryJoins: Joi.array().items(subqueryJoin), }); const normalizeQueryOrder = order => { diff --git a/packages/cubejs-api-gateway/src/types/query.ts b/packages/cubejs-api-gateway/src/types/query.ts index e93dc16158334..8c1f1c03c5cf7 100644 --- a/packages/cubejs-api-gateway/src/types/query.ts +++ b/packages/cubejs-api-gateway/src/types/query.ts @@ -67,6 +67,15 @@ interface QueryTimeDimension { granularity?: QueryTimeDimensionGranularity; } +type SubqueryJoins = { + sql: string, + // TODO This is _always_ a member expression, maybe pass as parsed, without intermediate string? + // TODO there are three different types instead of alternatives for this actually + on: string | ParsedMemberExpression | MemberExpression, + joinType: 'LEFT' | 'INNER', + alias: string, +}; + /** * Incoming network query data type. */ @@ -85,6 +94,9 @@ interface Query { renewQuery?: boolean; ungrouped?: boolean; responseFormat?: ResultType; + + // TODO incoming query, query with parsed exprs and query with evaluated exprs are all different types + subqueryJoins?: Array, } /** diff --git a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js index 775299299f3f4..002586ce92307 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js +++ b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js @@ -60,6 +60,23 @@ const SecondsDurations = { * @property {*} headCommitId */ +/** + * @typedef {Object} JoinRoot + * @property {string} sql + * @property {string} alias + */ + +/** + * @typedef {Object} JoinItem + * @property {string} sql + * @property {string} alias + * @property {string} on + */ + +/** + * @typedef {[JoinRoot, ...JoinItem]} JoinChain + */ + /** * BaseQuery class. BaseQuery object encapsulates the logic of * transforming an incoming to a specific cube request to the @@ -224,6 +241,7 @@ export class BaseQuery { multiStageQuery: this.options.multiStageQuery, multiStageDimensions: this.options.multiStageDimensions, multiStageTimeDimensions: this.options.multiStageTimeDimensions, + subqueryJoins: this.options.subqueryJoins, }); this.from = this.options.from; this.multiStageQuery = this.options.multiStageQuery; @@ -269,6 +287,11 @@ export class BaseQuery { this.preAggregationsSchemaOption = this.options.preAggregationsSchema ?? DEFAULT_PREAGGREGATIONS_SCHEMA; this.externalQueryClass = this.options.externalQueryClass; + /** + * @type {Array<{sql: string, on: {expression: Function}, joinType: 'LEFT' | 'INNER', alias: string}>} + */ + this.customSubQueryJoins = this.options.subqueryJoins ?? []; + // Set the default order only when options.order is not provided at all // if options.order is set (empty array [] or with data) - use it as is this.order = this.options.order ?? this.defaultOrder(); @@ -1604,19 +1627,44 @@ export class BaseQuery { return this.joinSql([ { sql: cubeSql, alias: cubeAlias }, ...(subQueryDimensionsByCube[join.root] || []).map(d => this.subQueryJoin(d)), - ...joins + ...joins, + ...this.customSubQueryJoins.map((customJoin) => this.customSubQueryJoin(customJoin)), ]); } joinSql(toJoin) { const [root, ...rest] = toJoin; const joins = rest.map( - j => `LEFT JOIN ${j.sql} ${this.asSyntaxJoin} ${j.alias} ON ${j.on}` + j => { + const joinType = j.joinType ?? 'LEFT'; + return `${joinType} JOIN ${j.sql} ${this.asSyntaxJoin} ${j.alias} ON ${j.on}`; + } ); return [`${root.sql} ${this.asSyntaxJoin} ${root.alias}`, ...joins].join('\n'); } + /** + * + * @param {{sql: string, on: {cubeName: string, expression: Function}, joinType: 'LEFT' | 'INNER', alias: string}} customJoin + * @returns {JoinItem} + */ + customSubQueryJoin(customJoin) { + const on = this.evaluateSql(customJoin.on.cubeName, customJoin.on.expression); + + return { + sql: `(${customJoin.sql})`, + alias: customJoin.alias, + on, + joinType: customJoin.joinType, + }; + } + + /** + * + * @param {string} dimension + * @returns {JoinItem} + */ subQueryJoin(dimension) { const { prefix, subQuery, cubeName } = this.subQueryDescription(dimension); const primaryKeys = this.cubeEvaluator.primaryKeys[cubeName]; diff --git a/packages/cubejs-testing/test/__snapshots__/smoke-cubesql.test.ts.snap b/packages/cubejs-testing/test/__snapshots__/smoke-cubesql.test.ts.snap index f027d7904b85a..fe01e2340efa8 100644 --- a/packages/cubejs-testing/test/__snapshots__/smoke-cubesql.test.ts.snap +++ b/packages/cubejs-testing/test/__snapshots__/smoke-cubesql.test.ts.snap @@ -57,6 +57,32 @@ Array [ ] `; +exports[`SQL API Postgres (Data) join with filtered grouped query: join grouped with filter 1`] = ` +Array [ + Object { + "count": "2", + "status": "processed", + }, + Object { + "count": "2", + "status": "new", + }, +] +`; + +exports[`SQL API Postgres (Data) join with grouped query: join grouped 1`] = ` +Array [ + Object { + "count": "2", + "status": "processed", + }, + Object { + "count": "1", + "status": "shipped", + }, +] +`; + exports[`SQL API Postgres (Data) metabase max number: metabase max number 1`] = ` Array [ Object { diff --git a/packages/cubejs-testing/test/smoke-cubesql.test.ts b/packages/cubejs-testing/test/smoke-cubesql.test.ts index ab816da2ecf28..0d1595956bdf0 100644 --- a/packages/cubejs-testing/test/smoke-cubesql.test.ts +++ b/packages/cubejs-testing/test/smoke-cubesql.test.ts @@ -476,6 +476,65 @@ filter_subq AS ( expect(res.rows).toMatchSnapshot('select __user and literal in wrapper'); }); + test('join with grouped query', async () => { + const query = ` + SELECT + "Orders".status AS status, + COUNT(*) AS count + FROM + "Orders" + INNER JOIN + ( + SELECT + status, + SUM(totalAmount) + FROM + "Orders" + GROUP BY 1 + ORDER BY 2 DESC + LIMIT 2 + ) top_orders + ON + "Orders".status = top_orders.status + GROUP BY 1 + ORDER BY 1 + `; + + const res = await connection.query(query); + // Expect only top statuses 2 by total amount: processed and shipped + expect(res.rows).toMatchSnapshot('join grouped'); + }); + + test('join with filtered grouped query', async () => { + const query = ` + SELECT + "Orders".status AS status, + COUNT(*) AS count + FROM + "Orders" + INNER JOIN + ( + SELECT + status, + SUM(totalAmount) + FROM + "Orders" + WHERE + status NOT IN ('shipped') + GROUP BY 1 + ORDER BY 2 DESC + LIMIT 2 + ) top_orders + ON + "Orders".status = top_orders.status + GROUP BY 1 + `; + + const res = await connection.query(query); + // Expect only top statuses 2 by total amount, with shipped filtered out: processed and new + expect(res.rows).toMatchSnapshot('join grouped with filter'); + }); + test('where segment is false', async () => { const query = 'SELECT value AS val, * FROM "SegmentTest" WHERE segment_eq_1 IS FALSE ORDER BY value;'; diff --git a/rust/cubesql/cubeclient/.openapi-generator/FILES b/rust/cubesql/cubeclient/.openapi-generator/FILES index 3b7c992366f32..2ac921858ba3f 100644 --- a/rust/cubesql/cubeclient/.openapi-generator/FILES +++ b/rust/cubesql/cubeclient/.openapi-generator/FILES @@ -16,6 +16,7 @@ src/models/v1_load_request_query_filter_base.rs src/models/v1_load_request_query_filter_item.rs src/models/v1_load_request_query_filter_logical_and.rs src/models/v1_load_request_query_filter_logical_or.rs +src/models/v1_load_request_query_join_subquery.rs src/models/v1_load_request_query_time_dimension.rs src/models/v1_load_response.rs src/models/v1_load_result.rs diff --git a/rust/cubesql/cubeclient/src/models/mod.rs b/rust/cubesql/cubeclient/src/models/mod.rs index 409b2b4865628..361e96528dbc3 100644 --- a/rust/cubesql/cubeclient/src/models/mod.rs +++ b/rust/cubesql/cubeclient/src/models/mod.rs @@ -28,6 +28,8 @@ pub mod v1_load_request_query_filter_logical_and; pub use self::v1_load_request_query_filter_logical_and::V1LoadRequestQueryFilterLogicalAnd; pub mod v1_load_request_query_filter_logical_or; pub use self::v1_load_request_query_filter_logical_or::V1LoadRequestQueryFilterLogicalOr; +pub mod v1_load_request_query_join_subquery; +pub use self::v1_load_request_query_join_subquery::V1LoadRequestQueryJoinSubquery; pub mod v1_load_request_query_time_dimension; pub use self::v1_load_request_query_time_dimension::V1LoadRequestQueryTimeDimension; pub mod v1_load_response; diff --git a/rust/cubesql/cubeclient/src/models/v1_load_request_query.rs b/rust/cubesql/cubeclient/src/models/v1_load_request_query.rs index 25332f8a94cd3..2ce959d038699 100644 --- a/rust/cubesql/cubeclient/src/models/v1_load_request_query.rs +++ b/rust/cubesql/cubeclient/src/models/v1_load_request_query.rs @@ -28,6 +28,8 @@ pub struct V1LoadRequestQuery { pub filters: Option>, #[serde(rename = "ungrouped", skip_serializing_if = "Option::is_none")] pub ungrouped: Option, + #[serde(rename = "subqueryJoins", skip_serializing_if = "Option::is_none")] + pub subquery_joins: Option>, } impl V1LoadRequestQuery { @@ -42,6 +44,7 @@ impl V1LoadRequestQuery { offset: None, filters: None, ungrouped: None, + subquery_joins: None, } } } diff --git a/rust/cubesql/cubeclient/src/models/v1_load_request_query_join_subquery.rs b/rust/cubesql/cubeclient/src/models/v1_load_request_query_join_subquery.rs new file mode 100644 index 0000000000000..8a5db06e44899 --- /dev/null +++ b/rust/cubesql/cubeclient/src/models/v1_load_request_query_join_subquery.rs @@ -0,0 +1,37 @@ +/* + * Cube.js + * + * Cube.js Swagger Schema + * + * The version of the OpenAPI document: 1.0.0 + * + * Generated by: https://openapi-generator.tech + */ + +#[derive(Clone, Debug, PartialEq, Default, Serialize, Deserialize)] +pub struct V1LoadRequestQueryJoinSubquery { + #[serde(rename = "sql")] + pub sql: String, + #[serde(rename = "on")] + pub on: String, + #[serde(rename = "joinType")] + pub join_type: String, + #[serde(rename = "alias")] + pub alias: String, +} + +impl V1LoadRequestQueryJoinSubquery { + pub fn new( + sql: String, + on: String, + join_type: String, + alias: String, + ) -> V1LoadRequestQueryJoinSubquery { + V1LoadRequestQueryJoinSubquery { + sql, + on, + join_type, + alias, + } + } +} diff --git a/rust/cubesql/cubesql/src/compile/builder.rs b/rust/cubesql/cubesql/src/compile/builder.rs index b1788863edab4..f265191bc85c4 100644 --- a/rust/cubesql/cubesql/src/compile/builder.rs +++ b/rust/cubesql/cubesql/src/compile/builder.rs @@ -151,6 +151,7 @@ impl QueryBuilder { None }, ungrouped: None, + subquery_joins: None, }, meta: self.meta, } diff --git a/rust/cubesql/cubesql/src/compile/engine/df/scan.rs b/rust/cubesql/cubesql/src/compile/engine/df/scan.rs index 11d7f03695d5f..92095825369e5 100644 --- a/rust/cubesql/cubesql/src/compile/engine/df/scan.rs +++ b/rust/cubesql/cubesql/src/compile/engine/df/scan.rs @@ -30,7 +30,7 @@ use std::{ use crate::{ compile::{ - engine::df::wrapper::{CubeScanWrapperNode, SqlQuery}, + engine::df::wrapper::{CubeScanWrappedSqlNode, CubeScanWrapperNode, SqlQuery}, rewrite::WrappedSelectType, test::find_cube_scans_deep_search, }, @@ -394,35 +394,32 @@ impl ExtensionPlanner for CubeScanExtensionPlanner { config_obj: self.config_obj.clone(), })) } else if let Some(wrapper_node) = node.as_any().downcast_ref::() { + return Err(DataFusionError::Internal(format!( + "CubeScanWrapperNode is not executable, SQL should be generated first with QueryEngine::evaluate_wrapped_sql: {:?}", + wrapper_node + ))); + } else if let Some(wrapped_sql_node) = + node.as_any().downcast_ref::() + { // TODO // assert_eq!(logical_inputs.len(), 0, "Inconsistent number of inputs"); // assert_eq!(physical_inputs.len(), 0, "Inconsistent number of inputs"); let scan_node = - find_cube_scans_deep_search(wrapper_node.wrapped_plan.clone(), false) + find_cube_scans_deep_search(wrapped_sql_node.wrapped_plan.clone(), false) .into_iter() .next() .ok_or(DataFusionError::Internal(format!( "No cube scans found in wrapper node: {:?}", - wrapper_node + wrapped_sql_node )))?; - let schema = SchemaRef::new(wrapper_node.schema().as_ref().into()); + let schema = SchemaRef::new(wrapped_sql_node.schema().as_ref().into()); Some(Arc::new(CubeScanExecutionPlan { schema, - member_fields: wrapper_node.member_fields.as_ref().ok_or_else(|| { - DataFusionError::Internal(format!( - "Member fields are not set for wrapper node. Optimization wasn't performed: {:?}", - wrapper_node - )) - })?.clone(), + member_fields: wrapped_sql_node.member_fields.clone(), transport: self.transport.clone(), - request: wrapper_node.request.clone().unwrap_or(scan_node.request.clone()), - wrapped_sql: Some(wrapper_node.wrapped_sql.as_ref().ok_or_else(|| { - DataFusionError::Internal(format!( - "Wrapped SQL is not set for wrapper node. Optimization wasn't performed: {:?}", - wrapper_node - )) - })?.clone()), + request: wrapped_sql_node.request.clone(), + wrapped_sql: Some(wrapped_sql_node.wrapped_sql.clone()), auth_context: scan_node.auth_context.clone(), options: scan_node.options.clone(), meta: self.meta.clone(), diff --git a/rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs b/rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs index 71e934dcffa26..8ec14fe5e0a61 100644 --- a/rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs +++ b/rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs @@ -19,10 +19,11 @@ use crate::{ CubeError, }; use chrono::{Days, NaiveDate, SecondsFormat, TimeZone, Utc}; +use cubeclient::models::{V1LoadRequestQuery, V1LoadRequestQueryJoinSubquery}; use datafusion::{ error::{DataFusionError, Result}, logical_plan::{ - plan::Extension, replace_col, Column, DFSchema, DFSchemaRef, Expr, GroupingSet, + plan::Extension, replace_col, Column, DFSchema, DFSchemaRef, Expr, GroupingSet, JoinType, LogicalPlan, UserDefinedLogicalNode, }, physical_plan::{aggregates::AggregateFunction, functions::BuiltinScalarFunction}, @@ -44,6 +45,20 @@ use std::{ sync::{Arc, LazyLock}, }; +pub struct JoinSubquery { + alias: String, + sql: String, + condition: Expr, + join_type: JoinType, +} + +pub struct PushToCubeContext<'l> { + ungrouped_scan_node: &'l CubeScanNode, + // Known join subquery qualifiers, to generate proper column expressions + known_join_subqueries: HashSet, + join_subqueries: Vec, +} + #[derive(Debug, Clone, Deserialize)] pub struct SqlQuery { pub sql: String, @@ -130,8 +145,8 @@ impl SqlQuery { index } - pub fn extend_values(&mut self, values: &Vec>) { - self.values.extend(values.iter().cloned()); + pub fn extend_values(&mut self, values: impl IntoIterator>) { + self.values.extend(values.into_iter()); } pub fn replace_sql(&mut self, sql: String) { @@ -201,14 +216,75 @@ impl SqlQuery { } } +#[derive(Clone, Debug)] +pub struct CubeScanWrappedSqlNode { + // TODO maybe replace wrapped plan with schema + scan_node + pub wrapped_plan: Arc, + pub wrapped_sql: SqlQuery, + pub request: TransportLoadRequestQuery, + pub member_fields: Vec, +} + +impl CubeScanWrappedSqlNode { + pub fn new( + wrapped_plan: Arc, + wrapped_sql: SqlQuery, + request: TransportLoadRequestQuery, + member_fields: Vec, + ) -> Self { + Self { + wrapped_plan, + wrapped_sql, + request, + member_fields, + } + } +} + +impl UserDefinedLogicalNode for CubeScanWrappedSqlNode { + fn as_any(&self) -> &dyn Any { + self + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![] + } + + fn schema(&self) -> &DFSchemaRef { + self.wrapped_plan.schema() + } + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut fmt::Formatter) -> fmt::Result { + // TODO figure out nice plan for wrapped plan + write!(f, "CubeScanWrappedSql") + } + + fn from_template( + &self, + exprs: &[datafusion::logical_plan::Expr], + inputs: &[datafusion::logical_plan::LogicalPlan], + ) -> std::sync::Arc { + assert_eq!(inputs.len(), 0, "input size inconsistent"); + assert_eq!(exprs.len(), 0, "expression size inconsistent"); + + Arc::new(CubeScanWrappedSqlNode { + wrapped_plan: self.wrapped_plan.clone(), + wrapped_sql: self.wrapped_sql.clone(), + request: self.request.clone(), + member_fields: self.member_fields.clone(), + }) + } +} + #[derive(Debug, Clone)] pub struct CubeScanWrapperNode { pub wrapped_plan: Arc, pub meta: Arc, pub auth_context: AuthContextRef, - pub wrapped_sql: Option, - pub request: Option, - pub member_fields: Option>, pub span_id: Option>, pub config_obj: Arc, } @@ -225,31 +301,10 @@ impl CubeScanWrapperNode { wrapped_plan, meta, auth_context, - wrapped_sql: None, - request: None, - member_fields: None, span_id, config_obj, } } - - pub fn with_sql_and_request( - &self, - sql: SqlQuery, - request: TransportLoadRequestQuery, - member_fields: Vec, - ) -> Self { - Self { - wrapped_plan: self.wrapped_plan.clone(), - meta: self.meta.clone(), - auth_context: self.auth_context.clone(), - wrapped_sql: Some(sql), - request: Some(request), - member_fields: Some(member_fields), - span_id: self.span_id.clone(), - config_obj: self.config_obj.clone(), - } - } } fn expr_name(e: &Expr, schema: &DFSchema) -> Result { @@ -297,6 +352,10 @@ impl ColumnRemapping { ) .map_err(|_| CubeError::internal(format!("Can't rename columns for expr: {expr:?}",))) } + + pub fn extend(&mut self, other: ColumnRemapping) { + self.column_remapping.extend(other.column_remapping); + } } /// Builds new column mapping @@ -501,7 +560,7 @@ impl CubeScanWrapperNode { &self, transport: Arc, load_request_meta: Arc, - ) -> result::Result { + ) -> result::Result { let schema = self.schema(); let wrapped_plan = self.wrapped_plan.clone(); let (sql, request, member_fields) = Self::generate_sql_for_node( @@ -541,7 +600,12 @@ impl CubeScanWrapperNode { sql.finalize_query(sql_templates).map_err(|e| CubeError::internal(e.to_string()))?; Ok((sql, request, member_fields)) })?; - Ok(self.with_sql_and_request(sql, request, member_fields)) + Ok(CubeScanWrappedSqlNode::new( + self.wrapped_plan.clone(), + sql, + request, + member_fields, + )) } pub fn set_max_limit_for_node(self, node: Arc) -> Arc { @@ -795,7 +859,7 @@ impl CubeScanWrapperNode { aggr_expr, window_expr, from, - joins: _joins, + joins, filter_expr, having_expr: _having_expr, limit, @@ -806,7 +870,7 @@ impl CubeScanWrapperNode { push_to_cube, }) = wrapped_select_node { - // TODO support joins + // TODO support ungrouped joins let ungrouped_scan_node = if push_to_cube { if let LogicalPlan::Extension(Extension { node }) = from.as_ref() { if let Some(cube_scan_node) = @@ -817,7 +881,7 @@ impl CubeScanWrapperNode { "Expected ungrouped CubeScan node but found: {cube_scan_node:?}" ))); } - Some(Arc::new(cube_scan_node.clone())) + Some(cube_scan_node) } else { return Err(CubeError::internal(format!( "Expected CubeScan node but found: {:?}", @@ -837,10 +901,10 @@ impl CubeScanWrapperNode { let SqlGenerationResult { data_source, from_alias, - column_remapping, + mut column_remapping, mut sql, request, - } = if let Some(ungrouped_scan_node) = ungrouped_scan_node.clone() { + } = if let Some(ungrouped_scan_node) = &ungrouped_scan_node { let data_sources = ungrouped_scan_node .used_cubes .iter() @@ -885,8 +949,6 @@ impl CubeScanWrapperNode { .await? }; - let column_remapping = column_remapping.as_ref(); - let mut subqueries_sql = HashMap::new(); for subquery in subqueries.iter() { let SqlGenerationResult { @@ -907,13 +969,124 @@ impl CubeScanWrapperNode { .await?; let (sql_string, new_values) = subquery_sql.unpack(); - sql.extend_values(&new_values); + sql.extend_values(new_values); + // TODO why only field 0 is a key? let field = subquery.schema().field(0); subqueries_sql.insert(field.qualified_name(), sql_string); } let subqueries_sql = Arc::new(subqueries_sql); let alias = alias.or(from_alias.clone()); let mut next_remapper = Remapper::new(alias.clone(), can_rename_columns); + + let push_to_cube_context = if let Some(ungrouped_scan_node) = + ungrouped_scan_node + { + let mut join_subqueries = vec![]; + let mut known_join_subqueries = HashSet::new(); + for (lp, cond, join_type) in joins { + match lp.as_ref() { + LogicalPlan::Extension(Extension { node }) => { + if let Some(join_cube_scan) = + node.as_any().downcast_ref::() + { + if join_cube_scan.request.ungrouped == Some(true) { + return Err(CubeError::internal(format!( + "Unsupported ungrouped CubeScan as join subquery: {join_cube_scan:?}" + ))); + } + } else { + // TODO support more grouped cases here + return Err(CubeError::internal(format!( + "Unsupported unknown extension as join subquery: {node:?}" + ))); + } + } + _ => { + // TODO support more grouped cases here + return Err(CubeError::internal(format!( + "Unsupported logical plan node as join subquery: {lp:?}" + ))); + } + } + + match join_type { + JoinType::Inner | JoinType::Left => { + // Do nothing + } + _ => { + return Err(CubeError::internal(format!( + "Unsupported join type for join subquery: {join_type:?}" + ))); + } + } + + // TODO avoid using direct alias from schema, implement remapping for qualifiers instead + let alias = lp + .schema() + .fields() + .iter() + .filter_map(|f| f.qualifier()) + .next() + .ok_or_else(|| { + CubeError::internal(format!( + "Alias not found for join subquery {lp:?}" + )) + })?; + + let subq_sql = Self::generate_sql_for_node( + plan.clone(), + transport.clone(), + load_request_meta.clone(), + lp.clone(), + true, + sql.values.clone(), + data_source.clone(), + ) + .await?; + let (subq_sql_string, new_values) = subq_sql.sql.unpack(); + sql.extend_values(new_values); + let subq_alias = subq_sql.from_alias; + // Expect that subq_sql.column_remapping already incorporates subq_alias/ + // TODO does it? + + // TODO expect returned from_alias to be fine, but still need to remap it from original alias somewhere in generate_sql_for_node + + // grouped join subquery can have its columns remapped, and expressions current node can reference original columns + column_remapping = { + match (column_remapping, subq_sql.column_remapping) { + (None, None) => None, + (None, Some(remapping)) | (Some(remapping), None) => { + Some(remapping) + } + (Some(mut left), Some(right)) => { + left.extend(right); + Some(left) + } + } + }; + + join_subqueries.push(JoinSubquery { + // TODO what alias to actually use here? two more-or-less valid options: returned from generate_sql_for_node ot realiased from `alias`. Plain `alias` is incorrect here + alias: subq_alias.unwrap_or_else(|| alias.clone()), + sql: subq_sql_string, + condition: cond.clone(), + join_type: join_type.clone(), + }); + known_join_subqueries.insert(alias.clone()); + } + + Some(PushToCubeContext { + ungrouped_scan_node, + join_subqueries, + known_join_subqueries, + }) + } else { + None + }; + // Drop mut, turn to ref + let column_remapping = column_remapping.as_ref(); + // Turn to ref + let push_to_cube_context = push_to_cube_context.as_ref(); if let Some(data_source) = data_source { let generator = plan .meta @@ -935,7 +1108,7 @@ impl CubeScanWrapperNode { column_remapping, &mut next_remapper, can_rename_columns, - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries_sql.clone(), ) .await?; @@ -949,7 +1122,7 @@ impl CubeScanWrapperNode { column_remapping, &mut next_remapper, can_rename_columns, - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries_sql.clone(), ) .await?; @@ -963,7 +1136,7 @@ impl CubeScanWrapperNode { column_remapping, &mut next_remapper, can_rename_columns, - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries_sql.clone(), ) .await?; @@ -977,7 +1150,7 @@ impl CubeScanWrapperNode { column_remapping, &mut next_remapper, can_rename_columns, - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries_sql.clone(), ) .await?; @@ -991,7 +1164,7 @@ impl CubeScanWrapperNode { column_remapping, &mut next_remapper, can_rename_columns, - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries_sql.clone(), ) .await?; @@ -1005,79 +1178,148 @@ impl CubeScanWrapperNode { column_remapping, &mut next_remapper, can_rename_columns, - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries_sql.clone(), ) .await?; - if let Some(ungrouped_scan_node) = ungrouped_scan_node.clone() { - let mut load_request = ungrouped_scan_node.request.clone(); - load_request.measures = Some( - aggregate - .iter() - .map(|m| { - Self::ungrouped_member_def( - m, - &ungrouped_scan_node.used_cubes, - ) - }) - .chain( - // TODO understand type of projections - projection.iter().map(|m| { + if let Some(PushToCubeContext { + ungrouped_scan_node, + join_subqueries, + known_join_subqueries: _, + }) = push_to_cube_context + { + let mut prepared_join_subqueries = vec![]; + for JoinSubquery { + alias: subq_alias, + sql: subq_sql, + condition, + join_type, + } in join_subqueries + { + // Need to call generate_column_expr to apply column_remapping + let (join_condition, new_sql) = Self::generate_column_expr( + plan.clone(), + schema.clone(), + [condition.clone()], + sql, + generator.clone(), + column_remapping, + &mut next_remapper, + true, + push_to_cube_context, + subqueries_sql.clone(), + ) + .await?; + let join_condition = join_condition[0].expr.clone(); + sql = new_sql; + + let join_sql_expression = { + // TODO this is NOT a proper way to generate member expr here + // TODO Do we even want a full-blown member expression here? or arguments + expr will be enough? + let res = Self::make_member_def( + &AliasedColumn { + expr: join_condition, + alias: "__join__alias__unused".to_string(), + }, + &ungrouped_scan_node.used_cubes, + )?; + serde_json::json!(res).to_string() + }; + + let join_type = match join_type { + JoinType::Left => generator + .get_sql_templates() + .left_join()?, + JoinType::Inner => generator + .get_sql_templates() + .inner_join()?, + _ => { + return Err(CubeError::internal(format!( + "Unsupported join type for join subquery: {join_type:?}" + ))) + } + }; + + // for simple ungrouped-grouped joins everything should already be present in from + // so we can just attach this join to the end, no need to look for a proper spot + prepared_join_subqueries.push(V1LoadRequestQueryJoinSubquery { + sql: subq_sql.clone(), + on: join_sql_expression, + join_type, + alias: subq_alias.clone(), + }); + } + + let load_request = &ungrouped_scan_node.request; + + let load_request = V1LoadRequestQuery { + measures: Some( + aggregate + .iter() + .map(|m| { Self::ungrouped_member_def( m, &ungrouped_scan_node.used_cubes, ) - }), - ) - .chain(window.iter().map(|m| { - Self::ungrouped_member_def( - m, - &ungrouped_scan_node.used_cubes, + }) + .chain( + // TODO understand type of projections + projection.iter().map(|m| { + Self::ungrouped_member_def( + m, + &ungrouped_scan_node.used_cubes, + ) + }), ) - })) - .collect::>()?, - ); - load_request.dimensions = Some( - group_by - .iter() - .zip(group_descs.iter()) - .map(|(m, t)| { - Self::dimension_member_def( - m, - &ungrouped_scan_node.used_cubes, - t, - ) - }) - .collect::>()?, - ); - load_request.segments = Some( - filter - .iter() - .map(|m| { - Self::ungrouped_member_def( - m, - &ungrouped_scan_node.used_cubes, - ) - }) - .collect::>()?, - ); - if !order_expr.is_empty() { - load_request.order = Some( - order_expr + .chain(window.iter().map(|m| { + Self::ungrouped_member_def( + m, + &ungrouped_scan_node.used_cubes, + ) + })) + .collect::>()?, + ), + dimensions: Some( + group_by + .iter() + .zip(group_descs.iter()) + .map(|(m, t)| { + Self::dimension_member_def( + m, + &ungrouped_scan_node.used_cubes, + t, + ) + }) + .collect::>()?, + ), + segments: Some( + filter .iter() - .map(|o| -> Result<_> { match o { - Expr::Sort { - expr, - asc, - .. - } => { - let col_name = expr_name(&expr, &schema)?; - let aliased_column = aggr_expr - .iter() - .find_position(|e| { - expr_name(e, &schema).map(|n| &n == &col_name).unwrap_or(false) - }) - .map(|(i, _)| aggregate[i].clone()).or_else(|| { + .map(|m| { + Self::ungrouped_member_def( + m, + &ungrouped_scan_node.used_cubes, + ) + }) + .collect::>()?, + ), + order: if !order_expr.is_empty() { + Some( + order_expr + .iter() + .map(|o| -> Result<_> { match o { + Expr::Sort { + expr, + asc, + .. + } => { + let col_name = expr_name(&expr, &schema)?; + let aliased_column = aggr_expr + .iter() + .find_position(|e| { + expr_name(e, &schema).map(|n| &n == &col_name).unwrap_or(false) + }) + .map(|(i, _)| aggregate[i].clone()).or_else(|| { projection_expr .iter() .find_position(|e| { @@ -1102,33 +1344,48 @@ impl CubeScanWrapperNode { flat_group_expr )) })?; - Ok(vec![ - aliased_column.alias.clone(), - if *asc { "asc".to_string() } else { "desc".to_string() }, - ]) - } - _ => Err(DataFusionError::Execution(format!( - "Expected sort expression, found {:?}", - o - ))), - }}) - .collect::>>()?, - ); - } - load_request.ungrouped = - if let WrappedSelectType::Projection = select_type { + Ok(vec![ + aliased_column.alias.clone(), + if *asc { "asc".to_string() } else { "desc".to_string() }, + ]) + } + _ => Err(DataFusionError::Execution(format!( + "Expected sort expression, found {:?}", + o + ))), + }}) + .collect::>>()?, + ) + } else { + load_request.order.clone() + }, + ungrouped: if let WrappedSelectType::Projection = select_type { load_request.ungrouped.clone() } else { None - }; + }, + // TODO is it okay to just override limit? + limit: if let Some(limit) = limit { + Some(limit as i32) + } else { + load_request.limit.clone() + }, + // TODO is it okay to just override offset? + offset: if let Some(offset) = offset { + Some(offset as i32) + } else { + load_request.offset.clone() + }, - if let Some(limit) = limit { - load_request.limit = Some(limit as i32); - } + // Original scan node can already have consumed filters from Logical plan + // It's incorrect to just throw them away + filters: ungrouped_scan_node.request.filters.clone(), + + time_dimensions: load_request.time_dimensions.clone(), + subquery_joins: (!prepared_join_subqueries.is_empty()) + .then_some(prepared_join_subqueries), + }; - if let Some(offset) = offset { - load_request.offset = Some(offset as i32); - } // TODO time dimensions, filters, segments let mut meta_with_user = load_request_meta.as_ref().clone(); @@ -1230,13 +1487,13 @@ impl CubeScanWrapperNode { async fn generate_column_expr( plan: Arc, schema: DFSchemaRef, - exprs: Vec, + exprs: impl IntoIterator, mut sql: SqlQuery, generator: Arc, column_remapping: Option<&ColumnRemapping>, next_remapper: &mut Remapper, can_rename_columns: bool, - ungrouped_scan_node: Option>, + push_to_cube_context: Option<&PushToCubeContext<'_>>, subqueries: Arc>, ) -> result::Result<(Vec, SqlQuery), CubeError> { let mut aliased_columns = Vec::new(); @@ -1258,12 +1515,12 @@ impl CubeScanWrapperNode { sql, generator.clone(), expr.clone(), - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries.clone(), ) .await?; let expr_sql = - Self::escape_interpolation_quotes(expr_sql, ungrouped_scan_node.is_some()); + Self::escape_interpolation_quotes(expr_sql, push_to_cube_context.is_some()); sql = new_sql_query; let alias = next_remapper.add_expr(&schema, &original_expr, &expr)?; @@ -1334,14 +1591,16 @@ impl CubeScanWrapperNode { .map_err(|e| DataFusionError::Internal(format!("Can't generate SQL for type: {}", e))) } - pub fn generate_sql_for_expr( + /// This function is async to be able to call to JS land, + /// in case some SQL generation could not be done through Jinja + pub fn generate_sql_for_expr<'ctx>( plan: Arc, mut sql_query: SqlQuery, sql_generator: Arc, expr: Expr, - ungrouped_scan_node: Option>, + push_to_cube_context: Option<&'ctx PushToCubeContext>, subqueries: Arc>, - ) -> Pin> + Send>> { + ) -> Pin> + Send + 'ctx>> { Box::pin(async move { match expr { Expr::Alias(expr, _) => { @@ -1350,14 +1609,14 @@ impl CubeScanWrapperNode { sql_query, sql_generator.clone(), *expr, - ungrouped_scan_node, + push_to_cube_context, subqueries.clone(), ) .await?; Ok((expr, sql_query)) } // Expr::OuterColumn(_, _) => {} - Expr::Column(c) => { + Expr::Column(ref c) => { if let Some(subquery) = subqueries.get(&c.flat_name()) { Ok(( sql_generator @@ -1371,8 +1630,31 @@ impl CubeScanWrapperNode { })?, sql_query, )) - } else if let Some(scan_node) = ungrouped_scan_node.as_ref() { - let field_index = scan_node + } else if let Some(PushToCubeContext { + ungrouped_scan_node, + join_subqueries: _, + known_join_subqueries, + }) = push_to_cube_context + { + if let Some(relation) = c.relation.as_ref() { + if known_join_subqueries.contains(relation) { + // SQL API passes fixed aliases to Cube.js for join subqueries + // It means we don't need to use member expressions here, and can just use that fixed alias + // So we can generate that as if it were regular column expression + + return Self::generate_sql_for_expr( + plan.clone(), + sql_query, + sql_generator.clone(), + expr, + None, + subqueries.clone(), + ) + .await; + } + } + + let field_index = ungrouped_scan_node .schema .fields() .iter() @@ -1390,12 +1672,15 @@ impl CubeScanWrapperNode { )) })? .0; - let member = scan_node.member_fields.get(field_index).ok_or_else(|| { - DataFusionError::Internal(format!( - "Can't find member for column {} in ungrouped scan node", - c - )) - })?; + let member = ungrouped_scan_node + .member_fields + .get(field_index) + .ok_or_else(|| { + DataFusionError::Internal(format!( + "Can't find member for column {} in ungrouped scan node", + c + )) + })?; match member { MemberField::Member(member) => { Ok((format!("${{{}}}", member), sql_query)) @@ -1406,7 +1691,7 @@ impl CubeScanWrapperNode { sql_query, sql_generator.clone(), Expr::Literal(value.clone()), - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries.clone(), ) .await @@ -1457,7 +1742,7 @@ impl CubeScanWrapperNode { sql_query, sql_generator.clone(), *left, - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries.clone(), ) .await?; @@ -1466,7 +1751,7 @@ impl CubeScanWrapperNode { sql_query, sql_generator.clone(), *right, - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries.clone(), ) .await?; @@ -1488,7 +1773,7 @@ impl CubeScanWrapperNode { sql_query, sql_generator.clone(), *like.expr, - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries.clone(), ) .await?; @@ -1497,7 +1782,7 @@ impl CubeScanWrapperNode { sql_query, sql_generator.clone(), *like.pattern, - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries.clone(), ) .await?; @@ -1508,7 +1793,7 @@ impl CubeScanWrapperNode { sql_query, sql_generator.clone(), Expr::Literal(ScalarValue::Utf8(Some(escape_char.to_string()))), - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries.clone(), ) .await?; @@ -1533,7 +1818,7 @@ impl CubeScanWrapperNode { sql_query, sql_generator.clone(), *ilike.expr, - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries.clone(), ) .await?; @@ -1542,7 +1827,7 @@ impl CubeScanWrapperNode { sql_query, sql_generator.clone(), *ilike.pattern, - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries.clone(), ) .await?; @@ -1553,7 +1838,7 @@ impl CubeScanWrapperNode { sql_query, sql_generator.clone(), Expr::Literal(ScalarValue::Utf8(Some(escape_char.to_string()))), - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries.clone(), ) .await?; @@ -1579,7 +1864,7 @@ impl CubeScanWrapperNode { sql_query, sql_generator.clone(), *expr, - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries.clone(), ) .await?; @@ -1601,7 +1886,7 @@ impl CubeScanWrapperNode { sql_query, sql_generator.clone(), *expr, - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries.clone(), ) .await?; @@ -1622,7 +1907,7 @@ impl CubeScanWrapperNode { sql_query, sql_generator.clone(), *expr, - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries.clone(), ) .await?; @@ -1643,7 +1928,7 @@ impl CubeScanWrapperNode { sql_query, sql_generator.clone(), *expr, - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries.clone(), ) .await?; @@ -1671,7 +1956,7 @@ impl CubeScanWrapperNode { sql_query, sql_generator.clone(), *expr, - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries.clone(), ) .await?; @@ -1687,7 +1972,7 @@ impl CubeScanWrapperNode { sql_query, sql_generator.clone(), *when, - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries.clone(), ) .await?; @@ -1696,7 +1981,7 @@ impl CubeScanWrapperNode { sql_query_next, sql_generator.clone(), *then, - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries.clone(), ) .await?; @@ -1709,7 +1994,7 @@ impl CubeScanWrapperNode { sql_query, sql_generator.clone(), *else_expr, - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries.clone(), ) .await?; @@ -1732,7 +2017,7 @@ impl CubeScanWrapperNode { sql_query, sql_generator.clone(), *expr, - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries.clone(), ) .await?; @@ -1752,7 +2037,7 @@ impl CubeScanWrapperNode { sql_query, sql_generator.clone(), *expr, - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries.clone(), ) .await?; @@ -2025,7 +2310,7 @@ impl CubeScanWrapperNode { sql_query, sql_generator.clone(), arg, - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries.clone(), ) .await?; @@ -2062,7 +2347,7 @@ impl CubeScanWrapperNode { sql_query, sql_generator.clone(), args[1].clone(), - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries.clone(), ) .await?; @@ -2105,7 +2390,7 @@ impl CubeScanWrapperNode { sql_query, sql_generator.clone(), arg, - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries.clone(), ) .await?; @@ -2145,7 +2430,7 @@ impl CubeScanWrapperNode { sql_query, sql_generator.clone(), arg, - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries.clone(), ) .await?; @@ -2174,7 +2459,7 @@ impl CubeScanWrapperNode { sql_query, sql_generator.clone(), expr, - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries.clone(), ) .await?; @@ -2202,7 +2487,7 @@ impl CubeScanWrapperNode { sql_query, sql_generator.clone(), expr, - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries.clone(), ) .await?; @@ -2243,7 +2528,7 @@ impl CubeScanWrapperNode { sql_query, sql_generator.clone(), arg, - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries.clone(), ) .await?; @@ -2257,7 +2542,7 @@ impl CubeScanWrapperNode { sql_query, sql_generator.clone(), arg, - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries.clone(), ) .await?; @@ -2271,7 +2556,7 @@ impl CubeScanWrapperNode { sql_query, sql_generator.clone(), arg, - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries.clone(), ) .await?; @@ -2307,7 +2592,7 @@ impl CubeScanWrapperNode { sql_query, sql_generator.clone(), *expr, - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries.clone(), ) .await?; @@ -2319,7 +2604,7 @@ impl CubeScanWrapperNode { sql_query, sql_generator.clone(), expr, - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries.clone(), ) .await?; @@ -2350,7 +2635,7 @@ impl CubeScanWrapperNode { sql_query, sql_generator.clone(), *expr, - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries.clone(), ) .await?; @@ -2360,7 +2645,7 @@ impl CubeScanWrapperNode { sql_query, sql_generator.clone(), *subquery, - ungrouped_scan_node.clone(), + push_to_cube_context, subqueries.clone(), ) .await?; @@ -2434,9 +2719,6 @@ impl UserDefinedLogicalNode for CubeScanWrapperNode { wrapped_plan: self.wrapped_plan.clone(), meta: self.meta.clone(), auth_context: self.auth_context.clone(), - wrapped_sql: self.wrapped_sql.clone(), - request: self.request.clone(), - member_fields: self.member_fields.clone(), span_id: self.span_id.clone(), config_obj: self.config_obj.clone(), }) diff --git a/rust/cubesql/cubesql/src/compile/mod.rs b/rust/cubesql/cubesql/src/compile/mod.rs index ed5d604436a6e..4a9defb889fc8 100644 --- a/rust/cubesql/cubesql/src/compile/mod.rs +++ b/rust/cubesql/cubesql/src/compile/mod.rs @@ -336,11 +336,7 @@ mod tests { ) .await.as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; assert!(sql.contains("LOWER(")); assert!(sql.contains(" IN (")); @@ -351,11 +347,7 @@ mod tests { ) .await.as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; assert!(sql.contains("LOWER(")); assert!(sql.contains(" IN (")); @@ -374,11 +366,7 @@ mod tests { DatabaseProtocol::PostgreSQL, ).await.as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; assert!(sql.contains("LOWER(")); } @@ -2777,17 +2765,15 @@ limit assert!(query_plan .as_logical_plan() - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("sixteen_charchar_1")); assert!(query_plan .as_logical_plan() - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("sixteen_charchar_2")); } @@ -6948,11 +6934,7 @@ ORDER BY DatabaseProtocol::PostgreSQL ).await.as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; assert!( sql.contains(expected_search_expr), "cast_expr is {}, expected_search_expr is {}", @@ -7256,9 +7238,8 @@ ORDER BY assert_eq!( query_plan .as_logical_plan() - .find_cube_scan_wrapper() - .request - .unwrap(), + .find_cube_scan_wrapped_sql() + .request, V1LoadRequestQuery { measures: Some(vec![ json!({ @@ -7374,9 +7355,8 @@ ORDER BY "source"."str0" ASC ); assert!(!query_plan .as_logical_plan() - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("ungrouped")); } @@ -10992,11 +10972,7 @@ ORDER BY "source"."str0" ASC .await .as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; // check wrapping for `LOWER(..) <> .. OR .. IS NULL` let re = Regex::new(r"LOWER ?\(.+\) != .+ OR .+ IS NULL").unwrap(); @@ -11029,11 +11005,7 @@ ORDER BY "source"."str0" ASC .await .as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; // check wrapping for `NOT(LOWER(..) IN (..))` let re = Regex::new(r"NOT.+LOWER ?\(.+\).* IN ").unwrap(); @@ -11329,11 +11301,7 @@ ORDER BY "source"."str0" ASC .await .as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; // check wrapping for `NOT(LOWER(..) IN (..)) OR NOT(.. IS NOT NULL)` let re = Regex::new(r"NOT.+LOWER ?\(.+\) IN .+\) OR NOT.+ IS NOT NULL").unwrap(); @@ -11653,9 +11621,8 @@ ORDER BY "source"."str0" ASC assert_eq!( logical_plan - .find_cube_scan_wrapper() - .request - .unwrap(), + .find_cube_scan_wrapped_sql() + .request, V1LoadRequestQuery { measures: Some(vec![]), dimensions: Some(vec![ @@ -11676,12 +11643,8 @@ ORDER BY "source"."str0" ASC "grouping_set": null, }).to_string(), ]), - time_dimensions: None, order: Some(vec![]), - limit: None, - offset: None, - filters: None, - ungrouped: None, + ..Default::default() } ); } @@ -11939,9 +11902,8 @@ ORDER BY "source"."str0" ASC assert_eq!( query_plan .as_logical_plan() - .find_cube_scan_wrapper() - .request - .unwrap(), + .find_cube_scan_wrapped_sql() + .request, V1LoadRequestQuery { measures: Some(vec![]), dimensions: Some(vec![ @@ -12246,11 +12208,7 @@ ORDER BY "source"."str0" ASC .await .as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; // check wrapping for `NOT(.. IS NULL OR LOWER(..) IN)` let re = Regex::new(r"NOT \(.+ IS NULL OR .*LOWER\(.+ IN ").unwrap(); @@ -12297,11 +12255,7 @@ ORDER BY "source"."str0" ASC let re = Regex::new(r"\(LOWER ?\(.+\) = .+ OR .+LOWER ?\(.+\) = .+\) IN \(TRUE, FALSE\)") .unwrap(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; assert!(re.is_match(&sql)); } @@ -12764,7 +12718,7 @@ ORDER BY "source"."str0" ASC let end_date = chrono::Utc::now().date_naive(); let start_date = end_date - chrono::Duration::days(30); assert_eq!( - logical_plan.find_cube_scan_wrapper().request.unwrap(), + logical_plan.find_cube_scan_wrapped_sql().request, V1LoadRequestQuery { measures: Some(vec![ json!({ @@ -12880,9 +12834,8 @@ ORDER BY "source"."str0" ASC let logical_plan = query_plan.as_logical_plan(); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("LEFT")); } @@ -13231,9 +13184,8 @@ ORDER BY "source"."str0" ASC let logical_plan = query_plan.as_logical_plan(); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("EXTRACT")); } @@ -13311,9 +13263,8 @@ ORDER BY "source"."str0" ASC let logical_plan = query_plan.as_logical_plan(); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("EXTRACT")); } @@ -13340,9 +13291,8 @@ ORDER BY "source"."str0" ASC let logical_plan = query_plan.as_logical_plan(); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("EXTRACT")); } @@ -13369,9 +13319,8 @@ ORDER BY "source"."str0" ASC let logical_plan = query_plan.as_logical_plan(); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("EXTRACT")); } @@ -13393,17 +13342,12 @@ ORDER BY "source"."str0" ASC let logical_plan = query_plan.as_logical_plan(); assert!( logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("OVER"), "SQL should contain 'OVER': {}", - logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql + logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql ); let physical_plan = query_plan.as_physical_plan().await.unwrap(); @@ -13430,32 +13374,22 @@ ORDER BY "source"."str0" ASC let logical_plan = query_plan.as_logical_plan(); assert!( logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("long_l_1"), "SQL should contain long_l_1: {}", - logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql + logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql ); assert!( logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("long_l_1"), "SQL should contain long_l_2: {}", - logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql + logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql ); let physical_plan = query_plan.as_physical_plan().await.unwrap(); @@ -13481,9 +13415,8 @@ ORDER BY "source"."str0" ASC let logical_plan = query_plan.as_logical_plan(); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("CURRENT_DATE()")); @@ -13537,11 +13470,7 @@ ORDER BY "source"."str0" ASC .await .as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; // check if contains `CAST(EXTRACT(YEAR FROM ..) || .. || .. || ..)` let re = Regex::new(r"CAST.+EXTRACT.+YEAR FROM(.+ \|\|){3}").unwrap(); @@ -13694,11 +13623,7 @@ ORDER BY "source"."str0" ASC let logical_plan = query_plan.as_logical_plan(); if Rewriter::sql_push_down_enabled() { - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; assert!(sql.contains("EXTRACT(YEAR")); assert!(sql.contains("EXTRACT(MONTH")); @@ -13802,11 +13727,7 @@ ORDER BY "source"."str0" ASC // TODO: split on complex expressions? // CAST(CAST(ta_1.order_date AS Date32) - CAST(CAST(Utf8("1970-01-01") AS Date32) AS Date32) + Int64(3) AS Decimal(38, 10)) if Rewriter::sql_push_down_enabled() { - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; if Rewriter::top_down_extractor_enabled() { assert!(sql.contains("LIMIT 1000")); } else { @@ -14178,11 +14099,7 @@ ORDER BY "source"."str0" ASC let logical_plan = query_plan.as_logical_plan(); if Rewriter::sql_push_down_enabled() { - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; assert!(sql.contains("LIMIT 101")); assert!(sql.contains("ORDER BY")); @@ -14292,9 +14209,8 @@ ORDER BY "source"."str0" ASC let logical_plan = query_plan.as_logical_plan(); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("NOT IN (")); } @@ -14359,9 +14275,8 @@ ORDER BY "source"."str0" ASC let logical_plan = query_plan.as_logical_plan(); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("NOT (")); } @@ -14393,9 +14308,8 @@ ORDER BY "source"."str0" ASC let logical_plan = query_plan.as_logical_plan(); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("DATEDIFF(day,")); @@ -14422,11 +14336,7 @@ ORDER BY "source"."str0" ASC ); let logical_plan = query_plan.as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; assert!(sql.contains("DATETIME_DIFF(CAST(")); assert!(sql.contains("day)")); @@ -14453,11 +14363,7 @@ ORDER BY "source"."str0" ASC ); let logical_plan = query_plan.as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; assert!(sql.contains("DATEDIFF(day,")); assert!(sql.contains("DATE_TRUNC('day',")); @@ -14484,11 +14390,7 @@ ORDER BY "source"."str0" ASC ); let logical_plan = query_plan.as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; assert!(sql.contains("CASE WHEN LOWER('day')")); assert!(sql.contains("WHEN 'year' THEN 12 WHEN 'quarter' THEN 3 WHEN 'month' THEN 1 END")); assert!(sql.contains("EXTRACT(EPOCH FROM")); @@ -14523,9 +14425,8 @@ ORDER BY "source"."str0" ASC let logical_plan = query_plan.as_logical_plan(); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("DATEADD(day, 7,")); @@ -14552,11 +14453,7 @@ ORDER BY "source"."str0" ASC ); let logical_plan = query_plan.as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; assert!(sql.contains("DATETIME_ADD(CAST(")); assert!(sql.contains("INTERVAL 7 day)")); @@ -14584,11 +14481,7 @@ ORDER BY "source"."str0" ASC ); let logical_plan = query_plan.as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; assert!(sql.contains("+ '7 day'::interval")); } @@ -14664,9 +14557,8 @@ ORDER BY "source"."str0" ASC let logical_plan = query_plan.as_logical_plan(); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("DATE(")); } @@ -14703,9 +14595,8 @@ ORDER BY "source"."str0" ASC let logical_plan = query_plan.as_logical_plan(); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("EXTRACT(MONTH FROM ")); } @@ -14746,11 +14637,7 @@ ORDER BY "source"."str0" ASC ); let logical_plan = query_plan.as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; assert!(sql.contains("order_date")); assert!(sql.contains("EXTRACT(DAY FROM")) } @@ -14866,11 +14753,7 @@ LIMIT {{ limit }}{% endif %}"#.to_string(), ); let logical_plan = query_plan.as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; assert!(sql.contains("OFFSET 1\nLIMIT 2")); } @@ -15117,9 +15000,8 @@ LIMIT {{ limit }}{% endif %}"#.to_string(), let logical_plan = query_plan.as_logical_plan(); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("SELECT DISTINCT ")); @@ -15197,9 +15079,8 @@ LIMIT {{ limit }}{% endif %}"#.to_string(), let logical_plan = query_plan.as_logical_plan(); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW")); @@ -15723,9 +15604,8 @@ LIMIT {{ limit }}{% endif %}"#.to_string(), let logical_plan = query_plan.as_logical_plan(); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("LIMIT 250")); @@ -16006,11 +15886,7 @@ LIMIT {{ limit }}{% endif %}"#.to_string(), ); let logical_plan = query_plan.as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; assert!(sql.contains(" AS STRING)")); assert!(sql.contains(" AS FLOAT)")); assert!(sql.contains(" AS DOUBLE)")); @@ -16038,11 +15914,7 @@ LIMIT {{ limit }}{% endif %}"#.to_string(), ); let logical_plan = query_plan.as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; assert!(sql.contains(" AS STRING)")); assert!(sql.contains(" AS FLOAT64)")); assert!(sql.contains(" AS BIGDECIMAL(38,10))")); @@ -16066,11 +15938,7 @@ LIMIT {{ limit }}{% endif %}"#.to_string(), ); let logical_plan = query_plan.as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; assert!(sql.contains(" AS TEXT)")); assert!(sql.contains(" AS REAL)")); assert!(sql.contains(" AS DOUBLE PRECISION)")); @@ -16188,11 +16056,7 @@ LIMIT {{ limit }}{% endif %}"#.to_string(), .await; let logical_plan = query_plan.as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; assert!(sql.contains("LIKE ")); assert!(sql.contains("ESCAPE ")); @@ -16329,11 +16193,7 @@ LIMIT {{ limit }}{% endif %}"#.to_string(), ); let logical_plan = query_plan.as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; assert!(sql.contains(" IS NULL DESC, ")); } diff --git a/rust/cubesql/cubesql/src/compile/query_engine.rs b/rust/cubesql/cubesql/src/compile/query_engine.rs index afc7f40cafb61..c4b76617bdc0a 100644 --- a/rust/cubesql/cubesql/src/compile/query_engine.rs +++ b/rust/cubesql/cubesql/src/compile/query_engine.rs @@ -298,6 +298,8 @@ pub trait QueryEngine { } } + // We want to generate SQL early, as a part of planning, and not later (like during execution) + // to catch all SQL generation errors during planning let rewrite_plan = Self::evaluate_wrapped_sql( self.transport_ref().clone(), Arc::new(state.get_load_request_meta()), diff --git a/rust/cubesql/cubesql/src/compile/rewrite/converter.rs b/rust/cubesql/cubesql/src/compile/rewrite/converter.rs index 5b028b24d37fd..dc65029915143 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/converter.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/converter.rs @@ -2121,6 +2121,11 @@ impl LanguageToLogicalPlanConverter { from.schema() .fields() .iter() + .chain( + joins + .iter() + .flat_map(|(j, _, _)| j.schema().fields().iter()), + ) .map(|f| Expr::Column(f.qualified_column())) .collect::>() } else { @@ -2158,7 +2163,14 @@ impl LanguageToLogicalPlanConverter { for subquery in subqueries.iter() { subqueries_schema.merge(subquery.schema()); } - let schema_with_subqueries = from.schema().join(&subqueries_schema)?; + let mut joins_schema = DFSchema::empty(); + for join in joins.iter() { + joins_schema.merge(join.0.schema()); + } + let schema_with_subqueries = from + .schema() + .join(&subqueries_schema)? + .join(&joins_schema)?; let without_window_fields = exprlist_to_fields_from_schema( all_expr_without_window.iter(), diff --git a/rust/cubesql/cubesql/src/compile/rewrite/cost.rs b/rust/cubesql/cubesql/src/compile/rewrite/cost.rs index 6ae446860960b..c5069560e6b7a 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/cost.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/cost.rs @@ -58,6 +58,11 @@ impl BestCubePlan { _ => 0, }; + let joins = match enode { + LogicalPlanLanguage::Join(_) => 1, + _ => 0, + }; + let wrapper_nodes = match enode { LogicalPlanLanguage::CubeScanWrapper(_) => 1, _ => 0, @@ -209,6 +214,7 @@ impl BestCubePlan { structure_points, ungrouped_aggregates: 0, wrapper_nodes, + joins, wrapped_select_ungrouped_scan, empty_wrappers: 0, ast_size_outside_wrapper: 0, @@ -247,6 +253,7 @@ pub struct CubePlanCost { non_pushed_down_window: i64, non_pushed_down_grouping_sets: i64, non_pushed_down_limit_sort: i64, + joins: usize, wrapper_nodes: i64, wrapped_select_ungrouped_scan: usize, ast_size_outside_wrapper: usize, @@ -369,6 +376,7 @@ impl CubePlanCost { cube_members: self.cube_members + other.cube_members, errors: self.errors + other.errors, structure_points: self.structure_points + other.structure_points, + joins: self.joins + other.joins, empty_wrappers: self.empty_wrappers + other.empty_wrappers, ast_size_outside_wrapper: self.ast_size_outside_wrapper + other.ast_size_outside_wrapper, @@ -424,6 +432,7 @@ impl CubePlanCost { cube_members: self.cube_members, errors: self.errors, structure_points: self.structure_points, + joins: self.joins, ast_size_outside_wrapper: match state { CubePlanState::Wrapped => 0, CubePlanState::Unwrapped(size) => *size, diff --git a/rust/cubesql/cubesql/src/compile/rewrite/mod.rs b/rust/cubesql/cubesql/src/compile/rewrite/mod.rs index c368ef8c1f4c1..ed788076c7798 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/mod.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/mod.rs @@ -470,6 +470,9 @@ crate::plan_to_language! { push_to_cube: bool, in_projection: bool, cube_members: Vec, + // Known qualifiers of grouped subqueries + // Used to allow to rewrite columns from them even with push to Cube enabled + grouped_subqueries: Vec, }, WrapperPullupReplacer { member: Arc, @@ -485,6 +488,9 @@ crate::plan_to_language! { push_to_cube: bool, in_projection: bool, cube_members: Vec, + // Known qualifiers of grouped subqueries + // Used to allow to rewrite columns from them even with push to Cube enabled + grouped_subqueries: Vec, }, FlattenPushdownReplacer { expr: Arc, @@ -544,26 +550,43 @@ macro_rules! var { } #[macro_export] -macro_rules! copy_flag { - ($egraph:expr, $subst:expr, $in_var:expr, $in_kind:ident, $out_var:expr, $out_kind:ident) => {{ +macro_rules! copy_value { + ($egraph:expr, $subst:expr, $ty:ty, $in_var:expr, $in_kind:ident, $out_var:expr, $out_kind:ident) => {{ let mut found = false; + let mut found_value: Option<&$ty> = None; for in_value in $crate::var_iter!($egraph[$subst[$in_var]], $in_kind) { - // Typechecking for $in_kind, only booleans are supported for now - let in_value: bool = *in_value; + // Typechecking for $in_kind + let in_value: &$ty = in_value; + if found { + // Found many different unified representations of same kind for a single eclass, not safe to copy + found_value = None; + } else { + found = true; + found_value = Some(in_value); + } + } + if let Some(found_value) = found_value { + let out_value = found_value.clone(); $subst.insert( $out_var, $egraph.add($crate::compile::rewrite::LogicalPlanLanguage::$out_kind( - $out_kind(in_value), + $out_kind(out_value), )), ); - found = true; - // This is safe, because we expect only enode with one child, with boolena inside, and expect that they would never unify - break; + true + } else { + false } - found }}; } +#[macro_export] +macro_rules! copy_flag { + ($egraph:expr, $subst:expr, $in_var:expr, $in_kind:ident, $out_var:expr, $out_kind:ident) => { + $crate::copy_value!($egraph, $subst, bool, $in_var, $in_kind, $out_var, $out_kind) + }; +} + pub struct WithColumnRelation(Option); impl ExprRewriter for WithColumnRelation { @@ -626,6 +649,12 @@ pub fn column_name_to_member_vec( } impl LogicalPlanData { + // TODO use it instead of find_member_by_alias in more places + fn find_member_by_column(&mut self, column: &Column) -> Option<(&MemberNameToExpr, String)> { + let name = column.flat_name(); + self.find_member_by_alias(&name) + } + fn find_member_by_alias(&mut self, name: &str) -> Option<(&MemberNameToExpr, String)> { if let Some(member_names_to_expr) = &mut self.member_name_to_expr { Self::do_find_member_by_alias(member_names_to_expr, name) @@ -1486,12 +1515,15 @@ fn wrapped_select_window_expr_empty_tail() -> String { wrapped_select_window_expr(Vec::::new()) } +fn wrapped_select_join(input: impl Display, expr: impl Display, join_type: impl Display) -> String { + format!("(WrappedSelectJoin {} {} {})", input, expr, join_type) +} + #[allow(dead_code)] fn wrapped_select_joins(left: impl Display, right: impl Display) -> String { format!("(WrappedSelectJoins {} {})", left, right) } -#[allow(dead_code)] fn wrapped_select_joins_empty_tail() -> String { "WrappedSelectJoins".to_string() } @@ -1950,10 +1982,10 @@ fn wrapper_pushdown_replacer( push_to_cube: impl Display, in_projection: impl Display, cube_members: impl Display, + grouped_subqueries: impl Display, ) -> String { format!( - "(WrapperPushdownReplacer {} {} {} {} {})", - members, alias_to_cube, push_to_cube, in_projection, cube_members + "(WrapperPushdownReplacer {members} {alias_to_cube} {push_to_cube} {in_projection} {cube_members} {grouped_subqueries})", ) } @@ -1963,10 +1995,10 @@ fn wrapper_pullup_replacer( push_to_cube: impl Display, in_projection: impl Display, cube_members: impl Display, + grouped_subqueries: impl Display, ) -> String { format!( - "(WrapperPullupReplacer {} {} {} {} {})", - members, alias_to_cube, push_to_cube, in_projection, cube_members + "(WrapperPullupReplacer {members} {alias_to_cube} {push_to_cube} {in_projection} {cube_members} {grouped_subqueries})", ) } diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rewriter.rs b/rust/cubesql/cubesql/src/compile/rewrite/rewriter.rs index 02f299617be70..e8114d999e96e 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rewriter.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rewriter.rs @@ -23,7 +23,6 @@ use datafusion::{ logical_plan::LogicalPlan, physical_plan::planner::DefaultPhysicalPlanner, scalar::ScalarValue, }; use egg::{EGraph, Extractor, Id, IterationData, Language, Rewrite, Runner, StopReason}; -use itertools::Itertools; use serde::{Deserialize, Serialize}; use std::{ collections::{HashMap, HashSet}, @@ -359,7 +358,7 @@ impl Rewriter { let Some((best_cost, best)) = extractor.find_best(root) else { return Err(CubeError::internal("Unable to find best plan".to_string())); }; - log::debug!("Best cost: {:?}", best_cost); + log::debug!("Best cost: {:#?}", best_cost); best } else { let extractor = Extractor::new( @@ -367,7 +366,7 @@ impl Rewriter { BestCubePlan::new(cube_context.meta.clone()), ); let (best_cost, best) = extractor.find_best(root); - log::debug!("Best cost: {:?}", best_cost); + log::debug!("Best cost: {:#?}", best_cost); best }; let qtrace_best_graph = if Qtrace::is_enabled() { @@ -376,14 +375,7 @@ impl Rewriter { vec![] }; let new_root = Id::from(best.as_ref().len() - 1); - log::debug!( - "Best: {}", - best.as_ref() - .iter() - .enumerate() - .map(|(i, n)| format!("{}: {:?}", i, n)) - .join(", ") - ); + log::debug!("Best: {}", best.pretty(120)); let converter = LanguageToLogicalPlanConverter::new( best, cube_context.clone(), diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs index d6cbe46d2a8f3..f5f7c809fc428 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs @@ -2716,77 +2716,14 @@ impl MemberRules { let left_aliases_var = var!(left_aliases_var); let right_aliases_var = var!(right_aliases_var); move |egraph, subst| { - if egraph - .index(subst[left_aliases_var]) - .data - .member_name_to_expr - .is_some() - { - if egraph - .index(subst[right_aliases_var]) - .data - .member_name_to_expr - .is_some() - { - let left_join_ons: Vec> = - var_iter!(egraph[subst[left_on_var]], JoinLeftOn) - .map(|elem| elem.iter().cloned().collect()) - .collect(); - for left_join_on in left_join_ons { - for join_on in left_join_on { - let member_names_to_expr_left = &mut egraph - .index_mut(subst[left_aliases_var]) - .data - .member_name_to_expr - .as_mut() - .unwrap(); - - // TODO: Avoid the join_on.*.clone() calls (should be trivial). - let mut column_name = join_on.name.clone(); - if let Some(name) = find_column_by_alias( - &column_name, - member_names_to_expr_left, - &join_on.relation.clone().unwrap_or_default(), - ) { - column_name = name.split(".").last().unwrap().to_string(); - } - - if column_name == "__cubeJoinField" { - let right_join_ons: Vec> = - var_iter!(egraph[subst[right_on_var]], JoinRightOn) - .map(|elem| elem.iter().cloned().collect()) - .collect(); - for right_join_on in right_join_ons { - for join_on in right_join_on.iter() { - let member_names_to_expr_right = &mut egraph - .index_mut(subst[right_aliases_var]) - .data - .member_name_to_expr - .as_mut() - .unwrap(); - - let mut column_name = join_on.name.clone(); - if let Some(name) = find_column_by_alias( - &column_name, - member_names_to_expr_right, - &join_on.relation.clone().unwrap_or_default(), - ) { - column_name = - name.split(".").last().unwrap().to_string(); - } - - if column_name == "__cubeJoinField" { - return true; - } - } - } - } - } - } - } - } - - false + is_proper_cube_join_condition( + egraph, + subst, + left_aliases_var, + left_on_var, + right_aliases_var, + right_on_var, + ) } } @@ -2950,20 +2887,113 @@ pub fn min_granularity(granularity_a: &String, granularity_b: &String) -> Option } } -fn find_column_by_alias( - column_name: &String, - member_names_to_expr: &mut MemberNamesToExpr, - cube_alias: &String, -) -> Option { +fn find_column_by_alias<'mn>( + column_name: &str, + member_names_to_expr: &'mn mut MemberNamesToExpr, + cube_alias: &str, +) -> Option<&'mn str> { if let Some((tuple, _)) = LogicalPlanData::do_find_member_by_alias( member_names_to_expr, &format!("{}.{}", cube_alias, column_name), ) { - return tuple.0.clone(); + return tuple.0.as_deref(); } None } +fn is_proper_cube_join_condition( + egraph: &mut CubeEGraph, + subst: &Subst, + left_cube_members_var: Var, + left_on_var: Var, + right_cube_members_var: Var, + right_on_var: Var, +) -> bool { + if egraph[subst[left_cube_members_var]] + .data + .member_name_to_expr + .is_none() + { + return false; + } + + if egraph[subst[right_cube_members_var]] + .data + .member_name_to_expr + .is_none() + { + return false; + } + + let left_join_ons = var_iter!(egraph[subst[left_on_var]], JoinLeftOn) + .cloned() + .collect::>(); + let right_join_ons = var_iter!(egraph[subst[right_on_var]], JoinRightOn) + .cloned() + .collect::>(); + + // For now this allows only exact left.__cubeJoinField = right.__cubeJoinField + // TODO implement more complex conditions + + for left_join_on in &left_join_ons { + if left_join_on.len() != 1 { + continue; + } + + let left_join_on = &left_join_on[0]; + + let left_member_names_to_expr = &mut egraph[subst[left_cube_members_var]] + .data + .member_name_to_expr + .as_mut() + .unwrap(); + + let mut left_column_name = left_join_on.name.as_str(); + if let Some(name) = find_column_by_alias( + left_column_name, + left_member_names_to_expr, + left_join_on.relation.as_deref().unwrap_or_default(), + ) { + left_column_name = name.rsplit_once(".").unwrap().1; + } + + if left_column_name != "__cubeJoinField" { + continue; + } + + for right_join_on in &right_join_ons { + if right_join_on.len() != 1 { + continue; + } + + let right_join_on = &right_join_on[0]; + + let right_member_names_to_expr = &mut egraph[subst[right_cube_members_var]] + .data + .member_name_to_expr + .as_mut() + .unwrap(); + + let mut right_column_name = right_join_on.name.as_str(); + if let Some(name) = find_column_by_alias( + right_column_name, + right_member_names_to_expr, + right_join_on.relation.as_deref().unwrap_or_default(), + ) { + right_column_name = name.rsplit_once(".").unwrap().1; + } + + if right_column_name != "__cubeJoinField" { + continue; + } + + return true; + } + } + + false +} + #[cfg(test)] mod tests { use super::*; diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/aggregate.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/aggregate.rs index 64f02744fb7dd..f9cf9f6f13061 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/aggregate.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/aggregate.rs @@ -13,9 +13,10 @@ use crate::{ wrapped_select_window_expr_empty_tail, wrapper_pullup_replacer, wrapper_pushdown_replacer, AggregateFunctionExprDistinct, AggregateFunctionExprFun, AliasExprAlias, ColumnExprColumn, ListType, LogicalPlanLanguage, WrappedSelectPushToCube, WrapperPullupReplacerAliasToCube, - WrapperPullupReplacerPushToCube, WrapperPushdownReplacerPushToCube, + WrapperPullupReplacerGroupedSubqueries, WrapperPullupReplacerPushToCube, + WrapperPushdownReplacerGroupedSubqueries, WrapperPushdownReplacerPushToCube, }, - copy_flag, + copy_flag, copy_value, transport::V1CubeMetaMeasureExt, var, var_iter, }; @@ -36,6 +37,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "CubeScanWrapperFinalized:false", ), @@ -52,6 +54,7 @@ impl WrapperRules { "?push_to_cube", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( wrapped_select_subqueries_empty_tail(), @@ -59,6 +62,7 @@ impl WrapperRules { "?push_to_cube", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?grouped_subqueries", ), wrapper_pushdown_replacer( "?group_expr", @@ -66,6 +70,7 @@ impl WrapperRules { "?pushdown_push_to_cube", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?pushdown_grouped_subqueries", ), wrapper_pushdown_replacer( "?aggr_expr", @@ -73,6 +78,7 @@ impl WrapperRules { "?pushdown_push_to_cube", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?pushdown_grouped_subqueries", ), wrapper_pullup_replacer( wrapped_select_window_expr_empty_tail(), @@ -80,6 +86,7 @@ impl WrapperRules { "?push_to_cube", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?cube_scan_input", @@ -87,14 +94,23 @@ impl WrapperRules { "?push_to_cube", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?grouped_subqueries", + ), + wrapper_pullup_replacer( + wrapped_select_joins_empty_tail(), + "?alias_to_cube", + "?push_to_cube", + "WrapperPullupReplacerInProjection:false", + "?cube_members", + "?grouped_subqueries", ), - wrapped_select_joins_empty_tail(), wrapper_pullup_replacer( wrapped_select_filter_expr_empty_tail(), "?alias_to_cube", "?push_to_cube", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?grouped_subqueries", ), wrapped_select_having_expr_empty_tail(), "WrappedSelectLimit:None", @@ -105,6 +121,7 @@ impl WrapperRules { "?push_to_cube", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?grouped_subqueries", ), "WrappedSelectAlias:None", "WrappedSelectDistinct:false", @@ -119,6 +136,8 @@ impl WrapperRules { "?push_to_cube", "?pushdown_push_to_cube", "?select_push_to_cube", + "?grouped_subqueries", + "?pushdown_grouped_subqueries", ), ), transforming_rewrite( @@ -129,6 +148,7 @@ impl WrapperRules { "?push_to_cube", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?grouped_subqueries", ), grouping_set_expr( wrapper_pushdown_replacer( @@ -137,6 +157,7 @@ impl WrapperRules { "?push_to_cube", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?grouped_subqueries", ), "?type", ), @@ -151,6 +172,7 @@ impl WrapperRules { "?push_to_cube", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?grouped_subqueries", ), "?type", ), @@ -160,6 +182,7 @@ impl WrapperRules { "?push_to_cube", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?grouped_subqueries", ), ), ]); @@ -182,6 +205,7 @@ impl WrapperRules { "WrapperPushdownReplacerPushToCube:true", "?in_projection", "?cube_members", + "?grouped_subqueries", ), vec![("?aggr_expr", aggr_expr)], wrapper_pullup_replacer( @@ -190,6 +214,7 @@ impl WrapperRules { "WrapperPullupReplacerPushToCube:true", "?in_projection", "?cube_members", + "?pullup_grouped_subqueries", ), self.pushdown_measure( "?aggr_expr", @@ -199,6 +224,8 @@ impl WrapperRules { cast_data_type, "?cube_members", "?measure", + "?grouped_subqueries", + "?pullup_grouped_subqueries", ), ) }, @@ -259,6 +286,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "CubeScanWrapperFinalized:false", ), @@ -278,6 +306,7 @@ impl WrapperRules { "?push_to_cube", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?grouped_subqueries", ), wrapper_pushdown_replacer( "?subqueries", @@ -285,6 +314,7 @@ impl WrapperRules { "?pushdown_push_to_cube", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?pushdown_grouped_subqueries", ), wrapper_pushdown_replacer( "?group_expr", @@ -292,6 +322,7 @@ impl WrapperRules { "?pushdown_push_to_cube", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?pushdown_grouped_subqueries", ), wrapper_pushdown_replacer( "?aggr_expr", @@ -299,6 +330,7 @@ impl WrapperRules { "?pushdown_push_to_cube", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?pushdown_grouped_subqueries", ), wrapper_pullup_replacer( wrapped_select_window_expr_empty_tail(), @@ -306,6 +338,7 @@ impl WrapperRules { "?push_to_cube", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?cube_scan_input", @@ -313,14 +346,23 @@ impl WrapperRules { "?push_to_cube", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?grouped_subqueries", + ), + wrapper_pullup_replacer( + wrapped_select_joins_empty_tail(), + "?alias_to_cube", + "?push_to_cube", + "WrapperPullupReplacerInProjection:false", + "?cube_members", + "?grouped_subqueries", ), - wrapped_select_joins_empty_tail(), wrapper_pullup_replacer( wrapped_select_filter_expr_empty_tail(), "?alias_to_cube", "?push_to_cube", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?grouped_subqueries", ), wrapped_select_having_expr_empty_tail(), "WrappedSelectLimit:None", @@ -331,6 +373,7 @@ impl WrapperRules { "?push_to_cube", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?grouped_subqueries", ), "WrappedSelectAlias:None", "WrappedSelectDistinct:false", @@ -346,12 +389,14 @@ impl WrapperRules { "?push_to_cube", "?pushdown_push_to_cube", "?select_push_to_cube", + "?grouped_subqueries", + "?pushdown_grouped_subqueries", ), )]); } pub fn aggregate_merge_rules(&self, rules: &mut Vec) { - rules.extend(vec![rewrite( + rules.extend(vec![transforming_rewrite( "wrapper-merge-aggregation-with-inner-wrapped-select", // Input is not a finished wrapper_pullup_replacer, but WrappedSelect just before pullup // After pullup replacer would disable push to cube, because any node on top would have WrappedSelect in `from` @@ -367,6 +412,7 @@ impl WrapperRules { "WrapperPullupReplacerPushToCube:true", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( wrapped_select_subqueries_empty_tail(), @@ -374,6 +420,7 @@ impl WrapperRules { "WrapperPullupReplacerPushToCube:true", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( wrapped_select_group_expr_empty_tail(), @@ -381,6 +428,7 @@ impl WrapperRules { "WrapperPullupReplacerPushToCube:true", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( wrapped_select_aggr_expr_empty_tail(), @@ -388,6 +436,7 @@ impl WrapperRules { "WrapperPullupReplacerPushToCube:true", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( wrapped_select_window_expr_empty_tail(), @@ -395,6 +444,7 @@ impl WrapperRules { "WrapperPullupReplacerPushToCube:true", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?inner_from", @@ -402,14 +452,23 @@ impl WrapperRules { "WrapperPullupReplacerPushToCube:true", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?grouped_subqueries", + ), + wrapper_pullup_replacer( + "?inner_joins", + "?alias_to_cube", + "WrapperPullupReplacerPushToCube:true", + "WrapperPullupReplacerInProjection:false", + "?cube_members", + "?grouped_subqueries", ), - wrapped_select_joins_empty_tail(), wrapper_pullup_replacer( "?inner_filters", "?alias_to_cube", "WrapperPullupReplacerPushToCube:true", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?grouped_subqueries", ), wrapped_select_having_expr_empty_tail(), // Inner must not have limit and offset, because they are not commutative with aggregation @@ -421,6 +480,7 @@ impl WrapperRules { "WrapperPullupReplacerPushToCube:true", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?grouped_subqueries", ), "WrappedSelectAlias:None", "WrappedSelectDistinct:false", @@ -442,6 +502,7 @@ impl WrapperRules { "WrapperPullupReplacerPushToCube:true", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( wrapped_select_subqueries_empty_tail(), @@ -449,6 +510,7 @@ impl WrapperRules { "WrapperPullupReplacerPushToCube:true", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?grouped_subqueries", ), wrapper_pushdown_replacer( "?group_expr", @@ -456,6 +518,7 @@ impl WrapperRules { "WrapperPushdownReplacerPushToCube:true", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?pushdown_grouped_subqueries", ), wrapper_pushdown_replacer( "?aggr_expr", @@ -463,6 +526,7 @@ impl WrapperRules { "WrapperPushdownReplacerPushToCube:true", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?pushdown_grouped_subqueries", ), wrapper_pullup_replacer( wrapped_select_window_expr_empty_tail(), @@ -470,6 +534,7 @@ impl WrapperRules { "WrapperPullupReplacerPushToCube:true", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?inner_from", @@ -477,14 +542,23 @@ impl WrapperRules { "WrapperPullupReplacerPushToCube:true", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?grouped_subqueries", + ), + wrapper_pullup_replacer( + "?inner_joins", + "?alias_to_cube", + "WrapperPullupReplacerPushToCube:true", + "WrapperPullupReplacerInProjection:false", + "?cube_members", + "?grouped_subqueries", ), - wrapped_select_joins_empty_tail(), wrapper_pullup_replacer( "?inner_filters", "?alias_to_cube", "WrapperPullupReplacerPushToCube:true", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?grouped_subqueries", ), wrapped_select_having_expr_empty_tail(), "WrappedSelectLimit:None", @@ -495,6 +569,7 @@ impl WrapperRules { "WrapperPullupReplacerPushToCube:true", "WrapperPullupReplacerInProjection:false", "?cube_members", + "?grouped_subqueries", ), "WrappedSelectAlias:None", "WrappedSelectDistinct:false", @@ -503,6 +578,7 @@ impl WrapperRules { ), "CubeScanWrapperFinalized:false", ), + self.transform_merge_aggregate("?grouped_subqueries", "?pushdown_grouped_subqueries"), )]); } @@ -513,12 +589,16 @@ impl WrapperRules { push_to_cube_var: &'static str, pushdown_push_to_cube_var: &'static str, select_push_to_cube_var: &'static str, + grouped_subqueries_var: &'static str, + pushdown_grouped_subqueries_var: &'static str, ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { let group_expr_var = var!(group_expr_var); let aggr_expr_var = var!(aggr_expr_var); let push_to_cube_var = var!(push_to_cube_var); let pushdown_push_to_cube_var = var!(pushdown_push_to_cube_var); let select_push_to_cube_var = var!(select_push_to_cube_var); + let grouped_subqueries_var = var!(grouped_subqueries_var); + let pushdown_grouped_subqueries_var = var!(pushdown_grouped_subqueries_var); move |egraph, subst| { Self::transform_aggregate_impl( egraph, @@ -528,6 +608,8 @@ impl WrapperRules { push_to_cube_var, pushdown_push_to_cube_var, select_push_to_cube_var, + grouped_subqueries_var, + pushdown_grouped_subqueries_var, ) } } @@ -540,6 +622,8 @@ impl WrapperRules { push_to_cube_var: &'static str, pushdown_push_to_cube_var: &'static str, select_push_to_cube_var: &'static str, + grouped_subqueries_var: &'static str, + pushdown_grouped_subqueries_var: &'static str, ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { let alias_to_cube_var = var!(alias_to_cube_var); let group_expr_var = var!(group_expr_var); @@ -547,6 +631,8 @@ impl WrapperRules { let push_to_cube_var = var!(push_to_cube_var); let pushdown_push_to_cube_var = var!(pushdown_push_to_cube_var); let select_push_to_cube_var = var!(select_push_to_cube_var); + let grouped_subqueries_var = var!(grouped_subqueries_var); + let pushdown_grouped_subqueries_var = var!(pushdown_grouped_subqueries_var); let meta = self.meta_context.clone(); move |egraph, subst| { if Self::transform_check_subquery_allowed( @@ -563,6 +649,8 @@ impl WrapperRules { push_to_cube_var, pushdown_push_to_cube_var, select_push_to_cube_var, + grouped_subqueries_var, + pushdown_grouped_subqueries_var, ) } else { false @@ -578,6 +666,8 @@ impl WrapperRules { push_to_cube_var: Var, pushdown_push_to_cube_var: Var, select_push_to_cube_var: Var, + grouped_subqueries_var: Var, + pushdown_grouped_subqueries_var: Var, ) -> bool { if egraph[subst[group_expr_var]].data.referenced_expr.is_none() { return false; @@ -597,6 +687,18 @@ impl WrapperRules { return false; } + if !copy_value!( + egraph, + subst, + Vec, + grouped_subqueries_var, + WrapperPullupReplacerGroupedSubqueries, + pushdown_grouped_subqueries_var, + WrapperPushdownReplacerGroupedSubqueries + ) { + return false; + } + for push_to_cube in var_iter!( egraph[subst[push_to_cube_var]], WrapperPullupReplacerPushToCube @@ -614,6 +716,30 @@ impl WrapperRules { false } + fn transform_merge_aggregate( + &self, + grouped_subqueries_var: &'static str, + pushdown_grouped_subqueries_var: &'static str, + ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { + let grouped_subqueries_var = var!(grouped_subqueries_var); + let pushdown_grouped_subqueries_var = var!(pushdown_grouped_subqueries_var); + move |egraph, subst| { + if !copy_value!( + egraph, + subst, + Vec, + grouped_subqueries_var, + WrapperPullupReplacerGroupedSubqueries, + pushdown_grouped_subqueries_var, + WrapperPushdownReplacerGroupedSubqueries + ) { + return false; + } + + true + } + } + fn check_rollup_allowed( &self, alias_to_cube_var: &'static str, @@ -651,6 +777,8 @@ impl WrapperRules { _cast_data_type_var: Option<&'static str>, cube_members_var: &'static str, measure_out_var: &'static str, + grouped_subqueries_var: &'static str, + pullup_grouped_subqueries_var: &'static str, ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { let original_expr_var = var!(original_expr_var); let column_var = column_var.map(|v| var!(v)); @@ -659,6 +787,8 @@ impl WrapperRules { // let cast_data_type_var = cast_data_type_var.map(|v| var!(v)); let cube_members_var = var!(cube_members_var); let measure_out_var = var!(measure_out_var); + let grouped_subqueries_var = var!(grouped_subqueries_var); + let pullup_grouped_subqueries_var = var!(pullup_grouped_subqueries_var); let meta = self.meta_context.clone(); let disable_strict_agg_type_match = self.config_obj.disable_strict_agg_type_match(); move |egraph, subst| { @@ -710,6 +840,18 @@ impl WrapperRules { disable_strict_agg_type_match, ) { + if !copy_value!( + egraph, + subst, + Vec, + grouped_subqueries_var, + WrapperPushdownReplacerGroupedSubqueries, + pullup_grouped_subqueries_var, + WrapperPullupReplacerGroupedSubqueries + ) { + return false; + } + let column_expr_column = egraph.add(LogicalPlanLanguage::ColumnExprColumn( ColumnExprColumn(column.clone()), diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/aggregate_function.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/aggregate_function.rs index 827132fb01df5..aadcddbce8a93 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/aggregate_function.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/aggregate_function.rs @@ -22,6 +22,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), agg_fun_expr( "?fun", @@ -31,6 +32,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", )], "?distinct", ), @@ -45,6 +47,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", )], "?distinct", ), @@ -54,6 +57,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), self.transform_agg_fun_expr("?fun", "?distinct", "?alias_to_cube"), ), diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/alias.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/alias.rs index 521c4f8d7578c..8c1268806256a 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/alias.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/alias.rs @@ -14,6 +14,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), alias_expr( wrapper_pushdown_replacer( @@ -22,6 +23,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "?alias", ), @@ -35,6 +37,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "?alias", ), @@ -44,6 +47,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), ), ]); diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/binary_expr.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/binary_expr.rs index 226e7608b5490..9ab802f261ac0 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/binary_expr.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/binary_expr.rs @@ -21,6 +21,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), binary_expr( wrapper_pushdown_replacer( @@ -29,6 +30,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "?op", wrapper_pushdown_replacer( @@ -37,6 +39,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), ), ), @@ -49,6 +52,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "?op", wrapper_pullup_replacer( @@ -57,6 +61,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), ), wrapper_pullup_replacer( @@ -65,6 +70,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), self.transform_binary_expr("?op", "?alias_to_cube"), ), diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/case.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/case.rs index b64ddea965a58..9eca3354b25e0 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/case.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/case.rs @@ -21,6 +21,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), case_expr_var_arg( wrapper_pushdown_replacer( @@ -29,6 +30,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pushdown_replacer( "?then", @@ -36,6 +38,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pushdown_replacer( "?else", @@ -43,6 +46,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), ), ), @@ -55,6 +59,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?then", @@ -62,6 +67,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?else", @@ -69,6 +75,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), ), wrapper_pullup_replacer( @@ -77,6 +84,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), self.transform_case_expr("?alias_to_cube"), ), diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/cast.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/cast.rs index 22f8f3945972d..119b79bc0a7df 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/cast.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/cast.rs @@ -14,6 +14,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), cast_expr( wrapper_pushdown_replacer( @@ -22,6 +23,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "?data_type", ), @@ -35,6 +37,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "?data_type", ), @@ -44,6 +47,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), ), ]); diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/column.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/column.rs index be57c4b7f743c..ed685fabf614a 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/column.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/column.rs @@ -1,20 +1,21 @@ use crate::{ compile::rewrite::{ analysis::Member, - column_expr, rewrite, + column_expr, rewriter::{CubeEGraph, CubeRewrite}, rules::wrapper::WrapperRules, transforming_rewrite, wrapper_pullup_replacer, wrapper_pushdown_replacer, ColumnExprColumn, LogicalPlanLanguage, WrapperPullupReplacerAliasToCube, + WrapperPullupReplacerGroupedSubqueries, WrapperPushdownReplacerGroupedSubqueries, }, - var, var_iter, + copy_value, var, var_iter, }; use egg::Subst; impl WrapperRules { pub fn column_rules(&self, rules: &mut Vec) { rules.extend(vec![ - rewrite( + transforming_rewrite( "wrapper-push-down-column", wrapper_pushdown_replacer( column_expr("?name"), @@ -22,6 +23,7 @@ impl WrapperRules { "WrapperPushdownReplacerPushToCube:false", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( column_expr("?name"), @@ -29,7 +31,9 @@ impl WrapperRules { "WrapperPullupReplacerPushToCube:false", "?in_projection", "?cube_members", + "?pullup_grouped_subqueries", ), + self.pushdown_column("?grouped_subqueries", "?pullup_grouped_subqueries"), ), // TODO This is half measure implementation to propagate ungrouped simple measure towards aggregate node that easily allow replacement of aggregation functions // We need to support it for complex aka `number` measures @@ -41,6 +45,7 @@ impl WrapperRules { "WrapperPushdownReplacerPushToCube:true", "WrapperPullupReplacerInProjection:true", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( column_expr("?name"), @@ -48,8 +53,14 @@ impl WrapperRules { "WrapperPullupReplacerPushToCube:true", "WrapperPullupReplacerInProjection:true", "?cube_members", + "?pullup_grouped_subqueries", + ), + self.pushdown_simple_measure( + "?name", + "?cube_members", + "?grouped_subqueries", + "?pullup_grouped_subqueries", ), - self.pushdown_simple_measure("?name", "?cube_members"), ), // TODO time dimension support transforming_rewrite( @@ -60,6 +71,7 @@ impl WrapperRules { "WrapperPushdownReplacerPushToCube:true", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?dimension", @@ -67,24 +79,72 @@ impl WrapperRules { "WrapperPullupReplacerPushToCube:true", "?in_projection", "?cube_members", + "?pullup_grouped_subqueries", + ), + self.pushdown_dimension( + "?alias_to_cube", + "?name", + "?cube_members", + "?dimension", + "?grouped_subqueries", + "?pullup_grouped_subqueries", ), - self.pushdown_dimension("?alias_to_cube", "?name", "?cube_members", "?dimension"), ), ]); } + fn pushdown_column( + &self, + grouped_subqueries_var: &'static str, + pullup_grouped_subqueries_var: &'static str, + ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { + let grouped_subqueries_var = var!(grouped_subqueries_var); + let pullup_grouped_subqueries_var = var!(pullup_grouped_subqueries_var); + move |egraph, subst| { + if !copy_value!( + egraph, + subst, + Vec, + grouped_subqueries_var, + WrapperPushdownReplacerGroupedSubqueries, + pullup_grouped_subqueries_var, + WrapperPullupReplacerGroupedSubqueries + ) { + return false; + } + + true + } + } + fn pushdown_dimension( &self, alias_to_cube_var: &'static str, column_name_var: &'static str, members_var: &'static str, dimension_var: &'static str, + grouped_subqueries_var: &'static str, + pullup_grouped_subqueries_var: &'static str, ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { let alias_to_cube_var = var!(alias_to_cube_var); let column_name_var = var!(column_name_var); let members_var = var!(members_var); let dimension_var = var!(dimension_var); + let grouped_subqueries_var = var!(grouped_subqueries_var); + let pullup_grouped_subqueries_var = var!(pullup_grouped_subqueries_var); move |egraph, subst| { + if !copy_value!( + egraph, + subst, + Vec, + grouped_subqueries_var, + WrapperPushdownReplacerGroupedSubqueries, + pullup_grouped_subqueries_var, + WrapperPullupReplacerGroupedSubqueries + ) { + return false; + } + let columns: Vec<_> = var_iter!(egraph[subst[column_name_var]], ColumnExprColumn) .cloned() .collect(); @@ -112,6 +172,30 @@ impl WrapperRules { } } } + + // Treat any column from grouped subquery as dimension, and pullup even when push to cube is enabled + // Column expressions can refer to grouped queries even without explicit relation + // TODO implement proper name resolution here + if let Some(col_relation) = &column.relation { + for grouped_subqueries in var_iter!( + egraph[subst[grouped_subqueries_var]], + WrapperPushdownReplacerGroupedSubqueries + ) { + if grouped_subqueries.iter().any(|subq| subq == col_relation) { + // Found grouped subquery, can "replace" column with itself + let column_expr_column = + egraph.add(LogicalPlanLanguage::ColumnExprColumn( + ColumnExprColumn(column.clone()), + )); + + let column_expr = + egraph.add(LogicalPlanLanguage::ColumnExpr([column_expr_column])); + subst.insert(dimension_var, column_expr); + return true; + } + } + } + if let Some((member, _)) = &egraph[subst[members_var]] .data .find_member_by_alias(&column.name) @@ -144,9 +228,13 @@ impl WrapperRules { &self, column_name_var: &'static str, members_var: &'static str, + grouped_subqueries_var: &'static str, + pullup_grouped_subqueries_var: &'static str, ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { let column_name_var = var!(column_name_var); let members_var = var!(members_var); + let grouped_subqueries_var = var!(grouped_subqueries_var); + let pullup_grouped_subqueries_var = var!(pullup_grouped_subqueries_var); let meta = self.meta_context.clone(); move |egraph, subst| { let columns: Vec<_> = var_iter!(egraph[subst[column_name_var]], ColumnExprColumn) @@ -159,6 +247,18 @@ impl WrapperRules { { if let Some(measure) = meta.find_measure_with_name(member.to_string()) { if measure.agg_type != Some("number".to_string()) { + if !copy_value!( + egraph, + subst, + Vec, + grouped_subqueries_var, + WrapperPushdownReplacerGroupedSubqueries, + pullup_grouped_subqueries_var, + WrapperPullupReplacerGroupedSubqueries + ) { + return false; + } + return true; } } diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/cube_scan_wrapper.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/cube_scan_wrapper.rs index 1e7cccd145534..d516b62212bb8 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/cube_scan_wrapper.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/cube_scan_wrapper.rs @@ -5,7 +5,7 @@ use crate::{ rules::wrapper::WrapperRules, transforming_rewrite, wrapper_pullup_replacer, CubeScanAliasToCube, CubeScanLimit, CubeScanOffset, CubeScanUngrouped, LogicalPlanLanguage, WrapperPullupReplacerAliasToCube, - WrapperPullupReplacerPushToCube, + WrapperPullupReplacerGroupedSubqueries, WrapperPullupReplacerPushToCube, }, var, var_iter, }; @@ -46,6 +46,7 @@ impl WrapperRules { "?push_to_cube_out", "WrapperPullupReplacerInProjection:false", "?members", + "?grouped_subqueries_out", ), "CubeScanWrapperFinalized:false", ), @@ -57,6 +58,7 @@ impl WrapperRules { "?ungrouped", "?alias_to_cube_out", "?push_to_cube_out", + "?grouped_subqueries_out", ), ), rewrite( @@ -68,6 +70,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "CubeScanWrapperFinalized:false", ), @@ -85,6 +88,7 @@ impl WrapperRules { ungrouped_cube_var: &'static str, alias_to_cube_var_out: &'static str, push_to_cube_out_var: &'static str, + grouped_subqueries_out_var: &'static str, ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { let members_var = var!(members_var); let alias_to_cube_var = var!(alias_to_cube_var); @@ -93,6 +97,7 @@ impl WrapperRules { let ungrouped_cube_var = var!(ungrouped_cube_var); let alias_to_cube_var_out = var!(alias_to_cube_var_out); let push_to_cube_out_var = var!(push_to_cube_out_var); + let grouped_subqueries_out_var = var!(grouped_subqueries_out_var); move |egraph, subst| { let mut has_no_limit_or_offset = true; for limit in var_iter!(egraph[subst[limit_var]], CubeScanLimit).cloned() { @@ -126,6 +131,14 @@ impl WrapperRules { WrapperPullupReplacerAliasToCube(alias_to_cube), )), ); + subst.insert( + grouped_subqueries_out_var, + egraph.add( + LogicalPlanLanguage::WrapperPullupReplacerGroupedSubqueries( + WrapperPullupReplacerGroupedSubqueries(vec![]), + ), + ), + ); return true; } } diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/distinct.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/distinct.rs index ab959f80444a3..b9af37fa16c38 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/distinct.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/distinct.rs @@ -32,6 +32,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "CubeScanWrapperFinalized:false".to_string(), )), @@ -60,6 +61,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "CubeScanWrapperFinalized:false", ), diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/extract.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/extract.rs index 230aaaa8c17a6..9f1b46eff2089 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/extract.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/extract.rs @@ -22,6 +22,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?date", @@ -29,6 +30,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), ], ), @@ -41,6 +43,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), self.transform_date_part_expr("?alias_to_cube"), )]); diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/filter.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/filter.rs index 2f75b8b59cf6f..6ac99d2193c74 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/filter.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/filter.rs @@ -10,9 +10,10 @@ use crate::{ wrapped_select_projection_expr_empty_tail, wrapped_select_subqueries_empty_tail, wrapped_select_window_expr_empty_tail, wrapper_pullup_replacer, wrapper_pushdown_replacer, LogicalPlanLanguage, WrappedSelectPushToCube, WrappedSelectUngroupedScan, - WrapperPullupReplacerPushToCube, WrapperPushdownReplacerPushToCube, + WrapperPullupReplacerGroupedSubqueries, WrapperPullupReplacerPushToCube, + WrapperPushdownReplacerGroupedSubqueries, WrapperPushdownReplacerPushToCube, }, - copy_flag, var, var_iter, + copy_flag, copy_value, var, var_iter, }; use egg::{Subst, Var}; @@ -123,6 +124,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "CubeScanWrapperFinalized:false", ), @@ -136,6 +138,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( wrapped_select_subqueries_empty_tail(), @@ -143,6 +146,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( wrapped_select_group_expr_empty_tail(), @@ -150,6 +154,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( wrapped_select_aggr_expr_empty_tail(), @@ -157,6 +162,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( wrapped_select_window_expr_empty_tail(), @@ -164,6 +170,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?cube_scan_input", @@ -171,8 +178,16 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", + ), + wrapper_pullup_replacer( + wrapped_select_joins_empty_tail(), + "?alias_to_cube", + "?push_to_cube", + "?in_projection", + "?cube_members", + "?grouped_subqueries", ), - wrapped_select_joins_empty_tail(), wrapped_select_filter_expr( wrapper_pushdown_replacer( "?filter_expr", @@ -180,6 +195,7 @@ impl WrapperRules { "?pushdown_push_to_cube", "?in_projection", "?cube_members", + "?pushdown_grouped_subqueries", ), wrapper_pullup_replacer( wrapped_select_filter_expr_empty_tail(), @@ -187,6 +203,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), ), wrapped_select_having_expr_empty_tail(), @@ -198,6 +215,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "WrappedSelectAlias:None", "WrappedSelectDistinct:false", @@ -211,6 +229,8 @@ impl WrapperRules { "?pushdown_push_to_cube", "?select_push_to_cube", "?select_ungrouped_scan", + "?grouped_subqueries", + "?pushdown_grouped_subqueries", ), )]); @@ -235,6 +255,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "CubeScanWrapperFinalized:false", ), @@ -251,6 +272,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pushdown_replacer( "?subqueries", @@ -258,6 +280,7 @@ impl WrapperRules { "?pushdown_push_to_cube", "?in_projection", "?cube_members", + "?pushdown_grouped_subqueries", ), wrapper_pullup_replacer( wrapped_select_group_expr_empty_tail(), @@ -265,6 +288,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( wrapped_select_aggr_expr_empty_tail(), @@ -272,6 +296,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( wrapped_select_window_expr_empty_tail(), @@ -279,6 +304,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?cube_scan_input", @@ -286,8 +312,16 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", + ), + wrapper_pullup_replacer( + wrapped_select_joins_empty_tail(), + "?alias_to_cube", + "?push_to_cube", + "?in_projection", + "?cube_members", + "?grouped_subqueries", ), - wrapped_select_joins_empty_tail(), wrapped_select_filter_expr( wrapper_pushdown_replacer( "?filter_expr", @@ -295,6 +329,7 @@ impl WrapperRules { "?pushdown_push_to_cube", "?in_projection", "?cube_members", + "?pushdown_grouped_subqueries", ), wrapper_pullup_replacer( wrapped_select_filter_expr_empty_tail(), @@ -302,6 +337,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), ), wrapped_select_having_expr_empty_tail(), @@ -313,6 +349,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "WrappedSelectAlias:None", "WrappedSelectDistinct:false", @@ -327,6 +364,8 @@ impl WrapperRules { "?pushdown_push_to_cube", "?select_push_to_cube", "?select_ungrouped_scan", + "?grouped_subqueries", + "?pushdown_grouped_subqueries", ), )]); } @@ -337,11 +376,15 @@ impl WrapperRules { pushdown_push_to_cube_var: &'static str, select_push_to_cube_var: &'static str, select_ungrouped_scan_var: &'static str, + grouped_subqueries_var: &'static str, + pushdown_grouped_subqueries_var: &'static str, ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { let push_to_cube_var = var!(push_to_cube_var); let pushdown_push_to_cube_var = var!(pushdown_push_to_cube_var); let select_push_to_cube_var = var!(select_push_to_cube_var); let select_ungrouped_scan_var = var!(select_ungrouped_scan_var); + let grouped_subqueries_var = var!(grouped_subqueries_var); + let pushdown_grouped_subqueries_var = var!(pushdown_grouped_subqueries_var); move |egraph, subst| { Self::transform_filter_impl( egraph, @@ -350,6 +393,8 @@ impl WrapperRules { pushdown_push_to_cube_var, select_push_to_cube_var, select_ungrouped_scan_var, + grouped_subqueries_var, + pushdown_grouped_subqueries_var, ) } } @@ -361,12 +406,16 @@ impl WrapperRules { pushdown_push_to_cube_var: &'static str, select_push_to_cube_var: &'static str, select_ungrouped_scan_var: &'static str, + grouped_subqueries_var: &'static str, + pushdown_grouped_subqueries_var: &'static str, ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { let alias_to_cube_var = var!(alias_to_cube_var); let push_to_cube_var = var!(push_to_cube_var); let pushdown_push_to_cube_var = var!(pushdown_push_to_cube_var); let select_push_to_cube_var = var!(select_push_to_cube_var); let select_ungrouped_scan_var = var!(select_ungrouped_scan_var); + let grouped_subqueries_var = var!(grouped_subqueries_var); + let pushdown_grouped_subqueries_var = var!(pushdown_grouped_subqueries_var); let meta = self.meta_context.clone(); move |egraph, subst| { if Self::transform_check_subquery_allowed( @@ -382,6 +431,8 @@ impl WrapperRules { pushdown_push_to_cube_var, select_push_to_cube_var, select_ungrouped_scan_var, + grouped_subqueries_var, + pushdown_grouped_subqueries_var, ) } else { false @@ -396,6 +447,8 @@ impl WrapperRules { pushdown_push_to_cube_var: Var, select_push_to_cube_var: Var, select_ungrouped_scan_var: Var, + grouped_subqueries_var: Var, + pushdown_grouped_subqueries_var: Var, ) -> bool { if !copy_flag!( egraph, @@ -408,6 +461,18 @@ impl WrapperRules { return false; } + if !copy_value!( + egraph, + subst, + Vec, + grouped_subqueries_var, + WrapperPullupReplacerGroupedSubqueries, + pushdown_grouped_subqueries_var, + WrapperPushdownReplacerGroupedSubqueries + ) { + return false; + } + for push_to_cube in var_iter!( egraph[subst[push_to_cube_var]], WrapperPullupReplacerPushToCube diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/in_list_expr.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/in_list_expr.rs index d2d37c4899bb3..ff94d71f0f5c8 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/in_list_expr.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/in_list_expr.rs @@ -4,10 +4,11 @@ use crate::{ rewriter::{CubeEGraph, CubeRewrite}, rules::wrapper::WrapperRules, transforming_rewrite, wrapper_pullup_replacer, wrapper_pushdown_replacer, - WrapperPullupReplacerAliasToCube, WrapperPullupReplacerPushToCube, + WrapperPullupReplacerAliasToCube, WrapperPullupReplacerGroupedSubqueries, + WrapperPullupReplacerPushToCube, WrapperPushdownReplacerGroupedSubqueries, WrapperPushdownReplacerPushToCube, }, - copy_flag, var, var_iter, + copy_flag, copy_value, var, var_iter, }; use egg::Subst; @@ -22,6 +23,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), inlist_expr( wrapper_pushdown_replacer( @@ -30,6 +32,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?list", @@ -37,6 +40,7 @@ impl WrapperRules { "?pullup_push_to_cube", "?in_projection", "?cube_members", + "?pullup_grouped_subqueries", ), "?negated", ), @@ -44,6 +48,8 @@ impl WrapperRules { "?list", "?push_to_cube", "?pullup_push_to_cube", + "?grouped_subqueries", + "?pullup_grouped_subqueries", ), ), rewrite( @@ -54,6 +60,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), inlist_expr( wrapper_pushdown_replacer( @@ -62,6 +69,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pushdown_replacer( "?list", @@ -69,6 +77,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "?negated", ), @@ -82,6 +91,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?list", @@ -89,6 +99,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "?negated", ), @@ -98,6 +109,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), self.transform_in_list_expr("?alias_to_cube"), ), @@ -139,10 +151,14 @@ impl WrapperRules { list_var: &'static str, push_to_cube_var: &'static str, pullup_push_to_cube_var: &'static str, + grouped_subqueries_var: &'static str, + pullup_grouped_subqueries_var: &'static str, ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { let list_var = var!(list_var); let push_to_cube_var = var!(push_to_cube_var); let pullup_push_to_cube_var = var!(pullup_push_to_cube_var); + let grouped_subqueries_var = var!(grouped_subqueries_var); + let pullup_grouped_subqueries_var = var!(pullup_grouped_subqueries_var); move |egraph: &mut CubeEGraph, subst| { if !copy_flag!( egraph, @@ -154,6 +170,17 @@ impl WrapperRules { ) { return false; } + if !copy_value!( + egraph, + subst, + Vec, + grouped_subqueries_var, + WrapperPushdownReplacerGroupedSubqueries, + pullup_grouped_subqueries_var, + WrapperPullupReplacerGroupedSubqueries + ) { + return false; + } return egraph[subst[list_var]].data.constant_in_list.is_some(); } } diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/in_subquery_expr.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/in_subquery_expr.rs index d6699eac3dbd4..02fe0e26e975e 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/in_subquery_expr.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/in_subquery_expr.rs @@ -4,9 +4,10 @@ use crate::{ rewriter::{CubeEGraph, CubeRewrite}, rules::wrapper::WrapperRules, transforming_rewrite, wrapper_pullup_replacer, wrapper_pushdown_replacer, - WrapperPullupReplacerPushToCube, WrapperPushdownReplacerPushToCube, + WrapperPullupReplacerGroupedSubqueries, WrapperPullupReplacerPushToCube, + WrapperPushdownReplacerGroupedSubqueries, WrapperPushdownReplacerPushToCube, }, - copy_flag, var, + copy_flag, copy_value, var, }; use egg::Subst; @@ -21,6 +22,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), insubquery_expr( wrapper_pushdown_replacer( @@ -29,6 +31,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?subquery", @@ -36,10 +39,16 @@ impl WrapperRules { "?pullup_push_to_cube", "?in_projection", "?cube_members", + "?pullup_grouped_subqueries", ), "?negated", ), - self.transform_in_subquery_pushdown("?push_to_cube", "?pullup_push_to_cube"), + self.transform_in_subquery_pushdown( + "?push_to_cube", + "?pullup_push_to_cube", + "?grouped_subqueries", + "?pullup_grouped_subqueries", + ), ), rewrite( "wrapper-in-subquery-pull-up", @@ -50,6 +59,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?subquery", @@ -57,6 +67,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "?negated", ), @@ -66,6 +77,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), ), ]); @@ -75,9 +87,13 @@ impl WrapperRules { &self, push_to_cube_var: &'static str, pullup_push_to_cube_var: &'static str, + grouped_subqueries_var: &'static str, + pullup_grouped_subqueries_var: &'static str, ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { let push_to_cube_var = var!(push_to_cube_var); let pullup_push_to_cube_var = var!(pullup_push_to_cube_var); + let grouped_subqueries_var = var!(grouped_subqueries_var); + let pullup_grouped_subqueries_var = var!(pullup_grouped_subqueries_var); move |egraph: &mut CubeEGraph, subst| { if !copy_flag!( egraph, @@ -89,6 +105,17 @@ impl WrapperRules { ) { return false; } + if !copy_value!( + egraph, + subst, + Vec, + grouped_subqueries_var, + WrapperPushdownReplacerGroupedSubqueries, + pullup_grouped_subqueries_var, + WrapperPullupReplacerGroupedSubqueries + ) { + return false; + } true } } diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/is_null_expr.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/is_null_expr.rs index 8239eb0aa34e8..55da4b2ecbaec 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/is_null_expr.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/is_null_expr.rs @@ -21,6 +21,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), is_null_expr(wrapper_pushdown_replacer( "?expr", @@ -28,6 +29,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", )), ), transforming_rewrite( @@ -38,6 +40,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", )), wrapper_pullup_replacer( is_null_expr("?expr"), @@ -45,6 +48,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), self.transform_is_null_expr("?alias_to_cube"), ), @@ -56,6 +60,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), is_not_null_expr(wrapper_pushdown_replacer( "?expr", @@ -63,6 +68,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", )), ), transforming_rewrite( @@ -73,6 +79,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", )), wrapper_pullup_replacer( is_not_null_expr("?expr"), @@ -80,6 +87,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), self.transform_is_null_expr("?alias_to_cube"), ), diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/join.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/join.rs new file mode 100644 index 0000000000000..daf1b997f1b24 --- /dev/null +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/join.rs @@ -0,0 +1,413 @@ +use crate::{ + compile::rewrite::{ + cube_scan_wrapper, join, rewrite, rewriter::CubeRewrite, rules::wrapper::WrapperRules, + transforming_rewrite, wrapped_select, wrapped_select_aggr_expr_empty_tail, + wrapped_select_filter_expr_empty_tail, wrapped_select_group_expr_empty_tail, + wrapped_select_having_expr_empty_tail, wrapped_select_join, wrapped_select_joins, + wrapped_select_joins_empty_tail, wrapped_select_order_expr_empty_tail, + wrapped_select_projection_expr_empty_tail, wrapped_select_subqueries_empty_tail, + wrapped_select_window_expr_empty_tail, wrapper_pullup_replacer, wrapper_pushdown_replacer, + BinaryExprOp, ColumnExprColumn, CubeEGraph, JoinLeftOn, JoinRightOn, LogicalPlanLanguage, + WrappedSelectJoinJoinType, WrapperPullupReplacerGroupedSubqueries, + WrapperPushdownReplacerGroupedSubqueries, + }, + var, var_iter, var_list_iter, +}; + +use crate::compile::rewrite::analysis::Member; +use datafusion::{logical_expr::Operator, logical_plan::Column}; +use egg::{Id, Subst}; + +impl WrapperRules { + pub fn join_rules(&self, rules: &mut Vec) { + rules.extend(vec![ + rewrite( + "wrapper-pull-up-single-select-join", + wrapped_select_join( + wrapper_pullup_replacer( + "?input", + "?alias_to_cube", + "?ungrouped", + "?in_projection", + "?cube_members", + "?grouped_subqueries", + ), + wrapper_pullup_replacer( + "?join_expr", + "?alias_to_cube", + "?ungrouped", + "?in_projection", + "?cube_members", + "?grouped_subqueries", + ), + "?out_join_type", + ), + wrapper_pullup_replacer( + wrapped_select_join("?input", "?join_expr", "?out_join_type"), + "?alias_to_cube", + "?ungrouped", + "?in_projection", + "?cube_members", + "?grouped_subqueries", + ), + ), + // TODO handle CrossJoin and Filter(CrossJoin) as well + transforming_rewrite( + "wrapper-push-down-ungrouped-join-grouped", + join( + cube_scan_wrapper( + wrapper_pullup_replacer( + "?left_cube_scan_input", + // Going to use this in RHS of rule + // RHS of join is grouped, so it shouldn't have any cubes or members + "?left_alias_to_cube", + // This check is important + // Rule would place ?left_cube_scan_input to `from` position of WrappedSelect(WrappedSelectPushToCube:true) + // So it need to support push-to-Cube + "WrapperPullupReplacerPushToCube:true", + "?in_projection", + // Going to use this in RHS of rule + // RHS of join is grouped, so it shouldn't have any cubes or members + "?left_cube_members", + "?left_grouped_subqueries", + ), + "CubeScanWrapperFinalized:false", + ), + cube_scan_wrapper( + wrapper_pullup_replacer( + "?right_input", + // Going to ignore this + "?right_alias_to_cube", + // TODO depend on proper "ungrouped scan" flag (that is not a push-to-cube) + "WrapperPullupReplacerPushToCube:false", + "?in_projection", + // Going to ignore this + "?right_cube_members", + "?right_grouped_subqueries", + ), + "CubeScanWrapperFinalized:false", + ), + "?left_on", + "?right_on", + "?in_join_type", + "?join_constraint", + "JoinNullEqualsNull:false", + ), + cube_scan_wrapper( + wrapped_select( + "WrappedSelectSelectType:Projection", + wrapper_pullup_replacer( + wrapped_select_projection_expr_empty_tail(), + "?left_alias_to_cube", + "WrapperPullupReplacerPushToCube:true", + "?in_projection", + "?left_cube_members", + "?out_pullup_grouped_subqueries", + ), + wrapper_pullup_replacer( + wrapped_select_subqueries_empty_tail(), + "?left_alias_to_cube", + "WrapperPullupReplacerPushToCube:true", + "?in_projection", + "?left_cube_members", + "?out_pullup_grouped_subqueries", + ), + wrapper_pullup_replacer( + wrapped_select_group_expr_empty_tail(), + "?left_alias_to_cube", + "WrapperPullupReplacerPushToCube:true", + "?in_projection", + "?left_cube_members", + "?out_pullup_grouped_subqueries", + ), + wrapper_pullup_replacer( + wrapped_select_aggr_expr_empty_tail(), + "?left_alias_to_cube", + "WrapperPullupReplacerPushToCube:true", + "?in_projection", + "?left_cube_members", + "?out_pullup_grouped_subqueries", + ), + wrapper_pullup_replacer( + wrapped_select_window_expr_empty_tail(), + "?left_alias_to_cube", + "WrapperPullupReplacerPushToCube:true", + "?in_projection", + "?left_cube_members", + "?out_pullup_grouped_subqueries", + ), + wrapper_pullup_replacer( + // Can move left_cube_scan_input here without checking if it's actually CubeScan + // Check for WrapperPullupReplacerPushToCube:true should be enough + "?left_cube_scan_input", + "?left_alias_to_cube", + "WrapperPullupReplacerPushToCube:true", + "?in_projection", + "?left_cube_members", + "?out_pullup_grouped_subqueries", + ), + // We don't want to use list rules here, because ?right_input is already done + wrapped_select_joins( + wrapped_select_join( + wrapper_pullup_replacer( + "?right_input", + "?left_alias_to_cube", + "WrapperPullupReplacerPushToCube:true", + "?in_projection", + "?left_cube_members", + "?out_pullup_grouped_subqueries", + ), + wrapper_pushdown_replacer( + "?out_join_expr", + // TODO pullup field in pushdown replacer + "?left_alias_to_cube", + // On one hand, this should be PushToCube:true, so we would only join on dimensions + // On other: RHS is grouped, so any column is just a column + // Right now, it is relying on grouped_subqueries + PushToCube:true, to allow both dimensions and grouped columns + "WrapperPushdownReplacerPushToCube:true", + // TODO pullup flag in pushdown replacer + "?in_projection", + "?left_cube_members", + "?out_pushdown_grouped_subqueries", + ), + "?out_join_type", + ), + // pullup(tail) just so it could be easily picked up by pullup rules + wrapper_pullup_replacer( + wrapped_select_joins_empty_tail(), + "?left_alias_to_cube", + "WrapperPullupReplacerPushToCube:true", + "?in_projection", + "?left_cube_members", + "?out_pullup_grouped_subqueries", + ), + ), + wrapper_pullup_replacer( + wrapped_select_filter_expr_empty_tail(), + "?left_alias_to_cube", + "WrapperPullupReplacerPushToCube:true", + "?in_projection", + "?left_cube_members", + "?out_pullup_grouped_subqueries", + ), + wrapped_select_having_expr_empty_tail(), + "WrappedSelectLimit:None", + "WrappedSelectOffset:None", + wrapper_pullup_replacer( + wrapped_select_order_expr_empty_tail(), + "?left_alias_to_cube", + "WrapperPullupReplacerPushToCube:true", + "?in_projection", + "?left_cube_members", + "?out_pullup_grouped_subqueries", + ), + "WrappedSelectAlias:None", + "WrappedSelectDistinct:false", + // left input has WrapperPullupReplacerPushToCube:true + // Meaning that left input itself is ungrouped CubeScan + // Keep it in result, rely on pull-up rules to drop it, and on flattening rules to pick it up + "WrappedSelectPushToCube:true", + // left input is WrapperPullupReplacerPushToCube:true, so result must be ungrouped + "WrappedSelectUngroupedScan:true", + ), + "CubeScanWrapperFinalized:false", + ), + self.transform_ungrouped_join_grouped( + "?left_cube_members", + "?left_on", + "?right_on", + "?in_join_type", + "?out_join_expr", + "?out_join_type", + "?out_pullup_grouped_subqueries", + "?out_pushdown_grouped_subqueries", + ), + ), + ]); + + // TODO only pullup is necessary here + Self::list_pushdown_pullup_rules( + rules, + "wrapper-joins", + "WrappedSelectJoins", + "WrappedSelectJoins", + ); + } + + fn are_join_members_supported<'egraph, 'columns>( + egraph: &'egraph mut CubeEGraph, + members: Id, + join_on: impl IntoIterator, + ) -> bool { + let members_data = &mut egraph[members].data; + + for column in join_on { + if let Some(((_, member, _), _)) = members_data.find_member_by_column(column) { + match member { + Member::Dimension { .. } => { + // do nothing + } + _ => { + // Unsupported member + return false; + } + } + } + } + + true + } + + fn build_join_expr( + egraph: &mut CubeEGraph, + left_join_on: impl IntoIterator, + right_join_on: impl IntoIterator, + ) -> Option { + let join_on_pairs = left_join_on + .into_iter() + .zip(right_join_on.into_iter()) + .collect::>(); + + let result_expr = + join_on_pairs + .into_iter() + .fold(None, |acc, (left_column, right_column)| { + let left_expr = egraph.add(LogicalPlanLanguage::ColumnExprColumn( + ColumnExprColumn(left_column), + )); + let right_expr = egraph.add(LogicalPlanLanguage::ColumnExprColumn( + ColumnExprColumn(right_column), + )); + let eq_expr = LogicalPlanLanguage::BinaryExpr([ + egraph.add(LogicalPlanLanguage::ColumnExpr([left_expr])), + egraph.add(LogicalPlanLanguage::BinaryExprOp(BinaryExprOp( + Operator::Eq, + ))), + egraph.add(LogicalPlanLanguage::ColumnExpr([right_expr])), + ]); + let eq_expr = egraph.add(eq_expr); + + let result = if let Some(acc) = acc { + let chained_expr = LogicalPlanLanguage::BinaryExpr([ + acc, + egraph.add(LogicalPlanLanguage::BinaryExprOp(BinaryExprOp( + Operator::And, + ))), + eq_expr, + ]); + egraph.add(chained_expr) + } else { + eq_expr + }; + + Some(result) + }); + + result_expr + } + + fn transform_ungrouped_join_grouped( + &self, + left_members_var: &'static str, + left_on_var: &'static str, + right_on_var: &'static str, + in_join_type_var: &'static str, + out_join_expr_var: &'static str, + out_join_type_var: &'static str, + out_pullup_grouped_subqueries_var: &'static str, + out_pushdown_grouped_subqueries_var: &'static str, + ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { + let left_members_var = var!(left_members_var); + let left_on_var = var!(left_on_var); + + let right_on_var = var!(right_on_var); + + let in_join_type_var = var!(in_join_type_var); + + let out_join_expr_var = var!(out_join_expr_var); + let out_join_type_var = var!(out_join_type_var); + let out_pullup_grouped_subqueries_var = var!(out_pullup_grouped_subqueries_var); + let out_pushdown_grouped_subqueries_var = var!(out_pushdown_grouped_subqueries_var); + + // Only left is allowed to be ungrouped query, so right would be a subquery join for left ungrouped CubeScan + // It means we don't care about just a "single cube" in LHS, and there's essentially no cubes by this moment in RHS + + move |egraph, subst| { + // We are going to generate join with grouped subquery + // TODO Do we have to check stuff like `transform_check_subquery_allowed` is checking: + // * Both inputs depend on a single data source + // * SQL generator for that data source have `expressions/subquery` template + // It could be checked later, in WrappedSelect as well + + for left_join_on in var_iter!(egraph[subst[left_on_var]], JoinLeftOn).cloned() { + for right_join_on in var_iter!(egraph[subst[right_on_var]], JoinRightOn).cloned() { + // Don't check right, as it is already grouped + + for in_join_type in + var_list_iter!(egraph[subst[in_join_type_var]], JoinJoinType).cloned() + { + if !Self::are_join_members_supported( + egraph, + subst[left_members_var], + &left_join_on, + ) { + return false; + } + + // TODO what's a proper way to find table expression alias? + let right_join_alias = right_join_on + .iter() + .filter_map(|c| c.relation.as_ref()) + .next() + .cloned(); + let Some(right_join_alias) = right_join_alias else { + return false; + }; + + let out_join_expr = + Self::build_join_expr(egraph, left_join_on, right_join_on); + let Some(out_join_expr) = out_join_expr else { + return false; + }; + + // LHS is ungrouped, RHS is grouped + // Don't pass ungrouped queries from below, their qualifiers should not be accessible during join condition rewrite + let out_grouped_subqueries = vec![right_join_alias]; + + subst.insert(out_join_expr_var, out_join_expr); + subst.insert( + out_join_type_var, + egraph.add(LogicalPlanLanguage::WrappedSelectJoinJoinType( + WrappedSelectJoinJoinType(in_join_type.0), + )), + ); + subst.insert( + out_pullup_grouped_subqueries_var, + egraph.add( + LogicalPlanLanguage::WrapperPullupReplacerGroupedSubqueries( + WrapperPullupReplacerGroupedSubqueries( + out_grouped_subqueries.clone(), + ), + ), + ), + ); + subst.insert( + out_pushdown_grouped_subqueries_var, + egraph.add( + LogicalPlanLanguage::WrapperPushdownReplacerGroupedSubqueries( + WrapperPushdownReplacerGroupedSubqueries( + out_grouped_subqueries, + ), + ), + ), + ); + + return true; + } + } + } + + return false; + } + } +} diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/like_expr.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/like_expr.rs index 98a8f85379a0b..cb466e9e99bb0 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/like_expr.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/like_expr.rs @@ -27,6 +27,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), like_expr( "?like_type", @@ -37,6 +38,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pushdown_replacer( "?pattern", @@ -44,6 +46,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "?escape_char", ), @@ -59,6 +62,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?pattern", @@ -66,6 +70,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "?escape_char", ), @@ -81,6 +86,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), self.transform_like_expr("?alias_to_cube", "?like_type", "?escape_char"), ), diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/limit.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/limit.rs index d46cb971541a3..1b31a8a6cbbd2 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/limit.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/limit.rs @@ -42,6 +42,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "CubeScanWrapperFinalized:false".to_string(), ), @@ -71,6 +72,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "CubeScanWrapperFinalized:false", ), diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/literal.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/literal.rs index 91cab261e8c98..96142f024e264 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/literal.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/literal.rs @@ -2,10 +2,11 @@ use crate::{ compile::rewrite::{ literal_expr, rules::wrapper::WrapperRules, transforming_rewrite, wrapper_pullup_replacer, wrapper_pushdown_replacer, LiteralExprValue, LogicalPlanLanguage, - WrapperPullupReplacerAliasToCube, WrapperPullupReplacerPushToCube, + WrapperPullupReplacerAliasToCube, WrapperPullupReplacerGroupedSubqueries, + WrapperPullupReplacerPushToCube, WrapperPushdownReplacerGroupedSubqueries, WrapperPushdownReplacerPushToCube, }, - copy_flag, var, var_iter, + copy_flag, copy_value, var, var_iter, }; use crate::compile::rewrite::{ @@ -26,6 +27,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( literal_expr("?value"), @@ -33,12 +35,15 @@ impl WrapperRules { "?pullup_push_to_cube", "?in_projection", "?cube_members", + "?pullup_grouped_subqueries", ), self.transform_literal( "?alias_to_cube", "?value", "?push_to_cube", "?pullup_push_to_cube", + "?grouped_subqueries", + "?pullup_grouped_subqueries", ), ), transforming_rewrite( @@ -49,6 +54,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?new_value", @@ -56,6 +62,7 @@ impl WrapperRules { "?pullup_push_to_cube", "?in_projection", "?cube_members", + "?pullup_grouped_subqueries", ), self.transform_interval_literal( "?alias_to_cube", @@ -63,6 +70,8 @@ impl WrapperRules { "?new_value", "?push_to_cube", "?pullup_push_to_cube", + "?grouped_subqueries", + "?pullup_grouped_subqueries", ), ), ]); @@ -74,11 +83,15 @@ impl WrapperRules { value_var: &str, push_to_cube_var: &str, pullup_push_to_cube_var: &str, + grouped_subqueries_var: &str, + pullup_grouped_subqueries_var: &str, ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { let alias_to_cube_var = var!(alias_to_cube_var); let value_var = var!(value_var); let push_to_cube_var = var!(push_to_cube_var); let pullup_push_to_cube_var = var!(pullup_push_to_cube_var); + let grouped_subqueries_var = var!(grouped_subqueries_var); + let pullup_grouped_subqueries_var = var!(pullup_grouped_subqueries_var); let meta = self.meta_context.clone(); move |egraph, subst| { if !copy_flag!( @@ -91,6 +104,17 @@ impl WrapperRules { ) { return false; } + if !copy_value!( + egraph, + subst, + Vec, + grouped_subqueries_var, + WrapperPushdownReplacerGroupedSubqueries, + pullup_grouped_subqueries_var, + WrapperPullupReplacerGroupedSubqueries + ) { + return false; + } for alias_to_cube in var_iter!( egraph[subst[alias_to_cube_var]], @@ -129,12 +153,16 @@ impl WrapperRules { new_value_var: &str, push_to_cube_var: &str, pullup_push_to_cube_var: &str, + grouped_subqueries_var: &str, + pullup_grouped_subqueries_var: &str, ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { let alias_to_cube_var = var!(alias_to_cube_var); let value_var = var!(value_var); let new_value_var = var!(new_value_var); let push_to_cube_var = var!(push_to_cube_var); let pullup_push_to_cube_var = var!(pullup_push_to_cube_var); + let grouped_subqueries_var = var!(grouped_subqueries_var); + let pullup_grouped_subqueries_var = var!(pullup_grouped_subqueries_var); let meta = self.meta_context.clone(); move |egraph, subst| { if !copy_flag!( @@ -147,6 +175,17 @@ impl WrapperRules { ) { return false; } + if !copy_value!( + egraph, + subst, + Vec, + grouped_subqueries_var, + WrapperPushdownReplacerGroupedSubqueries, + pullup_grouped_subqueries_var, + WrapperPullupReplacerGroupedSubqueries + ) { + return false; + } for alias_to_cube in var_iter!( egraph[subst[alias_to_cube_var]], diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/mod.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/mod.rs index 5624d3a201134..974a05623d072 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/mod.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/mod.rs @@ -12,6 +12,7 @@ mod filter; mod in_list_expr; mod in_subquery_expr; mod is_null_expr; +mod join; mod like_expr; mod limit; mod literal; @@ -36,10 +37,11 @@ use crate::{ replacer_push_down_node, }, transforming_rewrite, wrapper_pullup_replacer, wrapper_pushdown_replacer, ListType, - WrapperPullupReplacerPushToCube, WrapperPushdownReplacerPushToCube, + WrapperPullupReplacerGroupedSubqueries, WrapperPullupReplacerPushToCube, + WrapperPushdownReplacerGroupedSubqueries, WrapperPushdownReplacerPushToCube, }, config::ConfigObj, - copy_flag, + copy_flag, copy_value, transport::MetaContext, var, }; @@ -56,6 +58,7 @@ impl RewriteRules for WrapperRules { let mut rules = Vec::new(); self.cube_scan_wrapper_rules(&mut rules); + self.join_rules(&mut rules); self.wrapper_pull_up_rules(&mut rules); self.aggregate_rules(&mut rules); self.aggregate_rules_subquery(&mut rules); @@ -120,6 +123,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ) }, false, @@ -136,6 +140,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ) }, )); @@ -148,6 +153,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( substitute_list_node, @@ -155,17 +161,27 @@ impl WrapperRules { "?pullup_push_to_cube", "?in_projection", "?cube_members", + "?pullup_grouped_subqueries", + ), + Self::transform_list_tail( + "?push_to_cube", + "?pullup_push_to_cube", + "?grouped_subqueries", + "?pullup_grouped_subqueries", ), - Self::transform_list_tail("?push_to_cube", "?pullup_push_to_cube"), )]); } fn transform_list_tail( push_to_cube_var: &str, pullup_push_to_cube_var: &str, + grouped_subqueries_var: &str, + pullup_grouped_subqueries_var: &str, ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { let push_to_cube_var = var!(push_to_cube_var); let pullup_push_to_cube_var = var!(pullup_push_to_cube_var); + let grouped_subqueries_var = var!(grouped_subqueries_var); + let pullup_grouped_subqueries_var = var!(pullup_grouped_subqueries_var); move |egraph, subst| { if !copy_flag!( egraph, @@ -177,6 +193,17 @@ impl WrapperRules { ) { return false; } + if !copy_value!( + egraph, + subst, + Vec, + grouped_subqueries_var, + WrapperPushdownReplacerGroupedSubqueries, + pullup_grouped_subqueries_var, + WrapperPullupReplacerGroupedSubqueries + ) { + return false; + } true } } @@ -197,6 +224,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ) }, false, @@ -213,6 +241,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ) }, &[ @@ -220,6 +249,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ], )); @@ -231,6 +261,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( substitute_list_type.empty_list(), @@ -238,17 +269,27 @@ impl WrapperRules { "?pullup_push_to_cube", "?in_projection", "?cube_members", + "?pullup_grouped_subqueries", + ), + Self::transform_flat_list_tail( + "?push_to_cube", + "?pullup_push_to_cube", + "?grouped_subqueries", + "?pullup_grouped_subqueries", ), - Self::transform_flat_list_tail("?push_to_cube", "?pullup_push_to_cube"), )]); } fn transform_flat_list_tail( push_to_cube_var: &str, pullup_push_to_cube_var: &str, + grouped_subqueries_var: &str, + pullup_grouped_subqueries_var: &str, ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { let push_to_cube_var = var!(push_to_cube_var); let pullup_push_to_cube_var = var!(pullup_push_to_cube_var); + let grouped_subqueries_var = var!(grouped_subqueries_var); + let pullup_grouped_subqueries_var = var!(pullup_grouped_subqueries_var); move |egraph, subst| { if !copy_flag!( egraph, @@ -260,6 +301,17 @@ impl WrapperRules { ) { return false; } + if !copy_value!( + egraph, + subst, + Vec, + grouped_subqueries_var, + WrapperPushdownReplacerGroupedSubqueries, + pullup_grouped_subqueries_var, + WrapperPullupReplacerGroupedSubqueries + ) { + return false; + } true } } @@ -279,6 +331,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ) }, false, @@ -295,6 +348,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ) }, )); @@ -307,6 +361,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( list_node, @@ -314,17 +369,27 @@ impl WrapperRules { "?pullup_push_to_cube", "?in_projection", "?cube_members", + "?pullup_grouped_subqueries", + ), + Self::transform_expr_list_tail( + "?push_to_cube", + "?pullup_push_to_cube", + "?grouped_subqueries", + "?pullup_grouped_subqueries", ), - Self::transform_expr_list_tail("?push_to_cube", "?pullup_push_to_cube"), )]); } fn transform_expr_list_tail( push_to_cube_var: &str, pullup_push_to_cube_var: &str, + grouped_subqueries_var: &str, + pullup_grouped_subqueries_var: &str, ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { let push_to_cube_var = var!(push_to_cube_var); let pullup_push_to_cube_var = var!(pullup_push_to_cube_var); + let grouped_subqueries_var = var!(grouped_subqueries_var); + let pullup_grouped_subqueries_var = var!(pullup_grouped_subqueries_var); move |egraph, subst| { if !copy_flag!( egraph, @@ -336,6 +401,17 @@ impl WrapperRules { ) { return false; } + if !copy_value!( + egraph, + subst, + Vec, + grouped_subqueries_var, + WrapperPushdownReplacerGroupedSubqueries, + pullup_grouped_subqueries_var, + WrapperPullupReplacerGroupedSubqueries + ) { + return false; + } true } } diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/negative_expr.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/negative_expr.rs index 52c303ce32aa3..d0fcaafd0ec8d 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/negative_expr.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/negative_expr.rs @@ -21,6 +21,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), negative_expr(wrapper_pushdown_replacer( "?expr", @@ -28,6 +29,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", )), ), transforming_rewrite( @@ -38,6 +40,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", )), wrapper_pullup_replacer( negative_expr("?expr"), @@ -45,6 +48,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), self.transform_negative_expr("?alias_to_cube"), ), diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/not_expr.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/not_expr.rs index 82572f59d97e4..6c1673d040e73 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/not_expr.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/not_expr.rs @@ -21,6 +21,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), not_expr(wrapper_pushdown_replacer( "?expr", @@ -28,6 +29,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", )), ), transforming_rewrite( @@ -38,6 +40,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", )), wrapper_pullup_replacer( not_expr("?expr"), @@ -45,6 +48,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), self.transform_not_expr("?alias_to_cube"), ), diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/order.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/order.rs index 98f9718d4f8f4..9c634c46d7134 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/order.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/order.rs @@ -4,10 +4,11 @@ use crate::{ rewriter::{CubeEGraph, CubeRewrite}, rules::wrapper::WrapperRules, sort, transforming_rewrite, wrapped_select, wrapped_select_order_expr_empty_tail, - wrapper_pullup_replacer, wrapper_pushdown_replacer, WrapperPullupReplacerPushToCube, + wrapper_pullup_replacer, wrapper_pushdown_replacer, WrapperPullupReplacerGroupedSubqueries, + WrapperPullupReplacerPushToCube, WrapperPushdownReplacerGroupedSubqueries, WrapperPushdownReplacerPushToCube, }, - copy_flag, var, + copy_flag, copy_value, var, }; use egg::Subst; @@ -42,6 +43,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "CubeScanWrapperFinalized:false", ), @@ -55,6 +57,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?subqueries", @@ -62,6 +65,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?group_expr", @@ -69,6 +73,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?aggr_expr", @@ -76,6 +81,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?window_expr", @@ -83,6 +89,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?cube_scan_input", @@ -90,14 +97,23 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", + ), + wrapper_pullup_replacer( + "?joins", + "?alias_to_cube", + "?push_to_cube", + "?in_projection", + "?cube_members", + "?grouped_subqueries", ), - "?joins", wrapper_pullup_replacer( "?filter_expr", "?alias_to_cube", "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "?having_expr", "?limit", @@ -108,6 +124,7 @@ impl WrapperRules { "?pushdown_push_to_cube", "?in_projection", "?cube_members", + "?pushdown_grouped_subqueries", ), "?select_alias", "?select_distinct", @@ -116,7 +133,12 @@ impl WrapperRules { ), "CubeScanWrapperFinalized:false", ), - self.transform_order("?push_to_cube", "?pushdown_push_to_cube"), + self.transform_order( + "?push_to_cube", + "?pushdown_push_to_cube", + "?grouped_subqueries", + "?pushdown_grouped_subqueries", + ), )]); Self::list_pushdown_pullup_rules( @@ -131,9 +153,13 @@ impl WrapperRules { &self, push_to_cube_var: &'static str, pushdown_push_to_cube_var: &'static str, + grouped_subqueries_var: &'static str, + pushdown_grouped_subqueries_var: &'static str, ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { let push_to_cube_var = var!(push_to_cube_var); let pushdown_push_to_cube_var = var!(pushdown_push_to_cube_var); + let grouped_subqueries_var = var!(grouped_subqueries_var); + let pushdown_grouped_subqueries_var = var!(pushdown_grouped_subqueries_var); move |egraph, subst| { if !copy_flag!( egraph, @@ -145,6 +171,17 @@ impl WrapperRules { ) { return false; } + if !copy_value!( + egraph, + subst, + Vec, + grouped_subqueries_var, + WrapperPullupReplacerGroupedSubqueries, + pushdown_grouped_subqueries_var, + WrapperPushdownReplacerGroupedSubqueries + ) { + return false; + } true } } diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/projection.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/projection.rs index 473c5b04fef16..bc8c0cf7e337e 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/projection.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/projection.rs @@ -9,10 +9,11 @@ use crate::{ wrapped_select_order_expr_empty_tail, wrapped_select_subqueries_empty_tail, wrapped_select_window_expr_empty_tail, wrapper_pullup_replacer, wrapper_pushdown_replacer, ListType, LogicalPlanLanguage, ProjectionAlias, WrappedSelectAlias, - WrappedSelectPushToCube, WrappedSelectUngroupedScan, WrapperPullupReplacerPushToCube, - WrapperPushdownReplacerPushToCube, + WrappedSelectPushToCube, WrappedSelectUngroupedScan, + WrapperPullupReplacerGroupedSubqueries, WrapperPullupReplacerPushToCube, + WrapperPushdownReplacerGroupedSubqueries, WrapperPushdownReplacerPushToCube, }, - copy_flag, var, var_iter, + copy_flag, copy_value, var, var_iter, }; use egg::{Subst, Var}; @@ -29,6 +30,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "CubeScanWrapperFinalized:false", ), @@ -44,6 +46,7 @@ impl WrapperRules { "?pushdown_push_to_cube", "WrapperPullupReplacerInProjection:true", "?cube_members", + "?pushdown_grouped_subqueries", ), wrapper_pullup_replacer( wrapped_select_subqueries_empty_tail(), @@ -51,6 +54,7 @@ impl WrapperRules { "?push_to_cube", "WrapperPullupReplacerInProjection:true", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( wrapped_select_group_expr_empty_tail(), @@ -58,6 +62,7 @@ impl WrapperRules { "?push_to_cube", "WrapperPullupReplacerInProjection:true", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( wrapped_select_aggr_expr_empty_tail(), @@ -65,6 +70,7 @@ impl WrapperRules { "?push_to_cube", "WrapperPullupReplacerInProjection:true", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( wrapped_select_window_expr_empty_tail(), @@ -72,6 +78,7 @@ impl WrapperRules { "?push_to_cube", "WrapperPullupReplacerInProjection:true", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?cube_scan_input", @@ -79,14 +86,23 @@ impl WrapperRules { "?push_to_cube", "WrapperPullupReplacerInProjection:true", "?cube_members", + "?grouped_subqueries", + ), + wrapper_pullup_replacer( + wrapped_select_joins_empty_tail(), + "?alias_to_cube", + "?push_to_cube", + "WrapperPullupReplacerInProjection:true", + "?cube_members", + "?grouped_subqueries", ), - wrapped_select_joins_empty_tail(), wrapper_pullup_replacer( wrapped_select_filter_expr_empty_tail(), "?alias_to_cube", "?push_to_cube", "WrapperPullupReplacerInProjection:true", "?cube_members", + "?grouped_subqueries", ), wrapped_select_having_expr_empty_tail(), "WrappedSelectLimit:None", @@ -97,6 +113,7 @@ impl WrapperRules { "?push_to_cube", "WrapperPullupReplacerInProjection:true", "?cube_members", + "?grouped_subqueries", ), "?select_alias", "WrappedSelectDistinct:false", @@ -113,6 +130,8 @@ impl WrapperRules { "?select_alias", "?select_push_to_cube", "?select_ungrouped_scan", + "?grouped_subqueries", + "?pushdown_grouped_subqueries", ), )]); @@ -146,6 +165,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "CubeScanWrapperFinalized:false", ), @@ -164,6 +184,7 @@ impl WrapperRules { "?pushdown_push_to_cube", "WrapperPullupReplacerInProjection:true", "?cube_members", + "?pushdown_grouped_subqueries", ), wrapper_pushdown_replacer( "?subqueries", @@ -171,6 +192,7 @@ impl WrapperRules { "?pushdown_push_to_cube", "WrapperPullupReplacerInProjection:true", "?cube_members", + "?pushdown_grouped_subqueries", ), wrapper_pullup_replacer( wrapped_select_group_expr_empty_tail(), @@ -178,6 +200,7 @@ impl WrapperRules { "?push_to_cube", "WrapperPullupReplacerInProjection:true", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( wrapped_select_aggr_expr_empty_tail(), @@ -185,6 +208,7 @@ impl WrapperRules { "?push_to_cube", "WrapperPullupReplacerInProjection:true", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( wrapped_select_window_expr_empty_tail(), @@ -192,6 +216,7 @@ impl WrapperRules { "?push_to_cube", "WrapperPullupReplacerInProjection:true", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?cube_scan_input", @@ -199,14 +224,23 @@ impl WrapperRules { "?push_to_cube", "WrapperPullupReplacerInProjection:true", "?cube_members", + "?grouped_subqueries", + ), + wrapper_pullup_replacer( + wrapped_select_joins_empty_tail(), + "?alias_to_cube", + "?push_to_cube", + "WrapperPullupReplacerInProjection:true", + "?cube_members", + "?grouped_subqueries", ), - wrapped_select_joins_empty_tail(), wrapper_pullup_replacer( wrapped_select_filter_expr_empty_tail(), "?alias_to_cube", "?push_to_cube", "WrapperPullupReplacerInProjection:true", "?cube_members", + "?grouped_subqueries", ), wrapped_select_having_expr_empty_tail(), "WrappedSelectLimit:None", @@ -217,6 +251,7 @@ impl WrapperRules { "?push_to_cube", "WrapperPullupReplacerInProjection:true", "?cube_members", + "?grouped_subqueries", ), "?select_alias", "WrappedSelectDistinct:false", @@ -234,6 +269,8 @@ impl WrapperRules { "?select_alias", "?select_push_to_cube", "?select_ungrouped_scan", + "?grouped_subqueries", + "?pushdown_grouped_subqueries", ), )]); } @@ -246,6 +283,8 @@ impl WrapperRules { select_alias_var: &'static str, select_push_to_cube_var: &'static str, select_ungrouped_scan_var: &'static str, + grouped_subqueries_var: &'static str, + pushdown_grouped_subqueries_var: &'static str, ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { let expr_var = var!(expr_var); let projection_alias_var = var!(projection_alias_var); @@ -254,6 +293,8 @@ impl WrapperRules { let select_alias_var = var!(select_alias_var); let select_push_to_cube_var = var!(select_push_to_cube_var); let select_ungrouped_scan_var = var!(select_ungrouped_scan_var); + let grouped_subqueries_var = var!(grouped_subqueries_var); + let pushdown_grouped_subqueries_var = var!(pushdown_grouped_subqueries_var); move |egraph, subst| { Self::transform_projection_impl( egraph, @@ -265,6 +306,8 @@ impl WrapperRules { select_alias_var, select_push_to_cube_var, select_ungrouped_scan_var, + grouped_subqueries_var, + pushdown_grouped_subqueries_var, ) } } @@ -279,6 +322,8 @@ impl WrapperRules { select_alias_var: &'static str, select_push_to_cube_var: &'static str, select_ungrouped_scan_var: &'static str, + grouped_subqueries_var: &'static str, + pushdown_grouped_subqueries_var: &'static str, ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { let alias_to_cube_var = var!(alias_to_cube_var); let expr_var = var!(expr_var); @@ -289,6 +334,8 @@ impl WrapperRules { let select_push_to_cube_var = var!(select_push_to_cube_var); let select_ungrouped_scan_var = var!(select_ungrouped_scan_var); let meta = self.meta_context.clone(); + let grouped_subqueries_var = var!(grouped_subqueries_var); + let pushdown_grouped_subqueries_var = var!(pushdown_grouped_subqueries_var); move |egraph, subst| { if Self::transform_check_subquery_allowed( egraph, @@ -306,6 +353,8 @@ impl WrapperRules { select_alias_var, select_push_to_cube_var, select_ungrouped_scan_var, + grouped_subqueries_var, + pushdown_grouped_subqueries_var, ) } else { false @@ -323,6 +372,8 @@ impl WrapperRules { select_alias_var: Var, select_push_to_cube_var: Var, select_ungrouped_scan_var: Var, + grouped_subqueries_var: Var, + pushdown_grouped_subqueries_var: Var, ) -> bool { if let Some(_) = &egraph[subst[expr_var]].data.referenced_expr { if !copy_flag!( @@ -335,6 +386,17 @@ impl WrapperRules { ) { return false; } + if !copy_value!( + egraph, + subst, + Vec, + grouped_subqueries_var, + WrapperPullupReplacerGroupedSubqueries, + pushdown_grouped_subqueries_var, + WrapperPushdownReplacerGroupedSubqueries + ) { + return false; + } for projection_alias in var_iter!(egraph[subst[projection_alias_var]], ProjectionAlias).cloned() diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/scalar_function.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/scalar_function.rs index a888fa8ab9e70..b18ad1f035ec1 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/scalar_function.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/scalar_function.rs @@ -5,10 +5,11 @@ use crate::{ rules::wrapper::WrapperRules, scalar_fun_expr_args_empty_tail, scalar_fun_expr_args_legacy, transforming_rewrite, wrapper_pullup_replacer, wrapper_pushdown_replacer, ListPattern, ListType, - ScalarFunctionExprFun, WrapperPullupReplacerAliasToCube, WrapperPullupReplacerPushToCube, - WrapperPushdownReplacerPushToCube, + ScalarFunctionExprFun, WrapperPullupReplacerAliasToCube, + WrapperPullupReplacerGroupedSubqueries, WrapperPullupReplacerPushToCube, + WrapperPushdownReplacerGroupedSubqueries, WrapperPushdownReplacerPushToCube, }, - copy_flag, var, var_iter, + copy_flag, copy_value, var, var_iter, }; use egg::Subst; @@ -23,6 +24,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), fun_expr_var_arg( "?fun", @@ -32,6 +34,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), ), ), @@ -45,6 +48,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), ), wrapper_pullup_replacer( @@ -53,6 +57,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), self.transform_fun_expr("?fun", "?alias_to_cube"), ), @@ -64,6 +69,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( scalar_fun_expr_args_empty_tail(), @@ -71,8 +77,14 @@ impl WrapperRules { "?pullup_push_to_cube", "?in_projection", "?cube_members", + "?pullup_grouped_subqueries", + ), + self.transform_scalar_function_empty_tail( + "?push_to_cube", + "?pullup_push_to_cube", + "?grouped_subqueries", + "?pullup_grouped_subqueries", ), - self.transform_scalar_function_empty_tail("?push_to_cube", "?pullup_push_to_cube"), ), ]); @@ -88,6 +100,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), list_var: "?args".to_string(), elem: "?arg".to_string(), @@ -101,6 +114,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), }, ), @@ -116,6 +130,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), }, ListPattern { @@ -125,6 +140,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), list_var: "?new_args".to_string(), elem: "?arg".to_string(), @@ -147,6 +163,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), scalar_fun_expr_args_legacy( wrapper_pushdown_replacer( @@ -155,6 +172,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pushdown_replacer( "?right", @@ -162,6 +180,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), ), ), @@ -174,6 +193,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?right", @@ -181,6 +201,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), ), wrapper_pullup_replacer( @@ -189,6 +210,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), ), ]); @@ -199,9 +221,13 @@ impl WrapperRules { &self, push_to_cube_var: &'static str, pullup_push_to_cube_var: &'static str, + grouped_subqueries_var: &'static str, + pullup_grouped_subqueries_var: &'static str, ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { let push_to_cube_var = var!(push_to_cube_var); let pullup_push_to_cube_var = var!(pullup_push_to_cube_var); + let grouped_subqueries_var = var!(grouped_subqueries_var); + let pullup_grouped_subqueries_var = var!(pullup_grouped_subqueries_var); move |egraph, subst| { if !copy_flag!( egraph, @@ -213,6 +239,17 @@ impl WrapperRules { ) { return false; } + if !copy_value!( + egraph, + subst, + Vec, + grouped_subqueries_var, + WrapperPushdownReplacerGroupedSubqueries, + pullup_grouped_subqueries_var, + WrapperPullupReplacerGroupedSubqueries + ) { + return false; + } true } diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/sort_expr.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/sort_expr.rs index 1e5262fdd4133..28c6ecf4270da 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/sort_expr.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/sort_expr.rs @@ -14,6 +14,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), sort_expr( wrapper_pushdown_replacer( @@ -22,6 +23,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "?asc", "?nulls_first", @@ -36,6 +38,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "?asc", "?nulls_first", @@ -46,6 +49,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), ), ]); diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/subquery.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/subquery.rs index 686499b1b5093..c53723235da4f 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/subquery.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/subquery.rs @@ -5,9 +5,10 @@ use crate::{ rules::wrapper::WrapperRules, transforming_rewrite, wrapper_pullup_replacer, wrapper_pushdown_replacer, EmptyRelationDerivedSourceTableName, LogicalPlanLanguage, WrapperPullupReplacerAliasToCube, - WrapperPullupReplacerPushToCube, WrapperPushdownReplacerPushToCube, + WrapperPullupReplacerGroupedSubqueries, WrapperPullupReplacerPushToCube, + WrapperPushdownReplacerGroupedSubqueries, WrapperPushdownReplacerPushToCube, }, - copy_flag, + copy_flag, copy_value, transport::MetaContext, var, var_iter, var_list_iter, }; @@ -27,6 +28,7 @@ impl WrapperRules { "?nner_push_to_cube", "?inner_in_projection", "?inner_cube_members", + "?inner_grouped_subqueries", ), "CubeScanWrapperFinalized:false", ), @@ -34,6 +36,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?cube_scan_input", @@ -41,11 +44,14 @@ impl WrapperRules { "?pullup_push_to_cube", "?in_projection", "?cube_members", + "?pullup_grouped_subqueries", ), self.transform_check_subquery_wrapped( "?cube_scan_input", "?push_to_cube", "?pullup_push_to_cube", + "?grouped_subqueries", + "?pullup_grouped_subqueries", ), ), transforming_rewrite( @@ -66,10 +72,15 @@ impl WrapperRules { "WrapperPullupReplacerPushToCube:false", "WrapperPullupReplacerInProjection:true", "CubeScanMembers", + "?grouped_subqueries", ), "CubeScanWrapperFinalized:false", ), - self.transform_wrap_empty_rel("?derived_source_table_name", "?alias_to_cube"), + self.transform_wrap_empty_rel( + "?derived_source_table_name", + "?alias_to_cube", + "?grouped_subqueries", + ), ), ]); Self::list_pushdown_pullup_rules( @@ -83,9 +94,11 @@ impl WrapperRules { &self, source_table_name_var: &'static str, alias_to_cube_var: &'static str, + grouped_subqueries_out_var: &'static str, ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { let source_table_name_var = var!(source_table_name_var); let alias_to_cube_var = var!(alias_to_cube_var); + let grouped_subqueries_out_var = var!(grouped_subqueries_out_var); let meta_context = self.meta_context.clone(); move |egraph, subst| { for name in var_iter!( @@ -107,6 +120,18 @@ impl WrapperRules { )]), )), ); + // We don't want to mark current query as a grouped, because we create pullup replacer out of thin air here + // And it would need to match other replacers later + // At the same time, this pullup replacer have no subqueries on its own + // So whoever want to treat this as subquery would introduce it to grouped_subqueries + subst.insert( + grouped_subqueries_out_var, + egraph.add( + LogicalPlanLanguage::WrapperPullupReplacerGroupedSubqueries( + WrapperPullupReplacerGroupedSubqueries(vec![]), + ), + ), + ); return true; } } @@ -146,10 +171,14 @@ impl WrapperRules { cube_scan_input_var: &'static str, push_to_cube_var: &'static str, pullup_push_to_cube_var: &'static str, + grouped_subqueries_var: &'static str, + pullup_grouped_subqueries_var: &'static str, ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { let cube_scan_input_var = var!(cube_scan_input_var); let push_to_cube_var = var!(push_to_cube_var); let pullup_push_to_cube_var = var!(pullup_push_to_cube_var); + let grouped_subqueries_var = var!(grouped_subqueries_var); + let pullup_grouped_subqueries_var = var!(pullup_grouped_subqueries_var); move |egraph, subst| { if !copy_flag!( egraph, @@ -162,6 +191,18 @@ impl WrapperRules { return false; } + if !copy_value!( + egraph, + subst, + Vec, + grouped_subqueries_var, + WrapperPushdownReplacerGroupedSubqueries, + pullup_grouped_subqueries_var, + WrapperPullupReplacerGroupedSubqueries + ) { + return false; + } + for _ in var_list_iter!(egraph[subst[cube_scan_input_var]], WrappedSelect).cloned() { return true; } diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/udf_function.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/udf_function.rs index 5505b1f467281..418ff722764c6 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/udf_function.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/udf_function.rs @@ -5,10 +5,11 @@ use crate::{ rules::wrapper::WrapperRules, transforming_rewrite, udf_expr_var_arg, udf_fun_expr_args, udf_fun_expr_args_empty_tail, wrapper_pullup_replacer, wrapper_pushdown_replacer, ScalarUDFExprFun, - WrapperPullupReplacerAliasToCube, WrapperPullupReplacerPushToCube, + WrapperPullupReplacerAliasToCube, WrapperPullupReplacerGroupedSubqueries, + WrapperPullupReplacerPushToCube, WrapperPushdownReplacerGroupedSubqueries, WrapperPushdownReplacerPushToCube, }, - copy_flag, var, var_iter, + copy_flag, copy_value, var, var_iter, }; use egg::Subst; @@ -23,6 +24,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), udf_expr_var_arg( "?fun", @@ -32,6 +34,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), ), ), @@ -45,6 +48,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), ), wrapper_pullup_replacer( @@ -53,6 +57,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), self.transform_udf_expr("?fun", "?alias_to_cube"), ), @@ -64,6 +69,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), udf_fun_expr_args( wrapper_pushdown_replacer( @@ -72,6 +78,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pushdown_replacer( "?right", @@ -79,6 +86,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), ), ), @@ -91,6 +99,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?right", @@ -98,6 +107,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), ), wrapper_pullup_replacer( @@ -106,6 +116,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), ), transforming_rewrite( @@ -116,6 +127,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( udf_fun_expr_args_empty_tail(), @@ -123,8 +135,14 @@ impl WrapperRules { "?pullup_push_to_cube", "?in_projection", "?cube_members", + "?pullup_grouped_subqueries", + ), + self.transform_udf_expr_tail( + "?push_to_cube", + "?pullup_push_to_cube", + "?grouped_subqueries", + "?pullup_grouped_subqueries", ), - self.transform_udf_expr_tail("?push_to_cube", "?pullup_push_to_cube"), ), ]); } @@ -164,9 +182,13 @@ impl WrapperRules { &self, push_to_cube_var: &'static str, pullup_push_to_cube_var: &'static str, + grouped_subqueries_var: &'static str, + pullup_grouped_subqueries_var: &'static str, ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { let push_to_cube_var = var!(push_to_cube_var); let pullup_push_to_cube_var = var!(pullup_push_to_cube_var); + let grouped_subqueries_var = var!(grouped_subqueries_var); + let pullup_grouped_subqueries_var = var!(pullup_grouped_subqueries_var); move |egraph, subst| { if !copy_flag!( egraph, @@ -178,6 +200,17 @@ impl WrapperRules { ) { return false; } + if !copy_value!( + egraph, + subst, + Vec, + grouped_subqueries_var, + WrapperPushdownReplacerGroupedSubqueries, + pullup_grouped_subqueries_var, + WrapperPullupReplacerGroupedSubqueries + ) { + return false; + } true } } diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/window.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/window.rs index 928a1dd388cc4..444e40413ffec 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/window.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/window.rs @@ -5,9 +5,10 @@ use crate::{ rules::wrapper::WrapperRules, transforming_rewrite, window, wrapped_select, wrapped_select_window_expr_empty_tail, wrapper_pullup_replacer, wrapper_pushdown_replacer, ListType, - WrapperPullupReplacerPushToCube, WrapperPushdownReplacerPushToCube, + WrapperPullupReplacerGroupedSubqueries, WrapperPullupReplacerPushToCube, + WrapperPushdownReplacerGroupedSubqueries, WrapperPushdownReplacerPushToCube, }, - copy_flag, var, + copy_flag, copy_value, var, }; use egg::Subst; @@ -41,6 +42,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "CubeScanWrapperFinalized:false", ), @@ -55,6 +57,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?subqueries", @@ -62,6 +65,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?group_expr", @@ -69,6 +73,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?aggr_expr", @@ -76,6 +81,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pushdown_replacer( "?window_expr", @@ -83,6 +89,7 @@ impl WrapperRules { "?pushdown_push_to_cube", "?in_projection", "?cube_members", + "?pushdown_grouped_subqueries", ), wrapper_pullup_replacer( "?cube_scan_input", @@ -90,14 +97,23 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", + ), + wrapper_pullup_replacer( + "?joins", + "?alias_to_cube", + "?push_to_cube", + "?in_projection", + "?cube_members", + "?grouped_subqueries", ), - "?joins", wrapper_pullup_replacer( "?filter_expr", "?alias_to_cube", "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "?having_expr", "?limit", @@ -108,6 +124,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "?select_alias", "?select_distinct", @@ -116,7 +133,12 @@ impl WrapperRules { ), "CubeScanWrapperFinalized:false", ), - self.transform_window_pushdown("?push_to_cube", "?pushdown_push_to_cube"), + self.transform_window_pushdown( + "?push_to_cube", + "?pushdown_push_to_cube", + "?grouped_subqueries", + "?pushdown_grouped_subqueries", + ), )]); if self.config_obj.push_down_pull_up_split() { @@ -140,9 +162,13 @@ impl WrapperRules { &self, push_to_cube_var: &'static str, pushdown_push_to_cube_var: &'static str, + grouped_subqueries_var: &'static str, + pushdown_grouped_subqueries_var: &'static str, ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { let push_to_cube_var = var!(push_to_cube_var); let pushdown_push_to_cube_var = var!(pushdown_push_to_cube_var); + let grouped_subqueries_var = var!(grouped_subqueries_var); + let pushdown_grouped_subqueries_var = var!(pushdown_grouped_subqueries_var); move |egraph, subst| { if !copy_flag!( egraph, @@ -154,6 +180,17 @@ impl WrapperRules { ) { return false; } + if !copy_value!( + egraph, + subst, + Vec, + grouped_subqueries_var, + WrapperPullupReplacerGroupedSubqueries, + pushdown_grouped_subqueries_var, + WrapperPushdownReplacerGroupedSubqueries + ) { + return false; + } true } } diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/window_function.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/window_function.rs index c2fe257d4130a..af5bf45c3343c 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/window_function.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/window_function.rs @@ -28,6 +28,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), window_fun_expr_var_arg( "?fun", @@ -37,6 +38,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pushdown_replacer( "?partition_by", @@ -44,6 +46,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pushdown_replacer( "?order_by", @@ -51,6 +54,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "?window_frame", ), @@ -65,6 +69,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?partition_by", @@ -72,6 +77,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?order_by", @@ -79,6 +85,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "?window_frame", ), @@ -94,6 +101,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), self.transform_window_fun_expr("?fun", "?alias_to_cube"), ), diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/wrapper_pull_up.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/wrapper_pull_up.rs index be7c228d321b7..e6c7fa0b9a239 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/wrapper_pull_up.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/wrapper_pull_up.rs @@ -25,6 +25,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?subqueries", @@ -32,6 +33,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?group_expr", @@ -39,6 +41,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?aggr_expr", @@ -46,6 +49,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?window_expr", @@ -53,6 +57,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?cube_scan_input", @@ -60,14 +65,23 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", + ), + wrapper_pullup_replacer( + "?joins", + "?alias_to_cube", + "?push_to_cube", + "?in_projection", + "?cube_members", + "?grouped_subqueries", ), - wrapped_select_joins_empty_tail(), wrapper_pullup_replacer( "?filter_expr", "?alias_to_cube", "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapped_select_having_expr_empty_tail(), "WrappedSelectLimit:None", @@ -78,6 +92,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "?select_alias", "?select_distinct", @@ -96,7 +111,7 @@ impl WrapperRules { "?aggr_expr", "?window_expr", "?cube_scan_input", - wrapped_select_joins_empty_tail(), + "?joins", "?filter_expr", wrapped_select_having_expr_empty_tail(), "WrappedSelectLimit:None", @@ -114,6 +129,7 @@ impl WrapperRules { "WrapperPullupReplacerPushToCube:false", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "CubeScanWrapperFinalized:false", ), @@ -130,6 +146,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?subqueries", @@ -137,6 +154,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?group_expr", @@ -144,6 +162,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?aggr_expr", @@ -151,6 +170,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( "?window_expr", @@ -158,6 +178,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapper_pullup_replacer( wrapped_select( @@ -183,14 +204,24 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", + ), + wrapper_pullup_replacer( + // TODO handle non-empty joins + wrapped_select_joins_empty_tail(), + "?alias_to_cube", + "?push_to_cube", + "?in_projection", + "?cube_members", + "?grouped_subqueries", ), - wrapped_select_joins_empty_tail(), wrapper_pullup_replacer( "?filter_expr", "?alias_to_cube", "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), wrapped_select_having_expr_empty_tail(), "WrappedSelectLimit:None", @@ -201,6 +232,7 @@ impl WrapperRules { "?push_to_cube", "?in_projection", "?cube_members", + "?grouped_subqueries", ), "?select_alias", "?select_distinct", @@ -255,6 +287,7 @@ impl WrapperRules { "WrapperPullupReplacerPushToCube:false", "?inner_projection_expr", "?cube_members", + "?grouped_subqueries", ), "CubeScanWrapperFinalized:false", ), diff --git a/rust/cubesql/cubesql/src/compile/test/mod.rs b/rust/cubesql/cubesql/src/compile/test/mod.rs index 8ddbbb9545eff..11da31a9e5f81 100644 --- a/rust/cubesql/cubesql/src/compile/test/mod.rs +++ b/rust/cubesql/cubesql/src/compile/test/mod.rs @@ -30,6 +30,8 @@ pub mod test_bi_workarounds; #[cfg(test)] pub mod test_cube_join; #[cfg(test)] +pub mod test_cube_join_grouped; +#[cfg(test)] pub mod test_df_execution; #[cfg(test)] pub mod test_introspection; @@ -590,6 +592,8 @@ OFFSET {{ offset }}{% endif %}"#.to_string(), ("expressions/like".to_string(), "{{ expr }} {% if negated %}NOT {% endif %}LIKE {{ pattern }}".to_string()), ("expressions/ilike".to_string(), "{{ expr }} {% if negated %}NOT {% endif %}ILIKE {{ pattern }}".to_string()), ("expressions/like_escape".to_string(), "{{ like_expr }} ESCAPE {{ escape_char }}".to_string()), + ("join_types/inner".to_string(), "INNER".to_string()), + ("join_types/left".to_string(), "LEFT".to_string()), ("quotes/identifiers".to_string(), "\"".to_string()), ("quotes/escape".to_string(), "\"\"".to_string()), ("params/param".to_string(), "${{ param_index + 1 }}".to_string()), diff --git a/rust/cubesql/cubesql/src/compile/test/test_cube_join_grouped.rs b/rust/cubesql/cubesql/src/compile/test/test_cube_join_grouped.rs new file mode 100644 index 0000000000000..ae3efa44f8619 --- /dev/null +++ b/rust/cubesql/cubesql/src/compile/test/test_cube_join_grouped.rs @@ -0,0 +1,452 @@ +use datafusion::physical_plan::displayable; +use pretty_assertions::assert_eq; +use regex::Regex; + +use crate::compile::{ + test::{convert_select_to_query_plan, init_testing_logger, utils::LogicalPlanTestUtils}, + DatabaseProtocol, Rewriter, +}; + +// TODO Tests more joins with grouped queries +// Join structure: +// * ungrouped inner join grouped CubeScan +// * ungrouped inner join grouped CubeScan with filters with values +// * ungrouped inner join grouped WrappedSelect +// * ungrouped inner join grouped WrappedSelect with filters with values +// * ungrouped left join grouped +// * grouped left join ungrouped +// * ungrouped join EmptyRelation +// Join condition columns: +// * one dim +// * two dim +// * one measure +// * __cubeJoinField +// * one member expression dim (like ON LOWER(dim) = LOWER(column)) +// Join condition predicate: +// * = +// * IS NOT DISTINCT FROM +// * COALESCE + IS NULL +// Grouped query: +// * Grouping +// * Aggregation +// * Filter +// * Sort +// * Limit +// * Wrapper +// On top of of join +// * Grouping +// * Aggregation +// * Filter +// * Limit +// Test long and otherwise bad aliases for columns: +// * in both parts +// * in join condition +// * in expressions on top +// Test long and otherwise bad aliases for tables: +// * for grouped join part +// * for ungrouped join part +// * inside grouped join part +// * inside ungrouped join part +// * for result + +/// Simple join between ungrouped and grouped query should plan as a push-to-Cube query +/// with subquery_joins and with concrete member expressions in SQL +#[tokio::test] +async fn test_join_ungrouped_with_grouped() { + if !Rewriter::sql_push_down_enabled() { + return; + } + init_testing_logger(); + + let query_plan = convert_select_to_query_plan( + // language=PostgreSQL + r#" +SELECT + kibana_grouped.avg_price, + KibanaSampleDataEcommerce.customer_gender AS gender, + AVG(KibanaSampleDataEcommerce.avgPrice) AS price +FROM + KibanaSampleDataEcommerce +INNER JOIN ( + SELECT + customer_gender, + AVG(avgPrice) as avg_price + FROM + KibanaSampleDataEcommerce + GROUP BY 1 +) kibana_grouped +ON ( + (KibanaSampleDataEcommerce.customer_gender = kibana_grouped.customer_gender) +) +GROUP BY + 1, + 2 +; + "# + .to_string(), + DatabaseProtocol::PostgreSQL, + ) + .await; + + let physical_plan = query_plan.as_physical_plan().await.unwrap(); + println!( + "Physical plan: {}", + displayable(physical_plan.as_ref()).indent() + ); + + let request = query_plan + .as_logical_plan() + .find_cube_scan_wrapped_sql() + .request; + + assert_eq!(request.ungrouped, None); + + assert_eq!(request.subquery_joins.as_ref().unwrap().len(), 1); + + let subquery = &request.subquery_joins.unwrap()[0]; + + assert!(!subquery.sql.contains("ungrouped")); + assert_eq!(subquery.join_type, "INNER"); + assert!(subquery.on.contains( + r#"${KibanaSampleDataEcommerce.customer_gender} = \"kibana_grouped\".\"customer_gender\""# + )); + + // Measure from top aggregation + assert!(query_plan + .as_logical_plan() + .find_cube_scan_wrapped_sql() + .wrapped_sql + .sql + .contains(r#"\"expr\":\"${KibanaSampleDataEcommerce.avgPrice}\""#)); + // Dimension from ungrouped side + assert!(query_plan + .as_logical_plan() + .find_cube_scan_wrapped_sql() + .wrapped_sql + .sql + .contains(r#"\"expr\":\"${KibanaSampleDataEcommerce.customer_gender}\""#)); + // Dimension from grouped side + assert!(query_plan + .as_logical_plan() + .find_cube_scan_wrapped_sql() + .wrapped_sql + .sql + .contains(r#"\"expr\":\"\\\"kibana_grouped\\\".\\\"avg_price\\\"\""#)); +} + +/// Simple join between ungrouped and grouped query should plan as a push-to-Cube query +/// with subquery_joins and with concrete member expressions in SQL, even without aggregation on top +// TODO complete this test +#[tokio::test] +async fn test_join_ungrouped_with_grouped_no_agg() { + if !Rewriter::sql_push_down_enabled() { + return; + } + init_testing_logger(); + + let query_plan = convert_select_to_query_plan( + // language=PostgreSQL + r#" +SELECT + kibana_grouped.avg_price, + KibanaSampleDataEcommerce.customer_gender AS gender, + KibanaSampleDataEcommerce.avgPrice AS price +FROM + KibanaSampleDataEcommerce +INNER JOIN ( + SELECT + customer_gender, + AVG(avgPrice) as avg_price + FROM + KibanaSampleDataEcommerce + GROUP BY 1 +) kibana_grouped +ON ( + (KibanaSampleDataEcommerce.customer_gender = kibana_grouped.customer_gender) +) +; + "# + .to_string(), + DatabaseProtocol::PostgreSQL, + ) + .await; + + let physical_plan = query_plan.as_physical_plan().await.unwrap(); + println!( + "Physical plan: {}", + displayable(physical_plan.as_ref()).indent() + ); + + let request = query_plan + .as_logical_plan() + .find_cube_scan_wrapped_sql() + .request; + + assert_eq!(request.ungrouped, Some(true)); + + assert_eq!(request.subquery_joins.as_ref().unwrap().len(), 1); + + let subquery = &request.subquery_joins.unwrap()[0]; + + assert!(!subquery.sql.contains("ungrouped")); + assert_eq!(subquery.join_type, "INNER"); + assert!(subquery.on.contains( + r#"${KibanaSampleDataEcommerce.customer_gender} = \"kibana_grouped\".\"customer_gender\""# + )); + + // Measure from top aggregation + assert!(query_plan + .as_logical_plan() + .find_cube_scan_wrapped_sql() + .wrapped_sql + .sql + .contains(r#"\"expr\":\"${KibanaSampleDataEcommerce.avgPrice}\""#)); + // Dimension from ungrouped side + assert!(query_plan + .as_logical_plan() + .find_cube_scan_wrapped_sql() + .wrapped_sql + .sql + .contains(r#"\"expr\":\"${KibanaSampleDataEcommerce.customer_gender}\""#)); + // Dimension from grouped side + assert!(query_plan + .as_logical_plan() + .find_cube_scan_wrapped_sql() + .wrapped_sql + .sql + .contains(r#"\"expr\":\"\\\"kibana_grouped\\\".\\\"avg_price\\\"\""#)); +} + +/// Join between ungrouped and grouped query with two columns join condition +/// should plan as a push-to-Cube query with subquery_joins +#[tokio::test] +async fn test_join_ungrouped_with_grouped_two_columns_condition() { + if !Rewriter::sql_push_down_enabled() { + return; + } + init_testing_logger(); + + let query_plan = convert_select_to_query_plan( + // language=PostgreSQL + r#" +SELECT + AVG(KibanaSampleDataEcommerce.avgPrice) AS price +FROM + KibanaSampleDataEcommerce +INNER JOIN ( + SELECT + customer_gender, + notes, + AVG(avgPrice) as avg_price + FROM + KibanaSampleDataEcommerce + GROUP BY 1, 2 +) kibana_grouped +ON ( + KibanaSampleDataEcommerce.customer_gender = kibana_grouped.customer_gender AND KibanaSampleDataEcommerce.notes = kibana_grouped.notes +) +; + "# + .to_string(), + DatabaseProtocol::PostgreSQL, + ) + .await; + + let physical_plan = query_plan.as_physical_plan().await.unwrap(); + println!( + "Physical plan: {}", + displayable(physical_plan.as_ref()).indent() + ); + + let request = query_plan + .as_logical_plan() + .find_cube_scan_wrapped_sql() + .request; + + assert_eq!(request.ungrouped, None); + + assert_eq!(request.subquery_joins.as_ref().unwrap().len(), 1); + + let subquery = &request.subquery_joins.unwrap()[0]; + + assert!(!subquery.sql.contains("ungrouped")); + assert_eq!(subquery.join_type, "INNER"); + assert!(subquery.on.contains( + r#"${KibanaSampleDataEcommerce.customer_gender} = \"kibana_grouped\".\"customer_gender\""# + )); + assert!(subquery + .on + .contains(r#"${KibanaSampleDataEcommerce.notes} = \"kibana_grouped\".\"notes\""#)); + + // Measure from top aggregation + assert!(query_plan + .as_logical_plan() + .find_cube_scan_wrapped_sql() + .wrapped_sql + .sql + .contains(r#"\"expr\":\"${KibanaSampleDataEcommerce.avgPrice}\""#)); +} + +/// Join between ungrouped and grouped query with filter + sort + limit +/// should plan as a push-to-Cube query with subquery_joins +#[tokio::test] +async fn test_join_ungrouped_with_grouped_top1_and_filter() { + if !Rewriter::sql_push_down_enabled() { + return; + } + init_testing_logger(); + + let query_plan = convert_select_to_query_plan( + // language=PostgreSQL + r#" +SELECT + KibanaSampleDataEcommerce.customer_gender AS customer_gender, + AVG(KibanaSampleDataEcommerce.avgPrice) AS price +FROM + KibanaSampleDataEcommerce +INNER JOIN ( + SELECT + customer_gender, + AVG(avgPrice) as avg_price + FROM + KibanaSampleDataEcommerce + WHERE + notes = 'foo' + GROUP BY 1 + ORDER BY 2 DESC NULLS LAST + LIMIT 1 +) kibana_grouped +ON ( + KibanaSampleDataEcommerce.customer_gender = kibana_grouped.customer_gender +) +GROUP BY 1 +; + "# + .to_string(), + DatabaseProtocol::PostgreSQL, + ) + .await; + + let physical_plan = query_plan.as_physical_plan().await.unwrap(); + println!( + "Physical plan: {}", + displayable(physical_plan.as_ref()).indent() + ); + + let request = query_plan + .as_logical_plan() + .find_cube_scan_wrapped_sql() + .request; + + assert_eq!(request.ungrouped, None); + + assert_eq!(request.subquery_joins.as_ref().unwrap().len(), 1); + + let subquery = &request.subquery_joins.unwrap()[0]; + + assert!(!subquery.sql.contains("ungrouped")); + let re = Regex::new( + r#""order":\s*\[\s*\[\s*"KibanaSampleDataEcommerce.avgPrice",\s*"desc"\s*\]\s*\]"#, + ) + .unwrap(); + assert!(re.is_match(&subquery.sql)); + assert!(subquery.sql.contains(r#""limit": 1"#)); + assert_eq!(subquery.join_type, "INNER"); + assert!(subquery.on.contains( + r#"${KibanaSampleDataEcommerce.customer_gender} = \"kibana_grouped\".\"customer_gender\""# + )); + + // Measure from top aggregation + assert!(query_plan + .as_logical_plan() + .find_cube_scan_wrapped_sql() + .wrapped_sql + .sql + .contains(r#"\"expr\":\"${KibanaSampleDataEcommerce.avgPrice}\""#)); +} + +#[tokio::test] +async fn test_superset_topk() { + if !Rewriter::sql_push_down_enabled() { + return; + } + init_testing_logger(); + + let query_plan = convert_select_to_query_plan( + // language=PostgreSQL + r#" +SELECT DATE_TRUNC('week', order_date) AS __timestamp, + MEASURE(KibanaSampleDataEcommerce.avgPrice) AS avgPrice +FROM KibanaSampleDataEcommerce +JOIN + (SELECT customer_gender AS customer_gender__, + MEASURE(KibanaSampleDataEcommerce.avgPrice) AS mme_inner__ + FROM KibanaSampleDataEcommerce + WHERE order_date >= TO_TIMESTAMP('2022-09-16 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US') + AND order_date < TO_TIMESTAMP('2024-09-16 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US') + GROUP BY customer_gender + ORDER BY mme_inner__ DESC + LIMIT 20) AS anon_1 ON customer_gender = customer_gender__ +-- filters here are not supported without filter flattening in wrapper +-- TODO enable it when ready +-- WHERE order_date >= TO_TIMESTAMP('2022-09-16 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US') +-- AND order_date < TO_TIMESTAMP('2024-09-16 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US') +GROUP BY DATE_TRUNC('week', order_date) +ORDER BY avgPrice DESC +LIMIT 1000 +; + "# + .to_string(), + DatabaseProtocol::PostgreSQL, + ) + .await; + + let physical_plan = query_plan.as_physical_plan().await.unwrap(); + println!( + "Physical plan: {}", + displayable(physical_plan.as_ref()).indent() + ); + + let wrapped_sql_node = query_plan.as_logical_plan().find_cube_scan_wrapped_sql(); + + assert_eq!(wrapped_sql_node.request.ungrouped, None); + + assert_eq!( + wrapped_sql_node + .request + .subquery_joins + .as_ref() + .unwrap() + .len(), + 1 + ); + + let subquery = &wrapped_sql_node.request.subquery_joins.unwrap()[0]; + + assert!(!subquery.sql.contains("ungrouped")); + let re = Regex::new( + r#""order":\s*\[\s*\[\s*"KibanaSampleDataEcommerce.avgPrice",\s*"desc"\s*\]\s*\]"#, + ) + .unwrap(); + assert!(re.is_match(&subquery.sql)); + assert!(subquery.sql.contains(r#""limit": 20"#)); + assert_eq!(subquery.join_type, "INNER"); + assert!(subquery.on.contains( + r#"${KibanaSampleDataEcommerce.customer_gender} = \"anon_1\".\"customer_gender_\""# + )); + + // Measure from top aggregation + assert!(wrapped_sql_node + .wrapped_sql + .sql + .contains(r#"\"expr\":\"${KibanaSampleDataEcommerce.avgPrice}\""#)); + + // Outer sort + assert!(wrapped_sql_node + .wrapped_sql + .sql + .contains(r#"ORDER BY "KibanaSampleDataEcommerce"."measure_kibanasa" DESC NULLS FIRST"#)); + + // Outer limit + assert!(wrapped_sql_node.wrapped_sql.sql.contains("LIMIT 1000")); +} diff --git a/rust/cubesql/cubesql/src/compile/test/test_wrapper.rs b/rust/cubesql/cubesql/src/compile/test/test_wrapper.rs index b89241fab2c01..fc6267d32c44e 100644 --- a/rust/cubesql/cubesql/src/compile/test/test_wrapper.rs +++ b/rust/cubesql/cubesql/src/compile/test/test_wrapper.rs @@ -35,9 +35,8 @@ async fn test_simple_wrapper() { let logical_plan = query_plan.as_logical_plan(); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("COALESCE")); @@ -59,11 +58,7 @@ async fn test_wrapper_group_by_rollup() { .await; let logical_plan = query_plan.as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; assert!(sql.contains("Rollup")); let _physical_plan = query_plan.as_physical_plan().await.unwrap(); @@ -84,11 +79,7 @@ async fn test_wrapper_group_by_rollup_with_aliases() { .await; let logical_plan = query_plan.as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; assert!(sql.contains("Rollup")); let _physical_plan = query_plan.as_physical_plan().await.unwrap(); @@ -109,11 +100,7 @@ async fn test_wrapper_group_by_rollup_nested() { .await; let logical_plan = query_plan.as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; assert!(sql.contains("ROLLUP(1, 2)")); let _physical_plan = query_plan.as_physical_plan().await.unwrap(); @@ -134,11 +121,7 @@ async fn test_wrapper_group_by_rollup_nested_from_asterisk() { .await; let logical_plan = query_plan.as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; assert!(sql.contains("Rollup")); let _physical_plan = query_plan.as_physical_plan().await.unwrap(); @@ -159,11 +142,7 @@ async fn test_wrapper_group_by_rollup_nested_with_aliases() { .await; let logical_plan = query_plan.as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; assert!(sql.contains("ROLLUP(1, 2)")); let _physical_plan = query_plan.as_physical_plan().await.unwrap(); @@ -186,11 +165,7 @@ async fn test_wrapper_group_by_rollup_nested_complex() { .await; let logical_plan = query_plan.as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; assert!(sql.contains("ROLLUP(1), ROLLUP(2), 3, CUBE(4)")); let _physical_plan = query_plan.as_physical_plan().await.unwrap(); @@ -211,11 +186,7 @@ async fn test_wrapper_group_by_rollup_placeholders() { .await; let logical_plan = query_plan.as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; assert!(sql.contains("Rollup")); let _physical_plan = query_plan.as_physical_plan().await.unwrap(); @@ -236,11 +207,7 @@ async fn test_wrapper_group_by_cube() { .await; let logical_plan = query_plan.as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; assert!(sql.contains("Cube")); let _physical_plan = query_plan.as_physical_plan().await.unwrap(); @@ -261,11 +228,7 @@ async fn test_wrapper_group_by_rollup_complex() { .await; let logical_plan = query_plan.as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; assert!(sql.contains("Rollup")); let _physical_plan = query_plan.as_physical_plan().await.unwrap(); @@ -286,11 +249,7 @@ async fn test_simple_subquery_wrapper_projection_empty_source() { .await; let logical_plan = query_plan.as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; assert!(sql.contains("(SELECT")); assert!(sql.contains("utf8__male__")); @@ -313,11 +272,7 @@ async fn test_simple_subquery_wrapper_filter_empty_source() { .await; let logical_plan = query_plan.as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; assert!(sql.contains("(SELECT")); assert!(sql.contains("utf8__male__")); @@ -340,11 +295,7 @@ async fn test_simple_subquery_wrapper_projection_aggregate_empty_source() { .await; let logical_plan = query_plan.as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; assert!(sql.contains("(SELECT")); assert!(sql.contains("utf8__male__")); @@ -366,11 +317,7 @@ async fn test_simple_subquery_wrapper_filter_in_empty_source() { .await; let logical_plan = query_plan.as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; assert!(sql.contains("IN (SELECT")); assert!(sql.contains("utf8__male__")); @@ -393,11 +340,7 @@ async fn test_simple_subquery_wrapper_filter_and_projection_empty_source() { let logical_plan = query_plan.as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; assert!(sql.contains("IN (SELECT")); assert!(sql.contains("(SELECT")); assert!(sql.contains("utf8__male__")); @@ -422,15 +365,13 @@ async fn test_simple_subquery_wrapper_projection() { let logical_plan = query_plan.as_logical_plan(); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("(SELECT")); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("\\\\\\\"limit\\\\\\\": 1")); @@ -453,9 +394,8 @@ async fn test_simple_subquery_wrapper_projection_aggregate() { let logical_plan = query_plan.as_logical_plan(); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("(SELECT")); @@ -478,15 +418,13 @@ async fn test_simple_subquery_wrapper_filter_equal() { let logical_plan = query_plan.as_logical_plan(); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("(SELECT")); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("\\\\\\\"limit\\\\\\\": 1")); @@ -509,9 +447,8 @@ async fn test_simple_subquery_wrapper_filter_in() { let logical_plan = query_plan.as_logical_plan(); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("IN (SELECT")); @@ -535,9 +472,8 @@ async fn test_simple_subquery_wrapper_filter_and_projection() { let logical_plan = query_plan.as_logical_plan(); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("IN (SELECT")); @@ -583,9 +519,8 @@ GROUP BY assert!(query_plan .as_logical_plan() - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains( "${KibanaSampleDataEcommerce.order_date} >= timestamptz '2024-02-03T04:05:06.000Z'" @@ -629,9 +564,8 @@ WHERE assert!(query_plan .as_logical_plan() - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains( "${KibanaSampleDataEcommerce.order_date} >= timestamptz '2024-02-03T04:05:06.000Z'" @@ -674,9 +608,8 @@ GROUP BY assert!(query_plan .as_logical_plan() - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("${KibanaSampleDataEcommerce.order_date} >= timestamptz")); } @@ -715,9 +648,8 @@ WHERE assert!(query_plan .as_logical_plan() - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("${KibanaSampleDataEcommerce.order_date} >= timestamptz")); } @@ -738,9 +670,8 @@ async fn test_case_wrapper() { let logical_plan = query_plan.as_logical_plan(); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("CASE WHEN")); @@ -776,9 +707,8 @@ async fn test_case_wrapper_distinct() { let logical_plan = query_plan.as_logical_plan(); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("CASE WHEN")); @@ -805,9 +735,8 @@ async fn test_case_wrapper_alias_with_order() { let logical_plan = query_plan.as_logical_plan(); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("ORDER BY \"a\".\"case_when_a_cust\"")); @@ -834,9 +763,8 @@ async fn test_case_wrapper_ungrouped() { let logical_plan = query_plan.as_logical_plan(); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("CASE WHEN")); @@ -868,9 +796,8 @@ async fn test_case_wrapper_non_strict_match() { let logical_plan = query_plan.as_logical_plan(); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("CASE WHEN")); @@ -903,9 +830,8 @@ async fn test_case_wrapper_ungrouped_sorted() { let logical_plan = query_plan.as_logical_plan(); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("ORDER BY")); } @@ -932,9 +858,8 @@ async fn test_case_wrapper_ungrouped_sorted_aliased() { let logical_plan = query_plan.as_logical_plan(); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql // TODO test without depend on column name .contains("ORDER BY \"a\".\"case_when")); @@ -956,25 +881,19 @@ async fn test_case_wrapper_with_internal_limit() { let logical_plan = query_plan.as_logical_plan(); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("CASE WHEN")); assert!( logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("1123"), "SQL contains 1123: {}", - logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql + logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql ); let physical_plan = query_plan.as_physical_plan().await.unwrap(); @@ -1002,19 +921,14 @@ async fn test_case_wrapper_with_system_fields() { assert!( logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains( "\\\"cube_name\\\":\\\"KibanaSampleDataEcommerce\\\",\\\"alias\\\":\\\"user\\\"" ), r#"SQL contains `\"cube_name\":\"KibanaSampleDataEcommerce\",\"alias\":\"user\"` {}"#, - logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql + logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql ); let physical_plan = query_plan.as_physical_plan().await.unwrap(); @@ -1040,25 +954,19 @@ async fn test_case_wrapper_with_limit() { let logical_plan = query_plan.as_logical_plan(); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("CASE WHEN")); assert!( logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("1123"), "SQL contains 1123: {}", - logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql + logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql ); let physical_plan = query_plan.as_physical_plan().await.unwrap(); @@ -1084,9 +992,8 @@ async fn test_case_wrapper_with_null() { let logical_plan = query_plan.as_logical_plan(); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("CASE WHEN")); @@ -1141,9 +1048,8 @@ async fn test_case_wrapper_escaping() { let logical_plan = query_plan.as_logical_plan(); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql // Expect 6 backslashes as output is JSON and it's escaped one more time .contains("\\\\\\\\\\\\`")); @@ -1220,11 +1126,7 @@ WHERE ); let logical_plan = query_plan.as_logical_plan(); - let sql = logical_plan - .find_cube_scan_wrapper() - .wrapped_sql - .unwrap() - .sql; + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; assert_eq!( logical_plan.find_cube_scan().request, @@ -1309,9 +1211,8 @@ async fn test_wrapper_limit_zero() { let logical_plan = query_plan.as_logical_plan(); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("LIMIT 0")); @@ -1354,9 +1255,8 @@ async fn test_wrapper_filter_flatten() { assert_eq!( query_plan .as_logical_plan() - .find_cube_scan_wrapper() - .request - .unwrap(), + .find_cube_scan_wrapped_sql() + .request, TransportLoadRequestQuery { measures: Some(vec![json!({ "cube_name": "KibanaSampleDataEcommerce", @@ -1387,9 +1287,7 @@ async fn test_wrapper_filter_flatten() { time_dimensions: None, order: Some(vec![]), limit: Some(50000), - offset: None, - filters: None, - ungrouped: None, + ..Default::default() } ); } @@ -1446,16 +1344,14 @@ async fn wrapper_agg_over_limit() { ); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("\"limit\": 5")); assert!(query_plan .as_logical_plan() - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("\"ungrouped\": true")); } @@ -1510,16 +1406,14 @@ async fn wrapper_agg_dimension_over_limit() { ); assert!(logical_plan - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("\"limit\": 5")); assert!(query_plan .as_logical_plan() - .find_cube_scan_wrapper() + .find_cube_scan_wrapped_sql() .wrapped_sql - .unwrap() .sql .contains("\"ungrouped\": true")); } diff --git a/rust/cubesql/cubesql/src/compile/test/utils.rs b/rust/cubesql/cubesql/src/compile/test/utils.rs index 5193918dc97b4..e22772a655b61 100644 --- a/rust/cubesql/cubesql/src/compile/test/utils.rs +++ b/rust/cubesql/cubesql/src/compile/test/utils.rs @@ -3,14 +3,17 @@ use std::sync::Arc; use datafusion::logical_plan::{plan::Extension, Filter, LogicalPlan, PlanVisitor}; use crate::{ - compile::engine::df::{scan::CubeScanNode, wrapper::CubeScanWrapperNode}, + compile::engine::df::{ + scan::CubeScanNode, + wrapper::{CubeScanWrappedSqlNode, CubeScanWrapperNode}, + }, CubeError, }; pub trait LogicalPlanTestUtils { fn find_cube_scan(&self) -> CubeScanNode; - fn find_cube_scan_wrapper(&self) -> CubeScanWrapperNode; + fn find_cube_scan_wrapped_sql(&self) -> CubeScanWrappedSqlNode; fn find_cube_scans(&self) -> Vec; @@ -27,13 +30,13 @@ impl LogicalPlanTestUtils for LogicalPlan { cube_scans[0].clone() } - fn find_cube_scan_wrapper(&self) -> CubeScanWrapperNode { + fn find_cube_scan_wrapped_sql(&self) -> CubeScanWrappedSqlNode { match self { LogicalPlan::Extension(Extension { node }) => { - if let Some(wrapper_node) = node.as_any().downcast_ref::() { + if let Some(wrapper_node) = node.as_any().downcast_ref::() { wrapper_node.clone() } else { - panic!("Root plan node is not cube_scan_wrapper!"); + panic!("Root plan node is not cube_scan_wrapped_sql!"); } } _ => panic!("Root plan node is not extension!"), @@ -66,6 +69,10 @@ pub fn find_cube_scans_deep_search( ext.node.as_any().downcast_ref::() { wrapper_node.wrapped_plan.accept(self)?; + } else if let Some(wrapper_node) = + ext.node.as_any().downcast_ref::() + { + wrapper_node.wrapped_plan.accept(self)?; } } Ok(true) diff --git a/rust/cubesql/cubesql/src/transport/service.rs b/rust/cubesql/cubesql/src/transport/service.rs index df0b384600594..7812eefebe839 100644 --- a/rust/cubesql/cubesql/src/transport/service.rs +++ b/rust/cubesql/cubesql/src/transport/service.rs @@ -906,4 +906,12 @@ impl SqlTemplates { }; self.render_template(&format!("types/{}", data_type), context! {}) } + + pub fn left_join(&self) -> Result { + self.render_template("join_types/left", context! {}) + } + + pub fn inner_join(&self) -> Result { + self.render_template("join_types/inner", context! {}) + } }