diff --git a/packages/cubejs-backend-native/Cargo.lock b/packages/cubejs-backend-native/Cargo.lock index 5eb487efaf687..1ec0fc189e2b5 100644 --- a/packages/cubejs-backend-native/Cargo.lock +++ b/packages/cubejs-backend-native/Cargo.lock @@ -988,10 +988,12 @@ dependencies = [ "num-bigint", "num-traits", "saturating", + "serde", "smallvec", "symbol_table", "symbolic_expressions", "thiserror", + "vectorize", ] [[package]] @@ -1304,6 +1306,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" dependencies = [ "ahash 0.7.8", + "serde", ] [[package]] @@ -1523,6 +1526,7 @@ checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg", "hashbrown 0.12.3", + "serde", ] [[package]] @@ -3113,6 +3117,7 @@ checksum = "32bf088d1d7df2b2b6711b06da3471bc86677383c57b27251e18c56df8deac14" dependencies = [ "ahash 0.7.8", "hashbrown 0.12.3", + "serde", ] [[package]] @@ -3661,6 +3666,15 @@ dependencies = [ "serde", ] +[[package]] +name = "vectorize" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25e3bbfdfdcc4ea60ce183b1b45c936aacd69fe097ebf137984a32faf80e365b" +dependencies = [ + "serde", +] + [[package]] name = "version_check" version = "0.9.4" diff --git a/rust/cubenativeutils/Cargo.lock b/rust/cubenativeutils/Cargo.lock index 8e60af93afa27..0959a0f2888e6 100644 --- a/rust/cubenativeutils/Cargo.lock +++ b/rust/cubenativeutils/Cargo.lock @@ -854,10 +854,12 @@ dependencies = [ "num-bigint", "num-traits", "saturating", + "serde", "smallvec", "symbol_table", "symbolic_expressions", "thiserror", + "vectorize", ] [[package]] @@ -1152,6 +1154,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" dependencies = [ "ahash 0.7.8", + "serde", ] [[package]] @@ -1363,6 +1366,7 @@ checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg", "hashbrown 0.12.3", + "serde", ] [[package]] @@ -2783,6 +2787,7 @@ checksum = "32bf088d1d7df2b2b6711b06da3471bc86677383c57b27251e18c56df8deac14" dependencies = [ "ahash 0.7.8", "hashbrown 0.12.3", + "serde", ] [[package]] @@ -3242,6 +3247,15 @@ dependencies = [ "serde", ] +[[package]] +name = "vectorize" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25e3bbfdfdcc4ea60ce183b1b45c936aacd69fe097ebf137984a32faf80e365b" +dependencies = [ + "serde", +] + [[package]] name = "version_check" version = "0.9.4" diff --git a/rust/cubesql/Cargo.lock b/rust/cubesql/Cargo.lock index 58b0377de0dd2..1cb06b84594cd 100644 --- a/rust/cubesql/Cargo.lock +++ b/rust/cubesql/Cargo.lock @@ -1002,10 +1002,12 @@ dependencies = [ "num-bigint", "num-traits", "saturating", + "serde", "smallvec", "symbol_table", "symbolic_expressions", "thiserror", + "vectorize", ] [[package]] @@ -1284,6 +1286,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db0d4cf898abf0081f964436dc980e96670a0f36863e4b83aaacdb65c9d7ccc3" dependencies = [ "ahash 0.7.8", + "serde", ] [[package]] @@ -1500,6 +1503,7 @@ checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg", "hashbrown 0.12.1", + "serde", ] [[package]] @@ -2945,6 +2949,7 @@ checksum = "32bf088d1d7df2b2b6711b06da3471bc86677383c57b27251e18c56df8deac14" dependencies = [ "ahash 0.7.8", "hashbrown 0.12.1", + "serde", ] [[package]] @@ -3428,6 +3433,15 @@ dependencies = [ "serde", ] +[[package]] +name = "vectorize" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25e3bbfdfdcc4ea60ce183b1b45c936aacd69fe097ebf137984a32faf80e365b" +dependencies = [ + "serde", +] + [[package]] name = "version_check" version = "0.9.4" diff --git a/rust/cubesql/cubesql/Cargo.toml b/rust/cubesql/cubesql/Cargo.toml index 60c6d5284eafd..dc996a11726c6 100644 --- a/rust/cubesql/cubesql/Cargo.toml +++ b/rust/cubesql/cubesql/Cargo.toml @@ -45,7 +45,7 @@ mockall = "0.8.1" tokio-util = { version = "0.7", features=["compat"] } comfy-table = "7.1.0" bitflags = "1.3.2" -egg = { rev = "952f8c2a1033e5da097d23c523b0d8e392eb532b", git = "https://github.com/cube-js/egg.git" } +egg = { rev = "952f8c2a1033e5da097d23c523b0d8e392eb532b", git = "https://github.com/cube-js/egg.git", features = ["serde-1"] } paste = "1.0.6" csv = "1.1.6" tracing = { version = "0.1.40", features = ["async-await"] } diff --git a/rust/cubesql/cubesql/egraph-debug-template/package.json b/rust/cubesql/cubesql/egraph-debug-template/package.json index 60c0f901087df..f01a8fcd10356 100644 --- a/rust/cubesql/cubesql/egraph-debug-template/package.json +++ b/rust/cubesql/cubesql/egraph-debug-template/package.json @@ -6,7 +6,8 @@ "elkjs": "^0.9.1", "react": "18.1.0", "react-dom": "18.1.0", - "reactflow": "^11.10.3" + "reactflow": "^11.10.3", + "zod": "3.23.8" }, "scripts": { "start": "GENERATE_SOURCEMAP=false && react-scripts start", diff --git a/rust/cubesql/cubesql/egraph-debug-template/src/index.tsx b/rust/cubesql/cubesql/egraph-debug-template/src/index.tsx index 641a25369371c..29717ea340dcc 100644 --- a/rust/cubesql/cubesql/egraph-debug-template/src/index.tsx +++ b/rust/cubesql/cubesql/egraph-debug-template/src/index.tsx @@ -18,6 +18,7 @@ import type { NodeProps, } from 'reactflow'; import 'reactflow/dist/style.css'; +import { z } from 'zod'; import statesData from './states.json'; @@ -26,15 +27,37 @@ type InputNodeData = { label: string; comboId: string; }; + type InputEdgeData = { source: string; target: string; }; + type InputComboData = { id: string; label: string; }; -type StateData = { + +const EClassDebugData = z.object({ + id: z.number(), + canon: z.number(), +}); +type EClassDebugData = z.infer; + +const ENodeDebugData = z.object({ + enode: z.string(), + eclass: z.number(), + children: z.array(z.number()), +}); +type ENodeDebugData = z.infer; + +const EGraphDebugState = z.object({ + eclasses: z.array(EClassDebugData), + enodes: z.array(ENodeDebugData), +}); +type EGraphDebugState = z.infer; + +type PreparedStateData = { nodes: Array; removedNodes: Array; edges: Array; @@ -43,7 +66,15 @@ type StateData = { removedCombos: Array; appliedRules: Array; }; -type InputData = Array; + +const StateData = z.object({ + egraph: EGraphDebugState, + appliedRules: z.array(z.string()), +}); +type StateData = z.infer; + +const InputData = z.array(StateData); +type InputData = z.infer; type NodeData = { label: string; @@ -51,15 +82,158 @@ type NodeData = { type Node = ReactFlowNode; type Edge = ReactFlowEdge; -// TODO proper parsing here -const states = statesData as InputData; +const states: InputData = InputData.parse(statesData); + +function prepareStates(states: InputData): Array { + const result = []; + let previousDebugData: + | { + nodes: Array; + edges: Array; + combos: Array; + } + | undefined; + + for (const { egraph, appliedRules } of states) { + let nodes = egraph.enodes + .map((node) => { + return { + id: `${node.eclass}-${node.enode}`, + label: node.enode, + comboId: `c${node.eclass}`, + } as InputNodeData; + }) + .concat( + egraph.eclasses + // render only canonical eclasses to avoid rendering empty nodes and combos for merged ones + .filter((eclass) => eclass.id === eclass.canon) + .map((eclass) => { + return { + id: eclass.id.toString(), + label: eclass.id.toString(), + comboId: `c${eclass.id}`, + } as InputNodeData; + }), + ); + + const allEdges = egraph.enodes + .map((node) => { + return { + source: node.eclass.toString(), + target: `${node.eclass}-${node.enode}`, + } as InputEdgeData; + }) + .concat( + egraph.enodes.flatMap((node) => { + return node.children.map((child) => { + return { + source: `${node.eclass}-${node.enode}`, + target: child.toString(), + }; + }); + }), + ); + // Same eclass can be present as child for a single enode multiple times + // E.g. CubeScanFilters([CubeScanFilters([]), CubeScanFilters([])]) + // Both internal nodes are same eclass + // This will lead to duplicated edges and non-uniq ids + const uniqueEdges = new Map(); + for (const edge of allEdges) { + const key = JSON.stringify(edge); + if (uniqueEdges.get(key)) { + continue; + } + uniqueEdges.set(key, edge); + } + let edges = [...uniqueEdges.values()]; + + let combos = egraph.eclasses + // render only canonical eclasses to avoid rendering empty nodes and combos for merged ones + .filter((eclass) => eclass.id === eclass.canon) + .map((eclass) => { + return { + id: `c${eclass.id}`, + label: `#${eclass.id}`, + } as InputComboData; + }); + + const nodesClone = nodes.slice(); + const edgesClone = edges.slice(); + const combosClone = combos.slice(); + + let removedNodes: Array = []; + let removedEdges: Array = []; + let removedCombos: Array = []; + + if (previousDebugData !== undefined) { + const { + nodes: prevNodes, + edges: prevEdges, + combos: prevCombos, + } = previousDebugData; + nodes = nodes.filter( + (n) => !prevNodes.some((ln) => ln.id === n.id), + ); + edges = edges.filter( + (n) => + !prevEdges.some( + (ln) => + ln.source === n.source && ln.target === n.target, + ), + ); + combos = combos.filter( + (n) => !prevCombos.some((ln) => ln.id === n.id), + ); + + removedNodes = prevNodes.slice(); + removedNodes = removedNodes.filter( + (n) => !nodesClone.some((ln) => ln.id === n.id), + ); + + removedEdges = prevEdges.slice(); + removedEdges = removedEdges.filter( + (n) => + !edgesClone.some( + (ln) => + ln.source === n.source && ln.target === n.target, + ), + ); + + removedCombos = prevCombos.slice(); + removedCombos = removedCombos.filter( + (n) => !combosClone.some((ln) => ln.id === n.id), + ); + } + + let debugData = { + nodes, + edges, + combos, + removedNodes, + removedEdges, + removedCombos, + appliedRules, + } as PreparedStateData; + + result.push(debugData); + previousDebugData = { + nodes: nodesClone, + edges: edgesClone, + combos: combosClone, + }; + } + + return result; +} + +let preparedStates = prepareStates(states); // First is initial state -const totalIterations = states.length - 1; +const totalIterations = preparedStates.length - 1; const data = { - nodes: states[0].nodes, - edges: states[0].edges, - combos: states[0].combos, + nodes: preparedStates[0].nodes, + edges: preparedStates[0].edges, + combos: preparedStates[0].combos, }; const sizeByNode = (n: InputNodeData): [number, number] => [ 60 + n.label.length * 5, @@ -400,7 +574,7 @@ const LayoutFlow = () => { } let newNodes = preNodes; let newEdges = preEdges; - const toRemove = states[stateIdx]; + const toRemove = preparedStates[stateIdx]; let toRemoveNodeIds = (toRemove.nodes as Array<{ id: string }>) .concat(toRemove.combos) .map((n) => n.id); @@ -419,7 +593,7 @@ const LayoutFlow = () => { newEdges = newEdges.concat( Object.keys(edgeMap).map((key) => edgeMap[key]), ); - const toHighlight = states[stateIdx - 1]; + const toHighlight = preparedStates[stateIdx - 1]; const toHighlightNodeIds = (toHighlight.nodes as Array<{ id: string }>) .concat(toHighlight.combos) .map((n) => n.id); @@ -437,13 +611,13 @@ const LayoutFlow = () => { }; const nextState = () => { - if (stateIdx === states.length - 1) { + if (stateIdx === preparedStates.length - 1) { return; } let newNodes = preNodes; let newEdges = preEdges; setStateIdx(stateIdx + 1); - const toAdd = states[stateIdx + 1]; + const toAdd = preparedStates[stateIdx + 1]; let toRemoveNodeIds = (toAdd.removedNodes as Array<{ id: string }>) .concat(toAdd.removedCombos) .map((n) => n.id); @@ -581,7 +755,9 @@ const LayoutFlow = () => { ))}
- {states[stateIdx].appliedRules.join(', ')} + + {preparedStates[stateIdx].appliedRules.join(', ')} +
diff --git a/rust/cubesql/cubesql/src/compile/rewrite/analysis.rs b/rust/cubesql/cubesql/src/compile/rewrite/analysis.rs index f0af8e3396fa3..a5df8018f19e4 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/analysis.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/analysis.rs @@ -3,7 +3,7 @@ use crate::{ rewrite::{ converter::{is_expr_node, node_to_expr, LogicalPlanToLanguageConverter}, expr_column_name, - rewriter::{CubeEGraph, DebugData}, + rewriter::{CubeEGraph, EGraphDebugState}, AggregateUDFExprFun, AliasExprAlias, AllMembersAlias, AllMembersCube, ChangeUserCube, ColumnExprColumn, DimensionName, FilterMemberMember, FilterMemberOp, LiteralExprValue, LiteralMemberRelation, LiteralMemberValue, LogicalPlanLanguage, MeasureName, @@ -235,8 +235,6 @@ impl Member { } } -type EgraphDebugState = DebugData; - #[derive(Clone)] pub struct LogicalPlanAnalysis { /* This is 0, when creating the EGraph. It's set to 1 before iteration 0, @@ -244,7 +242,7 @@ pub struct LogicalPlanAnalysis { pub iteration_timestamp: usize, /// Debug info, used with egraph-debug /// Will be filled by special hook in Runner - pub debug_states: Vec, + pub debug_states: Vec, cube_context: Arc, planner: Arc, } @@ -283,16 +281,12 @@ impl LogicalPlanAnalysis { } } - fn prepare_egraph_debug_state(egraph: &CubeEGraph) -> EgraphDebugState { - DebugData::prepare(egraph) - } - pub fn store_egraph_debug_state(egraph: &mut CubeEGraph) { debug_assert_eq!( egraph.analysis.iteration_timestamp, egraph.analysis.debug_states.len() ); - let state = Self::prepare_egraph_debug_state(egraph); + let state = EGraphDebugState::new(egraph); egraph.analysis.debug_states.push(state); } diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rewriter.rs b/rust/cubesql/cubesql/src/compile/rewrite/rewriter.rs index bb7840374fae4..02f299617be70 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rewriter.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rewriter.rs @@ -42,101 +42,76 @@ pub struct Rewriter { pub type CubeRunner = Runner; -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct DebugNode { - id: String, - label: String, - #[serde(rename = "comboId")] - combo_id: String, +#[derive(Clone, Serialize, Deserialize)] +struct DebugENodeId(String); + +impl From<&LogicalPlanLanguage> for DebugENodeId { + fn from(value: &LogicalPlanLanguage) -> Self { + Self(format!("{value:?}")) + } } -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct DebugEdge { - source: String, - target: String, +#[derive(Clone, Serialize, Deserialize)] +pub struct EClassDebugData { + id: Id, + canon: Id, } -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct DebugCombo { - id: String, - label: String, +#[derive(Clone, Serialize, Deserialize)] +pub struct ENodeDebugData { + enode: DebugENodeId, + eclass: Id, + children: Vec, } -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct DebugData { - nodes: Vec, - #[serde(rename = "removedNodes")] - removed_nodes: Vec, - edges: Vec, - #[serde(rename = "removedEdges")] - removed_edges: Vec, - combos: Vec, - #[serde(rename = "removedCombos")] - removed_combos: Vec, - #[serde(rename = "appliedRules")] - applied_rules: Option>, +/// Representation is optimised for storing in JSON, to transfer to UI +#[derive(Clone, Serialize, Deserialize)] +pub struct EGraphDebugState { + eclasses: Vec, + enodes: Vec, } -impl DebugData { - pub fn prepare(graph: &CubeEGraph) -> DebugData { - DebugData { - applied_rules: None, - nodes: graph - .classes() - .flat_map(|class| { - let mut result = class - .nodes - .iter() - .map(|n| { - let node_id = format!("{}-{:?}", class.id, n); - DebugNode { - id: node_id.to_string(), - label: format!("{:?}", n), - combo_id: format!("c{}", class.id), - } - }) - .collect::>(); - result.push(DebugNode { - id: class.id.to_string(), - label: class.id.to_string(), - combo_id: format!("c{}", class.id), - }); - result - }) - .collect(), - edges: graph - .classes() - .flat_map(|class| { - class - .nodes - .iter() - .map(|n| DebugEdge { - source: class.id.to_string(), - target: format!("{}-{:?}", class.id, n,), - }) - .chain(class.nodes.iter().flat_map(|n| { - n.children().iter().map(move |c| DebugEdge { - source: format!("{}-{:?}", class.id, n), - target: c.to_string(), - }) - })) - .collect::>() - }) - .collect(), - combos: graph - .classes() - .map(|class| DebugCombo { - id: format!("c{}", class.id), - label: format!("#{}", class.id), - }) - .collect(), - removed_nodes: Vec::new(), - removed_edges: Vec::new(), - removed_combos: Vec::new(), - } +impl EGraphDebugState { + pub fn new(graph: &EGraph) -> Self { + let current_eclasses = graph.classes().map(|ec| ec.id); + let previous_debug_eclasses = graph + .analysis + .debug_states + .iter() + .flat_map(|state| state.eclasses.iter().map(|ecd| ecd.id)); + let all_known_eclasses = current_eclasses.chain(previous_debug_eclasses); + + let all_known_eclasses = all_known_eclasses.collect::>(); + + let eclasses = all_known_eclasses + .into_iter() + .map(|ec| EClassDebugData { + id: ec, + canon: graph.find(ec), + }) + .collect::>(); + + let enodes = graph + .classes() + .flat_map(|ec| ec.nodes.iter().map(move |node| (ec.id, node))) + .map(|(ec, node)| ENodeDebugData { + enode: node.into(), + eclass: ec, + children: node.children().to_vec(), + }) + .collect(); + + EGraphDebugState { eclasses, enodes } } } +#[derive(Serialize, Deserialize)] +struct DebugState { + egraph: EGraphDebugState, + #[serde(rename = "appliedRules")] + applied_rules: Vec, +} + #[derive(Debug)] pub struct IterInfo { debug_qtrace_eclasses: Option>, @@ -190,9 +165,6 @@ fn write_debug_states(runner: &CubeRunner, stage: &str) -> Result<(), CubeError> format!("{}/src/index.tsx", dir), )?; - let mut states = Vec::new(); - let mut last_debug_data: Option = None; - let debug_data = runner.egraph.analysis.debug_states.as_slice(); debug_assert_eq!(debug_data.len(), runner.iterations.len() + 1); @@ -202,53 +174,19 @@ fn write_debug_states(runner: &CubeRunner, stage: &str) -> Result<(), CubeError> .iter() .skip(1) .zip(runner.iterations.iter().map(|i| Some(&i.applied))); - let states_data = std::iter::once((&debug_data[0], None)).chain(states_data); - - for (debug_data, applied) in states_data { - let mut debug_data = debug_data.clone(); - let debug_data_clone = debug_data.clone(); - - if let Some(last) = last_debug_data { - debug_data - .nodes - .retain(|n| !last.nodes.iter().any(|ln| ln.id == n.id)); - debug_data.edges.retain(|n| { - !last - .edges - .iter() - .any(|ln| ln.source == n.source && ln.target == n.target) - }); - debug_data - .combos - .retain(|n| !last.combos.iter().any(|ln| ln.id == n.id)); - - debug_data.removed_nodes = last.nodes.clone(); - debug_data - .removed_nodes - .retain(|n| !debug_data_clone.nodes.iter().any(|ln| ln.id == n.id)); - debug_data.removed_edges = last.edges.clone(); - debug_data.removed_edges.retain(|n| { - !debug_data_clone - .edges - .iter() - .any(|ln| ln.source == n.source && ln.target == n.target) - }); - debug_data.removed_combos = last.combos.clone(); - debug_data - .removed_combos - .retain(|n| !debug_data_clone.combos.iter().any(|ln| ln.id == n.id)); - } - debug_data.applied_rules = Some( - applied + let debug_data = std::iter::once((&debug_data[0], None)) + .chain(states_data) + .map(|(egraph, applied_rules)| DebugState { + egraph: egraph.clone(), + applied_rules: applied_rules .map(|applied| applied.iter().map(|s| format!("{:?}", s)).collect()) .unwrap_or(vec![]), - ); - states.push(debug_data); - last_debug_data = Some(debug_data_clone); - } + }) + .collect::>(); + fs::write( format!("{}/src/states.json", dir), - serde_json::to_string_pretty(&states)?, + serde_json::to_string_pretty(&debug_data)?, )?; Ok(()) diff --git a/rust/cubesqlplanner/Cargo.lock b/rust/cubesqlplanner/Cargo.lock index 834e1ed12c0ff..55b1401878539 100644 --- a/rust/cubesqlplanner/Cargo.lock +++ b/rust/cubesqlplanner/Cargo.lock @@ -908,10 +908,12 @@ dependencies = [ "num-bigint", "num-traits", "saturating", + "serde", "smallvec", "symbol_table", "symbolic_expressions", "thiserror", + "vectorize", ] [[package]] @@ -1206,6 +1208,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" dependencies = [ "ahash 0.7.8", + "serde", ] [[package]] @@ -1417,6 +1420,7 @@ checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg", "hashbrown 0.12.3", + "serde", ] [[package]] @@ -2850,6 +2854,7 @@ checksum = "32bf088d1d7df2b2b6711b06da3471bc86677383c57b27251e18c56df8deac14" dependencies = [ "ahash 0.7.8", "hashbrown 0.12.3", + "serde", ] [[package]] @@ -3309,6 +3314,15 @@ dependencies = [ "serde", ] +[[package]] +name = "vectorize" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25e3bbfdfdcc4ea60ce183b1b45c936aacd69fe097ebf137984a32faf80e365b" +dependencies = [ + "serde", +] + [[package]] name = "version_check" version = "0.9.4"