-
Notifications
You must be signed in to change notification settings - Fork 16
Add planning tests #50
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,6 +10,7 @@ use datafusion::{ | |
|
|
||
| use datafusion_proto::physical_plan::{DefaultPhysicalExtensionCodec, PhysicalExtensionCodec}; | ||
| use datafusion_substrait::{logical_plan::consumer::from_substrait_plan, substrait::proto::Plan}; | ||
| use insta::assert_snapshot; | ||
| use tokio_stream::StreamExt; | ||
|
|
||
| use crate::{ | ||
|
|
@@ -286,3 +287,127 @@ impl QueryPlanner { | |
| Ok(()) | ||
| } | ||
| } | ||
|
|
||
| pub mod tests { | ||
| use super::*; | ||
| use arrow::datatypes::{DataType, Field, Schema}; | ||
| use datafusion::physical_plan::displayable; | ||
| use std::io::BufReader; | ||
| use std::{fs::File, path::Path}; | ||
|
|
||
| #[tokio::test] | ||
| async fn prepare_substrait_select_one() -> anyhow::Result<()> { | ||
| // Load Substrait and parse to protobuf `Plan`. | ||
| let file = File::open(Path::new("testdata/substrait/select_one.substrait.json"))?; | ||
| let reader = BufReader::new(file); | ||
| let plan: Plan = serde_json::from_reader(reader)?; | ||
|
|
||
| let planner = QueryPlanner::default(); | ||
| let qp = planner.prepare_substrait(plan).await?; | ||
|
|
||
| // Distributed plan schema must match logical schema. | ||
| let expected_schema = Arc::new(Schema::new(vec![Field::new( | ||
| "test_col", | ||
| DataType::Int64, | ||
| false, | ||
| )])); | ||
| assert_eq!(qp.distributed_plan.schema(), expected_schema); | ||
|
|
||
| // Check the distributed physical plan. | ||
| let distributed_plan_str = | ||
| format!("{}", displayable(qp.distributed_plan.as_ref()).indent(true)); | ||
| assert_snapshot!(distributed_plan_str, @r" | ||
| DDStageExec[0] (output_partitioning=UnknownPartitioning(1)) | ||
| ProjectionExec: expr=[1 as test_col] | ||
| DataSourceExec: partitions=1, partition_sizes=[1] | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I noticed that even though we set
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe because there are not enough data for the plan to make make 3 partitions? This is actually anther good case for us to test. I think we can either create a mock test and data to enforce the number of partitions or use larger data set. I think the mock test is usually the choice to avoid adding more data
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, I think we will start having a lot of things to do soon If you cannot work on this test next, it is good to create a ticket to remind us to work on it
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was planning on looking into integration tests with mocking after merging the planning tests, unless there was something more urgent to do. I'll create a ticket to track it still |
||
| "); | ||
|
|
||
| Ok(()) | ||
| } | ||
|
|
||
| #[tokio::test] | ||
| async fn prepare_sql_select_one() -> Result<()> { | ||
| let planner = QueryPlanner::default(); | ||
| let sql = "SELECT 1 AS test_col"; | ||
|
|
||
| let qp = planner.prepare(sql).await?; | ||
|
|
||
| // Distributed plan schema must match logical schema. | ||
| let expected_schema = Arc::new(Schema::new(vec![Field::new( | ||
| "test_col", | ||
| DataType::Int64, | ||
| false, | ||
| )])); | ||
| assert_eq!(qp.distributed_plan.schema(), expected_schema); | ||
|
|
||
| // Check the distributed physical plan. | ||
| let distributed_plan_str = | ||
| format!("{}", displayable(qp.distributed_plan.as_ref()).indent(true)); | ||
| assert_snapshot!(distributed_plan_str, @r" | ||
| DDStageExec[0] (output_partitioning=UnknownPartitioning(1)) | ||
| ProjectionExec: expr=[1 as test_col] | ||
| PlaceholderRowExec | ||
| "); | ||
|
|
||
| Ok(()) | ||
| } | ||
|
|
||
| #[tokio::test] | ||
| async fn prepare_describe_table() -> Result<()> { | ||
| std::env::set_var( | ||
| "DD_TABLES", | ||
| "people:parquet:testdata/parquet/people.parquet", | ||
| ); | ||
|
|
||
| let planner = QueryPlanner::default(); | ||
| let sql = "DESCRIBE people"; | ||
|
|
||
| let qp = planner.prepare(sql).await?; | ||
|
|
||
| // Check the distributed physical plan. | ||
| let distributed_plan_str = | ||
| format!("{}", displayable(qp.distributed_plan.as_ref()).indent(true)); | ||
| assert_snapshot!(distributed_plan_str, @r" | ||
| DDStageExec[0] (output_partitioning=UnknownPartitioning(1)) | ||
| RecordBatchExec | ||
| "); | ||
|
|
||
| Ok(()) | ||
| } | ||
|
|
||
| #[tokio::test] | ||
| async fn two_stages_query() -> Result<()> { | ||
| std::env::set_var( | ||
| "DD_TABLES", | ||
| "people:parquet:testdata/parquet/people.parquet", | ||
| ); | ||
|
|
||
| let planner = QueryPlanner::default(); | ||
| let sql = "SELECT * FROM (SELECT 1 as id) a CROSS JOIN (SELECT 2 as id) b order by b.id"; | ||
| let qp = planner.prepare(sql).await?; | ||
|
|
||
| // Distributed plan schema must match logical schema. | ||
| let expected_schema = Arc::new(Schema::new(vec![ | ||
| Field::new("id", DataType::Int64, false), | ||
| Field::new("id", DataType::Int64, false), | ||
| ])); | ||
|
|
||
| assert_eq!(qp.distributed_plan.schema(), expected_schema); | ||
|
|
||
| // Check the distributed physical plan. | ||
| let distributed_plan_str = | ||
| format!("{}", displayable(qp.distributed_plan.as_ref()).indent(true)); | ||
| assert_snapshot!(distributed_plan_str, @r" | ||
| DDStageExec[1] (output_partitioning=UnknownPartitioning(1)) | ||
| DDStageExec[0] (output_partitioning=UnknownPartitioning(1)) | ||
| SortExec: expr=[id@1 ASC NULLS LAST], preserve_partitioning=[false] | ||
| CrossJoinExec | ||
| ProjectionExec: expr=[1 as id] | ||
| PlaceholderRowExec | ||
| ProjectionExec: expr=[2 as id] | ||
| PlaceholderRowExec | ||
| "); | ||
|
|
||
| Ok(()) | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
❤️