databendlabs
diff --git a/‎…/sql/planner/optimizer/data/yaml/q1.yaml‎ ‎…sql/planner/optimizer/data/cases/q1.yaml‎src/query/service/tests/it/sql/planner/optimizer/data/yaml/q1.yaml renamed to src/query/service/tests/it/sql/planner/optimizer/data/cases/q1.yaml
Lines changed: 2 additions & 87 deletions b/‎…/sql/planner/optimizer/data/yaml/q1.yaml‎ ‎…sql/planner/optimizer/data/cases/q1.yaml‎src/query/service/tests/it/sql/planner/optimizer/data/yaml/q1.yaml renamed to src/query/service/tests/it/sql/planner/optimizer/data/cases/q1.yaml
Lines changed: 2 additions & 87 deletions
diff --git a/‎…/sql/planner/optimizer/data/yaml/q3.yaml‎ ‎…sql/planner/optimizer/data/cases/q3.yaml‎src/query/service/tests/it/sql/planner/optimizer/data/yaml/q3.yaml renamed to src/query/service/tests/it/sql/planner/optimizer/data/cases/q3.yaml
Lines changed: 2 additions & 74 deletions b/‎…/sql/planner/optimizer/data/yaml/q3.yaml‎ ‎…sql/planner/optimizer/data/cases/q3.yaml‎src/query/service/tests/it/sql/planner/optimizer/data/yaml/q3.yaml renamed to src/query/service/tests/it/sql/planner/optimizer/data/cases/q3.yaml
Lines changed: 2 additions & 74 deletions
@@ -25,93 +25,8 @@ sql: |
   ORDER  BY c_customer_id
   LIMIT 100
 
-# Table statistics derived from snow_plan's TableScan information
-table_statistics:
-  date_dim:
-    num_rows: 73049  # Estimated based on typical date dimension cardinality
-    data_size: 2138624  # Directly from snow_plan: "bytes: 2,138,624"
-    number_of_segments: 1  # From snow_plan: "partitions: 1/1"
-
-  store_returns:
-    num_rows: 287000000  # Estimated based on data size and typical row size
-    data_size: 124763446272  # Directly from snow_plan: "bytes: 124,763,446,272"
-    number_of_segments: 7070  # From snow_plan: "partitions: 7070/7070"
-
-  store:
-    num_rows: 1002  # Estimated based on typical store dimension cardinality
-    data_size: 135680  # Directly from snow_plan: "bytes: 135,680"
-    number_of_segments: 1  # From snow_plan: "partitions: 1/1"
-
-  customer:
-    num_rows: 12000000  # Estimated based on typical customer dimension size
-    data_size: 2328538624  # Directly from snow_plan: "bytes: 2,328,538,624"
-    number_of_segments: 261  # From snow_plan: "partitions: 261/261"
-
-# Column statistics derived from query predicates and typical TPC-DS data distributions
-column_statistics:
-  # Date dimension columns used in the query
-  date_dim.d_year:
-    min: 1990  # Typical range for TPC-DS
-    max: 2010  # Typical range for TPC-DS
-    ndv: 21  # Based on min/max range (2010-1990+1)
-    null_count: 0  # Primary dimension columns typically don't have nulls
-
-  date_dim.d_date_sk:
-    min: 1  # Typical starting value for surrogate key
-    max: 73049  # Based on table row count
-    ndv: 73049  # Primary key, so NDV equals row count
-    null_count: 0  # Primary key cannot be null
-
-  # Store returns columns used in the query
-  store_returns.sr_returned_date_sk:
-    min: 1  # Matches date_dim.d_date_sk min
-    max: 73049  # Matches date_dim.d_date_sk max
-    ndv: 73049  # Foreign key to date_dim
-    null_count: 287998  # Inferred from filter in snow_plan: "STORE_RETURNS.SR_RETURNED_DATE_SK IS NOT NULL"
-
-  store_returns.sr_customer_sk:
-    min: 1  # Typical starting value for surrogate key
-    max: 12000000  # Matches customer.c_customer_sk max
-    ndv: 11000000  # Estimated as slightly less than customer table cardinality
-    null_count: 143500  # Inferred from filter in snow_plan: "STORE_RETURNS.SR_CUSTOMER_SK IS NOT NULL"
-
-  store_returns.sr_store_sk:
-    min: 1  # Typical starting value for surrogate key
-    max: 1002  # Matches store.s_store_sk max
-    ndv: 1002  # Foreign key to store table
-    null_count: 143500  # Inferred from filter in snow_plan: "STORE_RETURNS.SR_STORE_SK IS NOT NULL"
-
-  store_returns.sr_return_amt:
-    min: 0.01  # Minimum reasonable return amount
-    max: 10000.00  # Maximum reasonable return amount
-    ndv: 100000  # Estimated based on typical distribution
-    null_count: 0  # Return amount is typically not null
-
-  # Store columns used in the query
-  store.s_store_sk:
-    min: 1  # Typical starting value for surrogate key
-    max: 1002  # Based on estimated row count
-    ndv: 1002  # Primary key, so NDV equals row count
-    null_count: 0  # Primary key cannot be null
-
-  store.s_state:
-    min: "AK"  # Alaska (alphabetically first US state)
-    max: "WY"  # Wyoming (alphabetically last US state)
-    ndv: 50  # Number of US states
-    null_count: 0  # State is typically not null
-
-  # Customer columns used in the query
-  customer.c_customer_sk:
-    min: 1  # Typical starting value for surrogate key
-    max: 12000000  # Based on estimated row count
-    ndv: 12000000  # Primary key, so NDV equals row count
-    null_count: 0  # Primary key cannot be null
-
-  customer.c_customer_id:
-    min: "AAAAAAAAAAAAAA"  # Lexicographically smallest possible customer ID
-    max: "ZZZZZZZZZZZZZZ"  # Lexicographically largest possible customer ID
-    ndv: 12000000  # Same as c_customer_sk (1:1 relationship)
-    null_count: 0  # Customer ID is typically not null
+# Reference to external statistics file
+statistics_file: statistics.yaml
 
 raw_plan: |
   Limit
 
@@ -13,80 +13,8 @@ sql: |
   ORDER BY dt.d_year, sum_agg DESC, brand_id
   LIMIT 100
 
-table_statistics:
-  date_dim:
-    num_rows: 73049  # Estimated based on typical date dimension cardinality
-    data_size: 2138624  # From snow_plan: "TableScan (DATE_DIM as DT) [partitions: 1/1, bytes: 2,138,624]"
-    data_size_compressed: 1069312  # Estimated as 50% of data_size
-    index_size: 427724  # Estimated as 20% of data_size
-    number_of_blocks: 21  # Estimated based on data_size
-    number_of_segments: 1  # From snow_plan: "partitions: 1/1"
-  store_sales:
-    num_rows: 2879987999  # Estimated based on data size and typical row size
-    data_size: 1212628258304  # From snow_plan: "TableScan (STORE_SALES) [partitions: 70,412/72,718, bytes: 1,212,628,258,304]"
-    data_size_compressed: 606314129152  # Estimated as 50% of data_size
-    index_size: 242525651660  # Estimated as 20% of data_size
-    number_of_blocks: 12126282  # Estimated based on data_size
-    number_of_segments: 70412  # From snow_plan: "partitions: 70,412/72,718"
-  item:
-    num_rows: 462000  # Estimated based on ss_item_sk range and typical item dimension size
-    data_size: 23811584  # From snow_plan: "TableScan (ITEM) [partitions: 2/2, bytes: 23,811,584]"
-    data_size_compressed: 11905792  # Estimated as 50% of data_size
-    index_size: 4762316  # Estimated as 20% of data_size
-    number_of_blocks: 238  # Estimated based on data_size
-    number_of_segments: 2  # From snow_plan: "partitions: 2/2"
-
-column_statistics:
-  date_dim.d_year:
-    min: 1990  # Typical range for TPC-DS
-    max: 2000  # Typical range for TPC-DS
-    ndv: 11  # Based on min/max range
-    null_count: 0  # Primary dimension columns typically don't have nulls
-  date_dim.d_date_sk:
-    min: 1  # Typical starting value for surrogate key
-    max: 73049  # Based on table row count
-    ndv: 73049  # Primary key, so NDV equals row count
-    null_count: 0  # Primary key cannot be null
-  date_dim.d_moy:
-    min: 1  # January
-    max: 12  # December
-    ndv: 12  # 12 months in a year
-    null_count: 0  # Date parts typically don't have nulls
-  store_sales.ss_ext_sales_price:
-    min: 0.01  # Minimum reasonable sales price
-    max: 30000.00  # Maximum reasonable extended sales price
-    ndv: 573997  # Estimated as ~20% of row count
-    null_count: 0  # Sales amount is typically not null
-  store_sales.ss_sold_date_sk:
-    min: 1  # Matches date_dim.d_date_sk min
-    max: 73049  # Matches date_dim.d_date_sk max
-    ndv: 73049  # Foreign key to date_dim
-    null_count: 287998  # From snow_plan filter: "STORE_SALES.SS_SOLD_DATE_SK IS NOT NULL" implies some nulls exist
-  store_sales.ss_item_sk:
-    min: 1  # Typical starting value for surrogate key
-    max: 462000  # Matches item.i_item_sk max
-    ndv: 462000  # Foreign key to item table
-    null_count: 0  # Required join key is typically not null
-  item.i_brand_id:
-    min: 1  # Typical starting value for ID
-    max: 1000  # Typical range for TPC-DS
-    ndv: 948  # Estimated based on TPC-DS typical cardinality
-    null_count: 0  # Brand ID is typically not null
-  item.i_brand:
-    min: "AAAAAAAAAAAAAA"  # Lexicographically smallest possible brand name
-    max: "zzzzzzzzzzzzzz"  # Lexicographically largest possible brand name
-    ndv: 948  # Same as i_brand_id (1:1 relationship)
-    null_count: 0  # Brand name is typically not null
-  item.i_item_sk:
-    min: 1  # Typical starting value for surrogate key
-    max: 462000  # Based on estimated row count
-    ndv: 462000  # Primary key, so NDV equals row count
-    null_count: 0  # Primary key cannot be null
-  item.i_manufact_id:
-    min: 1  # Typical starting value for ID
-    max: 1000  # Typical range for TPC-DS
-    ndv: 1000  # Based on typical TPC-DS cardinality
-    null_count: 0  # Manufacturer ID is typically not null
+# Reference to external statistics file
+statistics_file: statistics.yaml
 
 raw_plan: |
   Limit