Skip to content

Commit c01c11c

Browse files
committed
Bigger TPCH tests
1 parent bffe4b4 commit c01c11c

File tree

6 files changed

+584
-344
lines changed

6 files changed

+584
-344
lines changed

.github/workflows/ci.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,13 @@ jobs:
3333
- uses: ./.github/actions/setup
3434
- run: cargo test --features integration
3535

36+
tpch-test:
37+
runs-on: ubuntu-latest
38+
steps:
39+
- uses: actions/checkout@v4
40+
- uses: ./.github/actions/setup
41+
- run: cargo test --features tpch --test tpch_validation_test
42+
3643
format-check:
3744
runs-on: ubuntu-latest
3845
steps:

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ integration = [
4646
"arrow",
4747
"tokio-stream",
4848
]
49+
tpch = ["integration"]
4950

5051
[dev-dependencies]
5152
structopt = "0.3"

src/errors/mod.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,7 @@ pub fn tonic_status_to_datafusion_error(status: &tonic::Status) -> Option<DataFu
4040
}
4141

4242
match DataFusionErrorProto::decode(status.details()) {
43-
Ok(err_proto) => {
44-
dbg!(&err_proto);
45-
Some(err_proto.to_datafusion_err())
46-
}
43+
Ok(err_proto) => Some(err_proto.to_datafusion_err()),
4744
Err(err) => Some(internal_datafusion_err!(
4845
"Cannot decode DataFusionError: {err}"
4946
)),

src/test_utils/tpch.rs

Lines changed: 26 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,6 @@ pub fn tpch_query_from_dir(queries_dir: &std::path::Path, num: u8) -> String {
2727
}
2828
pub const NUM_QUERIES: u8 = 22; // number of queries in the TPCH benchmark numbered from 1 to 22
2929

30-
const SCALE_FACTOR: f64 = 0.001;
31-
3230
pub fn tpch_table(name: &str) -> Arc<dyn TableProvider> {
3331
let schema = Arc::new(get_tpch_table_schema(name));
3432
Arc::new(MemTable::try_new(schema, vec![]).unwrap())
@@ -158,33 +156,33 @@ where
158156
Ok(())
159157
}
160158

161-
macro_rules! must_generate_tpch_table {
162-
($generator:ident, $arrow:ident, $name:literal, $data_dir:expr) => {
163-
let data_dir = $data_dir.join($name);
164-
fs::create_dir_all(data_dir.clone()).expect("Failed to create data directory");
165-
// create three partitions for the table
166-
(1..=3).for_each(|part| {
167-
generate_table(
168-
// TODO: Consider adjusting the partitions and batch sizes.
169-
$arrow::new($generator::new(SCALE_FACTOR, part, 3)).with_batch_size(1000),
170-
&format!("{part}"),
171-
&data_dir.clone().into_boxed_path(),
172-
)
173-
.expect(concat!("Failed to generate ", $name, " table"));
174-
});
175-
};
176-
}
177-
178159
// generate_tpch_data generates all TPC-H tables in the specified data directory.
179-
pub fn generate_tpch_data(data_dir: &std::path::Path) {
160+
pub fn generate_tpch_data(data_dir: &std::path::Path, sf: f64, parts: i32) {
180161
fs::create_dir_all(data_dir).expect("Failed to create data directory");
181162

182-
must_generate_tpch_table!(RegionGenerator, RegionArrow, "region", data_dir);
183-
must_generate_tpch_table!(NationGenerator, NationArrow, "nation", data_dir);
184-
must_generate_tpch_table!(CustomerGenerator, CustomerArrow, "customer", data_dir);
185-
must_generate_tpch_table!(SupplierGenerator, SupplierArrow, "supplier", data_dir);
186-
must_generate_tpch_table!(PartGenerator, PartArrow, "part", data_dir);
187-
must_generate_tpch_table!(PartSuppGenerator, PartSuppArrow, "partsupp", data_dir);
188-
must_generate_tpch_table!(OrderGenerator, OrderArrow, "orders", data_dir);
189-
must_generate_tpch_table!(LineItemGenerator, LineItemArrow, "lineitem", data_dir);
163+
macro_rules! must_generate_tpch_table {
164+
($generator:ident, $arrow:ident, $name:literal) => {
165+
let data_dir = data_dir.join($name);
166+
fs::create_dir_all(data_dir.clone()).expect("Failed to create data directory");
167+
// create three partitions for the table
168+
(1..=parts).for_each(|part| {
169+
generate_table(
170+
// TODO: Consider adjusting the partitions and batch sizes.
171+
$arrow::new($generator::new(sf, part, parts)).with_batch_size(1000),
172+
&format!("{part}"),
173+
&data_dir,
174+
)
175+
.expect(concat!("Failed to generate ", $name, " table"));
176+
});
177+
};
178+
}
179+
180+
must_generate_tpch_table!(RegionGenerator, RegionArrow, "region");
181+
must_generate_tpch_table!(NationGenerator, NationArrow, "nation");
182+
must_generate_tpch_table!(CustomerGenerator, CustomerArrow, "customer");
183+
must_generate_tpch_table!(SupplierGenerator, SupplierArrow, "supplier");
184+
must_generate_tpch_table!(PartGenerator, PartArrow, "part");
185+
must_generate_tpch_table!(PartSuppGenerator, PartSuppArrow, "partsupp");
186+
must_generate_tpch_table!(OrderGenerator, OrderArrow, "orders");
187+
must_generate_tpch_table!(LineItemGenerator, LineItemArrow, "lineitem");
190188
}

tests/common.rs

Lines changed: 0 additions & 28 deletions
This file was deleted.

0 commit comments

Comments
 (0)