Skip to content
This repository was archived by the owner on Jan 9, 2020. It is now read-only.

Commit 3fea5c4

Browse files
committed
[SPARK-22787][TEST][SQL] Add a TPC-H query suite
## What changes were proposed in this pull request? Add a test suite to ensure all the TPC-H queries can be successfully analyzed, optimized and compiled without hitting the max iteration threshold. ## How was this patch tested? N/A Author: gatorsmile <[email protected]> Closes apache#19982 from gatorsmile/testTPCH.
1 parent 0ea2d8c commit 3fea5c4

File tree

26 files changed

+872
-57
lines changed

26 files changed

+872
-57
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,9 +116,10 @@ class TextFileFormat extends TextBasedFileFormat with DataSourceRegister {
116116
readToUnsafeMem(broadcastedHadoopConf, requiredSchema, textOptions.wholeText)
117117
}
118118

119-
private def readToUnsafeMem(conf: Broadcast[SerializableConfiguration],
120-
requiredSchema: StructType, wholeTextMode: Boolean):
121-
(PartitionedFile) => Iterator[UnsafeRow] = {
119+
private def readToUnsafeMem(
120+
conf: Broadcast[SerializableConfiguration],
121+
requiredSchema: StructType,
122+
wholeTextMode: Boolean): (PartitionedFile) => Iterator[UnsafeRow] = {
122123

123124
(file: PartitionedFile) => {
124125
val confValue = conf.value.value
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
-- using default substitutions
2+
3+
select
4+
l_returnflag,
5+
l_linestatus,
6+
sum(l_quantity) as sum_qty,
7+
sum(l_extendedprice) as sum_base_price,
8+
sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
9+
sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
10+
avg(l_quantity) as avg_qty,
11+
avg(l_extendedprice) as avg_price,
12+
avg(l_discount) as avg_disc,
13+
count(*) as count_order
14+
from
15+
lineitem
16+
where
17+
l_shipdate <= date '1998-12-01' - interval '90' day
18+
group by
19+
l_returnflag,
20+
l_linestatus
21+
order by
22+
l_returnflag,
23+
l_linestatus
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
-- using default substitutions
2+
3+
select
4+
c_custkey,
5+
c_name,
6+
sum(l_extendedprice * (1 - l_discount)) as revenue,
7+
c_acctbal,
8+
n_name,
9+
c_address,
10+
c_phone,
11+
c_comment
12+
from
13+
customer,
14+
orders,
15+
lineitem,
16+
nation
17+
where
18+
c_custkey = o_custkey
19+
and l_orderkey = o_orderkey
20+
and o_orderdate >= date '1993-10-01'
21+
and o_orderdate < date '1993-10-01' + interval '3' month
22+
and l_returnflag = 'R'
23+
and c_nationkey = n_nationkey
24+
group by
25+
c_custkey,
26+
c_name,
27+
c_acctbal,
28+
c_phone,
29+
n_name,
30+
c_address,
31+
c_comment
32+
order by
33+
revenue desc
34+
limit 20
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
-- using default substitutions
2+
3+
select
4+
ps_partkey,
5+
sum(ps_supplycost * ps_availqty) as value
6+
from
7+
partsupp,
8+
supplier,
9+
nation
10+
where
11+
ps_suppkey = s_suppkey
12+
and s_nationkey = n_nationkey
13+
and n_name = 'GERMANY'
14+
group by
15+
ps_partkey having
16+
sum(ps_supplycost * ps_availqty) > (
17+
select
18+
sum(ps_supplycost * ps_availqty) * 0.0001000000
19+
from
20+
partsupp,
21+
supplier,
22+
nation
23+
where
24+
ps_suppkey = s_suppkey
25+
and s_nationkey = n_nationkey
26+
and n_name = 'GERMANY'
27+
)
28+
order by
29+
value desc
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
-- using default substitutions
2+
3+
select
4+
l_shipmode,
5+
sum(case
6+
when o_orderpriority = '1-URGENT'
7+
or o_orderpriority = '2-HIGH'
8+
then 1
9+
else 0
10+
end) as high_line_count,
11+
sum(case
12+
when o_orderpriority <> '1-URGENT'
13+
and o_orderpriority <> '2-HIGH'
14+
then 1
15+
else 0
16+
end) as low_line_count
17+
from
18+
orders,
19+
lineitem
20+
where
21+
o_orderkey = l_orderkey
22+
and l_shipmode in ('MAIL', 'SHIP')
23+
and l_commitdate < l_receiptdate
24+
and l_shipdate < l_commitdate
25+
and l_receiptdate >= date '1994-01-01'
26+
and l_receiptdate < date '1994-01-01' + interval '1' year
27+
group by
28+
l_shipmode
29+
order by
30+
l_shipmode
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
-- using default substitutions
2+
3+
select
4+
c_count,
5+
count(*) as custdist
6+
from
7+
(
8+
select
9+
c_custkey,
10+
count(o_orderkey) as c_count
11+
from
12+
customer left outer join orders on
13+
c_custkey = o_custkey
14+
and o_comment not like '%special%requests%'
15+
group by
16+
c_custkey
17+
) as c_orders
18+
group by
19+
c_count
20+
order by
21+
custdist desc,
22+
c_count desc
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
-- using default substitutions
2+
3+
select
4+
100.00 * sum(case
5+
when p_type like 'PROMO%'
6+
then l_extendedprice * (1 - l_discount)
7+
else 0
8+
end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue
9+
from
10+
lineitem,
11+
part
12+
where
13+
l_partkey = p_partkey
14+
and l_shipdate >= date '1995-09-01'
15+
and l_shipdate < date '1995-09-01' + interval '1' month
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
-- using default substitutions
2+
3+
with revenue0 as
4+
(select
5+
l_suppkey as supplier_no,
6+
sum(l_extendedprice * (1 - l_discount)) as total_revenue
7+
from
8+
lineitem
9+
where
10+
l_shipdate >= date '1996-01-01'
11+
and l_shipdate < date '1996-01-01' + interval '3' month
12+
group by
13+
l_suppkey)
14+
15+
16+
select
17+
s_suppkey,
18+
s_name,
19+
s_address,
20+
s_phone,
21+
total_revenue
22+
from
23+
supplier,
24+
revenue0
25+
where
26+
s_suppkey = supplier_no
27+
and total_revenue = (
28+
select
29+
max(total_revenue)
30+
from
31+
revenue0
32+
)
33+
order by
34+
s_suppkey
35+
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
-- using default substitutions
2+
3+
select
4+
p_brand,
5+
p_type,
6+
p_size,
7+
count(distinct ps_suppkey) as supplier_cnt
8+
from
9+
partsupp,
10+
part
11+
where
12+
p_partkey = ps_partkey
13+
and p_brand <> 'Brand#45'
14+
and p_type not like 'MEDIUM POLISHED%'
15+
and p_size in (49, 14, 23, 45, 19, 3, 36, 9)
16+
and ps_suppkey not in (
17+
select
18+
s_suppkey
19+
from
20+
supplier
21+
where
22+
s_comment like '%Customer%Complaints%'
23+
)
24+
group by
25+
p_brand,
26+
p_type,
27+
p_size
28+
order by
29+
supplier_cnt desc,
30+
p_brand,
31+
p_type,
32+
p_size
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
-- using default substitutions
2+
3+
select
4+
sum(l_extendedprice) / 7.0 as avg_yearly
5+
from
6+
lineitem,
7+
part
8+
where
9+
p_partkey = l_partkey
10+
and p_brand = 'Brand#23'
11+
and p_container = 'MED BOX'
12+
and l_quantity < (
13+
select
14+
0.2 * avg(l_quantity)
15+
from
16+
lineitem
17+
where
18+
l_partkey = p_partkey
19+
)

0 commit comments

Comments
 (0)