Skip to content

Commit 23d7c50

Browse files
authored
feat(sql): WINDOW without ORDER BY (#3554)
Rules: - ALLOWED for ROWS-type WINDOW - NOT ALLOWED for RANGE-type WINDOW with offset PRECEDING/FOLLOWING - NOT ALLOWED for WINDOW with attribute EXCLUDE CURRENT_TIME Without ORDER BY, rows are processed in an unspecified order.
1 parent d00449d commit 23d7c50

File tree

9 files changed

+326
-43
lines changed

9 files changed

+326
-43
lines changed

cases/function/window/error_window.yaml

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,17 @@ debugs: []
1717
version: 0.5.0
1818
cases:
1919
- id: 0
20-
desc: no order by
20+
desc: RANGE-type WINDOW with offset PRECEDING/FOLLOWING requires ORDER BY
2121
inputs:
2222
- columns: [ "id int","c1 string","c3 int","c4 bigint","c5 float","c6 double","c7 timestamp","c8 date" ]
2323
indexs: [ "index1:c8:c4" ]
2424
rows:
2525
- [1,"aa",20,30,1.1,2.1,1590738990000,"2020-05-01"]
2626
sql: |
27-
SELECT id, c1, c4, count(c4) OVER w1 as w1_c4_count FROM {0} WINDOW w1 AS (PARTITION BY {0}.c8 ROWS BETWEEN 2 PRECEDING AND CURRENT ROW);
27+
SELECT id, c1, c4, count(c4) OVER w1 as w1_c4_count FROM {0}
28+
WINDOW w1 AS (PARTITION BY {0}.c8 ROWS_RANGE BETWEEN 2 PRECEDING AND CURRENT ROW);
2829
expect:
30+
msg: RANGE/ROWS_RANGE-type FRAME with offset PRECEDING/FOLLOWING requires exactly one ORDER BY column
2931
success: false
3032
- id: 1
3133
desc: no partition by
@@ -301,3 +303,29 @@ cases:
301303
SELECT id, c1, c3, sum(c4) OVER w1 as w1_c4_sum FROM {0} WINDOW w1 AS (PARTITION BY {0}.c33 ORDER BY {0}.c7 ROWS_RANGE BETWEEN 2s PRECEDING AND CURRENT ROW);
302304
expect:
303305
success: false
306+
- id: 17
307+
desc: ROWS WINDOW + EXCLUDE CURRENT_TIME requires order by
308+
inputs:
309+
- columns: [ "id int","c1 string","c3 int","c4 bigint","c5 float","c6 double","c7 timestamp","c8 date" ]
310+
indexs: [ "index1:c8:c4" ]
311+
rows:
312+
- [1,"aa",20,30,1.1,2.1,1590738990000,"2020-05-01"]
313+
sql: |
314+
SELECT id, c1, c4, count(c4) OVER w1 as w1_c4_count FROM {0}
315+
WINDOW w1 AS (PARTITION BY {0}.c8 ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW EXCLUDE CURRENT_TIME);
316+
expect:
317+
msg: WINDOW with EXCLUDE CURRENT_TIME requires exactly one ORDER BY column
318+
success: false
319+
- id: 18
320+
desc: RANGE WINDOW + EXCLUDE CURRENT_TIME requires order by
321+
inputs:
322+
- columns: [ "id int","c1 string","c3 int","c4 bigint","c5 float","c6 double","c7 timestamp","c8 date" ]
323+
indexs: [ "index1:c8:c4" ]
324+
rows:
325+
- [1,"aa",20,30,1.1,2.1,1590738990000,"2020-05-01"]
326+
sql: |
327+
SELECT id, c1, c4, count(c4) OVER w1 as w1_c4_count FROM {0}
328+
WINDOW w1 AS (PARTITION BY {0}.c8 ROWS_RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW EXCLUDE CURRENT_TIME);
329+
expect:
330+
msg: WINDOW with EXCLUDE CURRENT_TIME requires exactly one ORDER BY column
331+
success: false

cases/query/window_query.yaml

Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -901,3 +901,234 @@ cases:
901901
200, 1, 1
902902
300, 0, 0
903903
400, 2, 2
904+
905+
# ======================================================================
906+
# WINDOW without ORDER BY
907+
# ======================================================================
908+
- id: 24
909+
desc: ROWS WINDOW WITHOUT ORDER BY
910+
mode: batch-unsupport
911+
inputs:
912+
- name: t1
913+
columns:
914+
- id int
915+
- gp int
916+
- ts timestamp
917+
indexs:
918+
- idx:gp:ts
919+
data: |
920+
1, 100, 20000
921+
2, 100, 10000
922+
3, 400, 20000
923+
4, 400, 10000
924+
5, 400, 15000
925+
6, 400, 40000
926+
sql: |
927+
select id, count(ts) over w as agg
928+
from t1
929+
window w as (
930+
partition by gp
931+
rows between 2 open preceding and current row
932+
)
933+
request_plan: |
934+
PROJECT(type=Aggregation)
935+
REQUEST_UNION(partition_keys=(), orders=, rows=(, 2 OPEN PRECEDING, 0 CURRENT), index_keys=(gp))
936+
DATA_PROVIDER(request=t1)
937+
DATA_PROVIDER(type=Partition, table=t1, index=idx)
938+
cluster_request_plan: |
939+
SIMPLE_PROJECT(sources=(id, agg))
940+
REQUEST_JOIN(type=kJoinTypeConcat)
941+
SIMPLE_PROJECT(sources=(id))
942+
DATA_PROVIDER(request=t1)
943+
PROJECT(type=Aggregation)
944+
REQUEST_UNION(partition_keys=(), orders=, rows=(, 2 OPEN PRECEDING, 0 CURRENT), index_keys=(gp))
945+
DATA_PROVIDER(request=t1)
946+
DATA_PROVIDER(type=Partition, table=t1, index=idx)
947+
expect:
948+
columns: ["id int", "agg int64"]
949+
order: id
950+
data: |
951+
1, 1
952+
2, 2
953+
3, 1
954+
4, 2
955+
5, 2
956+
6, 2
957+
- id: 25
958+
desc: RANGE WINDOW WITHOUT ORDER BY
959+
mode: batch-unsupport
960+
inputs:
961+
- name: t1
962+
columns:
963+
- id int
964+
- gp int
965+
- ts timestamp
966+
indexs:
967+
- idx:gp:ts
968+
data: |
969+
1, 100, 20000
970+
2, 100, 10000
971+
3, 400, 20000
972+
4, 400, 10
973+
5, 400, 15000
974+
sql: |
975+
select id, count(ts) over w as agg
976+
from t1
977+
window w as (
978+
partition by gp
979+
rows_range between unbounded preceding and current row
980+
)
981+
request_plan: |
982+
PROJECT(type=Aggregation)
983+
REQUEST_UNION(partition_keys=(), orders=, range=(, 0 PRECEDING UNBOUND, 0 CURRENT), index_keys=(gp))
984+
DATA_PROVIDER(request=t1)
985+
DATA_PROVIDER(type=Partition, table=t1, index=idx)
986+
cluster_request_plan: |
987+
SIMPLE_PROJECT(sources=(id, agg))
988+
REQUEST_JOIN(type=kJoinTypeConcat)
989+
SIMPLE_PROJECT(sources=(id))
990+
DATA_PROVIDER(request=t1)
991+
PROJECT(type=Aggregation)
992+
REQUEST_UNION(partition_keys=(), orders=, range=(, 0 PRECEDING UNBOUND, 0 CURRENT), index_keys=(gp))
993+
DATA_PROVIDER(request=t1)
994+
DATA_PROVIDER(type=Partition, table=t1, index=idx)
995+
expect:
996+
columns: ["id int", "agg int64"]
997+
order: id
998+
data: |
999+
1, 1
1000+
2, 2
1001+
3, 1
1002+
4, 2
1003+
5, 3
1004+
- id: 26
1005+
desc: RANGE-type WINDOW WITHOUT ORDER BY + WINDOW attributes
1006+
mode: batch-unsupport
1007+
inputs:
1008+
- name: t1
1009+
columns:
1010+
- id int
1011+
- gp int
1012+
- ts timestamp
1013+
indexs:
1014+
- idx:gp:ts
1015+
data: |
1016+
1, 100, 20000
1017+
2, 100, 10000
1018+
3, 400, 20000
1019+
4, 400, 10000
1020+
5, 400, 15000
1021+
- name: t2
1022+
columns:
1023+
- id int
1024+
- gp int
1025+
- ts timestamp
1026+
indexs:
1027+
- idx:gp:ts
1028+
data: |
1029+
1, 100, 20000
1030+
2, 100, 10000
1031+
3, 400, 20000
1032+
4, 400, 10000
1033+
5, 400, 15000
1034+
sql: |
1035+
select id,
1036+
count(ts) over w1 as agg1,
1037+
count(ts) over w2 as agg2,
1038+
count(ts) over w3 as agg3,
1039+
count(ts) over w4 as agg4,
1040+
count(ts) over w5 as agg5,
1041+
count(ts) over w6 as agg6,
1042+
count(ts) over w7 as agg7,
1043+
from t1
1044+
window w1 as (
1045+
PARTITION by gp
1046+
ROWS_RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW),
1047+
w2 as (partition by gp
1048+
ROWS_RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW EXCLUDE CURRENT_ROW),
1049+
w3 as (PARTITION BY gp
1050+
ROWS_RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW MAXSIZE 1),
1051+
w4 as (
1052+
UNION (select * from t2)
1053+
PARTITION BY gp
1054+
ROWS_RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW INSTANCE_NOT_IN_WINDOW),
1055+
w5 as (
1056+
UNION (select * from t2)
1057+
PARTITION BY gp
1058+
ROWS_RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW INSTANCE_NOT_IN_WINDOW EXCLUDE CURRENT_ROW),
1059+
w6 as (
1060+
UNION (select * from t2)
1061+
PARTITION BY gp
1062+
ROWS_RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW MAXSIZE 2 INSTANCE_NOT_IN_WINDOW EXCLUDE CURRENT_ROW),
1063+
w7 as (
1064+
UNION (select * from t2)
1065+
PARTITION BY gp
1066+
ROWS_RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW EXCLUDE CURRENT_ROW)
1067+
expect:
1068+
columns: ["id int", "agg1 int64", "agg2 int64", "agg3 int64", "agg4 int64", "agg5 int64", "agg6 int64", "agg7 int64"]
1069+
order: id
1070+
data: |
1071+
1, 1, 0, 1, 3, 2, 2, 2
1072+
2, 2, 1, 1, 3, 2, 2, 3
1073+
3, 1, 0, 1, 4, 3, 2, 3
1074+
4, 2, 1, 1, 4, 3, 2, 4
1075+
5, 3, 2, 1, 4, 3, 2, 5
1076+
- id: 27
1077+
desc: ROWS-type WINDOW WITHOUT ORDER BY + WINDOW attributes
1078+
mode: batch-unsupport
1079+
inputs:
1080+
- name: t1
1081+
columns:
1082+
- id int
1083+
- gp int
1084+
- ts timestamp
1085+
indexs:
1086+
- idx:gp:ts
1087+
data: |
1088+
1, 100, 20000
1089+
2, 100, 10000
1090+
3, 400, 20000
1091+
4, 400, 10000
1092+
5, 400, 15000
1093+
- name: t2
1094+
columns:
1095+
- id int
1096+
- gp int
1097+
- ts timestamp
1098+
indexs:
1099+
- idx:gp:ts
1100+
data: |
1101+
1, 100, 20000
1102+
2, 100, 10000
1103+
3, 400, 20000
1104+
4, 400, 10000
1105+
5, 400, 15000
1106+
sql: |
1107+
select id,
1108+
count(ts) over w1 as agg1,
1109+
count(ts) over w2 as agg2,
1110+
count(ts) over w3 as agg3,
1111+
count(ts) over w4 as agg4,
1112+
from t1
1113+
window w1 as (
1114+
PARTITION by gp
1115+
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW),
1116+
w2 as (partition by gp
1117+
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW EXCLUDE CURRENT_ROW),
1118+
w3 as (
1119+
UNION (select * from t2)
1120+
PARTITION BY gp
1121+
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW INSTANCE_NOT_IN_WINDOW),
1122+
w4 as (
1123+
UNION (select * from t2)
1124+
PARTITION BY gp
1125+
ROWS BETWEEN 3 PRECEDING AND CURRENT ROW INSTANCE_NOT_IN_WINDOW EXCLUDE CURRENT_ROW)
1126+
expect:
1127+
columns: ["id int", "agg1 int64", "agg2 int64", "agg3 int64", "agg4 int64"]
1128+
order: id
1129+
data: |
1130+
1, 1, 0, 3, 2
1131+
2, 2, 1, 3, 2
1132+
3, 1, 0, 3, 3
1133+
4, 2, 1, 3, 3
1134+
5, 3, 2, 3, 3

hybridse/include/node/sql_node.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1166,6 +1166,9 @@ class FrameBound : public SqlNode {
11661166
int64_t GetOffset() const { return offset_; }
11671167
void SetOffset(int64_t v) { offset_ = v; }
11681168

1169+
// is offset [OPEN] PRECEDING/FOLLOWING
1170+
bool is_offset_bound() const;
1171+
11691172

11701173
/// \brief get the inclusive frame bound offset value that has signed symbol
11711174
///

hybridse/include/vm/physical_op.h

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -200,9 +200,9 @@ class Range : public FnComponent {
200200
const bool Valid() const { return nullptr != range_key_; }
201201
const std::string ToString() const {
202202
std::ostringstream oss;
203-
if (nullptr != range_key_ && nullptr != frame_) {
203+
if (nullptr != frame_) {
204204
if (nullptr != frame_->frame_range()) {
205-
oss << "range=(" << range_key_->GetExprString() << ", "
205+
oss << "range=(" << node::ExprString(range_key_) << ", "
206206
<< frame_->frame_range()->start()->GetExprString() << ", "
207207
<< frame_->frame_range()->end()->GetExprString();
208208

@@ -216,7 +216,7 @@ class Range : public FnComponent {
216216
if (nullptr != frame_->frame_range()) {
217217
oss << ", ";
218218
}
219-
oss << "rows=(" << range_key_->GetExprString() << ", "
219+
oss << "rows=(" << node::ExprString(range_key_) << ", "
220220
<< frame_->frame_rows()->start()->GetExprString() << ", "
221221
<< frame_->frame_rows()->end()->GetExprString() << ")";
222222
}
@@ -578,7 +578,7 @@ class PhysicalRequestProviderNode : public PhysicalDataProviderNode {
578578
PhysicalOpNode **out) override;
579579

580580
virtual ~PhysicalRequestProviderNode() {}
581-
virtual void Print(std::ostream &output, const std::string &tab) const;
581+
void Print(std::ostream &output, const std::string &tab) const override;
582582
};
583583

584584
class PhysicalRequestProviderNodeWithCommonColumn
@@ -846,9 +846,7 @@ class WindowOp {
846846
std::ostringstream oss;
847847
oss << "partition_" << partition_.ToString();
848848
oss << ", " << sort_.ToString();
849-
if (range_.Valid()) {
850-
oss << ", " << range_.ToString();
851-
}
849+
oss << ", " << range_.ToString();
852850
return oss.str();
853851
}
854852
const std::string FnDetail() const {

hybridse/src/node/sql_node.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2100,6 +2100,11 @@ void FrameBound::Print(std::ostream &output, const std::string &org_tab) const {
21002100
}
21012101
}
21022102

2103+
bool FrameBound::is_offset_bound() const {
2104+
return bound_type_ == kPreceding || bound_type_ == kOpenPreceding || bound_type_ == kFollowing ||
2105+
bound_type_ == kOpenFollowing;
2106+
}
2107+
21032108
int FrameBound::Compare(const FrameBound *bound1, const FrameBound *bound2) {
21042109
if (SqlEquals(bound1, bound2)) {
21052110
return 0;

hybridse/src/plan/planner.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818

1919
#include <algorithm>
2020
#include <map>
21-
#include <random>
2221
#include <set>
2322
#include <string>
2423
#include <utility>

0 commit comments

Comments
 (0)