Skip to content

Commit 2712794

Browse files
authored
[Go SDK + Protos] Fix Proto Spec for Pane encoding + Go SDK implementation. (#33840)
1 parent 14df78c commit 2712794

File tree

6 files changed

+80
-20
lines changed

6 files changed

+80
-20
lines changed

CHANGES.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@
111111
* Fixed session window aggregation, which wasn't being performed per-key. ([#33542](https://github.com/apache/beam/issues/33542)).)
112112
* [Dataflow Streaming Appliance] Fixed commits failing with KeyCommitTooLargeException when a key outputs >180MB of results. [#33588](https://github.com/apache/beam/issues/33588).
113113
* Fixed a Dataflow template creation issue that ignores template file creation errors (Java) ([#33636](https://github.com/apache/beam/issues/33636))
114-
114+
* Correctly documented Pane Encodings in the portability protocols ([#33840](https://github.com/apache/beam/issues/33840)).
115115

116116
## Security Fixes
117117
* Fixed (CVE-YYYY-NNNN)[https://www.cve.org/CVERecord?id=CVE-YYYY-NNNN] (Java/Python/Go) ([#X](https://github.com/apache/beam/issues/X)).

model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/beam_runner_api.proto

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -594,6 +594,9 @@ message BagStateSpec {
594594
string element_coder_id = 1;
595595
}
596596

597+
// OrderedListState values are encoded with the var int encoded
598+
// millis-since-epoch followed by the value encoded by the provided coder.
599+
// Be aware this is not the standard timestamp value encoding.
597600
message OrderedListStateSpec {
598601
string element_coder_id = 1;
599602
}
@@ -946,15 +949,15 @@ message StandardCoders {
946949
// coder.
947950
//
948951
// pane - The first byte of the pane info determines which type of
949-
// encoding is used, as well as the is_first, is_last, and timing
952+
// encoding is used, as well as the is_first, is_last and timing
950953
// fields. If this byte is bits [0 1 2 3 4 5 6 7], then:
951954
// * bits [0 1 2 3] determine the encoding as follows:
952955
// 0000 - The entire pane info is encoded as a single byte.
953956
// The is_first, is_last, and timing fields are encoded
954957
// as below, and the index and non-speculative index are
955958
// both zero (and hence are not encoded here).
956959
// 0001 - The pane info is encoded as this byte plus a single
957-
// VarInt encoed integer representing the pane index. The
960+
// VarInt encoded integer representing the pane index. The
958961
// non-speculative index can be derived as follows:
959962
// -1 if the pane is early, otherwise equal to index.
960963
// 0010 - The pane info is encoded as this byte plus two VarInt
@@ -965,8 +968,10 @@ message StandardCoders {
965968
// 01 - on time
966969
// 10 - late
967970
// 11 - unknown
968-
// * bit 6 is 1 if this is the first pane, 0 otherwise.
969-
// * bit 7 is 1 if this is the last pane, 0 otherwise.
971+
// * bit 6 is 1 if this is the last pane, 0 otherwise.
972+
// Commonly set with `byte |= 0x02`
973+
// * bit 7 is 1 if this is the first pane, 0 otherwise.
974+
// Commonly set with `byte |= 0x01`
970975
//
971976
// element - The element incoded using the supplied element coder.
972977
//
@@ -1329,7 +1334,7 @@ message Trigger {
13291334
message AfterSynchronizedProcessingTime {
13301335
}
13311336

1332-
// The default trigger. Equivalent to Repeat { AfterEndOfWindow } but
1337+
// The default trigger. Equivalent to AfterEndOfWindow { Late: Always }} but
13331338
// specially denoted to indicate the user did not alter the triggering.
13341339
message Default {
13351340
}
@@ -1339,12 +1344,12 @@ message Trigger {
13391344
int32 element_count = 1;
13401345
}
13411346

1342-
// Never ready. There will only be an ON_TIME output and a final
1343-
// output at window expiration.
1347+
// Never ready. There will only be an ON_TIME final output at window
1348+
// expiration.
13441349
message Never {
13451350
}
13461351

1347-
// Always ready. This can also be expressed as ElementCount(1) but
1352+
// Always ready. This can also be expressed as Repeat{ ElementCount(1) } but
13481353
// is more explicit.
13491354
message Always {
13501355
}

sdks/go/pkg/beam/core/graph/coder/panes.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,10 @@ func EncodePane(v typex.PaneInfo, w io.Writer) error {
2929

3030
pane := byte(0)
3131
if v.IsFirst {
32-
pane |= 0x02
32+
pane |= 0x01
3333
}
3434
if v.IsLast {
35-
pane |= 0x01
35+
pane |= 0x02
3636
}
3737
pane |= byte(v.Timing << 2)
3838

@@ -64,10 +64,10 @@ func EncodePane(v typex.PaneInfo, w io.Writer) error {
6464
func NewPane(b byte) typex.PaneInfo {
6565
pn := typex.NoFiringPane()
6666

67-
if !(b&0x02 == 2) {
67+
if !(b&0x01 == 1) {
6868
pn.IsFirst = false
6969
}
70-
if !(b&0x01 == 1) {
70+
if !(b&0x02 == 2) {
7171
pn.IsLast = false
7272
}
7373

sdks/go/pkg/beam/core/graph/coder/panes_test.go

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,13 @@ func equalPanes(left, right typex.PaneInfo) bool {
3333

3434
func TestPaneCoder(t *testing.T) {
3535
tests := []struct {
36-
name string
37-
timing typex.PaneTiming
38-
first bool
39-
last bool
40-
index int64
41-
nsIndex int64
36+
name string
37+
timing typex.PaneTiming
38+
first bool
39+
last bool
40+
index int64
41+
nsIndex int64
42+
firstByte byte
4243
}{
4344
{
4445
"false bools",
@@ -47,6 +48,7 @@ func TestPaneCoder(t *testing.T) {
4748
false,
4849
0,
4950
0,
51+
0b00001100,
5052
},
5153
{
5254
"true bools",
@@ -55,6 +57,7 @@ func TestPaneCoder(t *testing.T) {
5557
true,
5658
0,
5759
0,
60+
0b00001111,
5861
},
5962
{
6063
"first pane",
@@ -63,6 +66,7 @@ func TestPaneCoder(t *testing.T) {
6366
false,
6467
0,
6568
0,
69+
0b00001101,
6670
},
6771
{
6872
"last pane",
@@ -71,6 +75,7 @@ func TestPaneCoder(t *testing.T) {
7175
true,
7276
0,
7377
0,
78+
0b00001110,
7479
},
7580
{
7681
"on time, different index and non-speculative",
@@ -79,6 +84,7 @@ func TestPaneCoder(t *testing.T) {
7984
false,
8085
1,
8186
2,
87+
0b00100100,
8288
},
8389
{
8490
"valid early pane",
@@ -87,6 +93,7 @@ func TestPaneCoder(t *testing.T) {
8793
false,
8894
math.MaxInt64,
8995
-1,
96+
0b00010001,
9097
},
9198
{
9299
"on time, max non-speculative index",
@@ -95,6 +102,7 @@ func TestPaneCoder(t *testing.T) {
95102
true,
96103
0,
97104
math.MaxInt64,
105+
0b00100110,
98106
},
99107
{
100108
"late pane, max index",
@@ -103,6 +111,7 @@ func TestPaneCoder(t *testing.T) {
103111
false,
104112
math.MaxInt64,
105113
0,
114+
0b00101000,
106115
},
107116
{
108117
"on time, min non-speculative index",
@@ -111,6 +120,7 @@ func TestPaneCoder(t *testing.T) {
111120
true,
112121
0,
113122
math.MinInt64,
123+
0b00100110,
114124
},
115125
{
116126
"late, min index",
@@ -119,6 +129,34 @@ func TestPaneCoder(t *testing.T) {
119129
false,
120130
math.MinInt64,
121131
0,
132+
0b00101000,
133+
},
134+
{
135+
"last late firing",
136+
typex.PaneLate,
137+
false,
138+
true,
139+
2,
140+
1,
141+
0b00101010,
142+
},
143+
{
144+
"encodeByte 41",
145+
typex.PaneLate,
146+
true,
147+
false,
148+
2,
149+
1,
150+
0b00101001, // 41
151+
},
152+
{
153+
"encodeByte 18",
154+
typex.PaneEarly,
155+
false,
156+
true,
157+
0,
158+
-1,
159+
0b00010010, // 18
122160
},
123161
}
124162
for _, test := range tests {
@@ -129,6 +167,10 @@ func TestPaneCoder(t *testing.T) {
129167
if err != nil {
130168
t.Fatalf("failed to encode pane %v, got %v", input, err)
131169
}
170+
first := buf.Bytes()[0]
171+
if got, want := first, test.firstByte; got != want {
172+
t.Errorf("Unexpected First Byte: got %#08b, want %#08b, for %v ", got, want, input)
173+
}
132174
got, err := DecodePane(&buf)
133175
if err != nil {
134176
t.Fatalf("failed to decode pane from buffer %v, got %v", &buf, err)

sdks/go/pkg/beam/core/graph/window/trigger/trigger.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,7 @@ func (t *NeverTrigger) String() string {
352352
func (t NeverTrigger) trigger() {}
353353

354354
// Never creates a Never Trigger that is never ready to fire.
355-
// There will only be an ON_TIME output and a final output at window expiration.
355+
// There will only be a single ON_TIME final output at window expiration + allowed lateness.
356356
func Never() *NeverTrigger {
357357
return &NeverTrigger{}
358358
}

sdks/go/pkg/beam/core/typex/special.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,19 @@ const (
9898
PaneUnknown PaneTiming = 3
9999
)
100100

101+
func (t PaneTiming) String() string {
102+
switch t {
103+
case PaneEarly:
104+
return "early"
105+
case PaneOnTime:
106+
return "ontime"
107+
case PaneLate:
108+
return "late"
109+
default:
110+
return "unknown"
111+
}
112+
}
113+
101114
// PaneInfo represents the output pane.
102115
type PaneInfo struct {
103116
Timing PaneTiming

0 commit comments

Comments
 (0)