Skip to content

Commit 193372f

Browse files
authored
Support Protobuf 5.x (#32679)
* Force protobuf 5.x * restore lower bound * Add bypass for microsecond conversion bounds * linting * fix incorrect unit, standardize micros_to_nanos * remove unused constant * add extra unit tests * formatting
1 parent bcd785f commit 193372f

File tree

4 files changed

+100
-9
lines changed

4 files changed

+100
-9
lines changed

sdks/python/apache_beam/transforms/window.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -449,8 +449,8 @@ def to_runner_api_parameter(self, context):
449449
standard_window_fns_pb2.FixedWindowsPayload)
450450
def from_runner_api_parameter(fn_parameter, unused_context) -> 'FixedWindows':
451451
return FixedWindows(
452-
size=Duration(micros=fn_parameter.size.ToMicroseconds()),
453-
offset=Timestamp(micros=fn_parameter.offset.ToMicroseconds()))
452+
size=Duration(micros=proto_utils.to_micros(fn_parameter.size)),
453+
offset=Timestamp(micros=proto_utils.to_micros(fn_parameter.offset)))
454454

455455

456456
class SlidingWindows(NonMergingWindowFn):
@@ -522,9 +522,9 @@ def to_runner_api_parameter(self, context):
522522
def from_runner_api_parameter(
523523
fn_parameter, unused_context) -> 'SlidingWindows':
524524
return SlidingWindows(
525-
size=Duration(micros=fn_parameter.size.ToMicroseconds()),
526-
offset=Timestamp(micros=fn_parameter.offset.ToMicroseconds()),
527-
period=Duration(micros=fn_parameter.period.ToMicroseconds()))
525+
size=Duration(micros=proto_utils.to_micros(fn_parameter.size)),
526+
offset=Timestamp(micros=proto_utils.to_micros(fn_parameter.offset)),
527+
period=Duration(micros=proto_utils.to_micros(fn_parameter.period)))
528528

529529

530530
class Sessions(WindowFn):
@@ -589,4 +589,4 @@ def to_runner_api_parameter(self, context):
589589
standard_window_fns_pb2.SessionWindowsPayload)
590590
def from_runner_api_parameter(fn_parameter, unused_context) -> 'Sessions':
591591
return Sessions(
592-
gap_size=Duration(micros=fn_parameter.gap_size.ToMicroseconds()))
592+
gap_size=Duration(micros=proto_utils.to_micros(fn_parameter.gap_size)))

sdks/python/apache_beam/utils/proto_utils.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@
3636

3737
message_types = (message.Message, )
3838

39+
_SECONDS_TO_MICROS = 10**6
40+
_MICROS_TO_NANOS = 10**3
41+
3942

4043
@overload
4144
def pack_Any(msg: message.Message) -> any_pb2.Any:
@@ -115,8 +118,29 @@ def pack_Struct(**kwargs) -> struct_pb2.Struct:
115118

116119
def from_micros(cls: Type[TimeMessageT], micros: int) -> TimeMessageT:
117120
result = cls()
118-
result.FromMicroseconds(micros)
119-
return result
121+
if isinstance(result, duration_pb2.Duration):
122+
result.FromMicroseconds(micros)
123+
return result
124+
# Protobuf 5.x enforces a maximum timestamp value less than the Beam
125+
# maximum allowable timestamp, so we cannot use the built-in conversion.
126+
elif isinstance(result, timestamp_pb2.Timestamp):
127+
result.seconds = micros // _SECONDS_TO_MICROS
128+
result.nanos = (micros % _SECONDS_TO_MICROS) * _MICROS_TO_NANOS
129+
return result
130+
else:
131+
raise RuntimeError('cannot convert the micro seconds to %s' % cls)
132+
133+
134+
def to_micros(value: Union[duration_pb2.Duration, timestamp_pb2.Timestamp]):
135+
if isinstance(value, duration_pb2.Duration):
136+
return value.ToMicroseconds()
137+
# Protobuf 5.x enforces a maximum timestamp value less than the Beam
138+
# maximum allowable timestamp, so we cannot use the built-in conversion.
139+
elif isinstance(value, timestamp_pb2.Timestamp):
140+
micros = value.seconds * _SECONDS_TO_MICROS
141+
return micros + (value.nanos // _MICROS_TO_NANOS)
142+
else:
143+
raise RuntimeError('cannot convert %s to micro seconds' % value)
120144

121145

122146
def to_Timestamp(time: Union[int, float]) -> timestamp_pb2.Timestamp:
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
import unittest
18+
19+
from google.protobuf import duration_pb2
20+
from google.protobuf import timestamp_pb2
21+
22+
from apache_beam.utils import proto_utils
23+
from apache_beam.utils.timestamp import MAX_TIMESTAMP
24+
25+
26+
class TestProtoUtils(unittest.TestCase):
27+
def test_from_micros_duration(self):
28+
ts = proto_utils.from_micros(duration_pb2.Duration, MAX_TIMESTAMP.micros)
29+
expected = duration_pb2.Duration(
30+
seconds=MAX_TIMESTAMP.seconds(), nanos=775000000)
31+
self.assertEqual(ts, expected)
32+
33+
def test_from_micros_timestamp(self):
34+
ts = proto_utils.from_micros(timestamp_pb2.Timestamp, MAX_TIMESTAMP.micros)
35+
expected = timestamp_pb2.Timestamp(
36+
seconds=MAX_TIMESTAMP.seconds(), nanos=775000000)
37+
self.assertEqual(ts, expected)
38+
39+
def test_to_micros_duration(self):
40+
dur = duration_pb2.Duration(
41+
seconds=MAX_TIMESTAMP.seconds(), nanos=775000000)
42+
ts = proto_utils.to_micros(dur)
43+
expected = MAX_TIMESTAMP.micros
44+
self.assertEqual(ts, expected)
45+
46+
def test_to_micros_timestamp(self):
47+
dur = timestamp_pb2.Timestamp(
48+
seconds=MAX_TIMESTAMP.seconds(), nanos=775000000)
49+
ts = proto_utils.to_micros(dur)
50+
expected = MAX_TIMESTAMP.micros
51+
self.assertEqual(ts, expected)
52+
53+
def test_round_trip_duration(self):
54+
expected = 919336704
55+
dur = proto_utils.from_micros(duration_pb2.Duration, expected)
56+
ms = proto_utils.to_micros(dur)
57+
self.assertEqual(ms, expected)
58+
59+
def test_round_trip_timestamp(self):
60+
expected = 919336704
61+
ts = proto_utils.from_micros(timestamp_pb2.Timestamp, expected)
62+
ms = proto_utils.to_micros(ts)
63+
self.assertEqual(ms, expected)
64+
65+
66+
if __name__ == '__main__':
67+
unittest.main()

sdks/python/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,7 @@ def get_portability_package_data():
381381
#
382382
# 3. Exclude protobuf 4 versions that leak memory, see:
383383
# https://github.com/apache/beam/issues/28246
384-
'protobuf>=3.20.3,<4.26.0,!=4.0.*,!=4.21.*,!=4.22.0,!=4.23.*,!=4.24.*', # pylint: disable=line-too-long
384+
'protobuf>=3.20.3,<6.0.0.dev0,!=4.0.*,!=4.21.*,!=4.22.0,!=4.23.*,!=4.24.*', # pylint: disable=line-too-long
385385
'pydot>=1.2.0,<2',
386386
'python-dateutil>=2.8.0,<3',
387387
'pytz>=2018.3',

0 commit comments

Comments
 (0)