Skip to content

Commit 93274bb

Browse files
pytorchbotlucylq
andauthored
Add minimum_length to extended header for BC (#14334)
Summary: Error happening when we have older PTE files with extended header size 24. When we call 'from_bytes', we expect header size 32 after adding segment_data_size field. This is BC on C++ side because we have a minimum length. Add minimum length to python to make the change BC. Differential Revision: D82492169 Co-authored-by: lucylq <[email protected]>
1 parent e507bf9 commit 93274bb

File tree

3 files changed

+64
-16
lines changed

3 files changed

+64
-16
lines changed

exir/_serialize/_program.py

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -136,8 +136,7 @@ class _ExtendedHeader:
136136

137137
# The magic bytes that should be at the beginning of the header.
138138
EXPECTED_MAGIC: ClassVar[bytes] = b"eh00"
139-
# The length of the header in bytes.
140-
EXPECTED_LENGTH: ClassVar[int] = (
139+
MINIMUM_LENGTH: ClassVar[int] = (
141140
# Header magic
142141
4
143142
# Header length
@@ -146,10 +145,19 @@ class _ExtendedHeader:
146145
+ 8
147146
# Segment base offset
148147
+ 8
148+
)
149+
# The length of the header in bytes.
150+
EXPECTED_LENGTH: ClassVar[int] = (
151+
MINIMUM_LENGTH
149152
# Segment data size
150153
+ 8
151154
)
152155

156+
# To find the header, callers should provide at least this many bytes of
157+
# the head of the serialized Program data. Keep this in sync with
158+
# kNumHeadBytes in //executorch/schema/extended_header.cpp
159+
NUM_HEAD_BYTES: ClassVar[int] = 64
160+
153161
# Instance attributes. @dataclass will turn these into ctor args.
154162

155163
# The size of the serialized program data in bytes.
@@ -187,21 +195,29 @@ def from_bytes(data: bytes) -> "_ExtendedHeader":
187195
+ f"< {_ExtendedHeader.EXPECTED_LENGTH}"
188196
)
189197

198+
magic = data[0:4]
199+
length = int.from_bytes(data[4:8], byteorder=_HEADER_BYTEORDER)
200+
program_size = int.from_bytes(data[8:16], byteorder=_HEADER_BYTEORDER)
201+
segment_base_offset = int.from_bytes(data[16:24], byteorder=_HEADER_BYTEORDER)
202+
segment_data_size = (
203+
int.from_bytes(data[24:32], byteorder=_HEADER_BYTEORDER)
204+
if length > _ExtendedHeader.MINIMUM_LENGTH
205+
else 0
206+
)
207+
190208
return _ExtendedHeader(
191-
magic=data[0:4],
192-
length=int.from_bytes(data[4:8], byteorder=_HEADER_BYTEORDER),
193-
program_size=int.from_bytes(data[8:16], byteorder=_HEADER_BYTEORDER),
194-
segment_base_offset=int.from_bytes(
195-
data[16:24], byteorder=_HEADER_BYTEORDER
196-
),
197-
segment_data_size=int.from_bytes(data[24:32], byteorder=_HEADER_BYTEORDER),
209+
magic=magic,
210+
length=length,
211+
program_size=program_size,
212+
segment_base_offset=segment_base_offset,
213+
segment_data_size=segment_data_size,
198214
)
199215

200216
def is_valid(self) -> bool:
201217
"""Returns true if the extended header appears to be well-formed."""
202218
return (
203219
self.magic == _ExtendedHeader.EXPECTED_MAGIC
204-
and self.length >= _ExtendedHeader.EXPECTED_LENGTH
220+
and self.length >= _ExtendedHeader.MINIMUM_LENGTH
205221
)
206222

207223
def to_bytes(self) -> bytes:

exir/_serialize/test/test_program.py

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1009,14 +1009,30 @@ def test_named_data_segments(self) -> None:
10091009
EXAMPLE_HEADER_DATA: bytes = (
10101010
# Magic bytes
10111011
b"eh00"
1012-
# uint32_t header size (little endian)
1012+
# uint32_t header size (little endian). 0x20 --> 32 bytes.
10131013
+ b"\x20\x00\x00\x00"
10141014
# uint64_t program size
10151015
+ b"\x44\x33\x44\x33\x22\x11\x22\x11"
10161016
# uint64_t segment base offset
10171017
+ b"\x88\x77\x88\x77\x66\x55\x66\x55"
10181018
# uint64_t segment data size
10191019
+ b"\x22\x33\x22\x33\x44\x55\x44\x55"
1020+
# Padding; provide at least NUM_HEAD_BYTES for the header.
1021+
+ b"\x99" * (_ExtendedHeader.NUM_HEAD_BYTES - 32)
1022+
)
1023+
1024+
# Minimum fields in an extended header (no segment data size).
1025+
EXAMPLE_HEADER_DATA_MIN: bytes = (
1026+
# Magic bytes
1027+
b"eh00"
1028+
# uint32_t header size (little endian). 0x18 --> 24 bytes.
1029+
+ b"\x18\x00\x00\x00"
1030+
# uint64_t program size
1031+
+ b"\x44\x33\x44\x33\x22\x11\x22\x11"
1032+
# uint64_t segment base offset
1033+
+ b"\x88\x77\x88\x77\x66\x55\x66\x55"
1034+
# Padding; provide at least NUM_HEAD_BYTES for the header.
1035+
+ b"\x99" * (_ExtendedHeader.NUM_HEAD_BYTES - 24)
10201036
)
10211037

10221038

@@ -1028,7 +1044,7 @@ def test_to_bytes(self) -> None:
10281044
segment_data_size=EXAMPLE_SEGMENT_DATA_SIZE,
10291045
)
10301046
self.assertTrue(eh.is_valid())
1031-
self.assertEqual(eh.to_bytes(), EXAMPLE_HEADER_DATA)
1047+
self.assertEqual(eh.to_bytes(), EXAMPLE_HEADER_DATA[0:32])
10321048

10331049
def test_to_bytes_with_non_defaults(self) -> None:
10341050
eh = _ExtendedHeader(
@@ -1045,11 +1061,11 @@ def test_to_bytes_with_non_defaults(self) -> None:
10451061

10461062
# But still produces a valid output header, since to_bytes() ignores
10471063
# magic and length.
1048-
self.assertEqual(eh.to_bytes(), EXAMPLE_HEADER_DATA)
1064+
self.assertEqual(eh.to_bytes(), EXAMPLE_HEADER_DATA[0:32])
10491065

10501066
def test_from_bytes_valid(self) -> None:
10511067
# Parse the serialized extended header.
1052-
eh = _ExtendedHeader.from_bytes(EXAMPLE_HEADER_DATA)
1068+
eh = _ExtendedHeader.from_bytes(EXAMPLE_HEADER_DATA[0:32])
10531069

10541070
# This is a valid header: good magic and length.
10551071
self.assertTrue(eh.is_valid())
@@ -1060,6 +1076,20 @@ def test_from_bytes_valid(self) -> None:
10601076
self.assertEqual(eh.segment_base_offset, EXAMPLE_SEGMENT_BASE_OFFSET)
10611077
self.assertEqual(eh.segment_data_size, EXAMPLE_SEGMENT_DATA_SIZE)
10621078

1079+
def test_from_bytes_minimum(self) -> None:
1080+
# Parse the serialized extended header.
1081+
eh = _ExtendedHeader.from_bytes(EXAMPLE_HEADER_DATA_MIN)
1082+
1083+
# This is a valid header: good magic and length.
1084+
self.assertTrue(eh.is_valid())
1085+
1086+
self.assertEqual(eh.magic, _ExtendedHeader.EXPECTED_MAGIC)
1087+
self.assertEqual(eh.length, _ExtendedHeader.MINIMUM_LENGTH)
1088+
self.assertEqual(eh.program_size, EXAMPLE_PROGRAM_SIZE)
1089+
self.assertEqual(eh.segment_base_offset, EXAMPLE_SEGMENT_BASE_OFFSET)
1090+
# Does not contain segment_data_size; should be 0
1091+
self.assertEqual(eh.segment_data_size, 0)
1092+
10631093
def test_from_bytes_with_more_data_than_necessary(self) -> None:
10641094
# Pass in more data than necessary to parse the header.
10651095
header_data_with_suffix = EXAMPLE_HEADER_DATA + b"\x55" * 16
@@ -1167,4 +1197,5 @@ def test_from_bytes_invalid_length(self) -> None:
11671197
self.assertEqual(eh.length, 16)
11681198
self.assertEqual(eh.program_size, EXAMPLE_PROGRAM_SIZE)
11691199
self.assertEqual(eh.segment_base_offset, EXAMPLE_SEGMENT_BASE_OFFSET)
1170-
self.assertEqual(eh.segment_data_size, EXAMPLE_SEGMENT_DATA_SIZE)
1200+
# Length cut short; segment_data_size parsed as 0.
1201+
self.assertEqual(eh.segment_data_size, 0)

schema/extended_header.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ namespace runtime {
2222
struct ExtendedHeader {
2323
/**
2424
* To find the header, callers should provide at least this many bytes of the
25-
* head of the serialized Program data.
25+
* head of the serialized Program data. Keep this in sync with NUM_HEAD_BYTES
26+
* in //executorch/exir/_serialize/program.py
2627
*/
2728
static constexpr size_t kNumHeadBytes = 64;
2829

0 commit comments

Comments
 (0)