Skip to content

Commit fc72712

Browse files
fix(partition_table): Ignore UTF-8 BOM bytes in csv file
1 parent c06fa31 commit fc72712

File tree

7 files changed

+79
-15
lines changed

7 files changed

+79
-15
lines changed

components/partition_table/gen_esp32part.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
# SPDX-License-Identifier: Apache-2.0
1212
import argparse
1313
import binascii
14+
import codecs
1415
import errno
1516
import hashlib
1617
import os
@@ -175,21 +176,36 @@ def critical(msg):
175176
sys.stderr.write('\n')
176177

177178

179+
def get_encoding(first_bytes):
180+
"""Detect the encoding by checking for BOM (Byte Order Mark)"""
181+
BOMS = {
182+
codecs.BOM_UTF8: 'utf-8-sig',
183+
codecs.BOM_UTF16_LE: 'utf-16',
184+
codecs.BOM_UTF16_BE: 'utf-16',
185+
codecs.BOM_UTF32_LE: 'utf-32',
186+
codecs.BOM_UTF32_BE: 'utf-32',
187+
}
188+
for bom, encoding in BOMS.items():
189+
if first_bytes.startswith(bom):
190+
return encoding
191+
return 'utf-8'
192+
193+
178194
class PartitionTable(list):
179195
def __init__(self):
180196
super(PartitionTable, self).__init__(self)
181197

182198
@classmethod
183199
def from_file(cls, f):
184-
data = f.read()
185-
data_is_binary = data[0:2] == PartitionDefinition.MAGIC_BYTES
200+
bin_data = f.read()
201+
data_is_binary = bin_data[0:2] == PartitionDefinition.MAGIC_BYTES
186202
if data_is_binary:
187203
status('Parsing binary partition input...')
188-
return cls.from_binary(data), True
204+
return cls.from_binary(bin_data), True
189205

190-
data = data.decode()
206+
str_data = bin_data.decode(get_encoding(bin_data))
191207
status('Parsing CSV input...')
192-
return cls.from_csv(data), False
208+
return cls.from_csv(str_data), False
193209

194210
@classmethod
195211
def from_csv(cls, csv_contents):

components/partition_table/parttool.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ def __init__(self, port=None, baud=None, partition_table_offset=PARTITION_TABLE_
6464
gen.offset_part_table = partition_table_offset
6565
gen.primary_bootloader_offset = primary_bootloader_offset
6666
gen.recovery_bootloader_offset = recovery_bootloader_offset
67+
gen.quiet = True
6768

6869
def parse_esptool_args(esptool_args):
6970
results = list()
@@ -84,17 +85,8 @@ def parse_esptool_args(esptool_args):
8485
self.esptool_erase_args = parse_esptool_args(esptool_erase_args)
8586

8687
if partition_table_file:
87-
partition_table = None
8888
with open(partition_table_file, 'rb') as f:
89-
input_is_binary = (f.read(2) == gen.PartitionDefinition.MAGIC_BYTES)
90-
f.seek(0)
91-
if input_is_binary:
92-
partition_table = gen.PartitionTable.from_binary(f.read())
93-
94-
if partition_table is None:
95-
with open(partition_table_file, 'r', encoding='utf-8') as f:
96-
f.seek(0)
97-
partition_table = gen.PartitionTable.from_csv(f.read())
89+
partition_table, _ = gen.PartitionTable.from_file(f)
9890
else:
9991
temp_file = tempfile.NamedTemporaryFile(delete=False)
10092
temp_file.close()

components/partition_table/test_gen_esp32part_host/gen_esp32part_tests.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,56 @@ def test_only_empty_subtype_is_not_0(self):
289289
self.assertEqual(t[7].subtype, 0x06)
290290

291291

292+
class UTFCodingTests(Py23TestCase):
293+
def test_utf8_bom_csv_file(self):
294+
with open('partitions-utf8-bom.csv', 'rb') as csv_txt:
295+
t, _ = gen_esp32part.PartitionTable.from_file(csv_txt)
296+
t.verify()
297+
self.assertEqual(t[0].name, 'nvs') # 3 BOM bytes are not part of the name
298+
self.assertEqual(t[1].name, 'phy_инит_') # UTF-8 name is preserved
299+
self.assertEqual(t[2].name, 'factory')
300+
with open('partitions.bin', 'rb') as bin_file:
301+
binary_content = bin_file.read()
302+
self.assertEqual(_strip_trailing_ffs(t.to_binary()), _strip_trailing_ffs(binary_content))
303+
304+
def test_utf8_without_bom_csv_file(self):
305+
with open('partitions-utf8_without-bom.csv', 'rb') as csv_txt:
306+
t, _ = gen_esp32part.PartitionTable.from_file(csv_txt)
307+
t.verify()
308+
self.assertEqual(t[0].name, 'nvs')
309+
self.assertEqual(t[1].name, 'phy_инит_') # UTF-8 name is preserved
310+
self.assertEqual(t[2].name, 'factory')
311+
with open('partitions.bin', 'rb') as bin_file:
312+
binary_content = bin_file.read()
313+
self.assertEqual(_strip_trailing_ffs(t.to_binary()), _strip_trailing_ffs(binary_content))
314+
315+
def test_utf8_bin_file(self):
316+
with open('partitions.bin', 'rb') as bin_file:
317+
t, _ = gen_esp32part.PartitionTable.from_file(bin_file)
318+
t.verify()
319+
self.assertEqual(t[0].name, 'nvs')
320+
self.assertEqual(t[1].name, 'phy_инит_') # UTF-8 name is preserved
321+
self.assertEqual(t[2].name, 'factory')
322+
gen = t.to_csv()
323+
self.assertIn('\nnvs,', gen)
324+
self.assertIn('\nphy_инит_,', gen)
325+
self.assertIn('\nfactory,', gen)
326+
327+
def test_utf8_without_bom_bin_file(self):
328+
with open('partitions-utf8-bom.bin', 'rb') as bin_file:
329+
t, _ = gen_esp32part.PartitionTable.from_file(bin_file)
330+
t.verify()
331+
# If the old tool grabbed the BOM bytes for the first name then
332+
# we do not change the name. User needs to fix the CSV file.
333+
self.assertEqual(t[0].name, '\ufeffnvs')
334+
self.assertEqual(t[1].name, 'phy_инит_')
335+
self.assertEqual(t[2].name, 'factory')
336+
gen = t.to_csv()
337+
self.assertIn('\ufeffnvs,', gen)
338+
self.assertIn('\nphy_инит_,', gen)
339+
self.assertIn('\nfactory,', gen)
340+
341+
292342
class BinaryParserTests(Py23TestCase):
293343
def test_parse_one_entry(self):
294344
# type 0x30, subtype 0xee,
Binary file not shown.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
nvs, data, nvs, 0x9000, 24K,
2+
phy_инит_, data, phy, 0xf000, 0x1000,
3+
factory, app, factory, 0x10000, 1M,
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
nvs, data, nvs, 0x9000, 24K,
2+
phy_инит_, data, phy, 0xf000, 0x1000,
3+
factory, app, factory, 0x10000, 1M,
Binary file not shown.

0 commit comments

Comments
 (0)