Skip to content

Commit cb6f5a5

Browse files
committed
Fix that RLE8 BMPs with compression that causes them to be larger than their uncompressed version would cause the converter to not read enough bytes and crash
1 parent 208062a commit cb6f5a5

File tree

1 file changed

+22
-11
lines changed

1 file changed

+22
-11
lines changed

Python/bmp_to_png.py

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,11 @@ def is_bmp_rle8_compressed(img):
2121

2222
def get_bmp_pixel_index_array_offset(img):
2323
img.seek(0xA)
24-
# Start of the rle8.bmp example's pixel array: 08 00 03 F8
2524
return int.from_bytes(img.read(4), byteorder="little")
2625

2726

28-
def get_bmp_pixel_index_array_byte_size(img):
29-
img.seek(0x22)
27+
def get_bmp_byte_size(img):
28+
img.seek(0x2)
3029
return int.from_bytes(img.read(4), byteorder="little")
3130

3231

@@ -150,22 +149,34 @@ def get_decoded_pixel_index_array(rle_bytes, width, height):
150149

151150

152151
def get_pixel_index_array_bytes(img):
152+
"""
153+
The img.read(bmp_byte_size - pixel_index_array_offset) here will often attempt to read a few bytes past the end of the pixel data,
154+
which is because padding bytes at the end of the file are included in bmp_byte_size,
155+
and it isn't worth the effort to calculate how many of these padding bytes there are.
156+
157+
We can't go to img.seek(0x22) to read the image size, because that's always just the width * height of the image,
158+
which doesn't account for the fact that RLE8 compression typically causes there to be less pixel data.
159+
160+
Almost all RLE8 compressed BMPs are smaller than their uncompressed version would be,
161+
and in those cases img.read(image_size) would attempt to read past the end of the file,
162+
but .read() always stops reading at the end of any file.
163+
164+
*However*, in the case of particularly noisy RLE8 compressed BMPs,
165+
the RLE8 compression can cause the file to become *bigger* than their uncompressed version!
166+
167+
In these noisy RLE8 BMPs you *have* to read more bytes than just the image width * height,
168+
or the loop in get_decoded_pixel_index_array() would attempt to index past the end of the rle_bytes list, which'd crash the program.
169+
"""
153170
pixel_index_array_offset = get_bmp_pixel_index_array_offset(img)
154-
pixel_index_array_byte_size = get_bmp_pixel_index_array_byte_size(img)
171+
bmp_byte_size = get_bmp_byte_size(img)
155172

156173
img.seek(pixel_index_array_offset)
157-
return img.read(pixel_index_array_byte_size)
174+
return img.read(bmp_byte_size - pixel_index_array_offset)
158175

159176

160177
def get_pixel_array(img, width, height, palette):
161178
pixel_index_array_bytes = get_pixel_index_array_bytes(img)
162179
decoded_pixel_index_array = get_decoded_pixel_index_array(pixel_index_array_bytes, width, height)
163-
164-
# for index, row in enumerate(decoded_pixel_index_array):
165-
# print(index, len(row))
166-
# print("")
167-
# print(len(decoded_pixel_index_array[71]), decoded_pixel_index_array[71])
168-
# print("")
169180

170181
# TODO: Raise a custom ValueError exception for:
171182
# "ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (143,) + inhomogeneous part."

0 commit comments

Comments
 (0)