Skip to content

Commit ea88d9a

Browse files
committed
Fix BYTE_ARRAY use-after-free in page reader
1 parent 078b029 commit ea88d9a

File tree

3 files changed

+35
-6
lines changed

3 files changed

+35
-6
lines changed

src/reader/file_reader.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,7 @@ void carquet_column_reader_free(carquet_column_reader_t* reader) {
547547
if (!reader) return;
548548

549549
free(reader->page_buffer);
550+
free(reader->page_data_for_values);
550551
free(reader->dictionary_data);
551552
free(reader->dictionary_offsets);
552553

src/reader/page_reader.c

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -925,7 +925,17 @@ static carquet_status_t load_next_page_mmap(
925925
reader->decoded_def_levels, reader->decoded_rep_levels,
926926
&decoded_count, error);
927927

928-
free(decompressed);
928+
/* For BYTE_ARRAY PLAIN columns with compressed data, retain the
929+
* decompressed buffer since carquet_byte_array_t.data pointers
930+
* reference it. For uncompressed mmap, pointers go directly to mmap
931+
* which persists for the reader's lifetime, so no retention needed. */
932+
if (decompressed && reader->type == CARQUET_PHYSICAL_BYTE_ARRAY &&
933+
page_header.data_page_header.encoding == CARQUET_ENCODING_PLAIN) {
934+
free(reader->page_data_for_values);
935+
reader->page_data_for_values = decompressed;
936+
} else {
937+
free(decompressed);
938+
}
929939

930940
if (status != CARQUET_OK) {
931941
return status;
@@ -1097,11 +1107,26 @@ static carquet_status_t load_next_page_fread(
10971107
reader->decoded_def_levels, reader->decoded_rep_levels,
10981108
&decoded_count, error);
10991109

1100-
if (page_data != compressed) {
1101-
free(page_data);
1102-
}
1103-
if (compressed) {
1104-
free(compressed);
1110+
/* For BYTE_ARRAY PLAIN columns, the decoded carquet_byte_array_t structs
1111+
* have .data pointers into the page data buffer. Retain the buffer so
1112+
* these pointers remain valid until the next page is loaded. */
1113+
bool retain = (reader->type == CARQUET_PHYSICAL_BYTE_ARRAY &&
1114+
page_header.data_page_header.encoding == CARQUET_ENCODING_PLAIN);
1115+
1116+
if (retain) {
1117+
free(reader->page_data_for_values);
1118+
reader->page_data_for_values = page_data;
1119+
/* Free compressed buffer only if it's a separate allocation */
1120+
if (compressed && compressed != page_data) {
1121+
free(compressed);
1122+
}
1123+
} else {
1124+
if (page_data != compressed) {
1125+
free(page_data);
1126+
}
1127+
if (compressed) {
1128+
free(compressed);
1129+
}
11051130
}
11061131

11071132
if (status != CARQUET_OK) {

src/reader/reader_internal.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,9 @@ struct carquet_column_reader {
131131
int32_t dictionary_count;
132132
uint32_t* dictionary_offsets; /* Offset cache for O(1) BYTE_ARRAY lookup */
133133

134+
/* Retained page data for BYTE_ARRAY value pointers */
135+
uint8_t* page_data_for_values;
136+
134137
/* Current page state for partial reads */
135138
bool page_loaded; /* Is a page currently loaded? */
136139
int32_t page_num_values; /* Total values in current page */

0 commit comments

Comments
 (0)