@@ -925,7 +925,17 @@ static carquet_status_t load_next_page_mmap(
925925 reader -> decoded_def_levels , reader -> decoded_rep_levels ,
926926 & decoded_count , error );
927927
928- free (decompressed );
928+ /* For BYTE_ARRAY PLAIN columns with compressed data, retain the
929+ * decompressed buffer since carquet_byte_array_t.data pointers
930+ * reference it. For uncompressed mmap, pointers go directly to mmap
931+ * which persists for the reader's lifetime, so no retention needed. */
932+ if (decompressed && reader -> type == CARQUET_PHYSICAL_BYTE_ARRAY &&
933+ page_header .data_page_header .encoding == CARQUET_ENCODING_PLAIN ) {
934+ free (reader -> page_data_for_values );
935+ reader -> page_data_for_values = decompressed ;
936+ } else {
937+ free (decompressed );
938+ }
929939
930940 if (status != CARQUET_OK ) {
931941 return status ;
@@ -1097,11 +1107,26 @@ static carquet_status_t load_next_page_fread(
10971107 reader -> decoded_def_levels , reader -> decoded_rep_levels ,
10981108 & decoded_count , error );
10991109
1100- if (page_data != compressed ) {
1101- free (page_data );
1102- }
1103- if (compressed ) {
1104- free (compressed );
1110+ /* For BYTE_ARRAY PLAIN columns, the decoded carquet_byte_array_t structs
1111+ * have .data pointers into the page data buffer. Retain the buffer so
1112+ * these pointers remain valid until the next page is loaded. */
1113+ bool retain = (reader -> type == CARQUET_PHYSICAL_BYTE_ARRAY &&
1114+ page_header .data_page_header .encoding == CARQUET_ENCODING_PLAIN );
1115+
1116+ if (retain ) {
1117+ free (reader -> page_data_for_values );
1118+ reader -> page_data_for_values = page_data ;
1119+ /* Free compressed buffer only if it's a separate allocation */
1120+ if (compressed && compressed != page_data ) {
1121+ free (compressed );
1122+ }
1123+ } else {
1124+ if (page_data != compressed ) {
1125+ free (page_data );
1126+ }
1127+ if (compressed ) {
1128+ free (compressed );
1129+ }
11051130 }
11061131
11071132 if (status != CARQUET_OK ) {
0 commit comments