Skip to content

Commit 0a86015

Browse files
authored
GH-3249: Fix incorrect Bloom filter data when reading from ByteArrayInputStream by using readFully() (#3250)
1 parent 4aa2ea9 commit 0a86015

File tree

1 file changed

+8
-1
lines changed

1 file changed

+8
-1
lines changed

parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1667,7 +1667,14 @@ public BloomFilter readBloomFilter(ColumnChunkMetaData meta) throws IOException
16671667
byte[] bitset;
16681668
if (null == bloomFilterDecryptor) {
16691669
bitset = new byte[numBytes];
1670-
in.read(bitset);
1670+
// For negative bloomFilterLength (files from older versions), use readFully() instead of read().
1671+
// readFully() guarantees reading exactly numBytes bytes, while read() may read fewer bytes in a single
1672+
// call. This ensures the entire bitset is properly loaded.
1673+
if (bloomFilterLength < 0) {
1674+
f.readFully(bitset);
1675+
} else {
1676+
in.read(bitset);
1677+
}
16711678
} else {
16721679
bitset = bloomFilterDecryptor.decrypt(in, bloomFilterBitsetAAD);
16731680
if (bitset.length != numBytes) {

0 commit comments

Comments
 (0)