Skip to content

Commit bbf7a91

Browse files
authored
Temporary fix for SPARK-36990 (#43)
* Temporary fix for SPARK-36990 As we cannot differentiate between uint32 and int32, use LongIntegerUpdater by default, which converts an int32 to long using signed conversion
1 parent dc38be5 commit bbf7a91

File tree

4 files changed

+20
-4
lines changed

4 files changed

+20
-4
lines changed

sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterFactory.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ public ParquetVectorUpdater getUpdater(ColumnDescriptor descriptor, DataType spa
7373
if (sparkType == DataTypes.IntegerType || canReadAsIntDecimal(descriptor, sparkType)) {
7474
return new IntegerUpdater();
7575
} else if (sparkType == DataTypes.LongType) {
76-
return new IntegerUpdater();
76+
return new LongIntegerUpdater();
7777
} else if (sparkType == DataTypes.ByteType) {
7878
return new ByteUpdater();
7979
} else if (sparkType == DataTypes.ShortType) {
@@ -246,14 +246,14 @@ public void decodeSingleDictionaryId(
246246
}
247247
}
248248

249-
private static class UnsignedIntegerUpdater implements ParquetVectorUpdater {
249+
private static class LongIntegerUpdater implements ParquetVectorUpdater {
250250
@Override
251251
public void readValues(
252252
int total,
253253
int offset,
254254
WritableColumnVector values,
255255
VectorizedValuesReader valuesReader) {
256-
valuesReader.readUnsignedIntegers(total, values, offset);
256+
valuesReader.readIntegersAsLongs(total, values, offset);
257257
}
258258

259259
@Override
@@ -266,7 +266,7 @@ public void readValue(
266266
int offset,
267267
WritableColumnVector values,
268268
VectorizedValuesReader valuesReader) {
269-
values.putLong(offset, Integer.toUnsignedLong(valuesReader.readInteger()));
269+
values.putLong(offset, valuesReader.readInteger());
270270
}
271271

272272
@Override

sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedPlainValuesReader.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,16 @@ public void skipIntegers(int total) {
9797
in.skip(total * 4L);
9898
}
9999

100+
@Override
101+
public final void readIntegersAsLongs(int total, WritableColumnVector c, int rowId) {
102+
int requiredBytes = total * 4;
103+
ByteBuffer buffer = getBuffer(requiredBytes);
104+
for (int i = 0; i < total; i += 1) {
105+
c.putLong(rowId + i, buffer.getInt());
106+
}
107+
}
108+
109+
100110
@Override
101111
public final void readUnsignedIntegers(int total, WritableColumnVector c, int rowId) {
102112
int requiredBytes = total * 4;

sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedRleValuesReader.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,11 @@ public void readIntegers(int total, WritableColumnVector c, int rowId) {
311311
}
312312
}
313313

314+
@Override
315+
public void readIntegersAsLongs(int total, WritableColumnVector c, int rowId) {
316+
throw new UnsupportedOperationException("only readInts is valid.");
317+
}
318+
314319
@Override
315320
public void readUnsignedIntegers(int total, WritableColumnVector c, int rowId) {
316321
throw new UnsupportedOperationException("only readInts is valid.");

sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedValuesReader.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ public interface VectorizedValuesReader {
4343
void readShorts(int total, WritableColumnVector c, int rowId);
4444
void readIntegers(int total, WritableColumnVector c, int rowId);
4545
void readIntegersWithRebase(int total, WritableColumnVector c, int rowId, boolean failIfRebase);
46+
void readIntegersAsLongs(int total, WritableColumnVector c, int rowId);
4647
void readUnsignedIntegers(int total, WritableColumnVector c, int rowId);
4748
void readUnsignedLongs(int total, WritableColumnVector c, int rowId);
4849
void readLongs(int total, WritableColumnVector c, int rowId);

0 commit comments

Comments
 (0)