Skip to content

Commit 091b1c6

Browse files
committed
Revert "Use MSB and LSB longs to represent UUID"
This reverts commit e7a2f97.
1 parent 6ac5ddb commit 091b1c6

File tree

8 files changed

+148
-123
lines changed

8 files changed

+148
-123
lines changed

vector/src/main/java/org/apache/arrow/vector/UuidVector.java

Lines changed: 14 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
*/
1717
package org.apache.arrow.vector;
1818

19-
import static org.apache.arrow.memory.util.MemoryUtil.LITTLE_ENDIAN;
2019
import static org.apache.arrow.vector.extension.UuidType.UUID_BYTE_WIDTH;
2120

2221
import java.nio.ByteBuffer;
@@ -30,6 +29,7 @@
3029
import org.apache.arrow.vector.complex.impl.UuidReaderImpl;
3130
import org.apache.arrow.vector.complex.reader.FieldReader;
3231
import org.apache.arrow.vector.extension.UuidType;
32+
import org.apache.arrow.vector.holders.ExtensionHolder;
3333
import org.apache.arrow.vector.holders.NullableUuidHolder;
3434
import org.apache.arrow.vector.holders.UuidHolder;
3535
import org.apache.arrow.vector.types.pojo.Field;
@@ -157,17 +157,12 @@ public int isSet(int index) {
157157
*/
158158
public void get(int index, UuidHolder holder) {
159159
Preconditions.checkArgument(index >= 0, "Cannot get negative index in UUID vector.");
160-
final ArrowBuf dataBuffer = getUnderlyingVector().getDataBuffer();
161-
final long start = (long) index * UUID_BYTE_WIDTH;
162-
final long next = start + Long.BYTES;
163-
// UUIDs are stored in big-endian byte order in Arrow buffers.
164-
// ArrowBuf.getLong() reads in native byte order, so we need to reverse bytes on LE systems.
165-
if (LITTLE_ENDIAN) {
166-
holder.mostSigBits = Long.reverseBytes(dataBuffer.getLong(start));
167-
holder.leastSigBits = Long.reverseBytes(dataBuffer.getLong(next));
160+
if (NullCheckingForGet.NULL_CHECKING_ENABLED && this.isSet(index) == 0) {
161+
holder.isSet = 0;
168162
} else {
169-
holder.mostSigBits = dataBuffer.getLong(start);
170-
holder.leastSigBits = dataBuffer.getLong(next);
163+
holder.isSet = 1;
164+
holder.buffer = getDataBuffer();
165+
holder.start = getStartOffset(index);
171166
}
172167
}
173168

@@ -183,17 +178,8 @@ public void get(int index, NullableUuidHolder holder) {
183178
holder.isSet = 0;
184179
} else {
185180
holder.isSet = 1;
186-
final ArrowBuf dataBuffer = getUnderlyingVector().getDataBuffer();
187-
final long offset = (long) index * UUID_BYTE_WIDTH;
188-
// UUIDs are stored in big-endian byte order in Arrow buffers.
189-
// ArrowBuf.getLong() reads in native byte order, so we need to reverse bytes on LE systems.
190-
if (LITTLE_ENDIAN) {
191-
holder.mostSigBits = Long.reverseBytes(dataBuffer.getLong(offset));
192-
holder.leastSigBits = Long.reverseBytes(dataBuffer.getLong(offset + Long.BYTES));
193-
} else {
194-
holder.mostSigBits = dataBuffer.getLong(offset);
195-
holder.leastSigBits = dataBuffer.getLong(offset + Long.BYTES);
196-
}
181+
holder.buffer = getDataBuffer();
182+
holder.start = getStartOffset(index);
197183
}
198184
}
199185

@@ -228,7 +214,7 @@ public void set(int index, UUID value) {
228214
* @param holder the holder containing the UUID data
229215
*/
230216
public void set(int index, UuidHolder holder) {
231-
set(index, holder.getUuid());
217+
this.set(index, holder.buffer, holder.start);
232218
}
233219

234220
/**
@@ -241,7 +227,7 @@ public void set(int index, NullableUuidHolder holder) {
241227
if (holder.isSet == 0) {
242228
getUnderlyingVector().setNull(index);
243229
} else {
244-
set(index, holder.getUuid());
230+
this.set(index, holder.buffer, holder.start);
245231
}
246232
}
247233

@@ -257,8 +243,8 @@ public void set(int index, ArrowBuf source, int sourceOffset) {
257243

258244
BitVectorHelper.setBit(getUnderlyingVector().getValidityBuffer(), index);
259245
getUnderlyingVector()
260-
.getDataBuffer()
261-
.setBytes((long) index * UUID_BYTE_WIDTH, source, sourceOffset, UUID_BYTE_WIDTH);
246+
.getDataBuffer()
247+
.setBytes((long) index * UUID_BYTE_WIDTH, source, sourceOffset, UUID_BYTE_WIDTH);
262248
}
263249

264250
/**
@@ -295,7 +281,7 @@ public void setSafe(int index, NullableUuidHolder holder) {
295281
if (holder == null || holder.isSet == 0) {
296282
getUnderlyingVector().setNull(index);
297283
} else {
298-
setSafe(index, holder.getUuid());
284+
this.setSafe(index, holder.buffer, holder.start);
299285
}
300286
}
301287

@@ -306,7 +292,7 @@ public void setSafe(int index, NullableUuidHolder holder) {
306292
* @param holder the holder containing the UUID data
307293
*/
308294
public void setSafe(int index, UuidHolder holder) {
309-
setSafe(index, holder.getUuid());
295+
this.setSafe(index, holder.buffer, holder.start);
310296
}
311297

312298
/**

vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableUuidHolderReaderImpl.java

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import org.apache.arrow.vector.holders.NullableUuidHolder;
2121
import org.apache.arrow.vector.holders.UuidHolder;
2222
import org.apache.arrow.vector.types.Types;
23+
import org.apache.arrow.vector.util.UuidUtility;
2324

2425
/**
2526
* Reader implementation for reading UUID values from a {@link NullableUuidHolder}.
@@ -80,13 +81,13 @@ public boolean isSet() {
8081
public void read(ExtensionHolder h) {
8182
if (h instanceof NullableUuidHolder) {
8283
NullableUuidHolder nullableHolder = (NullableUuidHolder) h;
83-
nullableHolder.mostSigBits = this.holder.mostSigBits;
84-
nullableHolder.leastSigBits = this.holder.leastSigBits;
84+
nullableHolder.buffer = this.holder.buffer;
8585
nullableHolder.isSet = this.holder.isSet;
86+
nullableHolder.start = this.holder.start;
8687
} else if (h instanceof UuidHolder) {
8788
UuidHolder uuidHolder = (UuidHolder) h;
88-
uuidHolder.mostSigBits = this.holder.mostSigBits;
89-
uuidHolder.leastSigBits = this.holder.leastSigBits;
89+
uuidHolder.buffer = this.holder.buffer;
90+
uuidHolder.start = this.holder.start;
9091
} else {
9192
throw new IllegalArgumentException(
9293
"Unsupported holder type: "
@@ -102,7 +103,22 @@ public Object readObject() {
102103
if (!isSet()) {
103104
return null;
104105
}
105-
// Convert UUID longs to Java UUID object
106-
return holder.getUuid();
106+
// Convert UUID bytes to Java UUID object
107+
try {
108+
return UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start);
109+
} catch (Exception e) {
110+
throw new RuntimeException(
111+
String.format(
112+
"Failed to read UUID from buffer. Invalid Arrow buffer state: "
113+
+ "capacity=%d, readableBytes=%d, readerIndex=%d, writerIndex=%d, refCnt=%d. "
114+
+ "The buffer must contain exactly 16 bytes of valid UUID data.",
115+
holder.buffer.capacity(),
116+
holder.buffer.readableBytes(),
117+
holder.buffer.readerIndex(),
118+
holder.buffer.writerIndex(),
119+
holder.buffer.refCnt()),
120+
e);
121+
}
107122
}
108123
}
124+

vector/src/main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java

Lines changed: 7 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -18,37 +18,24 @@
1818

1919
import org.apache.arrow.vector.extension.UuidType;
2020
import org.apache.arrow.vector.types.pojo.ArrowType;
21-
import java.util.UUID;
21+
import org.apache.arrow.memory.ArrowBuf;
2222

2323
/**
2424
* Value holder for nullable UUID values.
2525
*
2626
* <p>The {@code isSet} field controls nullability: when {@code isSet = 1}, the holder contains a
27-
* valid UUID represented as two longs; when {@code isSet = 0}, the holder represents a null value
28-
* and the long fields should not be accessed.
27+
* valid UUID in {@code buffer}; when {@code isSet = 0}, the holder represents a null value and
28+
* {@code buffer} should not be accessed.
2929
*
3030
* @see UuidHolder
3131
* @see org.apache.arrow.vector.UuidVector
3232
* @see org.apache.arrow.vector.extension.UuidType
3333
*/
3434
public class NullableUuidHolder extends ExtensionHolder {
35-
/** The most significant 64 bits of the UUID. */
36-
public long mostSigBits;
37-
38-
/** The least significant 64 bits of the UUID. */
39-
public long leastSigBits;
40-
41-
/**
42-
* Converts the holder's two longs to a UUID object.
43-
*
44-
* @return the UUID represented by this holder, or null if isSet is 0
45-
*/
46-
public UUID getUuid() {
47-
if (this.isSet == 0) {
48-
return null;
49-
}
50-
return new UUID(mostSigBits, leastSigBits);
51-
}
35+
/** Buffer containing 16-byte UUID data. */
36+
public ArrowBuf buffer;
37+
/** Offset in the buffer where the UUID data starts. */
38+
public int start = 0;
5239

5340
@Override
5441
public ArrowType type() {

vector/src/main/java/org/apache/arrow/vector/holders/UuidHolder.java

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,23 +19,22 @@
1919
import org.apache.arrow.memory.ArrowBuf;
2020
import org.apache.arrow.vector.extension.UuidType;
2121
import org.apache.arrow.vector.types.pojo.ArrowType;
22-
import java.util.UUID;
2322

2423
/**
2524
* Value holder for non-nullable UUID values.
2625
*
27-
* <p>Contains a 16-byte UUID represented as two longs with {@code isSet} always 1.
26+
* <p>Contains a 16-byte UUID in {@code buffer} with {@code isSet} always 1.
2827
*
2928
* @see NullableUuidHolder
3029
* @see org.apache.arrow.vector.UuidVector
3130
* @see org.apache.arrow.vector.extension.UuidType
3231
*/
3332
public class UuidHolder extends ExtensionHolder {
34-
/** The most significant 64 bits of the UUID. */
35-
public long mostSigBits;
33+
/** Buffer containing 16-byte UUID data. */
34+
public ArrowBuf buffer;
3635

37-
/** The least significant 64 bits of the UUID. */
38-
public long leastSigBits;
36+
/** Offset in the buffer where the UUID data starts. */
37+
public int start = 0;
3938

4039
/** Constructs a UuidHolder with isSet = 1. */
4140
public UuidHolder() {
@@ -45,12 +44,5 @@ public UuidHolder() {
4544
@Override
4645
public ArrowType type() {
4746
return UuidType.INSTANCE;
48-
/**
49-
* Converts the holder's two longs to a UUID object.
50-
*
51-
* @return the UUID represented by this holder
52-
*/
53-
public UUID getUuid() {
54-
return new UUID(mostSigBits, leastSigBits);
5547
}
5648
}

vector/src/test/java/org/apache/arrow/vector/TestListVector.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
import org.apache.arrow.vector.types.pojo.Field;
4949
import org.apache.arrow.vector.types.pojo.FieldType;
5050
import org.apache.arrow.vector.util.TransferPair;
51+
import org.apache.arrow.vector.util.UuidUtility;
5152
import org.junit.jupiter.api.AfterEach;
5253
import org.junit.jupiter.api.BeforeEach;
5354
import org.junit.jupiter.api.Test;
@@ -1255,12 +1256,12 @@ public void testListVectorReaderForExtensionType() throws Exception {
12551256
FieldReader uuidReader = reader.reader();
12561257
UuidHolder holder = new UuidHolder();
12571258
uuidReader.read(holder);
1258-
UUID actualUuid = holder.getUuid();
1259+
UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start);
12591260
assertEquals(u1, actualUuid);
12601261
reader.next();
12611262
uuidReader = reader.reader();
12621263
uuidReader.read(holder);
1263-
actualUuid = holder.getUuid();
1264+
actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start);
12641265
assertEquals(u2, actualUuid);
12651266
}
12661267
}
@@ -1295,12 +1296,12 @@ public void testCopyFromForExtensionType() throws Exception {
12951296
FieldReader uuidReader = reader.reader();
12961297
UuidHolder holder = new UuidHolder();
12971298
uuidReader.read(holder);
1298-
UUID actualUuid = holder.getUuid();
1299+
UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start);
12991300
assertEquals(u1, actualUuid);
13001301
reader.next();
13011302
uuidReader = reader.reader();
13021303
uuidReader.read(holder);
1303-
actualUuid = holder.getUuid();
1304+
actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start);
13041305
assertEquals(u2, actualUuid);
13051306
}
13061307
}

vector/src/test/java/org/apache/arrow/vector/TestMapVector.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
import org.apache.arrow.vector.types.pojo.FieldType;
5050
import org.apache.arrow.vector.util.JsonStringArrayList;
5151
import org.apache.arrow.vector.util.TransferPair;
52+
import org.apache.arrow.vector.util.UuidUtility;
5253
import org.junit.jupiter.api.AfterEach;
5354
import org.junit.jupiter.api.BeforeEach;
5455
import org.junit.jupiter.api.Test;
@@ -1300,12 +1301,12 @@ public void testMapVectorWithExtensionType() throws Exception {
13001301
FieldReader uuidReader = mapReader.value();
13011302
UuidHolder holder = new UuidHolder();
13021303
uuidReader.read(holder);
1303-
UUID actualUuid = holder.getUuid();
1304+
UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start);
13041305
assertEquals(u1, actualUuid);
13051306
mapReader.next();
13061307
uuidReader = mapReader.value();
13071308
uuidReader.read(holder);
1308-
actualUuid = holder.getUuid();
1309+
actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start);
13091310
assertEquals(u2, actualUuid);
13101311
}
13111312
}
@@ -1342,12 +1343,12 @@ public void testCopyFromForExtensionType() throws Exception {
13421343
FieldReader uuidReader = mapReader.value();
13431344
UuidHolder holder = new UuidHolder();
13441345
uuidReader.read(holder);
1345-
UUID actualUuid = holder.getUuid();
1346+
UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start);
13461347
assertEquals(u1, actualUuid);
13471348
mapReader.next();
13481349
uuidReader = mapReader.value();
13491350
uuidReader.read(holder);
1350-
actualUuid = holder.getUuid();
1351+
actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start);
13511352
assertEquals(u2, actualUuid);
13521353
}
13531354
}

0 commit comments

Comments
 (0)