Skip to content

Commit e2ecd17

Browse files
committed
Use MSB and LSB longs to represent UUID
Add getUuid method in [Nullable]UuidHolder use ArrowBuf.getLong
1 parent 0016392 commit e2ecd17

File tree

8 files changed

+123
-148
lines changed

8 files changed

+123
-148
lines changed

vector/src/main/java/org/apache/arrow/vector/UuidVector.java

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
*/
1717
package org.apache.arrow.vector;
1818

19+
import static org.apache.arrow.memory.util.MemoryUtil.LITTLE_ENDIAN;
1920
import static org.apache.arrow.vector.extension.UuidType.UUID_BYTE_WIDTH;
2021

2122
import java.nio.ByteBuffer;
@@ -29,7 +30,6 @@
2930
import org.apache.arrow.vector.complex.impl.UuidReaderImpl;
3031
import org.apache.arrow.vector.complex.reader.FieldReader;
3132
import org.apache.arrow.vector.extension.UuidType;
32-
import org.apache.arrow.vector.holders.ExtensionHolder;
3333
import org.apache.arrow.vector.holders.NullableUuidHolder;
3434
import org.apache.arrow.vector.holders.UuidHolder;
3535
import org.apache.arrow.vector.types.pojo.Field;
@@ -157,12 +157,17 @@ public int isSet(int index) {
157157
*/
158158
public void get(int index, UuidHolder holder) {
159159
Preconditions.checkArgument(index >= 0, "Cannot get negative index in UUID vector.");
160-
if (NullCheckingForGet.NULL_CHECKING_ENABLED && this.isSet(index) == 0) {
161-
holder.isSet = 0;
160+
final ArrowBuf dataBuffer = getUnderlyingVector().getDataBuffer();
161+
final long start = (long) index * UUID_BYTE_WIDTH;
162+
final long next = start + Long.BYTES;
163+
// UUIDs are stored in big-endian byte order in Arrow buffers.
164+
// ArrowBuf.getLong() reads in native byte order, so we need to reverse bytes on LE systems.
165+
if (LITTLE_ENDIAN) {
166+
holder.mostSigBits = Long.reverseBytes(dataBuffer.getLong(start));
167+
holder.leastSigBits = Long.reverseBytes(dataBuffer.getLong(next));
162168
} else {
163-
holder.isSet = 1;
164-
holder.buffer = getDataBuffer();
165-
holder.start = getStartOffset(index);
169+
holder.mostSigBits = dataBuffer.getLong(start);
170+
holder.leastSigBits = dataBuffer.getLong(next);
166171
}
167172
}
168173

@@ -178,8 +183,17 @@ public void get(int index, NullableUuidHolder holder) {
178183
holder.isSet = 0;
179184
} else {
180185
holder.isSet = 1;
181-
holder.buffer = getDataBuffer();
182-
holder.start = getStartOffset(index);
186+
final ArrowBuf dataBuffer = getUnderlyingVector().getDataBuffer();
187+
final long offset = (long) index * UUID_BYTE_WIDTH;
188+
// UUIDs are stored in big-endian byte order in Arrow buffers.
189+
// ArrowBuf.getLong() reads in native byte order, so we need to reverse bytes on LE systems.
190+
if (LITTLE_ENDIAN) {
191+
holder.mostSigBits = Long.reverseBytes(dataBuffer.getLong(offset));
192+
holder.leastSigBits = Long.reverseBytes(dataBuffer.getLong(offset + Long.BYTES));
193+
} else {
194+
holder.mostSigBits = dataBuffer.getLong(offset);
195+
holder.leastSigBits = dataBuffer.getLong(offset + Long.BYTES);
196+
}
183197
}
184198
}
185199

@@ -214,7 +228,7 @@ public void set(int index, UUID value) {
214228
* @param holder the holder containing the UUID data
215229
*/
216230
public void set(int index, UuidHolder holder) {
217-
this.set(index, holder.buffer, holder.start);
231+
set(index, holder.getUuid());
218232
}
219233

220234
/**
@@ -227,7 +241,7 @@ public void set(int index, NullableUuidHolder holder) {
227241
if (holder.isSet == 0) {
228242
getUnderlyingVector().setNull(index);
229243
} else {
230-
this.set(index, holder.buffer, holder.start);
244+
set(index, holder.getUuid());
231245
}
232246
}
233247

@@ -243,8 +257,8 @@ public void set(int index, ArrowBuf source, int sourceOffset) {
243257

244258
BitVectorHelper.setBit(getUnderlyingVector().getValidityBuffer(), index);
245259
getUnderlyingVector()
246-
.getDataBuffer()
247-
.setBytes((long) index * UUID_BYTE_WIDTH, source, sourceOffset, UUID_BYTE_WIDTH);
260+
.getDataBuffer()
261+
.setBytes((long) index * UUID_BYTE_WIDTH, source, sourceOffset, UUID_BYTE_WIDTH);
248262
}
249263

250264
/**
@@ -281,7 +295,7 @@ public void setSafe(int index, NullableUuidHolder holder) {
281295
if (holder == null || holder.isSet == 0) {
282296
getUnderlyingVector().setNull(index);
283297
} else {
284-
this.setSafe(index, holder.buffer, holder.start);
298+
setSafe(index, holder.getUuid());
285299
}
286300
}
287301

@@ -292,7 +306,7 @@ public void setSafe(int index, NullableUuidHolder holder) {
292306
* @param holder the holder containing the UUID data
293307
*/
294308
public void setSafe(int index, UuidHolder holder) {
295-
this.setSafe(index, holder.buffer, holder.start);
309+
setSafe(index, holder.getUuid());
296310
}
297311

298312
/**

vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableUuidHolderReaderImpl.java

Lines changed: 6 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
import org.apache.arrow.vector.holders.NullableUuidHolder;
2121
import org.apache.arrow.vector.holders.UuidHolder;
2222
import org.apache.arrow.vector.types.Types;
23-
import org.apache.arrow.vector.util.UuidUtility;
2423

2524
/**
2625
* Reader implementation for reading UUID values from a {@link NullableUuidHolder}.
@@ -81,13 +80,13 @@ public boolean isSet() {
8180
public void read(ExtensionHolder h) {
8281
if (h instanceof NullableUuidHolder) {
8382
NullableUuidHolder nullableHolder = (NullableUuidHolder) h;
84-
nullableHolder.buffer = this.holder.buffer;
83+
nullableHolder.mostSigBits = this.holder.mostSigBits;
84+
nullableHolder.leastSigBits = this.holder.leastSigBits;
8585
nullableHolder.isSet = this.holder.isSet;
86-
nullableHolder.start = this.holder.start;
8786
} else if (h instanceof UuidHolder) {
8887
UuidHolder uuidHolder = (UuidHolder) h;
89-
uuidHolder.buffer = this.holder.buffer;
90-
uuidHolder.start = this.holder.start;
88+
uuidHolder.mostSigBits = this.holder.mostSigBits;
89+
uuidHolder.leastSigBits = this.holder.leastSigBits;
9190
} else {
9291
throw new IllegalArgumentException(
9392
"Unsupported holder type: "
@@ -103,22 +102,7 @@ public Object readObject() {
103102
if (!isSet()) {
104103
return null;
105104
}
106-
// Convert UUID bytes to Java UUID object
107-
try {
108-
return UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start);
109-
} catch (Exception e) {
110-
throw new RuntimeException(
111-
String.format(
112-
"Failed to read UUID from buffer. Invalid Arrow buffer state: "
113-
+ "capacity=%d, readableBytes=%d, readerIndex=%d, writerIndex=%d, refCnt=%d. "
114-
+ "The buffer must contain exactly 16 bytes of valid UUID data.",
115-
holder.buffer.capacity(),
116-
holder.buffer.readableBytes(),
117-
holder.buffer.readerIndex(),
118-
holder.buffer.writerIndex(),
119-
holder.buffer.refCnt()),
120-
e);
121-
}
105+
// Convert UUID longs to Java UUID object
106+
return holder.getUuid();
122107
}
123108
}
124-

vector/src/main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,29 +16,42 @@
1616
*/
1717
package org.apache.arrow.vector.holders;
1818

19-
import org.apache.arrow.memory.ArrowBuf;
2019
import org.apache.arrow.vector.extension.UuidType;
2120
import org.apache.arrow.vector.types.pojo.ArrowType;
21+
import java.util.UUID;
2222

2323
/**
2424
* Value holder for nullable UUID values.
2525
*
2626
* <p>The {@code isSet} field controls nullability: when {@code isSet = 1}, the holder contains a
27-
* valid UUID in {@code buffer}; when {@code isSet = 0}, the holder represents a null value and
28-
* {@code buffer} should not be accessed.
27+
* valid UUID represented as two longs; when {@code isSet = 0}, the holder represents a null value
28+
* and the long fields should not be accessed.
2929
*
3030
* @see UuidHolder
3131
* @see org.apache.arrow.vector.UuidVector
3232
* @see org.apache.arrow.vector.extension.UuidType
3333
*/
3434
public class NullableUuidHolder extends ExtensionHolder {
35-
/** Buffer containing 16-byte UUID data. */
36-
public ArrowBuf buffer;
35+
/** The most significant 64 bits of the UUID. */
36+
public long mostSigBits;
37+
38+
/** The least significant 64 bits of the UUID. */
39+
public long leastSigBits;
40+
41+
/**
42+
* Converts the holder's two longs to a UUID object.
43+
*
44+
* @return the UUID represented by this holder, or null if isSet is 0
45+
*/
46+
public UUID getUuid() {
47+
if (this.isSet == 0) {
48+
return null;
49+
}
50+
return new UUID(mostSigBits, leastSigBits);
51+
}
3752

3853
@Override
3954
public ArrowType type() {
4055
return UuidType.INSTANCE;
4156
}
42-
/** Offset in the buffer where the UUID data starts. */
43-
public int start = 0;
4457
}

vector/src/main/java/org/apache/arrow/vector/holders/UuidHolder.java

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,23 @@
1919
import org.apache.arrow.memory.ArrowBuf;
2020
import org.apache.arrow.vector.extension.UuidType;
2121
import org.apache.arrow.vector.types.pojo.ArrowType;
22+
import java.util.UUID;
2223

2324
/**
2425
* Value holder for non-nullable UUID values.
2526
*
26-
* <p>Contains a 16-byte UUID in {@code buffer} with {@code isSet} always 1.
27+
* <p>Contains a 16-byte UUID represented as two longs with {@code isSet} always 1.
2728
*
2829
* @see NullableUuidHolder
2930
* @see org.apache.arrow.vector.UuidVector
3031
* @see org.apache.arrow.vector.extension.UuidType
3132
*/
3233
public class UuidHolder extends ExtensionHolder {
33-
/** Buffer containing 16-byte UUID data. */
34-
public ArrowBuf buffer;
34+
/** The most significant 64 bits of the UUID. */
35+
public long mostSigBits;
3536

36-
/** Offset in the buffer where the UUID data starts. */
37-
public int start = 0;
37+
/** The least significant 64 bits of the UUID. */
38+
public long leastSigBits;
3839

3940
/** Constructs a UuidHolder with isSet = 1. */
4041
public UuidHolder() {
@@ -44,5 +45,12 @@ public UuidHolder() {
4445
@Override
4546
public ArrowType type() {
4647
return UuidType.INSTANCE;
48+
/**
49+
* Converts the holder's two longs to a UUID object.
50+
*
51+
* @return the UUID represented by this holder
52+
*/
53+
public UUID getUuid() {
54+
return new UUID(mostSigBits, leastSigBits);
4755
}
4856
}

vector/src/test/java/org/apache/arrow/vector/TestListVector.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@
4848
import org.apache.arrow.vector.types.pojo.Field;
4949
import org.apache.arrow.vector.types.pojo.FieldType;
5050
import org.apache.arrow.vector.util.TransferPair;
51-
import org.apache.arrow.vector.util.UuidUtility;
5251
import org.junit.jupiter.api.AfterEach;
5352
import org.junit.jupiter.api.BeforeEach;
5453
import org.junit.jupiter.api.Test;
@@ -1256,12 +1255,12 @@ public void testListVectorReaderForExtensionType() throws Exception {
12561255
FieldReader uuidReader = reader.reader();
12571256
UuidHolder holder = new UuidHolder();
12581257
uuidReader.read(holder);
1259-
UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start);
1258+
UUID actualUuid = holder.getUuid();
12601259
assertEquals(u1, actualUuid);
12611260
reader.next();
12621261
uuidReader = reader.reader();
12631262
uuidReader.read(holder);
1264-
actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start);
1263+
actualUuid = holder.getUuid();
12651264
assertEquals(u2, actualUuid);
12661265
}
12671266
}
@@ -1296,12 +1295,12 @@ public void testCopyFromForExtensionType() throws Exception {
12961295
FieldReader uuidReader = reader.reader();
12971296
UuidHolder holder = new UuidHolder();
12981297
uuidReader.read(holder);
1299-
UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start);
1298+
UUID actualUuid = holder.getUuid();
13001299
assertEquals(u1, actualUuid);
13011300
reader.next();
13021301
uuidReader = reader.reader();
13031302
uuidReader.read(holder);
1304-
actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start);
1303+
actualUuid = holder.getUuid();
13051304
assertEquals(u2, actualUuid);
13061305
}
13071306
}

vector/src/test/java/org/apache/arrow/vector/TestMapVector.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@
4949
import org.apache.arrow.vector.types.pojo.FieldType;
5050
import org.apache.arrow.vector.util.JsonStringArrayList;
5151
import org.apache.arrow.vector.util.TransferPair;
52-
import org.apache.arrow.vector.util.UuidUtility;
5352
import org.junit.jupiter.api.AfterEach;
5453
import org.junit.jupiter.api.BeforeEach;
5554
import org.junit.jupiter.api.Test;
@@ -1301,12 +1300,12 @@ public void testMapVectorWithExtensionType() throws Exception {
13011300
FieldReader uuidReader = mapReader.value();
13021301
UuidHolder holder = new UuidHolder();
13031302
uuidReader.read(holder);
1304-
UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start);
1303+
UUID actualUuid = holder.getUuid();
13051304
assertEquals(u1, actualUuid);
13061305
mapReader.next();
13071306
uuidReader = mapReader.value();
13081307
uuidReader.read(holder);
1309-
actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start);
1308+
actualUuid = holder.getUuid();
13101309
assertEquals(u2, actualUuid);
13111310
}
13121311
}
@@ -1343,12 +1342,12 @@ public void testCopyFromForExtensionType() throws Exception {
13431342
FieldReader uuidReader = mapReader.value();
13441343
UuidHolder holder = new UuidHolder();
13451344
uuidReader.read(holder);
1346-
UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start);
1345+
UUID actualUuid = holder.getUuid();
13471346
assertEquals(u1, actualUuid);
13481347
mapReader.next();
13491348
uuidReader = mapReader.value();
13501349
uuidReader.read(holder);
1351-
actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start);
1350+
actualUuid = holder.getUuid();
13521351
assertEquals(u2, actualUuid);
13531352
}
13541353
}

0 commit comments

Comments
 (0)