Skip to content

Commit e7a2f97

Browse files
committed
Use MSB and LSB longs to represent UUID
Add getUuid method in [Nullable]UuidHolder use ArrowBuf.getLong
1 parent e35ed7c commit e7a2f97

File tree

8 files changed

+124
-149
lines changed

8 files changed

+124
-149
lines changed

vector/src/main/java/org/apache/arrow/vector/UuidVector.java

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
*/
1717
package org.apache.arrow.vector;
1818

19+
import static org.apache.arrow.memory.util.MemoryUtil.LITTLE_ENDIAN;
1920
import static org.apache.arrow.vector.extension.UuidType.UUID_BYTE_WIDTH;
2021

2122
import java.nio.ByteBuffer;
@@ -29,7 +30,6 @@
2930
import org.apache.arrow.vector.complex.impl.UuidReaderImpl;
3031
import org.apache.arrow.vector.complex.reader.FieldReader;
3132
import org.apache.arrow.vector.extension.UuidType;
32-
import org.apache.arrow.vector.holders.ExtensionHolder;
3333
import org.apache.arrow.vector.holders.NullableUuidHolder;
3434
import org.apache.arrow.vector.holders.UuidHolder;
3535
import org.apache.arrow.vector.types.pojo.Field;
@@ -157,12 +157,17 @@ public int isSet(int index) {
157157
*/
158158
public void get(int index, UuidHolder holder) {
159159
Preconditions.checkArgument(index >= 0, "Cannot get negative index in UUID vector.");
160-
if (NullCheckingForGet.NULL_CHECKING_ENABLED && this.isSet(index) == 0) {
161-
holder.isSet = 0;
160+
final ArrowBuf dataBuffer = getUnderlyingVector().getDataBuffer();
161+
final long start = (long) index * UUID_BYTE_WIDTH;
162+
final long next = start + Long.BYTES;
163+
// UUIDs are stored in big-endian byte order in Arrow buffers.
164+
// ArrowBuf.getLong() reads in native byte order, so we need to reverse bytes on LE systems.
165+
if (LITTLE_ENDIAN) {
166+
holder.mostSigBits = Long.reverseBytes(dataBuffer.getLong(start));
167+
holder.leastSigBits = Long.reverseBytes(dataBuffer.getLong(next));
162168
} else {
163-
holder.isSet = 1;
164-
holder.buffer = getDataBuffer();
165-
holder.start = getStartOffset(index);
169+
holder.mostSigBits = dataBuffer.getLong(start);
170+
holder.leastSigBits = dataBuffer.getLong(next);
166171
}
167172
}
168173

@@ -178,8 +183,17 @@ public void get(int index, NullableUuidHolder holder) {
178183
holder.isSet = 0;
179184
} else {
180185
holder.isSet = 1;
181-
holder.buffer = getDataBuffer();
182-
holder.start = getStartOffset(index);
186+
final ArrowBuf dataBuffer = getUnderlyingVector().getDataBuffer();
187+
final long offset = (long) index * UUID_BYTE_WIDTH;
188+
// UUIDs are stored in big-endian byte order in Arrow buffers.
189+
// ArrowBuf.getLong() reads in native byte order, so we need to reverse bytes on LE systems.
190+
if (LITTLE_ENDIAN) {
191+
holder.mostSigBits = Long.reverseBytes(dataBuffer.getLong(offset));
192+
holder.leastSigBits = Long.reverseBytes(dataBuffer.getLong(offset + Long.BYTES));
193+
} else {
194+
holder.mostSigBits = dataBuffer.getLong(offset);
195+
holder.leastSigBits = dataBuffer.getLong(offset + Long.BYTES);
196+
}
183197
}
184198
}
185199

@@ -214,7 +228,7 @@ public void set(int index, UUID value) {
214228
* @param holder the holder containing the UUID data
215229
*/
216230
public void set(int index, UuidHolder holder) {
217-
this.set(index, holder.buffer, holder.start);
231+
set(index, holder.getUuid());
218232
}
219233

220234
/**
@@ -227,7 +241,7 @@ public void set(int index, NullableUuidHolder holder) {
227241
if (holder.isSet == 0) {
228242
getUnderlyingVector().setNull(index);
229243
} else {
230-
this.set(index, holder.buffer, holder.start);
244+
set(index, holder.getUuid());
231245
}
232246
}
233247

@@ -243,8 +257,8 @@ public void set(int index, ArrowBuf source, int sourceOffset) {
243257

244258
BitVectorHelper.setBit(getUnderlyingVector().getValidityBuffer(), index);
245259
getUnderlyingVector()
246-
.getDataBuffer()
247-
.setBytes((long) index * UUID_BYTE_WIDTH, source, sourceOffset, UUID_BYTE_WIDTH);
260+
.getDataBuffer()
261+
.setBytes((long) index * UUID_BYTE_WIDTH, source, sourceOffset, UUID_BYTE_WIDTH);
248262
}
249263

250264
/**
@@ -281,7 +295,7 @@ public void setSafe(int index, NullableUuidHolder holder) {
281295
if (holder == null || holder.isSet == 0) {
282296
getUnderlyingVector().setNull(index);
283297
} else {
284-
this.setSafe(index, holder.buffer, holder.start);
298+
setSafe(index, holder.getUuid());
285299
}
286300
}
287301

@@ -292,7 +306,7 @@ public void setSafe(int index, NullableUuidHolder holder) {
292306
* @param holder the holder containing the UUID data
293307
*/
294308
public void setSafe(int index, UuidHolder holder) {
295-
this.setSafe(index, holder.buffer, holder.start);
309+
setSafe(index, holder.getUuid());
296310
}
297311

298312
/**

vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableUuidHolderReaderImpl.java

Lines changed: 6 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
import org.apache.arrow.vector.holders.NullableUuidHolder;
2121
import org.apache.arrow.vector.holders.UuidHolder;
2222
import org.apache.arrow.vector.types.Types;
23-
import org.apache.arrow.vector.util.UuidUtility;
2423

2524
/**
2625
* Reader implementation for reading UUID values from a {@link NullableUuidHolder}.
@@ -81,13 +80,13 @@ public boolean isSet() {
8180
public void read(ExtensionHolder h) {
8281
if (h instanceof NullableUuidHolder) {
8382
NullableUuidHolder nullableHolder = (NullableUuidHolder) h;
84-
nullableHolder.buffer = this.holder.buffer;
83+
nullableHolder.mostSigBits = this.holder.mostSigBits;
84+
nullableHolder.leastSigBits = this.holder.leastSigBits;
8585
nullableHolder.isSet = this.holder.isSet;
86-
nullableHolder.start = this.holder.start;
8786
} else if (h instanceof UuidHolder) {
8887
UuidHolder uuidHolder = (UuidHolder) h;
89-
uuidHolder.buffer = this.holder.buffer;
90-
uuidHolder.start = this.holder.start;
88+
uuidHolder.mostSigBits = this.holder.mostSigBits;
89+
uuidHolder.leastSigBits = this.holder.leastSigBits;
9190
} else {
9291
throw new IllegalArgumentException(
9392
"Unsupported holder type: "
@@ -103,22 +102,7 @@ public Object readObject() {
103102
if (!isSet()) {
104103
return null;
105104
}
106-
// Convert UUID bytes to Java UUID object
107-
try {
108-
return UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start);
109-
} catch (Exception e) {
110-
throw new RuntimeException(
111-
String.format(
112-
"Failed to read UUID from buffer. Invalid Arrow buffer state: "
113-
+ "capacity=%d, readableBytes=%d, readerIndex=%d, writerIndex=%d, refCnt=%d. "
114-
+ "The buffer must contain exactly 16 bytes of valid UUID data.",
115-
holder.buffer.capacity(),
116-
holder.buffer.readableBytes(),
117-
holder.buffer.readerIndex(),
118-
holder.buffer.writerIndex(),
119-
holder.buffer.refCnt()),
120-
e);
121-
}
105+
// Convert UUID longs to Java UUID object
106+
return holder.getUuid();
122107
}
123108
}
124-

vector/src/main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,23 +16,35 @@
1616
*/
1717
package org.apache.arrow.vector.holders;
1818

19-
import org.apache.arrow.memory.ArrowBuf;
19+
import java.util.UUID;
2020

2121
/**
2222
* Value holder for nullable UUID values.
2323
*
2424
* <p>The {@code isSet} field controls nullability: when {@code isSet = 1}, the holder contains a
25-
* valid UUID in {@code buffer}; when {@code isSet = 0}, the holder represents a null value and
26-
* {@code buffer} should not be accessed.
25+
* valid UUID represented as two longs; when {@code isSet = 0}, the holder represents a null value
26+
* and the long fields should not be accessed.
2727
*
2828
* @see UuidHolder
2929
* @see org.apache.arrow.vector.UuidVector
3030
* @see org.apache.arrow.vector.extension.UuidType
3131
*/
3232
public class NullableUuidHolder extends ExtensionHolder {
33-
/** Buffer containing 16-byte UUID data. */
34-
public ArrowBuf buffer;
33+
/** The most significant 64 bits of the UUID. */
34+
public long mostSigBits;
3535

36-
/** Offset in the buffer where the UUID data starts. */
37-
public int start = 0;
36+
/** The least significant 64 bits of the UUID. */
37+
public long leastSigBits;
38+
39+
/**
40+
* Converts the holder's two longs to a UUID object.
41+
*
42+
* @return the UUID represented by this holder, or null if isSet is 0
43+
*/
44+
public UUID getUuid() {
45+
if (this.isSet == 0) {
46+
return null;
47+
}
48+
return new UUID(mostSigBits, leastSigBits);
49+
}
3850
}

vector/src/main/java/org/apache/arrow/vector/holders/UuidHolder.java

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,26 +16,35 @@
1616
*/
1717
package org.apache.arrow.vector.holders;
1818

19-
import org.apache.arrow.memory.ArrowBuf;
19+
import java.util.UUID;
2020

2121
/**
2222
* Value holder for non-nullable UUID values.
2323
*
24-
* <p>Contains a 16-byte UUID in {@code buffer} with {@code isSet} always 1.
24+
* <p>Contains a 16-byte UUID represented as two longs with {@code isSet} always 1.
2525
*
2626
* @see NullableUuidHolder
2727
* @see org.apache.arrow.vector.UuidVector
2828
* @see org.apache.arrow.vector.extension.UuidType
2929
*/
3030
public class UuidHolder extends ExtensionHolder {
31-
/** Buffer containing 16-byte UUID data. */
32-
public ArrowBuf buffer;
31+
/** The most significant 64 bits of the UUID. */
32+
public long mostSigBits;
3333

34-
/** Offset in the buffer where the UUID data starts. */
35-
public int start = 0;
34+
/** The least significant 64 bits of the UUID. */
35+
public long leastSigBits;
3636

3737
/** Constructs a UuidHolder with isSet = 1. */
3838
public UuidHolder() {
3939
this.isSet = 1;
4040
}
41+
42+
/**
43+
* Converts the holder's two longs to a UUID object.
44+
*
45+
* @return the UUID represented by this holder
46+
*/
47+
public UUID getUuid() {
48+
return new UUID(mostSigBits, leastSigBits);
49+
}
4150
}

vector/src/test/java/org/apache/arrow/vector/TestListVector.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@
4949
import org.apache.arrow.vector.types.pojo.Field;
5050
import org.apache.arrow.vector.types.pojo.FieldType;
5151
import org.apache.arrow.vector.util.TransferPair;
52-
import org.apache.arrow.vector.util.UuidUtility;
5352
import org.junit.jupiter.api.AfterEach;
5453
import org.junit.jupiter.api.BeforeEach;
5554
import org.junit.jupiter.api.Test;
@@ -1259,12 +1258,12 @@ public void testListVectorReaderForExtensionType() throws Exception {
12591258
FieldReader uuidReader = reader.reader();
12601259
UuidHolder holder = new UuidHolder();
12611260
uuidReader.read(holder);
1262-
UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start);
1261+
UUID actualUuid = holder.getUuid();
12631262
assertEquals(u1, actualUuid);
12641263
reader.next();
12651264
uuidReader = reader.reader();
12661265
uuidReader.read(holder);
1267-
actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start);
1266+
actualUuid = holder.getUuid();
12681267
assertEquals(u2, actualUuid);
12691268
}
12701269
}
@@ -1300,12 +1299,12 @@ public void testCopyFromForExtensionType() throws Exception {
13001299
FieldReader uuidReader = reader.reader();
13011300
UuidHolder holder = new UuidHolder();
13021301
uuidReader.read(holder);
1303-
UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start);
1302+
UUID actualUuid = holder.getUuid();
13041303
assertEquals(u1, actualUuid);
13051304
reader.next();
13061305
uuidReader = reader.reader();
13071306
uuidReader.read(holder);
1308-
actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start);
1307+
actualUuid = holder.getUuid();
13091308
assertEquals(u2, actualUuid);
13101309
}
13111310
}

vector/src/test/java/org/apache/arrow/vector/TestMapVector.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@
5050
import org.apache.arrow.vector.types.pojo.FieldType;
5151
import org.apache.arrow.vector.util.JsonStringArrayList;
5252
import org.apache.arrow.vector.util.TransferPair;
53-
import org.apache.arrow.vector.util.UuidUtility;
5453
import org.junit.jupiter.api.AfterEach;
5554
import org.junit.jupiter.api.BeforeEach;
5655
import org.junit.jupiter.api.Test;
@@ -1304,12 +1303,12 @@ public void testMapVectorWithExtensionType() throws Exception {
13041303
FieldReader uuidReader = mapReader.value();
13051304
UuidHolder holder = new UuidHolder();
13061305
uuidReader.read(holder);
1307-
UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start);
1306+
UUID actualUuid = holder.getUuid();
13081307
assertEquals(u1, actualUuid);
13091308
mapReader.next();
13101309
uuidReader = mapReader.value();
13111310
uuidReader.read(holder);
1312-
actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start);
1311+
actualUuid = holder.getUuid();
13131312
assertEquals(u2, actualUuid);
13141313
}
13151314
}
@@ -1349,12 +1348,12 @@ public void testCopyFromForExtensionType() throws Exception {
13491348
FieldReader uuidReader = mapReader.value();
13501349
UuidHolder holder = new UuidHolder();
13511350
uuidReader.read(holder);
1352-
UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start);
1351+
UUID actualUuid = holder.getUuid();
13531352
assertEquals(u1, actualUuid);
13541353
mapReader.next();
13551354
uuidReader = mapReader.value();
13561355
uuidReader.read(holder);
1357-
actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start);
1356+
actualUuid = holder.getUuid();
13581357
assertEquals(u2, actualUuid);
13591358
}
13601359
}

0 commit comments

Comments
 (0)