Skip to content

Commit e35ed7c

Browse files
committed
GH-948: Use buffer indexing for UUID vector
1 parent b9e40fa commit e35ed7c

File tree

9 files changed

+507
-76
lines changed

9 files changed

+507
-76
lines changed

vector/src/main/java/org/apache/arrow/vector/UuidVector.java

Lines changed: 53 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,13 @@
2323
import org.apache.arrow.memory.ArrowBuf;
2424
import org.apache.arrow.memory.BufferAllocator;
2525
import org.apache.arrow.memory.util.ArrowBufPointer;
26+
import org.apache.arrow.memory.util.ByteFunctionHelpers;
2627
import org.apache.arrow.memory.util.hash.ArrowBufHasher;
28+
import org.apache.arrow.util.Preconditions;
2729
import org.apache.arrow.vector.complex.impl.UuidReaderImpl;
2830
import org.apache.arrow.vector.complex.reader.FieldReader;
2931
import org.apache.arrow.vector.extension.UuidType;
32+
import org.apache.arrow.vector.holders.ExtensionHolder;
3033
import org.apache.arrow.vector.holders.NullableUuidHolder;
3134
import org.apache.arrow.vector.holders.UuidHolder;
3235
import org.apache.arrow.vector.types.pojo.Field;
@@ -132,7 +135,8 @@ public int hashCode(int index) {
132135

133136
@Override
134137
public int hashCode(int index, ArrowBufHasher hasher) {
135-
return getUnderlyingVector().hashCode(index, hasher);
138+
int start = this.getStartOffset(index);
139+
return ByteFunctionHelpers.hash(hasher, this.getDataBuffer(), start, start + UUID_BYTE_WIDTH);
136140
}
137141

138142
/**
@@ -146,17 +150,19 @@ public int isSet(int index) {
146150
}
147151

148152
/**
149-
* Gets the UUID value at the given index as an ArrowBuf.
153+
* Reads the UUID value at the given index into a UuidHolder.
150154
*
151-
* @param index the index to retrieve
152-
* @return a buffer slice containing the 16-byte UUID
153-
* @throws IllegalStateException if the value at the index is null and null checking is enabled
155+
* @param index the index to read from
156+
* @param holder the holder to populate with the UUID data
154157
*/
155-
public ArrowBuf get(int index) throws IllegalStateException {
158+
public void get(int index, UuidHolder holder) {
159+
Preconditions.checkArgument(index >= 0, "Cannot get negative index in UUID vector.");
156160
if (NullCheckingForGet.NULL_CHECKING_ENABLED && this.isSet(index) == 0) {
157-
throw new IllegalStateException("Value at index is null");
161+
holder.isSet = 0;
158162
} else {
159-
return getBufferSlicePostNullCheck(index);
163+
holder.isSet = 1;
164+
holder.buffer = getDataBuffer();
165+
holder.start = getStartOffset(index);
160166
}
161167
}
162168

@@ -167,23 +173,24 @@ public ArrowBuf get(int index) throws IllegalStateException {
167173
* @param holder the holder to populate with the UUID data
168174
*/
169175
public void get(int index, NullableUuidHolder holder) {
176+
Preconditions.checkArgument(index >= 0, "Cannot get negative index in UUID vector.");
170177
if (NullCheckingForGet.NULL_CHECKING_ENABLED && this.isSet(index) == 0) {
171178
holder.isSet = 0;
172179
} else {
173180
holder.isSet = 1;
174-
holder.buffer = getBufferSlicePostNullCheck(index);
181+
holder.buffer = getDataBuffer();
182+
holder.start = getStartOffset(index);
175183
}
176184
}
177185

178186
/**
179-
* Reads the UUID value at the given index into a UuidHolder.
187+
* Calculates the byte offset for a given index in the data buffer.
180188
*
181-
* @param index the index to read from
182-
* @param holder the holder to populate with the UUID data
189+
* @param index the index of the UUID value
190+
* @return the byte offset in the data buffer
183191
*/
184-
public void get(int index, UuidHolder holder) {
185-
holder.isSet = 1;
186-
holder.buffer = getBufferSlicePostNullCheck(index);
192+
public final int getStartOffset(int index) {
193+
return index * UUID_BYTE_WIDTH;
187194
}
188195

189196
/**
@@ -207,7 +214,7 @@ public void set(int index, UUID value) {
207214
* @param holder the holder containing the UUID data
208215
*/
209216
public void set(int index, UuidHolder holder) {
210-
this.set(index, holder.isSet, holder.buffer);
217+
this.set(index, holder.buffer, holder.start);
211218
}
212219

213220
/**
@@ -217,28 +224,11 @@ public void set(int index, UuidHolder holder) {
217224
* @param holder the holder containing the UUID data
218225
*/
219226
public void set(int index, NullableUuidHolder holder) {
220-
this.set(index, holder.isSet, holder.buffer);
221-
}
222-
223-
/**
224-
* Sets the UUID value at the given index with explicit null flag.
225-
*
226-
* @param index the index to set
227-
* @param isSet 1 if the value is set, 0 if null
228-
* @param buffer the buffer containing the 16-byte UUID data
229-
*/
230-
public void set(int index, int isSet, ArrowBuf buffer) {
231-
getUnderlyingVector().set(index, isSet, buffer);
232-
}
233-
234-
/**
235-
* Sets the UUID value at the given index from an ArrowBuf.
236-
*
237-
* @param index the index to set
238-
* @param value the buffer containing the 16-byte UUID data
239-
*/
240-
public void set(int index, ArrowBuf value) {
241-
getUnderlyingVector().set(index, value);
227+
if (holder.isSet == 0) {
228+
getUnderlyingVector().setNull(index);
229+
} else {
230+
this.set(index, holder.buffer, holder.start);
231+
}
242232
}
243233

244234
/**
@@ -249,10 +239,12 @@ public void set(int index, ArrowBuf value) {
249239
* @param sourceOffset the offset in the source buffer where the UUID data starts
250240
*/
251241
public void set(int index, ArrowBuf source, int sourceOffset) {
252-
// Copy bytes from source buffer to target vector data buffer
253-
ArrowBuf dataBuffer = getUnderlyingVector().getDataBuffer();
254-
dataBuffer.setBytes((long) index * UUID_BYTE_WIDTH, source, sourceOffset, UUID_BYTE_WIDTH);
255-
getUnderlyingVector().setIndexDefined(index);
242+
Preconditions.checkNotNull(source, "Cannot set UUID vector, the source buffer is null.");
243+
244+
BitVectorHelper.setBit(getUnderlyingVector().getValidityBuffer(), index);
245+
getUnderlyingVector()
246+
.getDataBuffer()
247+
.setBytes((long) index * UUID_BYTE_WIDTH, source, sourceOffset, UUID_BYTE_WIDTH);
256248
}
257249

258250
/**
@@ -286,25 +278,34 @@ public void setSafe(int index, UUID value) {
286278
* @param holder the holder containing the UUID data, or null to set a null value
287279
*/
288280
public void setSafe(int index, NullableUuidHolder holder) {
289-
if (holder != null) {
290-
getUnderlyingVector().setSafe(index, holder.isSet, holder.buffer);
291-
} else {
281+
if (holder == null || holder.isSet == 0) {
292282
getUnderlyingVector().setNull(index);
283+
} else {
284+
this.setSafe(index, holder.buffer, holder.start);
293285
}
294286
}
295287

296288
/**
297289
* Sets the UUID value at the given index from a UuidHolder, expanding capacity if needed.
298290
*
299291
* @param index the index to set
300-
* @param holder the holder containing the UUID data, or null to set a null value
292+
* @param holder the holder containing the UUID data
301293
*/
302294
public void setSafe(int index, UuidHolder holder) {
303-
if (holder != null) {
304-
getUnderlyingVector().setSafe(index, holder.isSet, holder.buffer);
305-
} else {
306-
getUnderlyingVector().setNull(index);
307-
}
295+
this.setSafe(index, holder.buffer, holder.start);
296+
}
297+
298+
/**
299+
* Sets the UUID value at the given index by copying from a source buffer, expanding capacity if
300+
* needed.
301+
*
302+
* @param index the index to set
303+
* @param buffer the source buffer to copy from
304+
* @param start the offset in the source buffer where the UUID data starts
305+
*/
306+
public void setSafe(int index, ArrowBuf buffer, int start) {
307+
getUnderlyingVector().handleSafe(index);
308+
this.set(index, buffer, start);
308309
}
309310

310311
/**
@@ -400,15 +401,9 @@ public TransferPair getTransferPair(BufferAllocator allocator) {
400401
return getTransferPair(this.getField().getName(), allocator);
401402
}
402403

403-
private ArrowBuf getBufferSlicePostNullCheck(int index) {
404-
return getUnderlyingVector()
405-
.getDataBuffer()
406-
.slice((long) index * UUID_BYTE_WIDTH, UUID_BYTE_WIDTH);
407-
}
408-
409404
@Override
410405
public int getTypeWidth() {
411-
return getUnderlyingVector().getTypeWidth();
406+
return UUID_BYTE_WIDTH;
412407
}
413408

414409
/** {@link TransferPair} for {@link UuidVector}. */
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.arrow.vector.complex.impl;
18+
19+
import org.apache.arrow.vector.holders.ExtensionHolder;
20+
import org.apache.arrow.vector.holders.NullableUuidHolder;
21+
import org.apache.arrow.vector.holders.UuidHolder;
22+
import org.apache.arrow.vector.types.Types;
23+
import org.apache.arrow.vector.util.UuidUtility;
24+
25+
/**
26+
* Reader implementation for reading UUID values from a {@link NullableUuidHolder}.
27+
*
28+
* <p>This reader wraps a single UUID holder value and provides methods to read from it. Unlike
29+
* {@link UuidReaderImpl} which reads from a vector, this reader operates on a holder instance.
30+
*
31+
* @see NullableUuidHolder
32+
* @see UuidReaderImpl
33+
*/
34+
public class NullableUuidHolderReaderImpl extends AbstractFieldReader {
35+
private final NullableUuidHolder holder;
36+
37+
/**
38+
* Constructs a reader for the given UUID holder.
39+
*
40+
* @param holder the UUID holder to read from
41+
*/
42+
public NullableUuidHolderReaderImpl(NullableUuidHolder holder) {
43+
this.holder = holder;
44+
}
45+
46+
@Override
47+
public int size() {
48+
throw new UnsupportedOperationException(
49+
"size() is not supported on NullableUuidHolderReaderImpl. "
50+
+ "This reader wraps a single UUID holder value, not a collection. "
51+
+ "Use UuidReaderImpl for vector-based UUID reading.");
52+
}
53+
54+
@Override
55+
public boolean next() {
56+
throw new UnsupportedOperationException(
57+
"next() is not supported on NullableUuidHolderReaderImpl. "
58+
+ "This reader wraps a single UUID holder value, not an iterator. "
59+
+ "Use UuidReaderImpl for vector-based UUID reading.");
60+
}
61+
62+
@Override
63+
public void setPosition(int index) {
64+
throw new UnsupportedOperationException(
65+
"setPosition() is not supported on NullableUuidHolderReaderImpl. "
66+
+ "This reader wraps a single UUID holder value, not a vector. "
67+
+ "Use UuidReaderImpl for vector-based UUID reading.");
68+
}
69+
70+
@Override
71+
public Types.MinorType getMinorType() {
72+
return Types.MinorType.EXTENSIONTYPE;
73+
}
74+
75+
@Override
76+
public boolean isSet() {
77+
return holder.isSet == 1;
78+
}
79+
80+
@Override
81+
public void read(ExtensionHolder h) {
82+
if (h instanceof NullableUuidHolder) {
83+
NullableUuidHolder nullableHolder = (NullableUuidHolder) h;
84+
nullableHolder.buffer = this.holder.buffer;
85+
nullableHolder.isSet = this.holder.isSet;
86+
nullableHolder.start = this.holder.start;
87+
} else if (h instanceof UuidHolder) {
88+
UuidHolder uuidHolder = (UuidHolder) h;
89+
uuidHolder.buffer = this.holder.buffer;
90+
uuidHolder.start = this.holder.start;
91+
} else {
92+
throw new IllegalArgumentException(
93+
"Unsupported holder type: "
94+
+ h.getClass().getName()
95+
+ ". "
96+
+ "Only NullableUuidHolder and UuidHolder are supported for UUID values. "
97+
+ "Provided holder type cannot be used to read UUID data.");
98+
}
99+
}
100+
101+
@Override
102+
public Object readObject() {
103+
if (!isSet()) {
104+
return null;
105+
}
106+
// Convert UUID bytes to Java UUID object
107+
try {
108+
return UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start);
109+
} catch (Exception e) {
110+
throw new RuntimeException(
111+
String.format(
112+
"Failed to read UUID from buffer. Invalid Arrow buffer state: "
113+
+ "capacity=%d, readableBytes=%d, readerIndex=%d, writerIndex=%d, refCnt=%d. "
114+
+ "The buffer must contain exactly 16 bytes of valid UUID data.",
115+
holder.buffer.capacity(),
116+
holder.buffer.readableBytes(),
117+
holder.buffer.readerIndex(),
118+
holder.buffer.writerIndex(),
119+
holder.buffer.refCnt()),
120+
e);
121+
}
122+
}
123+
}
124+

vector/src/main/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,15 @@ public void writeExtension(Object value) {
5050
vector.setSafe(getPosition(), (ArrowBuf) value);
5151
} else if (value instanceof java.util.UUID) {
5252
vector.setSafe(getPosition(), (java.util.UUID) value);
53+
} else if (value instanceof ExtensionHolder) {
54+
write((ExtensionHolder) value);
5355
} else {
54-
throw new IllegalArgumentException("Unsupported value type for UUID: " + value.getClass());
56+
throw new IllegalArgumentException(
57+
"Unsupported value type for UUID: "
58+
+ value.getClass().getName()
59+
+ ". "
60+
+ "Supported types are: byte[] (16 bytes), ArrowBuf (16 bytes), or java.util.UUID. "
61+
+ "Convert your value to one of these types before writing.");
5562
}
5663
vector.setValueCount(getPosition() + 1);
5764
}

vector/src/main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,4 +32,7 @@
3232
public class NullableUuidHolder extends ExtensionHolder {
3333
/** Buffer containing 16-byte UUID data. */
3434
public ArrowBuf buffer;
35+
36+
/** Offset in the buffer where the UUID data starts. */
37+
public int start = 0;
3538
}

vector/src/main/java/org/apache/arrow/vector/holders/UuidHolder.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ public class UuidHolder extends ExtensionHolder {
3131
/** Buffer containing 16-byte UUID data. */
3232
public ArrowBuf buffer;
3333

34+
/** Offset in the buffer where the UUID data starts. */
35+
public int start = 0;
36+
3437
/** Constructs a UuidHolder with isSet = 1. */
3538
public UuidHolder() {
3639
this.isSet = 1;

vector/src/test/java/org/apache/arrow/vector/TestListVector.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1259,12 +1259,12 @@ public void testListVectorReaderForExtensionType() throws Exception {
12591259
FieldReader uuidReader = reader.reader();
12601260
UuidHolder holder = new UuidHolder();
12611261
uuidReader.read(holder);
1262-
UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0);
1262+
UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start);
12631263
assertEquals(u1, actualUuid);
12641264
reader.next();
12651265
uuidReader = reader.reader();
12661266
uuidReader.read(holder);
1267-
actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0);
1267+
actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start);
12681268
assertEquals(u2, actualUuid);
12691269
}
12701270
}
@@ -1300,12 +1300,12 @@ public void testCopyFromForExtensionType() throws Exception {
13001300
FieldReader uuidReader = reader.reader();
13011301
UuidHolder holder = new UuidHolder();
13021302
uuidReader.read(holder);
1303-
UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0);
1303+
UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start);
13041304
assertEquals(u1, actualUuid);
13051305
reader.next();
13061306
uuidReader = reader.reader();
13071307
uuidReader.read(holder);
1308-
actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0);
1308+
actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start);
13091309
assertEquals(u2, actualUuid);
13101310
}
13111311
}

0 commit comments

Comments
 (0)