Skip to content

Commit 266dfc1

Browse files
authored
GH-586: Override fixedSizeBinary method for UnionMapWriter (#885)
## What's Changed `UnionMapWriter` will null out the entire map struct entry instead of setting the value to null in: ``` childWriter.value().fixedSizeBinary().writeNull(); ``` This PR overrides the `fixedSizeBinary()` method for the `UnionMapWriter`, resolving it. In addition, it introduces the `fixedSizeBinary(int byteWidth)` which needs to be used to initialize `key` and `value` writes in `UnionMapWriter` if they have not been initialized. Closes #586.
1 parent 8ecbea6 commit 266dfc1

File tree

2 files changed

+244
-0
lines changed

2 files changed

+244
-0
lines changed

vector/src/main/codegen/templates/UnionMapWriter.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,4 +243,27 @@ public ExtensionWriter extension(ArrowType type) {
243243
return super.extension(type);
244244
}
245245
}
246+
247+
public FixedSizeBinaryWriter fixedSizeBinary(int byteWidth) {
248+
switch (mode) {
249+
case KEY:
250+
return entryWriter.fixedSizeBinary(MapVector.KEY_NAME, byteWidth);
251+
case VALUE:
252+
return entryWriter.fixedSizeBinary(MapVector.VALUE_NAME, byteWidth);
253+
default:
254+
return this;
255+
}
256+
}
257+
258+
@Override
259+
public FixedSizeBinaryWriter fixedSizeBinary() {
260+
switch (mode) {
261+
case KEY:
262+
return entryWriter.fixedSizeBinary(MapVector.KEY_NAME);
263+
case VALUE:
264+
return entryWriter.fixedSizeBinary(MapVector.VALUE_NAME);
265+
default:
266+
return this;
267+
}
268+
}
246269
}

vector/src/test/java/org/apache/arrow/vector/TestMapVector.java

Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,12 @@
1616
*/
1717
package org.apache.arrow.vector;
1818

19+
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
1920
import static org.junit.jupiter.api.Assertions.assertEquals;
2021
import static org.junit.jupiter.api.Assertions.assertFalse;
2122
import static org.junit.jupiter.api.Assertions.assertNull;
2223
import static org.junit.jupiter.api.Assertions.assertSame;
24+
import static org.junit.jupiter.api.Assertions.assertThrows;
2325
import static org.junit.jupiter.api.Assertions.assertTrue;
2426

2527
import java.nio.ByteBuffer;
@@ -41,6 +43,7 @@
4143
import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter;
4244
import org.apache.arrow.vector.complex.writer.FieldWriter;
4345
import org.apache.arrow.vector.holder.UuidHolder;
46+
import org.apache.arrow.vector.holders.FixedSizeBinaryHolder;
4447
import org.apache.arrow.vector.types.Types.MinorType;
4548
import org.apache.arrow.vector.types.pojo.ArrowType;
4649
import org.apache.arrow.vector.types.pojo.Field;
@@ -1359,4 +1362,222 @@ public void testCopyFromForExtensionType() throws Exception {
13591362
assertEquals(u2, actualUuid);
13601363
}
13611364
}
1365+
1366+
private FixedSizeBinaryHolder getFixedSizeBinaryHolder(byte[] array) {
1367+
FixedSizeBinaryHolder holder = new FixedSizeBinaryHolder();
1368+
holder.byteWidth = array.length;
1369+
holder.buffer = allocator.buffer(array.length);
1370+
for (int i = 0; i < array.length; i++) {
1371+
holder.buffer.setByte(i, array[i]);
1372+
}
1373+
1374+
return holder;
1375+
}
1376+
1377+
/**
1378+
* Regression test for GH-586: UnionMapWriter.fixedSizeBinary() should properly delegate to the
1379+
* entry writer for both key and value paths.
1380+
*/
1381+
@Test
1382+
public void testFixedSizeBinaryWriter() {
1383+
try (MapVector mapVector = MapVector.empty("map_vector", allocator, false)) {
1384+
UnionMapWriter writer = mapVector.getWriter();
1385+
writer.allocate();
1386+
1387+
// populate input vector with the following records
1388+
// {[11, 22] -> [32, 21]}
1389+
// {1 -> [11, 22], 2 -> [32, 21]}
1390+
// null
1391+
// {[11, 22] -> 1, [32, 21] -> 2}
1392+
// {[11, 22] -> null}
1393+
// {null -> [32, 21]} - wrong "for a given entry, the "key" is non-nullable" - todo: it
1394+
// shouldn't work. Should it?
1395+
FixedSizeBinaryHolder holder1 = getFixedSizeBinaryHolder(new byte[] {11, 22});
1396+
FixedSizeBinaryHolder holder2 = getFixedSizeBinaryHolder(new byte[] {32, 21});
1397+
1398+
writer.setPosition(0); // optional
1399+
writer.startMap();
1400+
writer.startEntry();
1401+
writer
1402+
.key()
1403+
.fixedSizeBinary(holder1.byteWidth)
1404+
.write(holder1); // need to initialize with byteWidth - NPE otherwise
1405+
writer.value().fixedSizeBinary(holder2.byteWidth).write(holder2);
1406+
writer.endEntry();
1407+
holder1.buffer.close();
1408+
holder2.buffer.close();
1409+
writer.endMap();
1410+
1411+
// {1 -> [11, 22], 2 -> [32, 21]}
1412+
holder1 = getFixedSizeBinaryHolder(new byte[] {11, 22});
1413+
holder2 = getFixedSizeBinaryHolder(new byte[] {32, 21});
1414+
writer.setPosition(1);
1415+
writer.startMap();
1416+
writer.startEntry();
1417+
writer.key().bigInt().writeBigInt(1);
1418+
writer.value().fixedSizeBinary().write(holder1);
1419+
writer.endEntry();
1420+
holder1.buffer.close();
1421+
writer.startEntry();
1422+
writer.key().bigInt().writeBigInt(2);
1423+
writer.value().fixedSizeBinary().write(holder2);
1424+
writer.endEntry();
1425+
writer.endMap();
1426+
holder2.buffer.close();
1427+
1428+
// {[11, 22] -> 1, [32, 21] -> 2}
1429+
holder1 = getFixedSizeBinaryHolder(new byte[] {11, 22});
1430+
holder2 = getFixedSizeBinaryHolder(new byte[] {32, 21});
1431+
writer.setPosition(3);
1432+
writer.startMap();
1433+
writer.startEntry();
1434+
writer.key().fixedSizeBinary().write(holder1);
1435+
writer.value().bigInt().writeBigInt(1);
1436+
writer.endEntry();
1437+
holder1.buffer.close();
1438+
writer.startEntry();
1439+
writer.key().fixedSizeBinary().write(holder2);
1440+
writer.value().bigInt().writeBigInt(2);
1441+
writer.endEntry();
1442+
writer.endMap();
1443+
holder2.buffer.close();
1444+
1445+
// {[11, 22] -> null}
1446+
holder1 = getFixedSizeBinaryHolder(new byte[] {11, 22});
1447+
writer.setPosition(4);
1448+
writer.startMap();
1449+
writer.startEntry();
1450+
writer.key().fixedSizeBinary().write(holder1);
1451+
writer.endEntry();
1452+
writer.endMap();
1453+
holder1.buffer.close();
1454+
1455+
// {null -> [32, 21]}
1456+
holder2 = getFixedSizeBinaryHolder(new byte[] {32, 21});
1457+
writer.setPosition(5);
1458+
writer.startMap();
1459+
writer.startEntry();
1460+
writer.value().fixedSizeBinary().write(holder2);
1461+
writer.endEntry();
1462+
writer.endMap();
1463+
holder2.buffer.close();
1464+
1465+
writer.setValueCount(6);
1466+
1467+
// assert the output vector is correct
1468+
FieldReader reader = mapVector.getReader();
1469+
assertTrue(reader.isSet(), "shouldn't be null");
1470+
reader.setPosition(1);
1471+
assertTrue(reader.isSet(), "shouldn't be null");
1472+
reader.setPosition(2);
1473+
assertFalse(reader.isSet(), "should be null");
1474+
reader.setPosition(3);
1475+
assertTrue(reader.isSet(), "shouldn't be null");
1476+
reader.setPosition(4);
1477+
assertTrue(reader.isSet(), "shouldn't be null");
1478+
reader.setPosition(5);
1479+
assertTrue(reader.isSet(), "shouldn't be null");
1480+
1481+
/* index 0 */
1482+
Object result = mapVector.getObject(0);
1483+
ArrayList<?> resultSet = (ArrayList<?>) result;
1484+
assertEquals(1, resultSet.size());
1485+
Map<?, ?> resultStruct = (Map<?, ?>) resultSet.get(0);
1486+
assertTrue(resultStruct.containsKey(MapVector.KEY_NAME));
1487+
assertTrue(resultStruct.containsKey(MapVector.VALUE_NAME));
1488+
assertArrayEquals(new byte[] {11, 22}, (byte[]) resultStruct.get(MapVector.KEY_NAME));
1489+
assertArrayEquals(new byte[] {32, 21}, (byte[]) resultStruct.get(MapVector.VALUE_NAME));
1490+
1491+
/* index 1 */
1492+
result = mapVector.getObject(1);
1493+
resultSet = (ArrayList<?>) result;
1494+
assertEquals(2, resultSet.size());
1495+
resultStruct = (Map<?, ?>) resultSet.get(0);
1496+
assertEquals(1L, getResultKey(resultStruct));
1497+
assertTrue(resultStruct.containsKey(MapVector.VALUE_NAME));
1498+
assertArrayEquals(new byte[] {11, 22}, (byte[]) resultStruct.get(MapVector.VALUE_NAME));
1499+
resultStruct = (Map<?, ?>) resultSet.get(1);
1500+
assertEquals(2L, getResultKey(resultStruct));
1501+
assertTrue(resultStruct.containsKey(MapVector.VALUE_NAME));
1502+
assertArrayEquals(new byte[] {32, 21}, (byte[]) resultStruct.get(MapVector.VALUE_NAME));
1503+
1504+
/* index 2 */
1505+
result = mapVector.getObject(2);
1506+
assertNull(result);
1507+
1508+
/* index 3 */
1509+
result = mapVector.getObject(3);
1510+
resultSet = (ArrayList<?>) result;
1511+
assertEquals(2, resultSet.size());
1512+
resultStruct = (Map<?, ?>) resultSet.get(0);
1513+
assertTrue(resultStruct.containsKey(MapVector.KEY_NAME));
1514+
assertArrayEquals(new byte[] {11, 22}, (byte[]) resultStruct.get(MapVector.KEY_NAME));
1515+
assertEquals(1L, getResultValue(resultStruct));
1516+
resultStruct = (Map<?, ?>) resultSet.get(1);
1517+
assertTrue(resultStruct.containsKey(MapVector.KEY_NAME));
1518+
assertArrayEquals(new byte[] {32, 21}, (byte[]) resultStruct.get(MapVector.KEY_NAME));
1519+
assertEquals(2L, getResultValue(resultStruct));
1520+
1521+
/* index 4 */
1522+
result = mapVector.getObject(4);
1523+
resultSet = (ArrayList<?>) result;
1524+
assertEquals(1, resultSet.size());
1525+
resultStruct = (Map<?, ?>) resultSet.get(0);
1526+
assertTrue(resultStruct.containsKey(MapVector.KEY_NAME));
1527+
assertArrayEquals(new byte[] {11, 22}, (byte[]) resultStruct.get(MapVector.KEY_NAME));
1528+
assertFalse(resultStruct.containsKey(MapVector.VALUE_NAME));
1529+
1530+
/* index 5 */
1531+
result = mapVector.getObject(5);
1532+
resultSet = (ArrayList<?>) result;
1533+
assertEquals(1, resultSet.size());
1534+
resultStruct = (Map<?, ?>) resultSet.get(0);
1535+
assertFalse(resultStruct.containsKey(MapVector.KEY_NAME));
1536+
assertTrue(resultStruct.containsKey(MapVector.VALUE_NAME));
1537+
assertArrayEquals(new byte[] {32, 21}, (byte[]) resultStruct.get(MapVector.VALUE_NAME));
1538+
}
1539+
}
1540+
1541+
@Test
1542+
public void testFixedSizeBinaryFirstInitialization() {
1543+
try (MapVector mapVector = MapVector.empty("map_vector", allocator, false)) {
1544+
UnionMapWriter writer = mapVector.getWriter();
1545+
writer.allocate();
1546+
1547+
// populate input vector with the following records
1548+
// {[11, 22] -> [32, 21]}
1549+
FixedSizeBinaryHolder holder1 = getFixedSizeBinaryHolder(new byte[] {11, 22});
1550+
FixedSizeBinaryHolder holder2 = getFixedSizeBinaryHolder(new byte[] {32, 21});
1551+
1552+
writer.setPosition(0); // optional
1553+
writer.startMap();
1554+
writer.startEntry();
1555+
// require byteWidth parameter for first-time initialization of `key` or `value` writers
1556+
assertThrows(NullPointerException.class, () -> writer.key().fixedSizeBinary().write(holder1));
1557+
assertThrows(
1558+
NullPointerException.class, () -> writer.value().fixedSizeBinary().write(holder2));
1559+
writer.key().fixedSizeBinary(holder1.byteWidth).write(holder1);
1560+
writer.value().fixedSizeBinary(holder2.byteWidth).write(holder2);
1561+
writer.endEntry();
1562+
holder1.buffer.close();
1563+
holder2.buffer.close();
1564+
writer.endMap();
1565+
1566+
writer.setValueCount(1);
1567+
1568+
// assert the output vector is correct
1569+
FieldReader reader = mapVector.getReader();
1570+
assertTrue(reader.isSet(), "shouldn't be null");
1571+
1572+
/* index 0 */
1573+
Object result = mapVector.getObject(0);
1574+
ArrayList<?> resultSet = (ArrayList<?>) result;
1575+
assertEquals(1, resultSet.size());
1576+
Map<?, ?> resultStruct = (Map<?, ?>) resultSet.get(0);
1577+
assertTrue(resultStruct.containsKey(MapVector.KEY_NAME));
1578+
assertTrue(resultStruct.containsKey(MapVector.VALUE_NAME));
1579+
assertArrayEquals(new byte[] {11, 22}, (byte[]) resultStruct.get(MapVector.KEY_NAME));
1580+
assertArrayEquals(new byte[] {32, 21}, (byte[]) resultStruct.get(MapVector.VALUE_NAME));
1581+
}
1582+
}
13621583
}

0 commit comments

Comments
 (0)