Skip to content

Commit fa0d974

Browse files
authored
[core] Fix convert null row to column row vector (#6990)
1 parent 66fb0a0 commit fa0d974

File tree

3 files changed

+126
-0
lines changed

3 files changed

+126
-0
lines changed

paimon-common/src/main/java/org/apache/paimon/data/columnar/heap/HeapRowVector.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,4 +61,14 @@ public void setFields(WritableColumnVector[] fields) {
6161
System.arraycopy(fields, 0, this.children, 0, fields.length);
6262
this.vectorizedColumnBatch = new VectorizedColumnBatch(children);
6363
}
64+
65+
@Override
66+
public void appendNull() {
67+
super.appendNull();
68+
for (ColumnVector child : children) {
69+
if (child instanceof WritableColumnVector) {
70+
((WritableColumnVector) child).appendNull();
71+
}
72+
}
73+
}
6474
}

paimon-common/src/test/java/org/apache/paimon/data/columnar/RowToColumnConverterTest.java

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -458,4 +458,53 @@ public void testConvertRowType() {
458458
assertThat(new String(nameVector.getBytes(2).getBytes())).isEqualTo("Charlie");
459459
assertThat(rowVector.getRow(2).getInt(0)).isEqualTo(3);
460460
}
461+
462+
@Test
463+
public void testConvertNullableRowType() {
464+
RowType rowType =
465+
RowType.of(
466+
new DataField(
467+
0,
468+
"f",
469+
DataTypes.ROW(
470+
DataTypes.FIELD(0, "id", DataTypes.INT()),
471+
DataTypes.FIELD(1, "name", DataTypes.STRING()))));
472+
RowToColumnConverter converter = new RowToColumnConverter(rowType);
473+
474+
// Test null row value
475+
GenericRow row1 = GenericRow.of((Object) null);
476+
477+
// Test row with null fields
478+
GenericRow rowValue2 = GenericRow.of(null, BinaryString.fromString("Bob"));
479+
GenericRow row2 = GenericRow.of(rowValue2);
480+
481+
// Test row with all null fields
482+
GenericRow rowValue3 = GenericRow.of(null, null);
483+
GenericRow row3 = GenericRow.of(rowValue3);
484+
485+
HeapIntVector idVector = new HeapIntVector(3);
486+
HeapBytesVector nameVector = new HeapBytesVector(3);
487+
HeapRowVector rowVector = new HeapRowVector(3, idVector, nameVector);
488+
489+
WritableColumnVector[] vectors = new WritableColumnVector[] {rowVector};
490+
491+
// Convert null row
492+
converter.convert(row1, vectors);
493+
assertThat(rowVector.isNullAt(0)).isTrue();
494+
assertThat(idVector.isNullAt(0)).isTrue();
495+
assertThat(nameVector.isNullAt(0)).isTrue();
496+
497+
// Convert row with null id field
498+
converter.convert(row2, vectors);
499+
assertThat(rowVector.isNullAt(1)).isFalse();
500+
assertThat(idVector.isNullAt(1)).isTrue();
501+
assertThat(nameVector.isNullAt(1)).isFalse();
502+
assertThat(new String(nameVector.getBytes(1).getBytes())).isEqualTo("Bob");
503+
504+
// Convert row with all null fields
505+
converter.convert(row3, vectors);
506+
assertThat(rowVector.isNullAt(2)).isFalse();
507+
assertThat(idVector.isNullAt(2)).isTrue();
508+
assertThat(nameVector.isNullAt(2)).isTrue();
509+
}
461510
}

paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/VariantTestBase.scala

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,4 +386,71 @@ abstract class VariantTestBase extends PaimonSparkTestBase {
386386
}
387387
}
388388
}
389+
390+
test("Paimon Variant: read and write variant with null value") {
391+
withTable("source_tbl", "target_tbl") {
392+
sql("CREATE TABLE source_tbl (id INT, js STRING) USING paimon")
393+
val n = 100
394+
val nullCount = 98
395+
val values = (1 to n)
396+
.map {
397+
i =>
398+
if (i <= nullCount) {
399+
s"($i, null)"
400+
} else {
401+
val jsonStr =
402+
s"""
403+
|'{
404+
| "id":$i,"name":"user$i","age":${20 + (i % 50)},
405+
| "tags":[{"type":"vip","level":$i},{"type":"premium","level":$i}],
406+
| "address":{"city":"city$i","street":"street$i"}
407+
|}'
408+
|""".stripMargin
409+
s"($i, $jsonStr)"
410+
}
411+
}
412+
.mkString(", ")
413+
sql(s"INSERT INTO source_tbl VALUES $values")
414+
415+
sql("CREATE TABLE target_tbl (id INT, v VARIANT) USING paimon")
416+
sql("INSERT INTO target_tbl SELECT id, parse_json(js) FROM source_tbl")
417+
418+
checkAnswer(
419+
sql("""
420+
|SELECT
421+
|variant_get(v, '$.name', 'string'),
422+
|variant_get(v, '$.tags', 'string'),
423+
|variant_get(v, '$.tags', 'array<string>'),
424+
|variant_get(v, '$.tags', 'array<struct<type string, level int>>'),
425+
|variant_get(v, '$.tags[0]', 'string'),
426+
|variant_get(v, '$.tags[0]', 'struct<type string, level int>'),
427+
|variant_get(v, '$.tags[1].type', 'string'),
428+
|variant_get(v, '$.address', 'string')
429+
|FROM target_tbl where v IS NOT NULL
430+
|""".stripMargin),
431+
Seq(
432+
Row(
433+
"user99",
434+
"[{\"level\":99,\"type\":\"vip\"},{\"level\":99,\"type\":\"premium\"}]",
435+
Array("{\"level\":99,\"type\":\"vip\"}", "{\"level\":99,\"type\":\"premium\"}"),
436+
Array(Row("vip", 99), Row("premium", 99)),
437+
"{\"level\":99,\"type\":\"vip\"}",
438+
Row("vip", 99),
439+
"premium",
440+
"{\"city\":\"city99\",\"street\":\"street99\"}"
441+
),
442+
Row(
443+
"user100",
444+
"[{\"level\":100,\"type\":\"vip\"},{\"level\":100,\"type\":\"premium\"}]",
445+
Array("{\"level\":100,\"type\":\"vip\"}", "{\"level\":100,\"type\":\"premium\"}"),
446+
Array(Row("vip", 100), Row("premium", 100)),
447+
"{\"level\":100,\"type\":\"vip\"}",
448+
Row("vip", 100),
449+
"premium",
450+
"{\"city\":\"city100\",\"street\":\"street100\"}"
451+
)
452+
)
453+
)
454+
}
455+
}
389456
}

0 commit comments

Comments
 (0)