huan233usc
diff --git a/‎spark/v2/src/main/java/io/delta/spark/internal/v2/utils/KernelRowToSparkRow.java‎
Lines changed: 183 additions & 0 deletions b/‎spark/v2/src/main/java/io/delta/spark/internal/v2/utils/KernelRowToSparkRow.java‎
Lines changed: 183 additions & 0 deletions
diff --git a/‎spark/v2/src/main/java/io/delta/spark/internal/v2/utils/SparkRowToKernelRow.java‎
Lines changed: 211 additions & 0 deletions b/‎spark/v2/src/main/java/io/delta/spark/internal/v2/utils/SparkRowToKernelRow.java‎
Lines changed: 211 additions & 0 deletions
@@ -0,0 +1,183 @@
+/*
+ * Copyright (2025) The Delta Lake Project Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.delta.spark.internal.v2.utils;
+
+import io.delta.kernel.data.ArrayValue;
+import io.delta.kernel.data.ColumnVector;
+import io.delta.kernel.data.MapValue;
+import io.delta.kernel.internal.data.StructRow;
+import io.delta.kernel.types.*;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import org.apache.spark.sql.GenericRowWithSchema;
+import org.apache.spark.sql.Row;
+
+/**
+ * Zero-copy wrapper that presents a Kernel {@link io.delta.kernel.data.Row} as a Spark {@link Row}.
+ * Primitive field access delegates directly to the Kernel Row with no data copy. Complex types
+ * (Map, Array, Struct) are lazily converted on access.
+ */
+public class KernelRowToSparkRow implements Row {
+
+  private final io.delta.kernel.data.Row kernelRow;
+  private final StructType kernelSchema;
+  private final org.apache.spark.sql.types.StructType sparkSchema;
+
+  public KernelRowToSparkRow(io.delta.kernel.data.Row kernelRow) {
+    this(kernelRow, SchemaUtils.convertKernelSchemaToSparkSchema(kernelRow.getSchema()));
+  }
+
+  /**
+   * Constructor that accepts a pre-computed Spark schema to avoid redundant schema conversion when
+   * wrapping many rows that share the same schema.
+   */
+  public KernelRowToSparkRow(
+      io.delta.kernel.data.Row kernelRow, org.apache.spark.sql.types.StructType sparkSchema) {
+    this.kernelRow = kernelRow;
+    this.kernelSchema = kernelRow.getSchema();
+    this.sparkSchema = sparkSchema;
+  }
+
+  @Override
+  public int length() {
+    return kernelSchema.length();
+  }
+
+  @Override
+  public org.apache.spark.sql.types.StructType schema() {
+    return sparkSchema;
+  }
+
+  @Override
+  public Object get(int i) {
+    if (kernelRow.isNullAt(i)) {
+      return null;
+    }
+    return extractSparkValue(kernelRow, i, kernelSchema.at(i).getDataType());
+  }
+
+  @Override
+  public Row copy() {
+    Object[] values = new Object[length()];
+    for (int i = 0; i < values.length; i++) {
+      values[i] = get(i);
+    }
+    return new GenericRowWithSchema(values, sparkSchema);
+  }
+
+  // ---- type dispatch: Kernel Row field -> Spark-compatible Object ----
+
+  private static Object extractSparkValue(io.delta.kernel.data.Row row, int ordinal, DataType dt) {
+    if (dt instanceof BooleanType) {
+      return row.getBoolean(ordinal);
+    } else if (dt instanceof ByteType) {
+      return row.getByte(ordinal);
+    } else if (dt instanceof ShortType) {
+      return row.getShort(ordinal);
+    } else if (dt instanceof IntegerType || dt instanceof DateType) {
+      return row.getInt(ordinal);
+    } else if (dt instanceof LongType
+        || dt instanceof TimestampType
+        || dt instanceof TimestampNTZType) {
+      return row.getLong(ordinal);
+    } else if (dt instanceof FloatType) {
+      return row.getFloat(ordinal);
+    } else if (dt instanceof DoubleType) {
+      return row.getDouble(ordinal);
+    } else if (dt instanceof StringType) {
+      return row.getString(ordinal);
+    } else if (dt instanceof DecimalType) {
+      return row.getDecimal(ordinal);
+    } else if (dt instanceof BinaryType) {
+      return row.getBinary(ordinal);
+    } else if (dt instanceof StructType) {
+      return new KernelRowToSparkRow(
+          row.getStruct(ordinal), SchemaUtils.convertKernelSchemaToSparkSchema((StructType) dt));
+    } else if (dt instanceof MapType) {
+      return mapValueToScalaMap(row.getMap(ordinal), (MapType) dt);
+    } else if (dt instanceof ArrayType) {
+      return arrayValueToScalaSeq(row.getArray(ordinal), (ArrayType) dt);
+    }
+    throw new UnsupportedOperationException("Unsupported Kernel DataType: " + dt);
+  }
+
+  // ---- Kernel ColumnVector element -> Spark-compatible Object ----
+
+  static Object vectorValueToSpark(ColumnVector cv, int rowId, DataType dt) {
+    if (cv.isNullAt(rowId)) {
+      return null;
+    }
+    if (dt instanceof BooleanType) {
+      return cv.getBoolean(rowId);
+    } else if (dt instanceof ByteType) {
+      return cv.getByte(rowId);
+    } else if (dt instanceof ShortType) {
+      return cv.getShort(rowId);
+    } else if (dt instanceof IntegerType || dt instanceof DateType) {
+      return cv.getInt(rowId);
+    } else if (dt instanceof LongType
+        || dt instanceof TimestampType
+        || dt instanceof TimestampNTZType) {
+      return cv.getLong(rowId);
+    } else if (dt instanceof FloatType) {
+      return cv.getFloat(rowId);
+    } else if (dt instanceof DoubleType) {
+      return cv.getDouble(rowId);
+    } else if (dt instanceof StringType) {
+      return cv.getString(rowId);
+    } else if (dt instanceof DecimalType) {
+      return cv.getDecimal(rowId);
+    } else if (dt instanceof BinaryType) {
+      return cv.getBinary(rowId);
+    } else if (dt instanceof StructType) {
+      return new KernelRowToSparkRow(
+          StructRow.fromStructVector(cv, rowId),
+          SchemaUtils.convertKernelSchemaToSparkSchema((StructType) dt));
+    } else if (dt instanceof MapType) {
+      return mapValueToScalaMap(cv.getMap(rowId), (MapType) dt);
+    } else if (dt instanceof ArrayType) {
+      return arrayValueToScalaSeq(cv.getArray(rowId), (ArrayType) dt);
+    }
+    throw new UnsupportedOperationException("Unsupported Kernel DataType: " + dt);
+  }
+
+  // ---- MapValue -> scala.collection.Map ----
+
+  static scala.collection.Map<Object, Object> mapValueToScalaMap(MapValue mv, MapType mt) {
+    ColumnVector keys = mv.getKeys();
+    ColumnVector values = mv.getValues();
+    Map<Object, Object> javaMap = new HashMap<>();
+    for (int i = 0; i < mv.getSize(); i++) {
+      Object key = vectorValueToSpark(keys, i, mt.getKeyType());
+      Object value = vectorValueToSpark(values, i, mt.getValueType());
+      javaMap.put(key, value);
+    }
+    return scala.jdk.javaapi.CollectionConverters.asScala(javaMap);
+  }
+
+  // ---- ArrayValue -> scala.collection.Seq ----
+
+  static scala.collection.Seq<Object> arrayValueToScalaSeq(ArrayValue av, ArrayType at) {
+    ColumnVector elements = av.getElements();
+    List<Object> javaList = new ArrayList<>();
+    for (int i = 0; i < av.getSize(); i++) {
+      javaList.add(vectorValueToSpark(elements, i, at.getElementType()));
+    }
+    return scala.jdk.javaapi.CollectionConverters.asScala(javaList).toList();
+  }
+}
@@ -0,0 +1,211 @@
+/*
+ * Copyright (2025) The Delta Lake Project Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.delta.spark.internal.v2.utils;
+
+import io.delta.kernel.data.ArrayValue;
+import io.delta.kernel.data.ColumnVector;
+import io.delta.kernel.data.MapValue;
+import io.delta.kernel.data.Row;
+import io.delta.kernel.internal.util.VectorUtils;
+import io.delta.kernel.types.*;
+import java.math.BigDecimal;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Zero-copy wrapper that presents a Spark {@link org.apache.spark.sql.Row} as a Kernel {@link Row}.
+ * Primitive field access delegates directly to the Spark Row with no data copy. Complex types (Map,
+ * Array, Struct) are lazily converted on access.
+ */
+public class SparkRowToKernelRow implements Row {
+
+  private final org.apache.spark.sql.Row sparkRow;
+  private final StructType kernelSchema;
+
+  public SparkRowToKernelRow(org.apache.spark.sql.Row sparkRow, StructType kernelSchema) {
+    this.sparkRow = sparkRow;
+    this.kernelSchema = kernelSchema;
+  }
+
+  @Override
+  public StructType getSchema() {
+    return kernelSchema;
+  }
+
+  @Override
+  public boolean isNullAt(int ordinal) {
+    return sparkRow.isNullAt(ordinal);
+  }
+
+  @Override
+  public boolean getBoolean(int ordinal) {
+    return sparkRow.getBoolean(ordinal);
+  }
+
+  @Override
+  public byte getByte(int ordinal) {
+    return sparkRow.getByte(ordinal);
+  }
+
+  @Override
+  public short getShort(int ordinal) {
+    return sparkRow.getShort(ordinal);
+  }
+
+  @Override
+  public int getInt(int ordinal) {
+    return sparkRow.getInt(ordinal);
+  }
+
+  @Override
+  public long getLong(int ordinal) {
+    return sparkRow.getLong(ordinal);
+  }
+
+  @Override
+  public float getFloat(int ordinal) {
+    return sparkRow.getFloat(ordinal);
+  }
+
+  @Override
+  public double getDouble(int ordinal) {
+    return sparkRow.getDouble(ordinal);
+  }
+
+  @Override
+  public String getString(int ordinal) {
+    return sparkRow.getString(ordinal);
+  }
+
+  @Override
+  public BigDecimal getDecimal(int ordinal) {
+    return sparkRow.getDecimal(ordinal);
+  }
+
+  @Override
+  public byte[] getBinary(int ordinal) {
+    return (byte[]) sparkRow.get(ordinal);
+  }
+
+  @Override
+  public Row getStruct(int ordinal) {
+    org.apache.spark.sql.Row nested = sparkRow.getStruct(ordinal);
+    StructType nestedSchema = (StructType) kernelSchema.at(ordinal).getDataType();
+    return new SparkRowToKernelRow(nested, nestedSchema);
+  }
+
+  @Override
+  public MapValue getMap(int ordinal) {
+    Map<?, ?> javaMap = sparkRow.getJavaMap(ordinal);
+    MapType mt = (MapType) kernelSchema.at(ordinal).getDataType();
+    return javaMapToKernelMapValue(javaMap, mt);
+  }
+
+  @Override
+  public ArrayValue getArray(int ordinal) {
+    List<?> javaList = sparkRow.getList(ordinal);
+    ArrayType at = (ArrayType) kernelSchema.at(ordinal).getDataType();
+    return javaListToKernelArrayValue(javaList, at);
+  }
+
+  // ---- java.util.Map -> Kernel MapValue ----
+
+  static MapValue javaMapToKernelMapValue(Map<?, ?> javaMap, MapType mt) {
+    List<Object> keys = new ArrayList<>(javaMap.size());
+    List<Object> values = new ArrayList<>(javaMap.size());
+    for (Map.Entry<?, ?> entry : javaMap.entrySet()) {
+      keys.add(sparkValueToKernel(entry.getKey(), mt.getKeyType()));
+      values.add(sparkValueToKernel(entry.getValue(), mt.getValueType()));
+    }
+    ColumnVector keyVector = VectorUtils.buildColumnVector(keys, mt.getKeyType());
+    ColumnVector valueVector = VectorUtils.buildColumnVector(values, mt.getValueType());
+    return new MapValue() {
+      @Override
+      public int getSize() {
+        return keys.size();
+      }
+
+      @Override
+      public ColumnVector getKeys() {
+        return keyVector;
+      }
+
+      @Override
+      public ColumnVector getValues() {
+        return valueVector;
+      }
+    };
+  }
+
+  // ---- java.util.List -> Kernel ArrayValue ----
+
+  static ArrayValue javaListToKernelArrayValue(List<?> javaList, ArrayType at) {
+    List<Object> kernelValues = new ArrayList<>(javaList.size());
+    for (Object element : javaList) {
+      kernelValues.add(sparkValueToKernel(element, at.getElementType()));
+    }
+    return VectorUtils.buildArrayValue(kernelValues, at.getElementType());
+  }
+
+  // ---- Spark value -> Kernel-compatible value ----
+
+  @SuppressWarnings("unchecked")
+  static Object sparkValueToKernel(Object sparkValue, DataType dt) {
+    if (sparkValue == null) {
+      return null;
+    }
+    if (dt instanceof BooleanType
+        || dt instanceof ByteType
+        || dt instanceof ShortType
+        || dt instanceof IntegerType
+        || dt instanceof DateType
+        || dt instanceof LongType
+        || dt instanceof TimestampType
+        || dt instanceof TimestampNTZType
+        || dt instanceof FloatType
+        || dt instanceof DoubleType
+        || dt instanceof StringType
+        || dt instanceof BinaryType
+        || dt instanceof DecimalType) {
+      return sparkValue;
+    }
+    if (dt instanceof StructType) {
+      return new SparkRowToKernelRow((org.apache.spark.sql.Row) sparkValue, (StructType) dt);
+    }
+    if (dt instanceof MapType) {
+      Map<?, ?> javaMap;
+      if (sparkValue instanceof scala.collection.Map) {
+        javaMap =
+            scala.jdk.javaapi.CollectionConverters.asJava((scala.collection.Map<?, ?>) sparkValue);
+      } else {
+        javaMap = (Map<?, ?>) sparkValue;
+      }
+      return javaMapToKernelMapValue(javaMap, (MapType) dt);
+    }
+    if (dt instanceof ArrayType) {
+      List<?> javaList;
+      if (sparkValue instanceof scala.collection.Seq) {
+        javaList =
+            scala.jdk.javaapi.CollectionConverters.asJava((scala.collection.Seq<?>) sparkValue);
+      } else {
+        javaList = (List<?>) sparkValue;
+      }
+      return javaListToKernelArrayValue(javaList, (ArrayType) dt);
+    }
+    throw new UnsupportedOperationException("Unsupported Kernel DataType: " + dt);
+  }
+}