Skip to content
Draft
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
54 commits
Select commit Hold shift + click to select a range
65e1ff5
from_protobuf
thirtiseven Feb 26, 2026
a225f9a
address self review comments
thirtiseven Feb 26, 2026
e517e43
fix schema projection
thirtiseven Feb 26, 2026
bc1bee7
address comments
thirtiseven Feb 28, 2026
62679af
bug fix and clean up
thirtiseven Mar 2, 2026
f7d9551
bug fix and clean up
thirtiseven Mar 3, 2026
802488d
address cc comments
thirtiseven Mar 3, 2026
030fdf8
codex review and address
thirtiseven Mar 3, 2026
73ce21b
verify and fix shim build error
thirtiseven Mar 3, 2026
685c885
Update copyright year in pom.xml
thirtiseven Mar 3, 2026
781e639
Update copyright year in pom.xml
thirtiseven Mar 3, 2026
43a4c08
clean up
thirtiseven Mar 3, 2026
47897e9
Merge branch 'main' into from_protobuf_nested
thirtiseven Mar 3, 2026
c1ef9fb
address comments
thirtiseven Mar 5, 2026
8a2c007
address comments
thirtiseven Mar 5, 2026
035fd1d
address comments
thirtiseven Mar 5, 2026
c812754
fix shim
thirtiseven Mar 5, 2026
4551f28
address comments
thirtiseven Mar 5, 2026
ca3b7c5
address comments
thirtiseven Mar 5, 2026
6860c91
address comments
thirtiseven Mar 5, 2026
c1c919b
address comments
thirtiseven Mar 5, 2026
d71f7a1
address comments
thirtiseven Mar 5, 2026
0c9385a
address comments
thirtiseven Mar 6, 2026
303b4f8
address comments
thirtiseven Mar 6, 2026
981b75c
address comments
thirtiseven Mar 6, 2026
539af83
Batch merging after schema projection
thirtiseven Mar 11, 2026
46861ec
Merge remote-tracking branch 'origin/main' into from_protobuf_nested
thirtiseven Mar 11, 2026
19eb983
fix the enum bug
thirtiseven Mar 11, 2026
3059a50
reflection refactor
thirtiseven Mar 12, 2026
2c47aee
equals and hashcode
thirtiseven Mar 12, 2026
cea7594
address comments
thirtiseven Mar 12, 2026
9467ebc
address comments
thirtiseven Mar 12, 2026
c647776
address comments
thirtiseven Mar 12, 2026
be7bdc6
address comments
thirtiseven Mar 12, 2026
16f2f6e
address commmit
thirtiseven Mar 12, 2026
0467d72
address comments
thirtiseven Mar 13, 2026
2d541b6
address comments
thirtiseven Mar 13, 2026
8d34376
address comments
thirtiseven Mar 13, 2026
a714cd5
address comments
thirtiseven Mar 13, 2026
e18f64c
address comments
thirtiseven Mar 13, 2026
627cd91
bugfix
thirtiseven Mar 13, 2026
f070a43
bugfix and address comments
thirtiseven Mar 14, 2026
60b0f65
bugfix
thirtiseven Mar 14, 2026
ce1555b
address comments
thirtiseven Mar 14, 2026
f8dd5bc
address comments
thirtiseven Mar 14, 2026
fc3bf46
comments
thirtiseven Mar 14, 2026
44ae86f
comments
thirtiseven Mar 15, 2026
e25f798
comments
thirtiseven Mar 15, 2026
a13cd5c
comments
thirtiseven Mar 16, 2026
a3d0e70
comments
thirtiseven Mar 16, 2026
99b899f
comments and style
thirtiseven Mar 16, 2026
3425256
backport integration test change
thirtiseven Mar 17, 2026
4b05958
apply jni refactor
thirtiseven Mar 19, 2026
a44aee1
address comments
thirtiseven Mar 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/supported_ops.md
Original file line number Diff line number Diff line change
Expand Up @@ -8267,7 +8267,7 @@ are limited.
<td> </td>
<td> </td>
<td> </td>
<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types BINARY, CALENDAR, UDT, DAYTIME, YEARMONTH</em></td>
<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types CALENDAR, UDT, DAYTIME, YEARMONTH</em></td>
<td> </td>
<td> </td>
<td> </td>
Expand All @@ -8290,7 +8290,7 @@ are limited.
<td> </td>
<td> </td>
<td> </td>
<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types BINARY, CALENDAR, UDT, DAYTIME, YEARMONTH</em></td>
<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types CALENDAR, UDT, DAYTIME, YEARMONTH</em></td>
<td> </td>
<td> </td>
<td> </td>
Expand Down
67 changes: 65 additions & 2 deletions integration_tests/run_pyspark_from_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@
# To run all tests, including Avro tests:
# INCLUDE_SPARK_AVRO_JAR=true ./run_pyspark_from_build.sh
#
# To run tests WITHOUT Protobuf tests (protobuf is included by default):
# INCLUDE_SPARK_PROTOBUF_JAR=false ./run_pyspark_from_build.sh
#
# To run a specific test:
# TEST=my_test ./run_pyspark_from_build.sh
#
Expand Down Expand Up @@ -141,9 +144,68 @@ else
AVRO_JARS=""
fi

# ALL_JARS includes dist.jar integration-test.jar avro.jar parquet.jar if they exist
# Protobuf support: Include spark-protobuf jar by default for protobuf_test.py
# Set INCLUDE_SPARK_PROTOBUF_JAR=false to disable
PROTOBUF_JARS=""
if [[ $( echo ${INCLUDE_SPARK_PROTOBUF_JAR} | tr '[:upper:]' '[:lower:]' ) != "false" ]];
then
export INCLUDE_SPARK_PROTOBUF_JAR=true
mkdir -p "${TARGET_DIR}/dependency"

# Download spark-protobuf jar if not already in target/dependency
PROTOBUF_JAR_NAME="spark-protobuf_${SCALA_VERSION}-${VERSION_STRING}.jar"
PROTOBUF_JAR_PATH="${TARGET_DIR}/dependency/${PROTOBUF_JAR_NAME}"

if [[ ! -f "$PROTOBUF_JAR_PATH" ]]; then
echo "Downloading spark-protobuf jar..."
PROTOBUF_MAVEN_URL="https://repo1.maven.org/maven2/org/apache/spark/spark-protobuf_${SCALA_VERSION}/${VERSION_STRING}/${PROTOBUF_JAR_NAME}"
if curl -sL -o "$PROTOBUF_JAR_PATH" "$PROTOBUF_MAVEN_URL"; then
echo "Downloaded spark-protobuf jar to $PROTOBUF_JAR_PATH"
else
echo "WARNING: Failed to download spark-protobuf jar from $PROTOBUF_MAVEN_URL"
rm -f "$PROTOBUF_JAR_PATH"
fi
fi

# Also download protobuf-java jar (required dependency)
# Spark 3.5.x uses protobuf-java 3.25.1
PROTOBUF_JAVA_VERSION="3.25.1"
PROTOBUF_JAVA_JAR_NAME="protobuf-java-${PROTOBUF_JAVA_VERSION}.jar"
PROTOBUF_JAVA_JAR_PATH="${TARGET_DIR}/dependency/${PROTOBUF_JAVA_JAR_NAME}"

if [[ ! -f "$PROTOBUF_JAVA_JAR_PATH" ]]; then
echo "Downloading protobuf-java jar..."
PROTOBUF_JAVA_MAVEN_URL="https://repo1.maven.org/maven2/com/google/protobuf/protobuf-java/${PROTOBUF_JAVA_VERSION}/${PROTOBUF_JAVA_JAR_NAME}"
if curl -sL -o "$PROTOBUF_JAVA_JAR_PATH" "$PROTOBUF_JAVA_MAVEN_URL"; then
echo "Downloaded protobuf-java jar to $PROTOBUF_JAVA_JAR_PATH"
else
echo "WARNING: Failed to download protobuf-java jar from $PROTOBUF_JAVA_MAVEN_URL"
rm -f "$PROTOBUF_JAVA_JAR_PATH"
fi
fi

if [[ -f "$PROTOBUF_JAR_PATH" ]]; then
PROTOBUF_JARS="$PROTOBUF_JAR_PATH"
echo "Including spark-protobuf jar: $PROTOBUF_JAR_PATH"
fi
if [[ -f "$PROTOBUF_JAVA_JAR_PATH" ]]; then
PROTOBUF_JARS="$PROTOBUF_JARS $PROTOBUF_JAVA_JAR_PATH"
echo "Including protobuf-java jar: $PROTOBUF_JAVA_JAR_PATH"
fi
# Also add protobuf jars to driver classpath for Class.forName() to work
# This is needed because --jars only adds to executor classpath
if [[ -n "$PROTOBUF_JARS" ]]; then
PROTOBUF_DRIVER_CP=$(echo $PROTOBUF_JARS | tr ' ' ':')
export PYSP_TEST_spark_driver_extraClassPath="${PYSP_TEST_spark_driver_extraClassPath:+${PYSP_TEST_spark_driver_extraClassPath}:}${PROTOBUF_DRIVER_CP}"
echo "Added protobuf jars to driver classpath"
fi
else
export INCLUDE_SPARK_PROTOBUF_JAR=false
fi

# ALL_JARS includes dist.jar integration-test.jar avro.jar parquet.jar protobuf.jar if they exist
# Remove non-existing paths and canonicalize the paths including get rid of links and `..`
ALL_JARS=$(readlink -e $PLUGIN_JAR $TEST_JARS $AVRO_JARS $PARQUET_HADOOP_TESTS || true)
ALL_JARS=$(readlink -e $PLUGIN_JAR $TEST_JARS $AVRO_JARS $PARQUET_HADOOP_TESTS $PROTOBUF_JARS || true)
# `:` separated jars
ALL_JARS="${ALL_JARS//$'\n'/:}"

Expand Down Expand Up @@ -411,6 +473,7 @@ else
export PYSP_TEST_spark_gluten_loadLibFromJar=true
fi


SPARK_SHELL_SMOKE_TEST="${SPARK_SHELL_SMOKE_TEST:-0}"
EXPLAIN_ONLY_CPU_SMOKE_TEST="${EXPLAIN_ONLY_CPU_SMOKE_TEST:-0}"
SPARK_CONNECT_SMOKE_TEST="${SPARK_CONNECT_SMOKE_TEST:-0}"
Expand Down
Loading
Loading