Skip to content

Commit f1d0879

Browse files
authored
chore: Add ignored tests for reading complex types from Parquet (apache#1167)
* Add ignored tests for reading structs from Parquet * add basic map test * add tests for Map and Array
1 parent 7db9aa6 commit f1d0879

File tree

1 file changed

+127
-0
lines changed

1 file changed

+127
-0
lines changed

spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2195,6 +2195,133 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
21952195
}
21962196
}
21972197

2198+
ignore("get_struct_field - select primitive fields") {
2199+
withTempPath { dir =>
2200+
// create input file with Comet disabled
2201+
withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
2202+
val df = spark
2203+
.range(5)
2204+
// Add both a null struct and null inner value
2205+
.select(when(col("id") > 1, struct(when(col("id") > 2, col("id")).alias("id")))
2206+
.alias("nested1"))
2207+
2208+
df.write.parquet(dir.toString())
2209+
}
2210+
2211+
Seq("", "parquet").foreach { v1List =>
2212+
withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> v1List) {
2213+
val df = spark.read.parquet(dir.toString())
2214+
checkSparkAnswerAndOperator(df.select("nested1.id"))
2215+
}
2216+
}
2217+
}
2218+
}
2219+
2220+
ignore("get_struct_field - select subset of struct") {
2221+
withTempPath { dir =>
2222+
// create input file with Comet disabled
2223+
withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
2224+
val df = spark
2225+
.range(5)
2226+
// Add both a null struct and null inner value
2227+
.select(
2228+
when(
2229+
col("id") > 1,
2230+
struct(
2231+
when(col("id") > 2, col("id")).alias("id"),
2232+
when(col("id") > 2, struct(when(col("id") > 3, col("id")).alias("id")))
2233+
.as("nested2")))
2234+
.alias("nested1"))
2235+
2236+
df.write.parquet(dir.toString())
2237+
}
2238+
2239+
Seq("", "parquet").foreach { v1List =>
2240+
withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> v1List) {
2241+
val df = spark.read.parquet(dir.toString())
2242+
checkSparkAnswerAndOperator(df.select("nested1.id"))
2243+
checkSparkAnswerAndOperator(df.select("nested1.nested2"))
2244+
checkSparkAnswerAndOperator(df.select("nested1.nested2.id"))
2245+
checkSparkAnswerAndOperator(df.select("nested1.id", "nested1.nested2.id"))
2246+
}
2247+
}
2248+
}
2249+
}
2250+
2251+
ignore("get_struct_field - read entire struct") {
2252+
withTempPath { dir =>
2253+
// create input file with Comet disabled
2254+
withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
2255+
val df = spark
2256+
.range(5)
2257+
// Add both a null struct and null inner value
2258+
.select(
2259+
when(
2260+
col("id") > 1,
2261+
struct(
2262+
when(col("id") > 2, col("id")).alias("id"),
2263+
when(col("id") > 2, struct(when(col("id") > 3, col("id")).alias("id")))
2264+
.as("nested2")))
2265+
.alias("nested1"))
2266+
2267+
df.write.parquet(dir.toString())
2268+
}
2269+
2270+
Seq("", "parquet").foreach { v1List =>
2271+
withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> v1List) {
2272+
val df = spark.read.parquet(dir.toString())
2273+
checkSparkAnswerAndOperator(df.select("nested1"))
2274+
}
2275+
}
2276+
}
2277+
}
2278+
2279+
ignore("read map[int, int] from parquet") {
2280+
withTempPath { dir =>
2281+
// create input file with Comet disabled
2282+
withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
2283+
val df = spark
2284+
.range(5)
2285+
// Spark does not allow null as a key but does allow null as a
2286+
// value, and the entire map be null
2287+
.select(
2288+
when(col("id") > 1, map(col("id"), when(col("id") > 2, col("id")))).alias("map1"))
2289+
df.write.parquet(dir.toString())
2290+
}
2291+
2292+
Seq("", "parquet").foreach { v1List =>
2293+
withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> v1List) {
2294+
val df = spark.read.parquet(dir.toString())
2295+
checkSparkAnswerAndOperator(df.select("map1"))
2296+
checkSparkAnswerAndOperator(df.select(map_keys(col("map1"))))
2297+
checkSparkAnswerAndOperator(df.select(map_values(col("map1"))))
2298+
}
2299+
}
2300+
}
2301+
}
2302+
2303+
ignore("read array[int] from parquet") {
2304+
withTempPath { dir =>
2305+
// create input file with Comet disabled
2306+
withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
2307+
val df = spark
2308+
.range(5)
2309+
// Spark does not allow null as a key but does allow null as a
2310+
// value, and the entire map be null
2311+
.select(when(col("id") > 1, sequence(lit(0), col("id") * 2)).alias("array1"))
2312+
df.write.parquet(dir.toString())
2313+
}
2314+
2315+
Seq("", "parquet").foreach { v1List =>
2316+
withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> v1List) {
2317+
val df = spark.read.parquet(dir.toString())
2318+
checkSparkAnswerAndOperator(df.select("array1"))
2319+
checkSparkAnswerAndOperator(df.select(element_at(col("array1"), lit(1))))
2320+
}
2321+
}
2322+
}
2323+
}
2324+
21982325
test("CreateArray") {
21992326
Seq(true, false).foreach { dictionaryEnabled =>
22002327
withTempDir { dir =>

0 commit comments

Comments
 (0)