Skip to content

Commit 950841b

Browse files
committed
[learn] Init test
1 parent d81a205 commit 950841b

File tree

19 files changed

+1852
-5
lines changed

19 files changed

+1852
-5
lines changed

scalastyle-config.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,9 @@ This file is divided into 3 sections:
7171

7272
<check level="error" class="org.scalastyle.scalariform.SpacesBeforePlusChecker" enabled="true"></check>
7373

74-
<check level="error" class="org.scalastyle.file.WhitespaceEndOfLineChecker" enabled="true"></check>
74+
<check level="error" class="org.scalastyle.file.WhitespaceEndOfLineChecker" enabled="false"></check>
7575

76-
<check level="error" class="org.scalastyle.file.FileLineLengthChecker" enabled="true">
76+
<check level="error" class="org.scalastyle.file.FileLineLengthChecker" enabled="false">
7777
<parameters>
7878
<parameter name="maxLineLength"><![CDATA[100]]></parameter>
7979
<parameter name="tabSize"><![CDATA[2]]></parameter>

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,7 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
285285

286286
planChangeLogger.logBatch(batch.name, batchStartPlan, curPlan)
287287
}
288-
planChangeLogger.logMetrics(RuleExecutor.getCurrentMetrics() - beforeMetrics)
288+
// planChangeLogger.logMetrics(RuleExecutor.getCurrentMetrics() - beforeMetrics)
289289

290290
curPlan
291291
}

sql/core/pom.xml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,12 @@
217217
<artifactId>htmlunit-driver</artifactId>
218218
<scope>test</scope>
219219
</dependency>
220+
<dependency>
221+
<groupId>org.apache.paimon</groupId>
222+
<artifactId>paimon-spark-3.5</artifactId>
223+
<version>1.0.1</version>
224+
<scope>test</scope>
225+
</dependency>
220226
</dependencies>
221227
<build>
222228
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
{"name":"Yin", "address":{"city":"Columbus","state":"Ohio"}}
2+
{"name":123456, "address":{"city":null, "state":"California", "extraMetadata":"test message"}}

sql/core/src/test/resources/log4j2.properties

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,9 @@ appender.console.type = Console
2525
appender.console.name = STDOUT
2626
appender.console.target = SYSTEM_OUT
2727
appender.console.layout.type = PatternLayout
28-
appender.console.layout.pattern = %d{HH:mm:ss.SSS} %p %c: %maxLen{%m}{512}%n%ex{8}%n
28+
appender.console.layout.pattern = %d{HH:mm:ss.SSS} %p %c: %maxLen{%m}{5120}%n%ex{8}%n
2929
appender.console.filter.threshold.type = ThresholdFilter
30-
appender.console.filter.threshold.level = warn
30+
appender.console.filter.threshold.level = error
3131

3232
#File Appender
3333
appender.file.type = File
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.learn
19+
20+
import java.io.File
21+
22+
import org.apache.spark.SparkConf
23+
import org.apache.spark.sql.{DataFrame, QueryTest, SparkSession}
24+
import org.apache.spark.sql.test.SharedSparkSession
25+
import org.apache.spark.util.Utils
26+
27+
abstract class BaseTest extends QueryTest with SharedSparkSession {
28+
29+
val _spark: SparkSession = spark
30+
31+
protected lazy val tempDBDir: File = Utils.createTempDir()
32+
33+
protected val dbName0: String = "test"
34+
35+
override protected def sparkConf: SparkConf = {
36+
super.sparkConf
37+
.set("spark.sql.catalog.paimon", "org.apache.paimon.spark.SparkCatalog")
38+
.set("spark.sql.catalog.paimon.warehouse", tempDBDir.getCanonicalPath)
39+
.set("spark.sql.extensions",
40+
"org.apache.paimon.spark.extensions.PaimonSparkSessionExtensions")
41+
.set("spark.sql.planChangeLog.level", "error")
42+
.set("spark.sql.planChangeLog.batches", "")
43+
.set("spark.sql.planChangeLog.rules", "")
44+
// .set("spark.default.parallelism", "4")
45+
.set("spark.eventLog.enabled", "false")
46+
.set("spark.eventLog.dir", "/Users/zxy/data/spark/history")
47+
}
48+
49+
override protected def beforeAll(): Unit = {
50+
super.beforeAll()
51+
spark.sql(s"USE paimon")
52+
spark.sql(s"CREATE DATABASE IF NOT EXISTS paimon.$dbName0")
53+
}
54+
55+
override protected def afterAll(): Unit = {
56+
try {
57+
spark.sql(s"USE paimon")
58+
// spark.sql(s"USE default")
59+
spark.sql(s"DROP DATABASE IF EXISTS paimon.$dbName0 CASCADE")
60+
} finally {
61+
super.afterAll()
62+
}
63+
}
64+
65+
def printPlan(df: DataFrame): Unit = {
66+
// scalastyle:off println
67+
println("=== Optimized Plan ===")
68+
val optimizedPlan = df.queryExecution.optimizedPlan
69+
println(optimizedPlan)
70+
71+
println("=== Executed Plan ===")
72+
val executedPlan = df.queryExecution.executedPlan
73+
println(executedPlan)
74+
// scalastyle:on println
75+
76+
df.show()
77+
}
78+
}
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.learn.bugfix
19+
20+
import org.apache.spark.sql.functions.col
21+
import org.apache.spark.sql.learn.BaseTest
22+
23+
class RDDAndDataframe extends BaseTest {
24+
25+
import testImplicits._
26+
27+
test("RDD: test parallelize") {
28+
val tuples = spark.sparkContext.parallelize(1 to 1).map(x => (x, x)).collect()
29+
tuples
30+
}
31+
32+
test("Dataframe: test repartitionByCol") {
33+
withSQLConf("spark.sql.adaptive.enabled" -> "false") {
34+
withSQLConf("spark.sql.shuffle.partitions" -> "40") {
35+
val df = (1 to 20).toDF("id")
36+
val partitions = df.rdd.getNumPartitions
37+
val df2 = df.repartition(col("id"))
38+
val partitions2 = df2.rdd.getNumPartitions
39+
df2.collect()
40+
}
41+
42+
withSQLConf("spark.sql.shuffle.partitions" -> "10") {
43+
val df = (1 to 20).toDF("id")
44+
val partitions = df.rdd.getNumPartitions
45+
val df2 = df.repartition(col("id"))
46+
val partitions2 = df2.rdd.getNumPartitions
47+
df2.collect()
48+
}
49+
}
50+
}
51+
}
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.learn.bugfix
19+
20+
import org.apache.spark.sql.Row
21+
import org.apache.spark.sql.learn.BaseTest
22+
23+
class WriteChar extends BaseTest {
24+
25+
test("Bug fix: test write char with parquet") {
26+
sql("use spark_catalog")
27+
sql(s"CREATE DATABASE test_db LOCATION '$tempDBDir'")
28+
sql(s"USE test_db")
29+
30+
withTable("target", "source") {
31+
sql(
32+
s"""
33+
|CREATE TABLE test_db.target (c char(6))
34+
|USING parquet
35+
|""".stripMargin)
36+
37+
sql(
38+
s"""
39+
|CREATE TABLE test_db.source (c char(6))
40+
|USING parquet
41+
|""".stripMargin)
42+
43+
withSQLConf("spark.sql.legacy.charVarcharAsString" -> "true") {
44+
sql("INSERT INTO test_db.source VALUES ('ab')")
45+
}
46+
47+
withSQLConf("spark.sql.readSideCharPadding" -> "false") {
48+
sql(s"INSERT INTO target SELECT * FROM source")
49+
checkAnswer(
50+
spark.sql("SELECT c FROM source"), Row("ab"))
51+
checkAnswer(
52+
spark.sql("SELECT c FROM target"), Row("ab "))
53+
}
54+
}
55+
sql(s"DROP DATABASE test_db")
56+
sql(s"USE paimon")
57+
}
58+
}
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.learn.datasource
19+
20+
import org.apache.spark.sql.learn.BaseTest
21+
22+
class DataType extends BaseTest {
23+
24+
test("DataType: struct type") {
25+
withTable("students") {
26+
sql(
27+
"""
28+
|CREATE TABLE students (
29+
| name STRING,
30+
| age INT,
31+
| courses ARRAY<STRUCT<course_name: STRING, grade: DOUBLE>>
32+
|) USING paimon;
33+
|""".stripMargin)
34+
35+
sql(
36+
"""
37+
|INSERT INTO students VALUES
38+
|('Alice', 20, ARRAY(STRUCT('Math', 85.0), STRUCT('English', 88.0))),
39+
|('Bob', 22, ARRAY(STRUCT('Math', 90.0), STRUCT('Biology', 92.0))),
40+
|('Cathy', 21, ARRAY(STRUCT('History', 95.0)));
41+
|""".stripMargin)
42+
43+
sql(
44+
"""
45+
|SELECT
46+
| name,
47+
| age,
48+
| course.course_name,
49+
| course.grade
50+
|FROM
51+
| students
52+
|LATERAL VIEW explode(courses) AS course;
53+
|""".stripMargin).show()
54+
}
55+
}
56+
57+
test("DataType: struct type CTAS") {
58+
withTable("students") {
59+
sql(
60+
"""
61+
|CREATE TABLE students AS
62+
|SELECT *
63+
|FROM (VALUES ('Alice', 20, ARRAY(STRUCT('Math', 85.0), STRUCT('English', 88.0))),
64+
| ('Bob', 22, ARRAY(STRUCT('Math', 90.0), STRUCT('Biology', 92.0))),
65+
| ('Cathy', 21, ARRAY(STRUCT('History', 95.0)))) ;
66+
|""".stripMargin)
67+
68+
sql("desc table extended students").show(false)
69+
}
70+
}
71+
72+
test("DataType: struct type get") {
73+
withTable("students") {
74+
sql(
75+
"""
76+
|CREATE TABLE students (
77+
| name STRING,
78+
| age INT,
79+
| course STRUCT<course_name: STRING, grade: DOUBLE>
80+
|) USING paimon;
81+
|""".stripMargin)
82+
83+
sql(
84+
"""
85+
|INSERT INTO students VALUES
86+
|('Alice', 20, STRUCT('Math', 85.0)),
87+
|('Bob', 22, STRUCT('Biology', 92.0)),
88+
|('Cathy', 21, STRUCT('History', 95.0));
89+
|""".stripMargin)
90+
91+
sql(
92+
"""
93+
|SELECT
94+
| name,
95+
| age,
96+
| course.course_name,
97+
| course.grade
98+
|FROM
99+
| students;
100+
|""".stripMargin).show()
101+
}
102+
}
103+
}
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.learn.datasource
19+
20+
import org.apache.spark.sql.learn.BaseTest
21+
22+
class Filter extends BaseTest {
23+
24+
test("Filter: test partition filter") {
25+
sql("use spark_catalog")
26+
sql(s"CREATE DATABASE test_db LOCATION '$tempDBDir'")
27+
sql(s"USE test_db")
28+
29+
withTable("t") {
30+
sql(
31+
s"""
32+
|CREATE TABLE test_db.t (id INT, p STRING)
33+
|USING parquet
34+
|PARTITIONED BY (p)
35+
|""".stripMargin)
36+
sql("INSERT INTO test_db.t VALUES (1, '1')")
37+
38+
printPlan(sql("SELECT * FROM test_db.t WHERE p = 1"))
39+
}
40+
41+
withTable("t") {
42+
sql(
43+
s"""
44+
|CREATE TABLE test_db.t (id INT, p INT)
45+
|USING parquet
46+
|PARTITIONED BY (p)
47+
|""".stripMargin)
48+
sql("INSERT INTO test_db.t VALUES (1, 1)")
49+
50+
printPlan(sql("SELECT * FROM test_db.t WHERE p = '1'"))
51+
}
52+
sql(s"DROP DATABASE test_db")
53+
sql(s"USE paimon")
54+
}
55+
}

0 commit comments

Comments
 (0)