@@ -107,6 +107,68 @@ abstract class OrcPartitionDiscoveryTest extends OrcTest {
107
107
}
108
108
}
109
109
110
+ test(" read partitioned table - with nulls" ) {
111
+ withTempDir { base =>
112
+ for {
113
+ // Must be `Integer` rather than `Int` here. `null.asInstanceOf[Int]` results in a zero...
114
+ pi <- Seq (1 , null .asInstanceOf [Integer ])
115
+ ps <- Seq (" foo" , null .asInstanceOf [String ])
116
+ } {
117
+ makeOrcFile(
118
+ (1 to 10 ).map(i => OrcParData (i, i.toString)),
119
+ makePartitionDir(base, defaultPartitionName, " pi" -> pi, " ps" -> ps))
120
+ }
121
+
122
+ spark.read
123
+ .option(" hive.exec.default.partition.name" , defaultPartitionName)
124
+ .orc(base.getCanonicalPath)
125
+ .createOrReplaceTempView(" t" )
126
+
127
+ withTempTable(" t" ) {
128
+ checkAnswer(
129
+ sql(" SELECT * FROM t" ),
130
+ for {
131
+ i <- 1 to 10
132
+ pi <- Seq (1 , null .asInstanceOf [Integer ])
133
+ ps <- Seq (" foo" , null .asInstanceOf [String ])
134
+ } yield Row (i, i.toString, pi, ps))
135
+
136
+ checkAnswer(
137
+ sql(" SELECT * FROM t WHERE pi IS NULL" ),
138
+ for {
139
+ i <- 1 to 10
140
+ ps <- Seq (" foo" , null .asInstanceOf [String ])
141
+ } yield Row (i, i.toString, null , ps))
142
+
143
+ checkAnswer(
144
+ sql(" SELECT * FROM t WHERE ps IS NULL" ),
145
+ for {
146
+ i <- 1 to 10
147
+ pi <- Seq (1 , null .asInstanceOf [Integer ])
148
+ } yield Row (i, i.toString, pi, null ))
149
+ }
150
+ }
151
+ }
152
+
153
+ test(" SPARK-27162: handle pathfilter configuration correctly" ) {
154
+ withTempPath { dir =>
155
+ val path = dir.getCanonicalPath
156
+
157
+ val df = spark.range(2 )
158
+ df.write.orc(path + " /p=1" )
159
+ df.write.orc(path + " /p=2" )
160
+ assert(spark.read.orc(path).count() === 4 )
161
+
162
+ val extraOptions = Map (
163
+ " mapred.input.pathFilter.class" -> classOf [TestFileFilter ].getName,
164
+ " mapreduce.input.pathFilter.class" -> classOf [TestFileFilter ].getName
165
+ )
166
+ assert(spark.read.options(extraOptions).orc(path).count() === 2 )
167
+ }
168
+ }
169
+ }
170
+
171
+ class OrcPartitionDiscoverySuite extends OrcPartitionDiscoveryTest with SharedSQLContext {
110
172
test(" read partitioned table - partition key included in orc file" ) {
111
173
withTempDir { base =>
112
174
for {
@@ -127,7 +189,7 @@ abstract class OrcPartitionDiscoveryTest extends OrcTest {
127
189
i <- 1 to 10
128
190
pi <- Seq (1 , 2 )
129
191
ps <- Seq (" foo" , " bar" )
130
- } yield Row (i, pi, i.toString, ps))
192
+ } yield Row (i, i.toString, pi , ps))
131
193
132
194
checkAnswer(
133
195
sql(" SELECT intField, pi FROM t" ),
@@ -142,28 +204,26 @@ abstract class OrcPartitionDiscoveryTest extends OrcTest {
142
204
for {
143
205
i <- 1 to 10
144
206
ps <- Seq (" foo" , " bar" )
145
- } yield Row (i, 1 , i.toString, ps))
207
+ } yield Row (i, i.toString, 1 , ps))
146
208
147
209
checkAnswer(
148
210
sql(" SELECT * FROM t WHERE ps = 'foo'" ),
149
211
for {
150
212
i <- 1 to 10
151
213
pi <- Seq (1 , 2 )
152
- } yield Row (i, pi, i.toString, " foo" ))
214
+ } yield Row (i, i.toString, pi , " foo" ))
153
215
}
154
216
}
155
217
}
156
218
157
-
158
- test(" read partitioned table - with nulls" ) {
219
+ test(" read partitioned table - with nulls and partition keys are included in Orc file" ) {
159
220
withTempDir { base =>
160
221
for {
161
- // Must be `Integer` rather than `Int` here. `null.asInstanceOf[Int]` results in a zero...
162
- pi <- Seq (1 , null .asInstanceOf [Integer ])
222
+ pi <- Seq (1 , 2 )
163
223
ps <- Seq (" foo" , null .asInstanceOf [String ])
164
224
} {
165
225
makeOrcFile(
166
- (1 to 10 ).map(i => OrcParData (i, i.toString)),
226
+ (1 to 10 ).map(i => OrcParDataWithKey (i, pi, i.toString, ps )),
167
227
makePartitionDir(base, defaultPartitionName, " pi" -> pi, " ps" -> ps))
168
228
}
169
229
@@ -177,23 +237,71 @@ abstract class OrcPartitionDiscoveryTest extends OrcTest {
177
237
sql(" SELECT * FROM t" ),
178
238
for {
179
239
i <- 1 to 10
180
- pi <- Seq (1 , null . asInstanceOf [ Integer ] )
240
+ pi <- Seq (1 , 2 )
181
241
ps <- Seq (" foo" , null .asInstanceOf [String ])
182
242
} yield Row (i, i.toString, pi, ps))
183
243
184
244
checkAnswer(
185
- sql(" SELECT * FROM t WHERE pi IS NULL" ),
245
+ sql(" SELECT * FROM t WHERE ps IS NULL" ),
186
246
for {
187
247
i <- 1 to 10
188
- ps <- Seq (" foo" , null .asInstanceOf [String ])
189
- } yield Row (i, i.toString, null , ps))
248
+ pi <- Seq (1 , 2 )
249
+ } yield Row (i, i.toString, pi, null ))
250
+ }
251
+ }
252
+ }
253
+ }
190
254
255
+ class OrcV1PartitionDiscoverySuite extends OrcPartitionDiscoveryTest with SharedSQLContext {
256
+ override protected def sparkConf : SparkConf =
257
+ super
258
+ .sparkConf
259
+ .set(SQLConf .USE_V1_SOURCE_READER_LIST , " orc" )
260
+ .set(SQLConf .USE_V1_SOURCE_WRITER_LIST , " orc" )
261
+
262
+ test(" read partitioned table - partition key included in orc file" ) {
263
+ withTempDir { base =>
264
+ for {
265
+ pi <- Seq (1 , 2 )
266
+ ps <- Seq (" foo" , " bar" )
267
+ } {
268
+ makeOrcFile(
269
+ (1 to 10 ).map(i => OrcParDataWithKey (i, pi, i.toString, ps)),
270
+ makePartitionDir(base, defaultPartitionName, " pi" -> pi, " ps" -> ps))
271
+ }
272
+
273
+ spark.read.orc(base.getCanonicalPath).createOrReplaceTempView(" t" )
274
+
275
+ withTempTable(" t" ) {
191
276
checkAnswer(
192
- sql(" SELECT * FROM t WHERE ps IS NULL " ),
277
+ sql(" SELECT * FROM t" ),
193
278
for {
194
279
i <- 1 to 10
195
- pi <- Seq (1 , null .asInstanceOf [Integer ])
196
- } yield Row (i, i.toString, pi, null ))
280
+ pi <- Seq (1 , 2 )
281
+ ps <- Seq (" foo" , " bar" )
282
+ } yield Row (i, pi, i.toString, ps))
283
+
284
+ checkAnswer(
285
+ sql(" SELECT intField, pi FROM t" ),
286
+ for {
287
+ i <- 1 to 10
288
+ pi <- Seq (1 , 2 )
289
+ _ <- Seq (" foo" , " bar" )
290
+ } yield Row (i, pi))
291
+
292
+ checkAnswer(
293
+ sql(" SELECT * FROM t WHERE pi = 1" ),
294
+ for {
295
+ i <- 1 to 10
296
+ ps <- Seq (" foo" , " bar" )
297
+ } yield Row (i, 1 , i.toString, ps))
298
+
299
+ checkAnswer(
300
+ sql(" SELECT * FROM t WHERE ps = 'foo'" ),
301
+ for {
302
+ i <- 1 to 10
303
+ pi <- Seq (1 , 2 )
304
+ } yield Row (i, pi, i.toString, " foo" ))
197
305
}
198
306
}
199
307
}
@@ -232,31 +340,4 @@ abstract class OrcPartitionDiscoveryTest extends OrcTest {
232
340
}
233
341
}
234
342
}
235
-
236
- test(" SPARK-27162: handle pathfilter configuration correctly" ) {
237
- withTempPath { dir =>
238
- val path = dir.getCanonicalPath
239
-
240
- val df = spark.range(2 )
241
- df.write.orc(path + " /p=1" )
242
- df.write.orc(path + " /p=2" )
243
- assert(spark.read.orc(path).count() === 4 )
244
-
245
- val extraOptions = Map (
246
- " mapred.input.pathFilter.class" -> classOf [TestFileFilter ].getName,
247
- " mapreduce.input.pathFilter.class" -> classOf [TestFileFilter ].getName
248
- )
249
- assert(spark.read.options(extraOptions).orc(path).count() === 2 )
250
- }
251
- }
252
- }
253
-
254
- class OrcPartitionDiscoverySuite extends OrcPartitionDiscoveryTest with SharedSQLContext
255
-
256
- class OrcV1PartitionDiscoverySuite extends OrcPartitionDiscoveryTest with SharedSQLContext {
257
- override protected def sparkConf : SparkConf =
258
- super
259
- .sparkConf
260
- .set(SQLConf .USE_V1_SOURCE_READER_LIST , " orc" )
261
- .set(SQLConf .USE_V1_SOURCE_WRITER_LIST , " orc" )
262
343
}
0 commit comments