@@ -59,12 +59,12 @@ class DistributionSuite extends SparkFunSuite {
59
59
true )
60
60
61
61
checkSatisfied(
62
- HashPartitioning (Seq (' a ), 10 ),
62
+ HashPartitioning (Seq ($ " a " ), 10 ),
63
63
UnspecifiedDistribution ,
64
64
true )
65
65
66
66
checkSatisfied(
67
- RangePartitioning (Seq (' a .asc), 10 ),
67
+ RangePartitioning (Seq ($ " a " .asc), 10 ),
68
68
UnspecifiedDistribution ,
69
69
true )
70
70
@@ -101,22 +101,22 @@ class DistributionSuite extends SparkFunSuite {
101
101
true )
102
102
103
103
checkSatisfied(
104
- HashPartitioning (Seq (' a ), 1 ),
104
+ HashPartitioning (Seq ($ " a " ), 1 ),
105
105
AllTuples ,
106
106
true )
107
107
108
108
checkSatisfied(
109
- HashPartitioning (Seq (' a ), 10 ),
109
+ HashPartitioning (Seq ($ " a " ), 10 ),
110
110
AllTuples ,
111
111
false )
112
112
113
113
checkSatisfied(
114
- RangePartitioning (Seq (' a .asc), 1 ),
114
+ RangePartitioning (Seq ($ " a " .asc), 1 ),
115
115
AllTuples ,
116
116
true )
117
117
118
118
checkSatisfied(
119
- RangePartitioning (Seq (' a .asc), 10 ),
119
+ RangePartitioning (Seq ($ " a " .asc), 10 ),
120
120
AllTuples ,
121
121
false )
122
122
@@ -130,17 +130,17 @@ class DistributionSuite extends SparkFunSuite {
130
130
// SinglePartition can satisfy all the distributions except `BroadcastDistribution`
131
131
checkSatisfied(
132
132
SinglePartition ,
133
- ClusteredDistribution (Seq (' a , ' b , ' c )),
133
+ ClusteredDistribution (Seq ($ " a " , $ " b " , $ " c " )),
134
134
true )
135
135
136
136
checkSatisfied(
137
137
SinglePartition ,
138
- HashClusteredDistribution (Seq (' a , ' b , ' c )),
138
+ HashClusteredDistribution (Seq ($ " a " , $ " b " , $ " c " )),
139
139
true )
140
140
141
141
checkSatisfied(
142
142
SinglePartition ,
143
- OrderedDistribution (Seq (' a .asc, ' b .asc, ' c .asc)),
143
+ OrderedDistribution (Seq ($ " a " .asc, $ " b " .asc, $ " c " .asc)),
144
144
true )
145
145
146
146
checkSatisfied(
@@ -153,154 +153,154 @@ class DistributionSuite extends SparkFunSuite {
153
153
// HashPartitioning can satisfy ClusteredDistribution iff its hash expressions are a subset of
154
154
// the required clustering expressions.
155
155
checkSatisfied(
156
- HashPartitioning (Seq (' a , ' b , ' c ), 10 ),
157
- ClusteredDistribution (Seq (' a , ' b , ' c )),
156
+ HashPartitioning (Seq ($ " a " , $ " b " , $ " c " ), 10 ),
157
+ ClusteredDistribution (Seq ($ " a " , $ " b " , $ " c " )),
158
158
true )
159
159
160
160
checkSatisfied(
161
- HashPartitioning (Seq (' b , ' c ), 10 ),
162
- ClusteredDistribution (Seq (' a , ' b , ' c )),
161
+ HashPartitioning (Seq ($ " b " , $ " c " ), 10 ),
162
+ ClusteredDistribution (Seq ($ " a " , $ " b " , $ " c " )),
163
163
true )
164
164
165
165
checkSatisfied(
166
- HashPartitioning (Seq (' a , ' b , ' c ), 10 ),
167
- ClusteredDistribution (Seq (' b , ' c )),
166
+ HashPartitioning (Seq ($ " a " , $ " b " , $ " c " ), 10 ),
167
+ ClusteredDistribution (Seq ($ " b " , $ " c " )),
168
168
false )
169
169
170
170
checkSatisfied(
171
- HashPartitioning (Seq (' a , ' b , ' c ), 10 ),
172
- ClusteredDistribution (Seq (' d , ' e )),
171
+ HashPartitioning (Seq ($ " a " , $ " b " , $ " c " ), 10 ),
172
+ ClusteredDistribution (Seq ($ " d " , $ " e " )),
173
173
false )
174
174
175
175
// HashPartitioning can satisfy HashClusteredDistribution iff its hash expressions are exactly
176
176
// same with the required hash clustering expressions.
177
177
checkSatisfied(
178
- HashPartitioning (Seq (' a , ' b , ' c ), 10 ),
179
- HashClusteredDistribution (Seq (' a , ' b , ' c )),
178
+ HashPartitioning (Seq ($ " a " , $ " b " , $ " c " ), 10 ),
179
+ HashClusteredDistribution (Seq ($ " a " , $ " b " , $ " c " )),
180
180
true )
181
181
182
182
checkSatisfied(
183
- HashPartitioning (Seq (' c , ' b , ' a ), 10 ),
184
- HashClusteredDistribution (Seq (' a , ' b , ' c )),
183
+ HashPartitioning (Seq ($ " c " , $ " b " , $ " a " ), 10 ),
184
+ HashClusteredDistribution (Seq ($ " a " , $ " b " , $ " c " )),
185
185
false )
186
186
187
187
checkSatisfied(
188
- HashPartitioning (Seq (' a , ' b ), 10 ),
189
- HashClusteredDistribution (Seq (' a , ' b , ' c )),
188
+ HashPartitioning (Seq ($ " a " , $ " b " ), 10 ),
189
+ HashClusteredDistribution (Seq ($ " a " , $ " b " , $ " c " )),
190
190
false )
191
191
192
192
// HashPartitioning cannot satisfy OrderedDistribution
193
193
checkSatisfied(
194
- HashPartitioning (Seq (' a , ' b , ' c ), 10 ),
195
- OrderedDistribution (Seq (' a .asc, ' b .asc, ' c .asc)),
194
+ HashPartitioning (Seq ($ " a " , $ " b " , $ " c " ), 10 ),
195
+ OrderedDistribution (Seq ($ " a " .asc, $ " b " .asc, $ " c " .asc)),
196
196
false )
197
197
198
198
checkSatisfied(
199
- HashPartitioning (Seq (' a , ' b , ' c ), 1 ),
200
- OrderedDistribution (Seq (' a .asc, ' b .asc, ' c .asc)),
199
+ HashPartitioning (Seq ($ " a " , $ " b " , $ " c " ), 1 ),
200
+ OrderedDistribution (Seq ($ " a " .asc, $ " b " .asc, $ " c " .asc)),
201
201
false ) // TODO: this can be relaxed.
202
202
203
203
checkSatisfied(
204
- HashPartitioning (Seq (' b , ' c ), 10 ),
205
- OrderedDistribution (Seq (' a .asc, ' b .asc, ' c .asc)),
204
+ HashPartitioning (Seq ($ " b " , $ " c " ), 10 ),
205
+ OrderedDistribution (Seq ($ " a " .asc, $ " b " .asc, $ " c " .asc)),
206
206
false )
207
207
}
208
208
209
209
test(" RangePartitioning is the output partitioning" ) {
210
210
// RangePartitioning can satisfy OrderedDistribution iff its ordering is a prefix
211
211
// of the required ordering, or the required ordering is a prefix of its ordering.
212
212
checkSatisfied(
213
- RangePartitioning (Seq (' a .asc, ' b .asc, ' c .asc), 10 ),
214
- OrderedDistribution (Seq (' a .asc, ' b .asc, ' c .asc)),
213
+ RangePartitioning (Seq ($ " a " .asc, $ " b " .asc, $ " c " .asc), 10 ),
214
+ OrderedDistribution (Seq ($ " a " .asc, $ " b " .asc, $ " c " .asc)),
215
215
true )
216
216
217
217
checkSatisfied(
218
- RangePartitioning (Seq (' a .asc, ' b .asc, ' c .asc), 10 ),
219
- OrderedDistribution (Seq (' a .asc, ' b .asc)),
218
+ RangePartitioning (Seq ($ " a " .asc, $ " b " .asc, $ " c " .asc), 10 ),
219
+ OrderedDistribution (Seq ($ " a " .asc, $ " b " .asc)),
220
220
true )
221
221
222
222
checkSatisfied(
223
- RangePartitioning (Seq (' a .asc, ' b .asc, ' c .asc), 10 ),
224
- OrderedDistribution (Seq (' a .asc, ' b .asc, ' c .asc, ' d .desc)),
223
+ RangePartitioning (Seq ($ " a " .asc, $ " b " .asc, $ " c " .asc), 10 ),
224
+ OrderedDistribution (Seq ($ " a " .asc, $ " b " .asc, $ " c " .asc, ' d .desc)),
225
225
true )
226
226
227
227
// TODO: We can have an optimization to first sort the dataset
228
228
// by a.asc and then sort b, and c in a partition. This optimization
229
229
// should tradeoff the benefit of a less number of Exchange operators
230
230
// and the parallelism.
231
231
checkSatisfied(
232
- RangePartitioning (Seq (' a .asc, ' b .asc, ' c .asc), 10 ),
233
- OrderedDistribution (Seq (' a .asc, ' b .desc, ' c .asc)),
232
+ RangePartitioning (Seq ($ " a " .asc, $ " b " .asc, $ " c " .asc), 10 ),
233
+ OrderedDistribution (Seq ($ " a " .asc, $ " b " .desc, $ " c " .asc)),
234
234
false )
235
235
236
236
checkSatisfied(
237
- RangePartitioning (Seq (' a .asc, ' b .asc, ' c .asc), 10 ),
238
- OrderedDistribution (Seq (' b .asc, ' a .asc)),
237
+ RangePartitioning (Seq ($ " a " .asc, $ " b " .asc, $ " c " .asc), 10 ),
238
+ OrderedDistribution (Seq ($ " b " .asc, $ " a " .asc)),
239
239
false )
240
240
241
241
checkSatisfied(
242
- RangePartitioning (Seq (' a .asc, ' b .asc, ' c .asc), 10 ),
243
- OrderedDistribution (Seq (' a .asc, ' b .asc, ' d .desc)),
242
+ RangePartitioning (Seq ($ " a " .asc, $ " b " .asc, $ " c " .asc), 10 ),
243
+ OrderedDistribution (Seq ($ " a " .asc, $ " b " .asc, ' d .desc)),
244
244
false )
245
245
246
246
// RangePartitioning can satisfy ClusteredDistribution iff its ordering expressions are a subset
247
247
// of the required clustering expressions.
248
248
checkSatisfied(
249
- RangePartitioning (Seq (' a .asc, ' b .asc, ' c .asc), 10 ),
250
- ClusteredDistribution (Seq (' a , ' b , ' c )),
249
+ RangePartitioning (Seq ($ " a " .asc, $ " b " .asc, $ " c " .asc), 10 ),
250
+ ClusteredDistribution (Seq ($ " a " , $ " b " , $ " c " )),
251
251
true )
252
252
253
253
checkSatisfied(
254
- RangePartitioning (Seq (' a .asc, ' b .asc, ' c .asc), 10 ),
255
- ClusteredDistribution (Seq (' c , ' b , ' a )),
254
+ RangePartitioning (Seq ($ " a " .asc, $ " b " .asc, $ " c " .asc), 10 ),
255
+ ClusteredDistribution (Seq ($ " c " , $ " b " , $ " a " )),
256
256
true )
257
257
258
258
checkSatisfied(
259
- RangePartitioning (Seq (' a .asc, ' b .asc, ' c .asc), 10 ),
260
- ClusteredDistribution (Seq (' b , ' c , ' a , ' d )),
259
+ RangePartitioning (Seq ($ " a " .asc, $ " b " .asc, $ " c " .asc), 10 ),
260
+ ClusteredDistribution (Seq ($ " b " , $ " c " , $ " a " , $ " d " )),
261
261
true )
262
262
263
263
checkSatisfied(
264
- RangePartitioning (Seq (' a .asc, ' b .asc, ' c .asc), 10 ),
265
- ClusteredDistribution (Seq (' a , ' b )),
264
+ RangePartitioning (Seq ($ " a " .asc, $ " b " .asc, $ " c " .asc), 10 ),
265
+ ClusteredDistribution (Seq ($ " a " , $ " b " )),
266
266
false )
267
267
268
268
checkSatisfied(
269
- RangePartitioning (Seq (' a .asc, ' b .asc, ' c .asc), 10 ),
270
- ClusteredDistribution (Seq (' c , ' d )),
269
+ RangePartitioning (Seq ($ " a " .asc, $ " b " .asc, $ " c " .asc), 10 ),
270
+ ClusteredDistribution (Seq ($ " c " , $ " d " )),
271
271
false )
272
272
273
273
// RangePartitioning cannot satisfy HashClusteredDistribution
274
274
checkSatisfied(
275
- RangePartitioning (Seq (' a .asc, ' b .asc, ' c .asc), 10 ),
276
- HashClusteredDistribution (Seq (' a , ' b , ' c )),
275
+ RangePartitioning (Seq ($ " a " .asc, $ " b " .asc, $ " c " .asc), 10 ),
276
+ HashClusteredDistribution (Seq ($ " a " , $ " b " , $ " c " )),
277
277
false )
278
278
}
279
279
280
280
test(" Partitioning.numPartitions must match Distribution.requiredNumPartitions to satisfy it" ) {
281
281
checkSatisfied(
282
282
SinglePartition ,
283
- ClusteredDistribution (Seq (' a , ' b , ' c ), Some (10 )),
283
+ ClusteredDistribution (Seq ($ " a " , $ " b " , $ " c " ), Some (10 )),
284
284
false )
285
285
286
286
checkSatisfied(
287
287
SinglePartition ,
288
- HashClusteredDistribution (Seq (' a , ' b , ' c ), Some (10 )),
288
+ HashClusteredDistribution (Seq ($ " a " , $ " b " , $ " c " ), Some (10 )),
289
289
false )
290
290
291
291
checkSatisfied(
292
- HashPartitioning (Seq (' a , ' b , ' c ), 10 ),
293
- ClusteredDistribution (Seq (' a , ' b , ' c ), Some (5 )),
292
+ HashPartitioning (Seq ($ " a " , $ " b " , $ " c " ), 10 ),
293
+ ClusteredDistribution (Seq ($ " a " , $ " b " , $ " c " ), Some (5 )),
294
294
false )
295
295
296
296
checkSatisfied(
297
- HashPartitioning (Seq (' a , ' b , ' c ), 10 ),
298
- HashClusteredDistribution (Seq (' a , ' b , ' c ), Some (5 )),
297
+ HashPartitioning (Seq ($ " a " , $ " b " , $ " c " ), 10 ),
298
+ HashClusteredDistribution (Seq ($ " a " , $ " b " , $ " c " ), Some (5 )),
299
299
false )
300
300
301
301
checkSatisfied(
302
- RangePartitioning (Seq (' a .asc, ' b .asc, ' c .asc), 10 ),
303
- ClusteredDistribution (Seq (' a , ' b , ' c ), Some (5 )),
302
+ RangePartitioning (Seq ($ " a " .asc, $ " b " .asc, $ " c " .asc), 10 ),
303
+ ClusteredDistribution (Seq ($ " a " , $ " b " , $ " c " ), Some (5 )),
304
304
false )
305
305
}
306
306
}
0 commit comments