Skip to content

Commit fc29c91

Browse files
dchvnsrowen
authored andcommitted
[SPARK-35561][SQL] Remove leading zeros from empty static number type partition
### What changes were proposed in this pull request? This PR removes leading zeros from static number type partition when we insert into a partition table with empty partitions. create table CREATE TABLE `table_int` ( `id` INT, `c_string` STRING, `p_int` int) USING parquet PARTITIONED BY (p_int); insert insert overwrite table table_int partition (p_int='00011') select 1, 'c string' where true ; |partition| |---------| |p_int=11| insert overwrite table table_int partition (p_int='00012') select 1, 'c string' where false ; |partition| |---------| |p_int=00012| ### Why are the changes needed? This PR creates consistent result when insert empty or non-empty partition ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Add Unit test Closes apache#33291 from dgd-contributor/35561_insert_integer_partition_fail_when_empty. Authored-by: dgd-contributor <dgd_contributor@viettel.com.vn> Signed-off-by: Sean Owen <srowen@gmail.com>
1 parent f61d599 commit fc29c91

File tree

2 files changed

+19
-3
lines changed

2 files changed

+19
-3
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -352,10 +352,19 @@ object PartitioningUtils {
352352
*/
353353
def getPathFragment(spec: TablePartitionSpec, partitionSchema: StructType): String = {
354354
partitionSchema.map { field =>
355-
escapePathName(field.name) + "=" + getPartitionValueString(spec(field.name))
355+
escapePathName(field.name) + "=" +
356+
getPartitionValueString(
357+
removeLeadingZerosFromNumberTypePartition(spec(field.name), field.dataType))
356358
}.mkString("/")
357359
}
358360

361+
def removeLeadingZerosFromNumberTypePartition(value: String, dataType: DataType): String =
362+
dataType match {
363+
case ByteType | ShortType | IntegerType | LongType | FloatType | DoubleType =>
364+
castPartValueToDesiredType(dataType, value, null).toString
365+
case _ => value
366+
}
367+
359368
def getPathFragment(spec: TablePartitionSpec, partitionColumns: Seq[Attribute]): String = {
360369
getPathFragment(spec, StructType.fromAttributes(partitionColumns))
361370
}
@@ -523,9 +532,9 @@ object PartitioningUtils {
523532
case _ if value == DEFAULT_PARTITION_NAME => null
524533
case NullType => null
525534
case StringType => UTF8String.fromString(unescapePathName(value))
526-
case IntegerType => Integer.parseInt(value)
535+
case ByteType | ShortType | IntegerType => Integer.parseInt(value)
527536
case LongType => JLong.parseLong(value)
528-
case DoubleType => JDouble.parseDouble(value)
537+
case FloatType | DoubleType => JDouble.parseDouble(value)
529538
case _: DecimalType => Literal(new JBigDecimal(value)).value
530539
case DateType =>
531540
Cast(Literal(value), DateType, Some(zoneId.getId)).eval()

sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1250,6 +1250,13 @@ class ParquetV2PartitionDiscoverySuite extends ParquetPartitionDiscoverySuite {
12501250
.sparkConf
12511251
.set(SQLConf.USE_V1_SOURCE_LIST, "")
12521252

1253+
test("SPARK-35561: remove leading zeros from empty static number type partition") {
1254+
val spec = Map("p_int"-> "010", "p_float"-> "01.00")
1255+
val schema = new StructType().add("p_int", "int").add("p_float", "float")
1256+
val path = PartitioningUtils.getPathFragment(spec, schema)
1257+
assert("p_int=10/p_float=1.0" === path)
1258+
}
1259+
12531260
test("read partitioned table - partition key included in Parquet file") {
12541261
withTempDir { base =>
12551262
for {

0 commit comments

Comments
 (0)