Skip to content

Commit d820227

Browse files
committed
Added logic to throw exception when the user tries to use -force-no-copy for file(row group/block) with multiple partitions
1 parent 339d59d commit d820227

File tree

2 files changed

+31
-15
lines changed

2 files changed

+31
-15
lines changed

ice/src/main/java/com/altinity/ice/cli/internal/cmd/Insert.java

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -423,17 +423,18 @@ private static List<DataFile> processFile(
423423

424424
PartitionKey partitionKey = null;
425425
if (partitionSpec.isPartitioned()) {
426-
partitionKey = Partitioning.inferPartitionKey(metadata, partitionSpec);
427-
if (partitionKey == null) {
426+
var inferResult = Partitioning.inferPartitionKey(metadata, partitionSpec);
427+
if (!inferResult.success()) {
428428
if (options.noCopy || options.s3CopyObject) {
429429
throw new BadRequestException(
430-
String.format(
431-
"Cannot infer partition key of %s from the metadata", inputFile.location()));
430+
String.format("%s: %s", inputFile.location(), inferResult.failureReason()));
432431
}
433432
logger.warn(
434-
"{} does not appear to be partitioned. Falling back to full scan (slow)",
435-
inputFile.location());
433+
"{}: {}. Falling back to full scan (slow)",
434+
inputFile.location(),
435+
inferResult.failureReason());
436436
} else {
437+
partitionKey = inferResult.partitionKey();
437438
logger.info("{}: using inferred partition key {}", file, partitionKey);
438439
}
439440
}

ice/src/main/java/com/altinity/ice/cli/internal/iceberg/Partitioning.java

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,13 @@ public final class Partitioning {
4949

5050
private Partitioning() {}
5151

52+
public record InferPartitionKeyResult(
53+
@Nullable PartitionKey partitionKey, @Nullable String failureReason) {
54+
public boolean success() {
55+
return partitionKey != null;
56+
}
57+
}
58+
5259
public static PartitionSpec newPartitionSpec(Schema schema, List<Main.IcePartition> columns) {
5360
final PartitionSpec.Builder builder = PartitionSpec.builderFor(schema);
5461
if (!columns.isEmpty()) {
@@ -123,7 +130,7 @@ public static void apply(UpdatePartitionSpec op, List<Main.IcePartition> columns
123130
}
124131

125132
// TODO: fall back to path when statistics is not available
126-
public static @Nullable PartitionKey inferPartitionKey(
133+
public static InferPartitionKeyResult inferPartitionKey(
127134
ParquetMetadata metadata, PartitionSpec spec) {
128135
Schema schema = spec.schema();
129136

@@ -138,7 +145,7 @@ public static void apply(UpdatePartitionSpec op, List<Main.IcePartition> columns
138145

139146
Object value = null;
140147
Object valueTransformed = null;
141-
boolean same = true;
148+
String failureReason = null;
142149

143150
for (BlockMetaData block : blocks) {
144151
ColumnChunkMetaData columnMeta =
@@ -148,7 +155,7 @@ public static void apply(UpdatePartitionSpec op, List<Main.IcePartition> columns
148155
.orElse(null);
149156

150157
if (columnMeta == null) {
151-
same = false;
158+
failureReason = String.format("Column '%s' not found in file metadata", sourceName);
152159
break;
153160
}
154161

@@ -158,7 +165,7 @@ public static void apply(UpdatePartitionSpec op, List<Main.IcePartition> columns
158165
|| !stats.hasNonNullValue()
159166
|| stats.genericGetMin() == null
160167
|| stats.genericGetMax() == null) {
161-
same = false;
168+
failureReason = String.format("Column '%s' has no statistics", sourceName);
162169
break;
163170
}
164171

@@ -176,29 +183,37 @@ public static void apply(UpdatePartitionSpec op, List<Main.IcePartition> columns
176183
Object maxTransformed = boundTransform.apply(max);
177184

178185
if (!minTransformed.equals(maxTransformed)) {
179-
same = false;
186+
failureReason =
187+
String.format(
188+
"File contains multiple partition values for '%s' (min: %s, max: %s). "
189+
+ "In force-no-copy mode, each file must contain data for only one partition value",
190+
sourceName, minTransformed, maxTransformed);
180191
break;
181192
}
182193

183194
if (valueTransformed == null) {
184195
valueTransformed = minTransformed;
185196
value = min;
186197
} else if (!valueTransformed.equals(minTransformed)) {
187-
same = false;
198+
failureReason =
199+
String.format(
200+
"File contains multiple partition values for '%s' (e.g., %s and %s). "
201+
+ "In force-no-copy mode, each file must contain data for only one partition value",
202+
sourceName, valueTransformed, minTransformed);
188203
break;
189204
}
190205
}
191206

192-
if (same && value != null) {
207+
if (failureReason == null && value != null) {
193208
partitionRecord.setField(sourceName, decodeStatValue(value, type));
194209
} else {
195-
return null;
210+
return new InferPartitionKeyResult(null, failureReason);
196211
}
197212
}
198213

199214
PartitionKey partitionKey = new PartitionKey(spec, schema);
200215
partitionKey.wrap(partitionRecord);
201-
return partitionKey;
216+
return new InferPartitionKeyResult(partitionKey, null);
202217
}
203218

204219
// Copied from org.apache.iceberg.parquet.ParquetConversions.

0 commit comments

Comments
 (0)