Skip to content

Commit 5abac95

Browse files
authored
[Kernel][Clustering #2] add getPhysicalColumnNameAndDataType util in ColumnMapping (delta-io#4319)
<!-- Thanks for sending a pull request! Here are some tips for you: 1. If this is your first time, please read our contributor guidelines: https://github.com/delta-io/delta/blob/master/CONTRIBUTING.md 2. If the PR is unfinished, add '[WIP]' in your PR title, e.g., '[WIP] Your PR title ...'. 3. Be sure to keep the PR description updated to reflect all changes. 4. Please write your PR title to summarize what this PR proposes. 5. If possible, provide a concise example to reproduce the issue for a faster review. 6. If applicable, include the corresponding issue number in the PR title and link it in the body. --> #### Which Delta project/connector is this regarding? <!-- Please add the component selected below to the beginning of the pull request title For example: [Spark] Title of my pull request --> - [ ] Spark - [ ] Standalone - [ ] Flink - [x] Kernel - [ ] Other (fill in here) ## Description <!-- - Describe what this PR changes. - Describe why we need the change. If this PR resolves an issue be sure to include "Resolves #XXX" to correctly link and close the issue upon merge. --> Split the main PR delta-io#4265 for faster review Add a util func `convertToPhysicalColumnNames` in ColumnMapping to get the corresponding physical column name for a logical column ## How was this patch tested? <!-- If tests were added, say they were added here. Please make sure to test the changes thoroughly including negative and positive cases if possible. If the changes were tested in any way other than unit tests, please clarify how you tested step by step (ideally copy and paste-able, so that other reviewers can test and check, and descendants can verify in the future). If the changes were not tested, please explain why. --> Add unit test cases in ColumnMappingSuite.scala ## Does this PR introduce _any_ user-facing changes? <!-- If yes, please clarify the previous behavior and the change this PR proposes - provide the console output, description and/or an example to show the behavior difference if possible. If possible, please also clarify if this is a user-facing change compared to the released Delta Lake versions or within the unreleased branches such as master. If no, write 'No'. -->
1 parent 069a7cc commit 5abac95

File tree

3 files changed

+135
-1
lines changed

3 files changed

+135
-1
lines changed

kernel/kernel-api/src/main/java/io/delta/kernel/internal/DeltaErrors.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import static java.lang.String.format;
1919

2020
import io.delta.kernel.exceptions.*;
21+
import io.delta.kernel.expressions.Column;
2122
import io.delta.kernel.internal.actions.DomainMetadata;
2223
import io.delta.kernel.internal.tablefeatures.TableFeature;
2324
import io.delta.kernel.types.DataType;
@@ -251,6 +252,11 @@ public static KernelException statsTypeMismatch(
251252
return new KernelException(format(msgFormat, fieldName, expected, actual));
252253
}
253254

255+
public static KernelException columnNotFoundInSchema(Column column, StructType tableSchema) {
256+
return new KernelException(
257+
format("Column '%s' was not found in the table schema: %s", column, tableSchema));
258+
}
259+
254260
/// Start: icebergCompat exceptions
255261
public static KernelException icebergCompatMissingNumRecordsStats(
256262
String compatVersion, DataFileStatus dataFileStatus) {

kernel/kernel-api/src/main/java/io/delta/kernel/internal/util/ColumnMapping.java

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,12 @@
1515
*/
1616
package io.delta.kernel.internal.util;
1717

18+
import static io.delta.kernel.internal.DeltaErrors.columnNotFoundInSchema;
1819
import static io.delta.kernel.internal.util.Preconditions.checkArgument;
1920
import static java.util.Collections.singletonMap;
2021

2122
import io.delta.kernel.exceptions.InvalidConfigurationValueException;
23+
import io.delta.kernel.expressions.Column;
2224
import io.delta.kernel.internal.TableConfig;
2325
import io.delta.kernel.internal.actions.Metadata;
2426
import io.delta.kernel.types.*;
@@ -157,6 +159,31 @@ public static Optional<Metadata> updateColumnMappingMetadataIfNeeded(
157159
}
158160
}
159161

162+
/** Returns the physical column and data type for a given logical column based on the schema. */
163+
public static Tuple2<Column, DataType> getPhysicalColumnNameAndDataType(
164+
StructType schema, Column logicalColumn) {
165+
List<String> physicalNameParts = new ArrayList<>();
166+
DataType currentType = schema;
167+
168+
// Traverse through each level of the logical name to resolve its corresponding physical name.
169+
for (String namePart : logicalColumn.getNames()) {
170+
if (!(currentType instanceof StructType)) {
171+
throw columnNotFoundInSchema(logicalColumn, schema);
172+
}
173+
174+
StructType structType = (StructType) currentType;
175+
// Find the field in the current structure that matches the given name
176+
StructField field =
177+
structType.fields().stream()
178+
.filter(f -> f.getName().equalsIgnoreCase(namePart))
179+
.findFirst()
180+
.orElseThrow(() -> columnNotFoundInSchema(logicalColumn, schema));
181+
physicalNameParts.add(ColumnMapping.getPhysicalName(field));
182+
currentType = field.getDataType();
183+
}
184+
return new Tuple2<>(new Column(physicalNameParts.toArray(new String[0])), currentType);
185+
}
186+
160187
////////////////////////////
161188
// Private Helper Methods //
162189
////////////////////////////

kernel/kernel-api/src/test/scala/io/delta/kernel/internal/util/ColumnMappingSuite.scala

Lines changed: 102 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@ package io.delta.kernel.internal.util
1717

1818
import java.util
1919

20+
import io.delta.kernel.exceptions.KernelException
21+
import io.delta.kernel.expressions.Column
2022
import io.delta.kernel.internal.actions.Metadata
2123
import io.delta.kernel.internal.util.ColumnMapping._
2224
import io.delta.kernel.internal.util.ColumnMapping.ColumnMappingMode._
23-
import io.delta.kernel.types.{ArrayType, FieldMetadata, IntegerType, MapType, StringType, StructField, StructType}
25+
import io.delta.kernel.types._
2426

2527
import org.assertj.core.api.Assertions.{assertThat, assertThatNoException, assertThatThrownBy}
2628
import org.assertj.core.util.Maps
@@ -199,6 +201,105 @@ class ColumnMappingSuite extends AnyFunSuite with ColumnMappingSuiteBase {
199201
assertThat(ColumnMapping.findMaxColumnId(schema)).isEqualTo(12)
200202
}
201203

204+
private val testingSchema = new StructType()
205+
.add("a", StringType.STRING)
206+
.add(
207+
"b",
208+
new StructType()
209+
.add("c", DoubleType.DOUBLE)
210+
.add("d", DateType.DATE))
211+
.add("e", FloatType.FLOAT)
212+
.add(
213+
"f",
214+
new StructType()
215+
.add(
216+
"g",
217+
new StructType()
218+
.add("h", TimestampNTZType.TIMESTAMP_NTZ)))
219+
.add("i", new MapType(StringType.STRING, DoubleType.DOUBLE, false))
220+
.add("j", new ArrayType(StringType.STRING, false))
221+
222+
Seq(
223+
(Array("a"), StringType.STRING),
224+
(Array("b", "c"), DoubleType.DOUBLE),
225+
(Array("b", "d"), DateType.DATE),
226+
(Array("e"), FloatType.FLOAT),
227+
(Array("f", "g", "h"), TimestampNTZType.TIMESTAMP_NTZ),
228+
(Array("i"), new MapType(StringType.STRING, DoubleType.DOUBLE, false)),
229+
(Array("j"), new ArrayType(StringType.STRING, false))).foreach {
230+
case (columnName, expectedType) =>
231+
test(s"get physical column name and dataType for $columnName") {
232+
// case 1: column mapping disabled
233+
val column = new Column(columnName)
234+
val resultTuple =
235+
ColumnMapping.getPhysicalColumnNameAndDataType(testingSchema, column)
236+
237+
val actualColumn = resultTuple._1
238+
val actualType = resultTuple._2
239+
assert(actualColumn == column)
240+
assert(actualType == expectedType)
241+
242+
// case 2: column mapping disabled
243+
val metadata: Metadata = updateColumnMappingMetadataIfNeeded(
244+
testMetadata(testingSchema).withColumnMappingEnabled("id"),
245+
true).orElseGet(() => fail("Metadata should not be empty"))
246+
247+
val physicalResultTuple = ColumnMapping.getPhysicalColumnNameAndDataType(
248+
metadata.getSchema,
249+
column)
250+
val actualPhysicalColumn = physicalResultTuple._1
251+
val actualPhysicalType = physicalResultTuple._2
252+
assert(actualPhysicalColumn.getNames.length == columnName.length)
253+
assert(actualPhysicalType == expectedType)
254+
}
255+
}
256+
257+
Seq(
258+
(Array("A"), Array("a"), StringType.STRING),
259+
(Array("B", "C"), Array("b", "c"), DoubleType.DOUBLE),
260+
(Array("B", "D"), Array("b", "d"), DateType.DATE),
261+
(Array("E"), Array("e"), FloatType.FLOAT),
262+
(Array("F", "G", "H"), Array("f", "g", "h"), TimestampNTZType.TIMESTAMP_NTZ),
263+
(Array("I"), Array("i"), new MapType(StringType.STRING, DoubleType.DOUBLE, false)),
264+
(Array("J"), Array("j"), new ArrayType(StringType.STRING, false))).foreach {
265+
case (inputColumnName, expectedColumnName, expectedType) =>
266+
test(s"get physical column name should respect case of table schema, $inputColumnName") {
267+
268+
val column = new Column(inputColumnName)
269+
val resultTuple =
270+
ColumnMapping.getPhysicalColumnNameAndDataType(testingSchema, column)
271+
272+
val actualColumn = resultTuple._1
273+
val actualType = resultTuple._2
274+
assert(actualColumn == new Column(expectedColumnName))
275+
assert(actualType == expectedType)
276+
}
277+
}
278+
279+
test("getPhysicalColumnNameAndDataType: exception expected when column does not exist") {
280+
val ex = intercept[KernelException] {
281+
ColumnMapping.getPhysicalColumnNameAndDataType(
282+
new StructType()
283+
.add("A", StringType.STRING)
284+
.add("b", IntegerType.INTEGER),
285+
new Column("abc"))
286+
}
287+
assert(ex.getMessage.contains("Column 'column(`abc`)' was not found in the table schema"))
288+
289+
val ex1 = intercept[KernelException] {
290+
ColumnMapping.getPhysicalColumnNameAndDataType(
291+
new StructType().add("a", StringType.STRING)
292+
.add(
293+
"b",
294+
new StructType()
295+
.add("D", IntegerType.INTEGER)
296+
.add("e", IntegerType.INTEGER))
297+
.add("c", IntegerType.INTEGER),
298+
new Column(Array("Bbb", "d")))
299+
}
300+
assert(ex1.getMessage.contains("Column 'column(`Bbb`.`d`)' was not found in the table schema"))
301+
}
302+
202303
Seq(true, false).foreach { isNewTable =>
203304
test(s"assign id and physical name to new table: $isNewTable") {
204305
val schema: StructType = new StructType()

0 commit comments

Comments
 (0)