Skip to content

Commit 8dccbab

Browse files
authored
[Kernel][Clean up] Add fromRow() method for Metadata and directly compare P&M in test (delta-io#4317)
<!-- Thanks for sending a pull request! Here are some tips for you: 1. If this is your first time, please read our contributor guidelines: https://github.com/delta-io/delta/blob/master/CONTRIBUTING.md 2. If the PR is unfinished, add '[WIP]' in your PR title, e.g., '[WIP] Your PR title ...'. 3. Be sure to keep the PR description updated to reflect all changes. 4. Please write your PR title to summarize what this PR proposes. 5. If possible, provide a concise example to reproduce the issue for a faster review. 6. If applicable, include the corresponding issue number in the PR title and link it in the body. --> #### Which Delta project/connector is this regarding? <!-- Please add the component selected below to the beginning of the pull request title For example: [Spark] Title of my pull request --> - [ ] Spark - [ ] Standalone - [ ] Flink - [x] Kernel - [ ] Other (fill in here) ## Description <!-- - Describe what this PR changes. - Describe why we need the change. If this PR resolves an issue be sure to include "Resolves #XXX" to correctly link and close the issue upon merge. --> This PR address some leftover todos in the test by removing custom helper method for checking equality of P&M. As a part of this effort, added fromRow() method for Metadata which may address several different use cases. No functional changes ## How was this patch tested? <!-- If tests were added, say they were added here. Please make sure to test the changes thoroughly including negative and positive cases if possible. If the changes were tested in any way other than unit tests, please clarify how you tested step by step (ideally copy and paste-able, so that other reviewers can test and check, and descendants can verify in the future). If the changes were not tested, please explain why. --> Existing tests ## Does this PR introduce _any_ user-facing changes? <!-- If yes, please clarify the previous behavior and the change this PR proposes - provide the console output, description and/or an example to show the behavior difference if possible. If possible, please also clarify if this is a user-facing change compared to the released Delta Lake versions or within the unreleased branches such as master. If no, write 'No'. --> No
1 parent 062c2e9 commit 8dccbab

File tree

2 files changed

+21
-59
lines changed

2 files changed

+21
-59
lines changed

kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/Metadata.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,13 @@
3030

3131
public class Metadata {
3232

33+
public static Metadata fromRow(Row row) {
34+
requireNonNull(row);
35+
checkArgument(FULL_SCHEMA.equals(row.getSchema()));
36+
return fromColumnVector(
37+
VectorUtils.buildColumnVector(Collections.singletonList(row), FULL_SCHEMA), /* rowId */ 0);
38+
}
39+
3340
public static Metadata fromColumnVector(ColumnVector vector, int rowId) {
3441
if (vector.isNullAt(rowId)) {
3542
return null;

kernel/kernel-api/src/test/scala/io/delta/kernel/internal/checksum/ChecksumWriterSuite.scala

Lines changed: 14 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,9 @@ import io.delta.kernel.internal.actions.{Format, Metadata, Protocol}
2323
import io.delta.kernel.internal.checksum.CRCInfo.CRC_FILE_SCHEMA
2424
import io.delta.kernel.internal.data.GenericRow
2525
import io.delta.kernel.internal.fs.Path
26+
import io.delta.kernel.internal.types.DataTypeJsonSerDe
2627
import io.delta.kernel.internal.util.VectorUtils
27-
import io.delta.kernel.internal.util.VectorUtils.{buildArrayValue, stringStringMapValue}
28+
import io.delta.kernel.internal.util.VectorUtils.{buildArrayValue, buildColumnVector, stringStringMapValue}
2829
import io.delta.kernel.test.{BaseMockJsonHandler, MockEngineUtils}
2930
import io.delta.kernel.types.{StringType, StructType}
3031
import io.delta.kernel.utils.CloseableIterator
@@ -63,8 +64,13 @@ class ChecksumWriterSuite extends AnyFunSuite with MockEngineUtils {
6364
new CRCInfo(version, metadata, protocol, tableSizeBytes, numFiles, txn))
6465

6566
verifyChecksumFile(jsonHandler, version)
66-
verifyChecksumContent(jsonHandler.capturedCrcRow.get, tableSizeBytes, numFiles, txn)
67-
verifyMetadataAndProtocol(jsonHandler.capturedCrcRow.get, metadata, protocol)
67+
verifyChecksumContent(
68+
jsonHandler.capturedCrcRow.get,
69+
tableSizeBytes,
70+
numFiles,
71+
metadata,
72+
protocol,
73+
txn)
6874
}
6975

7076
// Test with and without transaction ID
@@ -82,6 +88,8 @@ class ChecksumWriterSuite extends AnyFunSuite with MockEngineUtils {
8288
actualCheckSumRow: Row,
8389
expectedTableSizeBytes: Long,
8490
expectedNumFiles: Long,
91+
expectedMetadata: Metadata,
92+
expectedProtocol: Protocol,
8593
expectedTxnId: Optional[String]): Unit = {
8694
assert(!actualCheckSumRow.isNullAt(TABLE_SIZE_BYTES_IDX) && actualCheckSumRow.getLong(
8795
TABLE_SIZE_BYTES_IDX) == expectedTableSizeBytes)
@@ -91,6 +99,8 @@ class ChecksumWriterSuite extends AnyFunSuite with MockEngineUtils {
9199
NUM_METADATA_IDX) && actualCheckSumRow.getLong(NUM_METADATA_IDX) == 1L)
92100
assert(!actualCheckSumRow.isNullAt(
93101
NUM_PROTOCOL_IDX) && actualCheckSumRow.getLong(NUM_PROTOCOL_IDX) == 1L)
102+
assert(expectedProtocol === Protocol.fromRow(actualCheckSumRow.getStruct(PROTOCOL_IDX)))
103+
assert(expectedMetadata === Metadata.fromRow(actualCheckSumRow.getStruct(METADATA_IDX)))
94104

95105
if (expectedTxnId.isPresent) {
96106
assert(actualCheckSumRow.getString(TXN_ID_IDX) == expectedTxnId.get())
@@ -99,68 +109,13 @@ class ChecksumWriterSuite extends AnyFunSuite with MockEngineUtils {
99109
}
100110
}
101111

102-
private def verifyMetadataAndProtocol(
103-
actualRow: Row,
104-
expectedMetadata: Metadata,
105-
expectedProtocol: Protocol): Unit = {
106-
checkMetadata(expectedMetadata, actualRow.getStruct(METADATA_IDX))
107-
checkProtocol(expectedProtocol, actualRow.getStruct(PROTOCOL_IDX))
108-
}
109-
110-
// TODO: implement compare in Metadata and remove this method
111-
private def checkMetadata(expectedMetadata: Metadata, actualMetadataRow: Row): Unit = {
112-
assert(actualMetadataRow.getSchema == Metadata.FULL_SCHEMA)
113-
114-
def getOptionalString(field: String): Optional[String] =
115-
Optional.ofNullable(actualMetadataRow.getString(Metadata.FULL_SCHEMA.indexOf(field)))
116-
117-
assert(
118-
actualMetadataRow.getString(Metadata.FULL_SCHEMA.indexOf("id")) == expectedMetadata.getId)
119-
assert(getOptionalString("name") == expectedMetadata.getName)
120-
assert(getOptionalString("description") == expectedMetadata.getDescription)
121-
122-
val formatRow = actualMetadataRow.getStruct(Metadata.FULL_SCHEMA.indexOf("format"))
123-
assert(
124-
formatRow
125-
.getString(
126-
Format.FULL_SCHEMA.indexOf("provider")) == expectedMetadata.getFormat.getProvider)
127-
128-
assert(
129-
actualMetadataRow
130-
.getString(
131-
Metadata.FULL_SCHEMA.indexOf("schemaString")) == expectedMetadata.getSchemaString)
132-
assert(
133-
actualMetadataRow
134-
.getArray(Metadata.FULL_SCHEMA.indexOf("partitionColumns"))
135-
== expectedMetadata.getPartitionColumns)
136-
assert(
137-
Optional
138-
.ofNullable(actualMetadataRow.getLong(Metadata.FULL_SCHEMA.indexOf("createdTime")))
139-
== expectedMetadata.getCreatedTime)
140-
assert(
141-
VectorUtils
142-
.toJavaMap(actualMetadataRow.getMap(Metadata.FULL_SCHEMA.indexOf("configuration")))
143-
== expectedMetadata.getConfiguration)
144-
}
145-
146-
// TODO: implement compare in Protocol and remove this method
147-
private def checkProtocol(expectedProtocol: Protocol, actualProtocolRow: Row): Unit = {
148-
assert(actualProtocolRow.getSchema == Protocol.FULL_SCHEMA)
149-
assert(
150-
expectedProtocol.getMinReaderVersion == actualProtocolRow
151-
.getInt(Protocol.FULL_SCHEMA.indexOf("minReaderVersion")))
152-
assert(
153-
expectedProtocol.getMinWriterVersion == actualProtocolRow
154-
.getInt(Protocol.FULL_SCHEMA.indexOf("minWriterVersion")))
155-
}
156-
157112
private def createTestMetadata(): Metadata = {
158113
new Metadata(
159114
"id",
160115
Optional.of("name"),
161116
Optional.of("description"),
162117
new Format("parquet", Collections.emptyMap()),
163-
"schemaString",
118+
DataTypeJsonSerDe.serializeDataType(new StructType()),
164119
new StructType(),
165120
buildArrayValue(util.Arrays.asList("c3"), StringType.STRING),
166121
Optional.of(123),

0 commit comments

Comments
 (0)