|
| 1 | +/* |
| 2 | + * Copyright (2024) The Delta Lake Project Authors. |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
| 7 | + * |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | + * |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + * See the License for the specific language governing permissions and |
| 14 | + * limitations under the License. |
| 15 | + */ |
| 16 | + |
| 17 | +package io.delta.kernel.coordinatedcommits; |
| 18 | + |
| 19 | +import io.delta.kernel.TableIdentifier; |
| 20 | +import io.delta.kernel.annotation.Evolving; |
| 21 | +import io.delta.kernel.data.Row; |
| 22 | +import io.delta.kernel.engine.Engine; |
| 23 | +import io.delta.kernel.engine.coordinatedcommits.CommitFailedException; |
| 24 | +import io.delta.kernel.engine.coordinatedcommits.CommitResponse; |
| 25 | +import io.delta.kernel.engine.coordinatedcommits.GetCommitsResponse; |
| 26 | +import io.delta.kernel.engine.coordinatedcommits.UpdatedActions; |
| 27 | +import io.delta.kernel.engine.coordinatedcommits.actions.AbstractMetadata; |
| 28 | +import io.delta.kernel.engine.coordinatedcommits.actions.AbstractProtocol; |
| 29 | +import io.delta.kernel.utils.CloseableIterator; |
| 30 | +import java.io.IOException; |
| 31 | +import java.util.Map; |
| 32 | +import java.util.Optional; |
| 33 | + |
| 34 | +/** |
| 35 | + * The CommitCoordinatorClient is responsible for communicating with the commit coordinator and |
| 36 | + * backfilling commits. It has four main APIs that need to be implemented: |
| 37 | + * |
| 38 | + * <ul> |
| 39 | + * <li>{@link #registerTable}: Determine the table config during commit coordinator registration. |
| 40 | + * <li>{@link #commit}: Commit a new version of the table. |
| 41 | + * <li>{@link #getCommits}: Tracks and returns unbackfilled commits. |
| 42 | + * <li>{@link #backfillToVersion}: Ensure that commits are backfilled if/when needed. |
| 43 | + * </ul> |
| 44 | + * |
| 45 | + * @since 3.3.0 |
| 46 | + */ |
| 47 | +@Evolving |
| 48 | +public interface CommitCoordinatorClient { |
| 49 | + |
| 50 | + /** |
| 51 | + * Register the table represented by the given {@code logPath} at the provided {@code |
| 52 | + * currentVersion} with the commit coordinator this commit coordinator client represents. |
| 53 | + * |
| 54 | + * <p>This API is called when the table is being converted from an existing file system table to a |
| 55 | + * coordinated-commit table. |
| 56 | + * |
| 57 | + * <p>When a new coordinated-commit table is being created, the {@code currentVersion} will be -1 |
| 58 | + * and the upgrade commit needs to be a file system commit which will write the backfilled file |
| 59 | + * directly. |
| 60 | + * |
| 61 | + * @param engine The {@link Engine} instance to use, if needed. |
| 62 | + * @param logPath The path to the delta log of the table that should be converted. |
| 63 | + * @param tableIdentifier The table identifier for the table, or {@link Optional#empty()} if the |
| 64 | + * table doesn't use any identifier (i.e. it is path-based). |
| 65 | + * @param currentVersion The version of the table just before conversion. currentVersion + 1 |
| 66 | + * represents the commit that will do the conversion. This must be backfilled atomically. |
| 67 | + * currentVersion + 2 represents the first commit after conversion. This will go through the |
| 68 | + * CommitCoordinatorClient and the client is free to choose when it wants to backfill this |
| 69 | + * commit. |
| 70 | + * @param currentMetadata The metadata of the table at currentVersion |
| 71 | + * @param currentProtocol The protocol of the table at currentVersion |
| 72 | + * @return A map of key-value pairs which is issued by the commit coordinator to uniquely identify |
| 73 | + * the table. This should be stored in the table's metadata for table property {@link |
| 74 | + * io.delta.kernel.internal.TableConfig#COORDINATED_COMMITS_TABLE_CONF}. This information |
| 75 | + * needs to be passed to the {@link #commit}, {@link #getCommits}, and {@link |
| 76 | + * #backfillToVersion} APIs to identify the table. |
| 77 | + */ |
| 78 | + Map<String, String> registerTable( |
| 79 | + Engine engine, |
| 80 | + String logPath, |
| 81 | + Optional<TableIdentifier> tableIdentifier, |
| 82 | + long currentVersion, |
| 83 | + AbstractMetadata currentMetadata, |
| 84 | + AbstractProtocol currentProtocol); |
| 85 | + |
| 86 | + /** |
| 87 | + * Commit the given set of actions to the table represented by {@code tableDescriptor}. |
| 88 | + * |
| 89 | + * @param engine The {@link Engine} instance to use. This gives client implementations access to |
| 90 | + * {@link io.delta.kernel.engine.JsonHandler#writeJsonFileAtomically} in order to write the |
| 91 | + * given set of actions to an unbackfilled Delta file. |
| 92 | + * @param tableDescriptor The descriptor for the table. |
| 93 | + * @param commitVersion The version of the commit that is being committed. |
| 94 | + * @param actions The set of actions to be committed |
| 95 | + * @param updatedActions Additional information for the commit, including: |
| 96 | + * <ul> |
| 97 | + * <li>Commit info |
| 98 | + * <li>Metadata changes |
| 99 | + * <li>Protocol changes |
| 100 | + * </ul> |
| 101 | + * |
| 102 | + * @return {@link CommitResponse} containing the file status of the committed file. Note: If the |
| 103 | + * commit is already backfilled, the file status may be omitted, and the client can retrieve |
| 104 | + * this information independently. |
| 105 | + * @throws CommitFailedException if the commit operation fails |
| 106 | + */ |
| 107 | + CommitResponse commit( |
| 108 | + Engine engine, |
| 109 | + TableDescriptor tableDescriptor, |
| 110 | + long commitVersion, |
| 111 | + CloseableIterator<Row> actions, |
| 112 | + UpdatedActions updatedActions) |
| 113 | + throws CommitFailedException; |
| 114 | + |
| 115 | + /** |
| 116 | + * Get the unbackfilled commits for the table represented by the given tableDescriptor. Commits |
| 117 | + * older than startVersion (if given) or newer than endVersion (if given) are ignored. The |
| 118 | + * returned commits are contiguous and in ascending version order. |
| 119 | + * |
| 120 | + * <p>Note that the first version returned by this API may not be equal to startVersion. This |
| 121 | + * happens when some versions starting from startVersion have already been backfilled and so the |
| 122 | + * commit coordinator may have stopped tracking them. |
| 123 | + * |
| 124 | + * <p>The returned latestTableVersion is the maximum commit version ratified by the commit |
| 125 | + * coordinator. Note that returning latestTableVersion as -1 is acceptable only if the commit |
| 126 | + * coordinator never ratified any version, i.e. it never accepted any unbackfilled commit. |
| 127 | + * |
| 128 | + * @param engine The {@link Engine} instance to use, if needed. |
| 129 | + * @param tableDescriptor The descriptor for the table. |
| 130 | + * @param startVersion The minimum version of the commit that should be returned, or {@link |
| 131 | + * Optional#empty()} if there is no minimum. |
| 132 | + * @param endVersion The maximum version of the commit that should be returned, or {@link |
| 133 | + * Optional#empty()} if there is no maximum. |
| 134 | + * @return {@link GetCommitsResponse} which has a list of {@link |
| 135 | + * io.delta.kernel.engine.coordinatedcommits.Commit}s and the latestTableVersion which is |
| 136 | + * tracked by the {@link CommitCoordinatorClient}. |
| 137 | + */ |
| 138 | + GetCommitsResponse getCommits( |
| 139 | + Engine engine, |
| 140 | + TableDescriptor tableDescriptor, |
| 141 | + Optional<Long> startVersion, |
| 142 | + Optional<Long> endVersion); |
| 143 | + |
| 144 | + /** |
| 145 | + * Backfill all commits up to {@code version} and notify the commit coordinator. |
| 146 | + * |
| 147 | + * <p>If this API returns successfully, that means the backfill must have been completed, although |
| 148 | + * the commit coordinator may not be aware of it yet. |
| 149 | + * |
| 150 | + * @param engine The {@link Engine} instance to use, if needed. |
| 151 | + * @param tableDescriptor The descriptor for the table. |
| 152 | + * @param version The version until which the commit coordinator client should backfill. |
| 153 | + * @param lastKnownBackfilledVersion The last known version that was backfilled before this API |
| 154 | + * was called. If it is {@link Optional#empty()}, then the commit coordinator client should |
| 155 | + * backfill from the beginning of the table. |
| 156 | + * @throws IOException if there is an IO error while backfilling the commits. |
| 157 | + */ |
| 158 | + void backfillToVersion( |
| 159 | + Engine engine, |
| 160 | + TableDescriptor tableDescriptor, |
| 161 | + long version, |
| 162 | + Optional<Long> lastKnownBackfilledVersion) |
| 163 | + throws IOException; |
| 164 | + |
| 165 | + /** |
| 166 | + * Checks if this CommitCoordinatorClient is semantically equal to another |
| 167 | + * CommitCoordinatorClient. |
| 168 | + */ |
| 169 | + boolean semanticEquals(CommitCoordinatorClient other); |
| 170 | +} |
0 commit comments