-
Notifications
You must be signed in to change notification settings - Fork 1.3k
[core][rest] Add branch merge support for append-only tables #7882
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -48,6 +48,7 @@ | |
| import org.apache.paimon.rest.requests.ForwardBranchRequest; | ||
| import org.apache.paimon.rest.requests.ListPartitionsByNamesRequest; | ||
| import org.apache.paimon.rest.requests.MarkDonePartitionsRequest; | ||
| import org.apache.paimon.rest.requests.MergeBranchRequest; | ||
| import org.apache.paimon.rest.requests.RegisterTableRequest; | ||
| import org.apache.paimon.rest.requests.RenameBranchRequest; | ||
| import org.apache.paimon.rest.requests.RenameTableRequest; | ||
|
|
@@ -1006,6 +1007,25 @@ public void fastForward(Identifier identifier, String branch) { | |
| restAuthFunction); | ||
| } | ||
|
|
||
| /** | ||
| * Merge branch for table. | ||
| * | ||
| * @param identifier database name and table name. | ||
| * @param sourceBranch source branch name | ||
| * @param targetBranch target branch name | ||
| * @throws NoSuchResourceException Exception thrown on HTTP 404 means the branch or table not | ||
| * exists | ||
| * @throws ForbiddenException Exception thrown on HTTP 403 means don't have the permission for | ||
| * this table | ||
| */ | ||
| public void mergeBranch(Identifier identifier, String sourceBranch, String targetBranch) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we need it as a REST API? Does it feel like just creating a new commit?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the question. Yes, it creates a new commit on the target branch. I added it for REST catalog because branch merge needs to read and validate metadata from both branches before committing. It is also aligned with fastForward, which is already a REST branch operation. |
||
| MergeBranchRequest request = new MergeBranchRequest(sourceBranch, targetBranch); | ||
| client.post( | ||
| resourcePaths.mergeBranch(identifier.getDatabaseName(), identifier.getObjectName()), | ||
| request, | ||
| restAuthFunction); | ||
| } | ||
|
|
||
| /** | ||
| * List branches for table. | ||
| * | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,58 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.paimon.rest.requests; | ||
|
|
||
| import org.apache.paimon.rest.RESTRequest; | ||
|
|
||
| import org.apache.paimon.shade.jackson2.com.fasterxml.jackson.annotation.JsonCreator; | ||
| import org.apache.paimon.shade.jackson2.com.fasterxml.jackson.annotation.JsonGetter; | ||
| import org.apache.paimon.shade.jackson2.com.fasterxml.jackson.annotation.JsonIgnoreProperties; | ||
| import org.apache.paimon.shade.jackson2.com.fasterxml.jackson.annotation.JsonProperty; | ||
|
|
||
| /** Request for merging branch. */ | ||
| @JsonIgnoreProperties(ignoreUnknown = true) | ||
| public class MergeBranchRequest implements RESTRequest { | ||
|
|
||
| private static final String FIELD_SOURCE_BRANCH = "sourceBranch"; | ||
| private static final String FIELD_TARGET_BRANCH = "targetBranch"; | ||
|
|
||
| @JsonProperty(FIELD_SOURCE_BRANCH) | ||
| private final String sourceBranch; | ||
|
|
||
| @JsonProperty(FIELD_TARGET_BRANCH) | ||
| private final String targetBranch; | ||
|
|
||
| @JsonCreator | ||
| public MergeBranchRequest( | ||
| @JsonProperty(FIELD_SOURCE_BRANCH) String sourceBranch, | ||
| @JsonProperty(FIELD_TARGET_BRANCH) String targetBranch) { | ||
| this.sourceBranch = sourceBranch; | ||
| this.targetBranch = targetBranch; | ||
| } | ||
|
|
||
| @JsonGetter(FIELD_SOURCE_BRANCH) | ||
| public String sourceBranch() { | ||
| return sourceBranch; | ||
| } | ||
|
|
||
| @JsonGetter(FIELD_TARGET_BRANCH) | ||
| public String targetBranch() { | ||
| return targetBranch; | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks like we don't need this option, just throw exception in branch merging is oK.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the review. I added this option mainly to keep a clear invariant for branch merge: the table history must be pure-append.
My concern is that checking this only when mergeBranch is called may not be reliable, because old snapshots can expire. In that case, we may no longer be able to tell whether compaction or INSERT OVERWRITE happened before, and file-level merge could become unsafe.
So the option is intended to make this an explicit opt-in behavior and keep the table merge-safe from the beginning by rejecting compaction / INSERT OVERWRITE.
Another possible approach could be to persist some state indicating whether unsafe operations have ever happened, but that seems a bit heavier to me.
Please let me know what you think.