-
Notifications
You must be signed in to change notification settings - Fork 722
[#9647]feat(catalog-lakehouse-generic): Add external Delta table support for generic lakehouse catalog #9678
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,35 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
| package org.apache.gravitino.catalog.lakehouse.delta; | ||
|
|
||
| /** | ||
| * Constants for Delta Lake table format support in Gravitino lakehouse catalog. | ||
| * | ||
| * <p>This class defines constants used for managing external Delta tables, including table format | ||
| * identifiers. | ||
| */ | ||
| public class DeltaConstants { | ||
|
|
||
| /** The table format identifier for Delta Lake tables. */ | ||
| public static final String DELTA_TABLE_FORMAT = "delta"; | ||
|
|
||
| private DeltaConstants() { | ||
| // Prevent instantiation | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,76 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
| package org.apache.gravitino.catalog.lakehouse.delta; | ||
|
|
||
| import static org.apache.gravitino.catalog.lakehouse.delta.DeltaConstants.DELTA_TABLE_FORMAT; | ||
|
|
||
| import java.util.Collections; | ||
| import java.util.List; | ||
| import org.apache.gravitino.EntityStore; | ||
| import org.apache.gravitino.catalog.ManagedSchemaOperations; | ||
| import org.apache.gravitino.catalog.ManagedTableOperations; | ||
| import org.apache.gravitino.catalog.lakehouse.generic.LakehouseTableDelegator; | ||
| import org.apache.gravitino.connector.PropertyEntry; | ||
| import org.apache.gravitino.rel.Table; | ||
| import org.apache.gravitino.storage.IdGenerator; | ||
|
|
||
| /** | ||
| * Delegator for Delta Lake table format in Gravitino lakehouse catalog. | ||
| * | ||
| * <p>This delegator provides table operations specific to external Delta Lake tables. It enables | ||
| * Gravitino to register and manage metadata for existing Delta tables without creating or modifying | ||
| * the underlying Delta table data. | ||
| * | ||
| * <p>Key features: | ||
| * | ||
| * <ul> | ||
| * <li>Supports external Delta table registration (requires {@code external=true} property) | ||
| * <li>Schema is provided by user in CREATE TABLE request (not read from Delta log) | ||
| * <li>Uses standard table properties ({@code location}, {@code external}) - no Delta-specific | ||
| * properties needed | ||
| * <li>Drop operations only remove metadata, preserving the Delta table data | ||
| * <li>No ALTER TABLE or PURGE support (external tables should be modified directly) | ||
| * </ul> | ||
| */ | ||
| public class DeltaTableDelegator implements LakehouseTableDelegator { | ||
|
|
||
| @Override | ||
| public String tableFormat() { | ||
| return DELTA_TABLE_FORMAT; | ||
| } | ||
|
|
||
| /** | ||
| * Returns Delta-specific table property entries. | ||
| * | ||
| * <p>Delta tables use standard table properties ({@link Table#PROPERTY_LOCATION} and {@link | ||
| * Table#PROPERTY_EXTERNAL}), so no Delta-specific properties are needed. | ||
| * | ||
| * @return an empty list since all required properties are standard table properties | ||
| */ | ||
| @Override | ||
| public List<PropertyEntry<?>> tablePropertyEntries() { | ||
| return Collections.emptyList(); | ||
| } | ||
|
|
||
| @Override | ||
| public ManagedTableOperations createTableOps( | ||
| EntityStore store, ManagedSchemaOperations schemaOps, IdGenerator idGenerator) { | ||
| return new DeltaTableOperations(store, schemaOps, idGenerator); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,248 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
| package org.apache.gravitino.catalog.lakehouse.delta; | ||
|
|
||
| import com.google.common.base.Preconditions; | ||
| import java.util.Collections; | ||
| import java.util.Map; | ||
| import org.apache.commons.lang3.StringUtils; | ||
| import org.apache.gravitino.EntityStore; | ||
| import org.apache.gravitino.NameIdentifier; | ||
| import org.apache.gravitino.catalog.ManagedSchemaOperations; | ||
| import org.apache.gravitino.catalog.ManagedTableOperations; | ||
| import org.apache.gravitino.connector.SupportsSchemas; | ||
| import org.apache.gravitino.exceptions.NoSuchSchemaException; | ||
| import org.apache.gravitino.exceptions.NoSuchTableException; | ||
| import org.apache.gravitino.exceptions.TableAlreadyExistsException; | ||
| import org.apache.gravitino.rel.Column; | ||
| import org.apache.gravitino.rel.Table; | ||
| import org.apache.gravitino.rel.TableChange; | ||
| import org.apache.gravitino.rel.expressions.distributions.Distribution; | ||
| import org.apache.gravitino.rel.expressions.distributions.Distributions; | ||
| import org.apache.gravitino.rel.expressions.sorts.SortOrder; | ||
| import org.apache.gravitino.rel.expressions.sorts.SortOrders; | ||
| import org.apache.gravitino.rel.expressions.transforms.Transform; | ||
| import org.apache.gravitino.rel.expressions.transforms.Transforms; | ||
| import org.apache.gravitino.rel.indexes.Index; | ||
| import org.apache.gravitino.rel.indexes.Indexes; | ||
| import org.apache.gravitino.storage.IdGenerator; | ||
|
|
||
| /** | ||
| * Table operations for Delta Lake tables in Gravitino lakehouse catalog. | ||
| * | ||
| * <p>This class handles the lifecycle of external Delta Lake table metadata in Gravitino. It | ||
| * focuses on metadata management only and does not interact with the actual Delta table data or | ||
| * Delta log files. | ||
| * | ||
| * <p><b>Supported Operations:</b> | ||
| * | ||
| * <ul> | ||
| * <li><b>Create Table:</b> Registers an external Delta table by storing its schema and location | ||
| * in Gravitino's metadata store. Requires {@code external=true} property. | ||
| * <li><b>Load Table:</b> Retrieves table metadata from Gravitino's metadata store | ||
| * <li><b>Drop Table:</b> Removes metadata only, preserving the physical Delta table data | ||
| * </ul> | ||
| * | ||
| * <p><b>Unsupported Operations:</b> | ||
| * | ||
| * <ul> | ||
| * <li><b>Alter Table:</b> Not supported; users should modify the Delta table directly using Delta | ||
| * Lake APIs, then optionally recreate the catalog entry with updated schema | ||
| * <li><b>Purge Table:</b> Not supported for external tables; data lifecycle is managed externally | ||
| * </ul> | ||
| * | ||
| * <p><b>Design Decisions:</b> | ||
| * | ||
| * <ul> | ||
| * <li>Only supports external tables ({@code external=true} must be explicitly set) | ||
| * <li>Schema comes from CREATE TABLE request (not validated against Delta log) | ||
| * <li>User is responsible for ensuring schema accuracy matches the actual Delta table | ||
| * <li>Partitions, distribution, sort orders, and indexes must not be specified (throws | ||
| * IllegalArgumentException) | ||
| * </ul> | ||
| */ | ||
| public class DeltaTableOperations extends ManagedTableOperations { | ||
|
|
||
| private final EntityStore store; | ||
| private final ManagedSchemaOperations schemaOps; | ||
| private final IdGenerator idGenerator; | ||
|
|
||
| public DeltaTableOperations( | ||
| EntityStore store, ManagedSchemaOperations schemaOps, IdGenerator idGenerator) { | ||
| this.store = store; | ||
| this.schemaOps = schemaOps; | ||
| this.idGenerator = idGenerator; | ||
| } | ||
|
|
||
| @Override | ||
| protected EntityStore store() { | ||
| return store; | ||
| } | ||
|
|
||
| @Override | ||
| protected SupportsSchemas schemas() { | ||
| return schemaOps; | ||
| } | ||
|
|
||
| @Override | ||
| protected IdGenerator idGenerator() { | ||
| return idGenerator; | ||
| } | ||
|
|
||
| /** | ||
| * Creates an external Delta table by registering its metadata in Gravitino. | ||
| * | ||
| * <p>This method validates that the table is explicitly marked as external and has a valid | ||
| * location, then stores the metadata in Gravitino's entity store. It does not create or modify | ||
| * the actual Delta table data. | ||
| * | ||
| * <p><b>Required Properties:</b> | ||
| * | ||
| * <ul> | ||
| * <li>{@code external=true} - Must be explicitly set to create external Delta tables | ||
| * <li>{@code location} - Storage path of the existing Delta table | ||
| * </ul> | ||
| * | ||
| * <p><b>Disallowed Parameters:</b> | ||
| * | ||
| * <ul> | ||
| * <li>Partitions - Delta table partitioning is managed in the Delta log, not Gravitino | ||
| * <li>Distribution - Not applicable for external Delta tables | ||
| * <li>Sort orders - Not applicable for external Delta tables | ||
| * <li>Indexes - Not applicable for external Delta tables | ||
| * </ul> | ||
| * | ||
| * @param ident the table identifier | ||
| * @param columns the table columns (schema provided by user) | ||
| * @param comment the table comment | ||
| * @param properties the table properties (must include {@code external=true} and {@code | ||
| * location}) | ||
| * @param partitions the partitioning (must be empty or null) | ||
| * @param distribution the distribution (must be NONE or null) | ||
| * @param sortOrders the sort orders (must be empty or null) | ||
| * @param indexes the indexes (must be empty or null) | ||
| * @return the created table metadata | ||
| * @throws NoSuchSchemaException if the schema does not exist | ||
| * @throws TableAlreadyExistsException if the table already exists | ||
| * @throws IllegalArgumentException if {@code external=true} is not set, location is missing, or | ||
| * any partitions, distribution, sort orders, or indexes are specified | ||
| */ | ||
| @Override | ||
| public Table createTable( | ||
| NameIdentifier ident, | ||
| Column[] columns, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will the columns be different from the underlying location?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, it is possible. But since the query engine only honors the location to read the table, so this inconsistency only affects the display. |
||
| String comment, | ||
| Map<String, String> properties, | ||
| Transform[] partitions, | ||
| Distribution distribution, | ||
| SortOrder[] sortOrders, | ||
| Index[] indexes) | ||
| throws NoSuchSchemaException, TableAlreadyExistsException { | ||
| Map<String, String> copiedProperties = properties == null ? Collections.emptyMap() : properties; | ||
jerryshao marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| // Validate that the table is explicitly marked as external | ||
| Preconditions.checkArgument( | ||
| copiedProperties.containsKey(Table.PROPERTY_EXTERNAL) | ||
| && "true".equalsIgnoreCase(copiedProperties.get(Table.PROPERTY_EXTERNAL)), | ||
| "Gravitino only supports creating external Delta tables" | ||
| + " for now. Please set property 'external=true' when creating Delta tables."); | ||
|
|
||
| // Validate required location property | ||
| String location = copiedProperties.get(Table.PROPERTY_LOCATION); | ||
| Preconditions.checkArgument( | ||
| StringUtils.isNotBlank(location), | ||
| "Property '%s' is required for external Delta tables. Please specify the" | ||
| + " Delta table location.", | ||
| Table.PROPERTY_LOCATION); | ||
|
|
||
| // Validate that partitioning, distribution, sort orders, and indexes are not specified | ||
| Preconditions.checkArgument( | ||
| partitions == null || partitions.length == 0, | ||
| "Delta table doesn't support specifying partitioning in CREATE TABLE. " | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Partition is an import concept in delta, what if we support specifying the partition when creating table, is there something bad effect?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I refer to UC that they don't support partitioned table, so I didn't add this support for now. I can add a TODO and create an issue to follow the partition support thing.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I updated the code to support identity partitioning. |
||
| + "Partitioning is managed in the Delta transaction log."); | ||
|
|
||
| Preconditions.checkArgument( | ||
| distribution == null || distribution.equals(Distributions.NONE), | ||
| "Delta table doesn't support specifying distribution in CREATE TABLE. " | ||
| + "Distribution is not applicable for external Delta tables."); | ||
|
|
||
| Preconditions.checkArgument( | ||
| sortOrders == null || sortOrders.length == 0, | ||
| "Delta table doesn't support specifying sort orders in CREATE TABLE. " | ||
| + "Sort orders are not applicable for external Delta tables."); | ||
|
|
||
| Preconditions.checkArgument( | ||
| indexes == null || indexes.length == 0, | ||
| "Delta table doesn't support specifying indexes in CREATE TABLE. " | ||
| + "Indexes are not applicable for external Delta tables."); | ||
|
|
||
| // Store metadata in entity store (schema from user request) | ||
| return super.createTable( | ||
| ident, | ||
| columns, | ||
| comment, | ||
| copiedProperties, | ||
| Transforms.EMPTY_TRANSFORM, | ||
| Distributions.NONE, | ||
| SortOrders.NONE, | ||
| Indexes.EMPTY_INDEXES); | ||
| } | ||
|
|
||
| /** | ||
| * Alters a Delta table. | ||
| * | ||
| * <p>This operation is not supported for external Delta tables. Users should modify the Delta | ||
| * table directly using Delta Lake APIs or tools. If the schema changes, the table entry in | ||
| * Gravitino can be dropped and recreated with the updated schema. | ||
| * | ||
| * @param ident the table identifier | ||
| * @param changes the table changes to apply | ||
| * @return never returns (always throws exception) | ||
| * @throws UnsupportedOperationException always thrown as ALTER is not supported | ||
| */ | ||
| @Override | ||
| public Table alterTable(NameIdentifier ident, TableChange... changes) | ||
| throws NoSuchTableException, IllegalArgumentException { | ||
| throw new UnsupportedOperationException( | ||
| "ALTER TABLE operations are not supported for external Delta tables. " | ||
| + "Please modify the Delta table directly using Delta Lake APIs or tools, " | ||
| + "then DROP and recreate the table entry in Gravitino with the updated schema if" | ||
| + " needed."); | ||
| } | ||
|
|
||
| /** | ||
| * Purges a Delta table (removes both metadata and data). | ||
| * | ||
| * <p>This operation is not supported for external Delta tables. External table data is managed | ||
| * outside Gravitino, so purging is not applicable. Use {@link #dropTable(NameIdentifier)} to | ||
| * remove only the metadata, leaving the Delta table data intact. | ||
| * | ||
| * @param ident the table identifier | ||
| * @return never returns (always throws exception) | ||
| * @throws UnsupportedOperationException always thrown as PURGE is not supported for external | ||
| * tables | ||
| */ | ||
| @Override | ||
| public boolean purgeTable(NameIdentifier ident) { | ||
| throw new UnsupportedOperationException( | ||
| "Purge operation is not supported for external Delta tables. " | ||
| + "External table data is managed outside of Gravitino. " | ||
| + "Use dropTable() to remove metadata only, preserving the Delta table data."); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -40,7 +40,10 @@ public class GenericTablePropertiesMetadata extends BasePropertiesMetadata { | |||||
| ImmutableList.of( | ||||||
| stringOptionalPropertyEntry( | ||||||
| Table.PROPERTY_LOCATION, | ||||||
| "The root directory of the generic table.", | ||||||
| "The directory of the table. For managed table, if this is not specified" | ||||||
| + " in the table property, it will use the one in catalog / schema level and " | ||||||
| + "concatenate with the table name. For external table, this property is" | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems to contradict the configuration for the Lance table. An external Lance table can still use the concat policy and does not have such a limitation. Do you need to change Lance along the way?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does Lance REST support create/register table without location provided?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I mean, we can set the properties Currently, all table register/created by Lance REST are external tables, and the table property |
||||||
| + "required.", | ||||||
|
||||||
| + "required.", | |
| + " required.", |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
seems PROPERTY_EXTERNAL is not handled in the
GenericTablePropertiesMetadatagravitino/catalogs/catalog-lakehouse-generic/src/main/java/org/apache/gravitino/catalog/lakehouse/generic/GenericTablePropertiesMetadata.java
Lines 39 to 51 in cc039f8