Skip to content
Closed
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions extensions/federation/hadoop/build.gradle.kts
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

plugins {
id("polaris-client")
alias(libs.plugins.jandex)
}

dependencies {
// Polaris dependencies
implementation(project(":polaris-core"))

implementation(platform(libs.iceberg.bom))
implementation("org.apache.iceberg:iceberg-api")
implementation("org.apache.iceberg:iceberg-core")
implementation("org.apache.iceberg:iceberg-common")

// Hadoop dependencies (for Hadoop catalog support)
implementation(libs.hadoop.common) {
exclude("org.slf4j", "slf4j-reload4j")
exclude("org.slf4j", "slf4j-log4j12")
exclude("ch.qos.reload4j", "reload4j")
exclude("log4j", "log4j")
exclude("org.apache.zookeeper", "zookeeper")
exclude("org.apache.hadoop.thirdparty", "hadoop-shaded-protobuf_3_25")
exclude("com.github.pjfanning", "jersey-json")
exclude("com.sun.jersey", "jersey-core")
exclude("com.sun.jersey", "jersey-server")
exclude("com.sun.jersey", "jersey-servlet")
exclude("io.dropwizard.metrics", "metrics-core")
}
implementation(libs.hadoop.client.api)
implementation(libs.hadoop.client.runtime)

// CDI dependencies for runtime discovery
implementation(libs.jakarta.enterprise.cdi.api)
implementation(libs.smallrye.common.annotation)

// Logging
implementation(libs.slf4j.api)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.polaris.extensions.federation.hadoop;

import io.smallrye.common.annotation.Identifier;
import jakarta.enterprise.context.ApplicationScoped;
import org.apache.hadoop.conf.Configuration;
import org.apache.iceberg.catalog.Catalog;
import org.apache.iceberg.hadoop.HadoopCatalog;
import org.apache.polaris.core.catalog.ExternalCatalogFactory;
import org.apache.polaris.core.connection.AuthenticationParametersDpo;
import org.apache.polaris.core.connection.AuthenticationType;
import org.apache.polaris.core.connection.ConnectionConfigInfoDpo;
import org.apache.polaris.core.connection.ConnectionType;
import org.apache.polaris.core.connection.hadoop.HadoopConnectionConfigInfoDpo;
import org.apache.polaris.core.secrets.UserSecretsManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/** Factory class for creating a Hadoop catalog handle based on connection configuration. */
@ApplicationScoped
@Identifier(ConnectionType.HADOOP_FACTORY_IDENTIFIER)
public class HadoopFederatedCatalogFactory implements ExternalCatalogFactory {
private static final Logger LOGGER = LoggerFactory.getLogger(HadoopFederatedCatalogFactory.class);

@Override
public Catalog createCatalog(
ConnectionConfigInfoDpo connectionConfigInfoDpo, UserSecretsManager userSecretsManager) {
// Currently, Polaris supports Hadoop federation only via IMPLICIT authentication.
// Hence, prior to initializing the configuration, ensure that the catalog uses
// IMPLICIT authentication.
AuthenticationParametersDpo authenticationParametersDpo =
connectionConfigInfoDpo.getAuthenticationParameters();
if (authenticationParametersDpo.getAuthenticationTypeCode()
!= AuthenticationType.IMPLICIT.getCode()) {
throw new IllegalStateException("Hadoop federation only supports IMPLICIT authentication.");
}
Configuration conf = new Configuration();
String warehouse = ((HadoopConnectionConfigInfoDpo) connectionConfigInfoDpo).getWarehouse();
HadoopCatalog hadoopCatalog = new HadoopCatalog(conf, warehouse);
hadoopCatalog.initialize(
warehouse, connectionConfigInfoDpo.asIcebergCatalogProperties(userSecretsManager));
return hadoopCatalog;
}
}
1 change: 1 addition & 0 deletions gradle/projects.main.properties
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ polaris-minio-testcontainer=tools/minio-testcontainer
polaris-version=tools/version
polaris-misc-types=tools/misc-types
polaris-persistence-varint=nosql/persistence/varint
polaris-extensions-federation-hadoop=extensions/federation/hadoop

polaris-config-docs-annotations=tools/config-docs/annotations
polaris-config-docs-generator=tools/config-docs/generator
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.polaris.core.catalog;

import org.apache.iceberg.catalog.Catalog;
import org.apache.polaris.core.connection.ConnectionConfigInfoDpo;
import org.apache.polaris.core.secrets.UserSecretsManager;

/**
* Factory interface for creating external catalog handles based on connection configuration.
*
* <p>Implementations should be annotated with CDI annotations and use the @Identifier annotation to
* specify which connection type they support.
*/
public interface ExternalCatalogFactory {

/**
* Creates a catalog handle for the given connection configuration.
*
* @param connectionConfig the connection configuration
* @param userSecretsManager the user secrets manager for handling credentials
* @return the initialized catalog
* @throws IllegalStateException if the connection configuration is invalid
*/
Catalog createCatalog(
ConnectionConfigInfoDpo connectionConfig, UserSecretsManager userSecretsManager);
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ public enum ConnectionType {
HADOOP(2),
;

public static final String ICEBERG_REST_FACTORY_IDENTIFIER = "ICEBERG_REST";
public static final String HADOOP_FACTORY_IDENTIFIER = "HADOOP";

private static final ConnectionType[] REVERSE_MAPPING_ARRAY;

static {
Expand Down Expand Up @@ -77,4 +80,22 @@ public enum ConnectionType {
public int getCode() {
return this.code;
}

/**
* Get the factory identifier string used for CDI injection of the appropriate
* ExternalCatalogFactory.
*
* @return the factory identifier string
*/
public String getFactoryIdentifier() {
switch (this) {
case ICEBERG_REST:
return ICEBERG_REST_FACTORY_IDENTIFIER;
case HADOOP:
return HADOOP_FACTORY_IDENTIFIER;
default:
throw new UnsupportedOperationException(
"No factory identifier for connection type: " + this);
}
}
}
3 changes: 3 additions & 0 deletions runtime/service/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ dependencies {
implementation(project(":polaris-api-management-service"))
implementation(project(":polaris-api-iceberg-service"))
implementation(project(":polaris-api-catalog-service"))
if ((project.findProperty("NonRESTCatalogs") as String?)?.contains("HADOOP") == true) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a breaking change to Polaris that effectively removes functionality in the Polaris server.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIUC the functionality is still there, but the way you need to build Polaris to use that functionality does change (across versions).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In any case, shouldn't this block be declared rather in runtime/server?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Regarding runtime/server module: We reference the ExternalCatalogFactory in the IcebergCatalogHandler which is in runtime/service module. So isn't this the correct location?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But afaict ExternalCatalogFactory is an interface in polaris-core. We don't reference HadoopFederatedCatalogFactory directly in code.

In runtime/server we already have other similar runtimeOnly declarations:

  runtimeOnly(project(":polaris-eclipselink"))
  runtimeOnly("org.postgresql:postgresql")
  runtimeOnly(project(":polaris-relational-jdbc"))
  runtimeOnly("io.quarkus:quarkus-jdbc-postgresql")

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had that until c73966c

I can do either, from the Polaris OSS sync last week (discussion about regarding hive federation), my takeaway was that we wanted to avoid having the default Polaris JAR depend on anything hadoop.

However, if you'd prefer compiling it each time (and only loading if necessary), I can revert that change. I will send out a separate PR without dynamic compilation and update a README.md for this PR. Please pick the option that's best suited according to you.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 to adding something to the README/getting-started or similar to better make it an easy copy/paste command to decide whether to compile with or without the extended dependencies.

My understanding is indeed that this check was directly to address the concern others had about having Hadoop (or Hive in the future) compile-time dependencies be always present for all Polaris builds.

Personally I don't feel too strongly either way, so I'm okay with or without the additional compilation property.

Copy link
Contributor

@dennishuo dennishuo Aug 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, per offline discussion, +1 to what @adutra said about putting the Hadoop extension into runtime/server if it works correctly for Quarkus finding it in the runtime assembly.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

runtimeOnly(project(":polaris-extensions-federation-hadoop"))
}

runtimeOnly(project(":polaris-relational-jdbc"))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import jakarta.enterprise.context.RequestScoped;
import jakarta.enterprise.inject.Any;
import jakarta.enterprise.inject.Instance;
import jakarta.inject.Inject;
import jakarta.ws.rs.WebApplicationException;
import jakarta.ws.rs.core.HttpHeaders;
Expand Down Expand Up @@ -61,6 +63,7 @@
import org.apache.iceberg.rest.responses.LoadTableResponse;
import org.apache.polaris.core.auth.AuthenticatedPolarisPrincipal;
import org.apache.polaris.core.auth.PolarisAuthorizer;
import org.apache.polaris.core.catalog.ExternalCatalogFactory;
import org.apache.polaris.core.context.CallContext;
import org.apache.polaris.core.context.RealmContext;
import org.apache.polaris.core.entity.PolarisEntity;
Expand Down Expand Up @@ -144,6 +147,7 @@ public class IcebergCatalogAdapter
private final CatalogPrefixParser prefixParser;
private final ReservedProperties reservedProperties;
private final CatalogHandlerUtils catalogHandlerUtils;
private final Instance<ExternalCatalogFactory> externalCatalogFactories;

@Inject
public IcebergCatalogAdapter(
Expand All @@ -157,7 +161,8 @@ public IcebergCatalogAdapter(
PolarisAuthorizer polarisAuthorizer,
CatalogPrefixParser prefixParser,
ReservedProperties reservedProperties,
CatalogHandlerUtils catalogHandlerUtils) {
CatalogHandlerUtils catalogHandlerUtils,
@Any Instance<ExternalCatalogFactory> externalCatalogFactories) {
this.realmContext = realmContext;
this.callContext = callContext;
this.catalogFactory = catalogFactory;
Expand All @@ -169,6 +174,7 @@ public IcebergCatalogAdapter(
this.prefixParser = prefixParser;
this.reservedProperties = reservedProperties;
this.catalogHandlerUtils = catalogHandlerUtils;
this.externalCatalogFactories = externalCatalogFactories;
}

/**
Expand Down Expand Up @@ -205,7 +211,8 @@ IcebergCatalogHandler newHandlerWrapper(SecurityContext securityContext, String
catalogName,
polarisAuthorizer,
reservedProperties,
catalogHandlerUtils);
catalogHandlerUtils,
externalCatalogFactories);
}

@Override
Expand Down
Loading