apache · okumin · Aug 15, 2025 · Aug 8, 2025 · okumin · Aug 16, 2025
diff --git a/...rg-handler/src/main/java/org/apache/iceberg/mr/hive/compaction/IcebergTableOptimizer.java b/...rg-handler/src/main/java/org/apache/iceberg/mr/hive/compaction/IcebergTableOptimizer.java
@@ -88,7 +88,7 @@ public Set<CompactionInfo> findPotentialCompactions(long lastChecked, ShowCompac
       Set<String> skipDBs, Set<String> skipTables) {
     Set<CompactionInfo> compactionTargets = Sets.newHashSet();
 
-    getTables().stream()
+    getTableNames().stream()
         .filter(table -> !skipDBs.contains(table.getDb()))
         .filter(table -> !skipTables.contains(table.getNotEmptyDbTable()))
         .map(table -> {
@@ -126,9 +126,9 @@ public Set<CompactionInfo> findPotentialCompactions(long lastChecked, ShowCompac
     return compactionTargets;
   }
 
-  private List<org.apache.hadoop.hive.common.TableName> getTables() {
+  private List<org.apache.hadoop.hive.common.TableName> getTableNames() {
     try {
-      return IcebergTableUtil.getTableFetcher(client, null, "*", null).getTables();
+      return IcebergTableUtil.getTableFetcher(client, null, "*", null).getTableNames();
     } catch (Exception e) {
       throw new RuntimeMetaException(e, "Error getting table names");
     }

diff --git a/...er/src/main/java/org/apache/iceberg/mr/hive/metastore/task/IcebergHouseKeeperService.java b/...er/src/main/java/org/apache/iceberg/mr/hive/metastore/task/IcebergHouseKeeperService.java
@@ -29,15 +29,13 @@
 import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
 import org.apache.hadoop.hive.metastore.IMetaStoreClient;
 import org.apache.hadoop.hive.metastore.MetastoreTaskThread;
-import org.apache.hadoop.hive.metastore.api.GetTableRequest;
 import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
 import org.apache.hadoop.hive.metastore.txn.NoMutex;
 import org.apache.hadoop.hive.metastore.txn.TxnStore;
 import org.apache.hadoop.hive.metastore.txn.TxnUtils;
 import org.apache.iceberg.ExpireSnapshots;
 import org.apache.iceberg.Table;
 import org.apache.iceberg.mr.hive.IcebergTableUtil;
-import org.apache.thrift.TException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -79,35 +77,21 @@ public void run() {
 
   private void expireTables(String catalogName, String dbPattern, String tablePattern) {
     try (IMetaStoreClient msc = new HiveMetaStoreClient(conf)) {
-      // TODO: HIVE-28952 – modify TableFetcher to return HMS Table API objects directly,
-      // avoiding the need for subsequent msc.getTable calls to fetch each matched table individually
-      List<TableName> tables = IcebergTableUtil.getTableFetcher(msc, catalogName, dbPattern, tablePattern).getTables();
-
+      int maxBatchSize = MetastoreConf.getIntVar(conf, MetastoreConf.ConfVars.BATCH_RETRIEVE_MAX);
+      List<org.apache.hadoop.hive.metastore.api.Table> tables =
+          IcebergTableUtil.getTableFetcher(msc, catalogName, dbPattern, tablePattern).getTables(maxBatchSize);
       LOG.debug("{} candidate tables found", tables.size());
-
-      for (TableName table : tables) {
-        try {
-          expireSnapshotsForTable(getIcebergTable(table, msc));
-        } catch (Exception e) {
-          LOG.error("Exception while running iceberg expiry service on catalog/db/table: {}/{}/{}",
-              catalogName, dbPattern, tablePattern, e);
-        }
+      for (org.apache.hadoop.hive.metastore.api.Table table : tables) {
+        expireSnapshotsForTable(getIcebergTable(table));
       }
     } catch (Exception e) {
       throw new RuntimeException("Error while getting tables from metastore", e);
     }
   }
 
-  private Table getIcebergTable(TableName tableName, IMetaStoreClient msc) {
-    return tableCache.get(tableName, key -> {
-      LOG.debug("Getting iceberg table from metastore as it's not present in table cache: {}", tableName);
-      GetTableRequest request = new GetTableRequest(tableName.getDb(), tableName.getTable());
-      try {
-        return IcebergTableUtil.getTable(conf, msc.getTable(request));
-      } catch (TException e) {
-        throw new RuntimeException(e);
-      }
-    });
+  private Table getIcebergTable(org.apache.hadoop.hive.metastore.api.Table table) {
+    TableName tableName = TableName.fromString(table.getTableName(), table.getCatName(), table.getDbName());
+    return tableCache.get(tableName, key -> IcebergTableUtil.getTable(conf, table));
   }
 
   /**

diff --git a/...rc/test/java/org/apache/iceberg/mr/hive/metastore/task/TestIcebergHouseKeeperService.java b/...rc/test/java/org/apache/iceberg/mr/hive/metastore/task/TestIcebergHouseKeeperService.java
@@ -23,11 +23,11 @@
 import java.util.Collections;
 import java.util.List;
 import java.util.Optional;
-import org.apache.hadoop.hive.common.TableName;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.TableType;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.GetTableRequest;
+import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
 import org.apache.hadoop.hive.metastore.utils.TableFetcher;
 import org.apache.hadoop.hive.ql.metadata.Hive;
 import org.apache.hadoop.hive.ql.metadata.Table;
@@ -69,8 +69,11 @@ public void testIcebergTableFetched() throws Exception {
 
     TableFetcher tableFetcher = IcebergTableUtil.getTableFetcher(db.getMSC(), null, "default", "*");
 
-    List<TableName> tables = tableFetcher.getTables();
-    Assert.assertEquals(new TableName("hive", "default", "iceberg_table"), tables.get(0));
+    int maxBatchSize = MetastoreConf.getIntVar(conf, MetastoreConf.ConfVars.BATCH_RETRIEVE_MAX);
+    List<org.apache.hadoop.hive.metastore.api.Table> tables = tableFetcher.getTables(maxBatchSize);
+    Assert.assertEquals("hive", tables.get(0).getCatName());
+    Assert.assertEquals("default", tables.get(0).getDbName());
+    Assert.assertEquals("iceberg_table", tables.get(0).getTableName());
   }
 
   @Test

diff --git a/...e/metastore-client/src/main/java/org/apache/hadoop/hive/metastore/utils/TableFetcher.java b/...e/metastore-client/src/main/java/org/apache/hadoop/hive/metastore/utils/TableFetcher.java
@@ -20,9 +20,11 @@
 import com.google.common.annotations.VisibleForTesting;
 import org.apache.hadoop.hive.common.TableName;
 import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.TableIterable;
 import org.apache.hadoop.hive.metastore.TableType;
 import org.apache.hadoop.hive.metastore.Warehouse;
 import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -90,7 +92,7 @@ private void buildTableFilter(String tablePattern, List<String> conditions) {
     this.tableFilter = String.join(" and ", conditions);
   }
 
-  public List<TableName> getTables() throws Exception {
+  public List<TableName> getTableNames() throws Exception {
     List<TableName> candidates = new ArrayList<>();
 
     // if tableTypes is empty, then a list with single empty string has to specified to scan no tables.
@@ -102,21 +104,47 @@ public List<TableName> getTables() throws Exception {
     List<String> databases = client.getDatabases(catalogName, dbPattern);
 
     for (String db : databases) {
-      Database database = client.getDatabase(catalogName, db);
-      if (MetaStoreUtils.checkIfDbNeedsToBeSkipped(database)) {
-        LOG.debug("Skipping table under database: {}", db);
-        continue;
-      }
-      if (MetaStoreUtils.isDbBeingPlannedFailedOver(database)) {
-        LOG.info("Skipping table that belongs to database {} being failed over.", db);
-        continue;
-      }
-      List<String> tablesNames = client.listTableNamesByFilter(catalogName, db, tableFilter, -1);
+      List<String> tablesNames = getTableNamesForDatabase(catalogName, db);
       tablesNames.forEach(tablesName -> candidates.add(TableName.fromString(tablesName, catalogName, db)));
     }
     return candidates;
   }
 
+  public List<Table> getTables(int maxBatchSize) throws Exception {
+    List<Table> candidates = new ArrayList<>();
+
+    // if tableTypes is empty, then a list with single empty string has to specified to scan no tables.
+    if (tableTypes.isEmpty()) {
+      LOG.info("Table fetcher returns empty list as no table types specified");
+      return candidates;
+    }
+
+    List<String> databases = client.getDatabases(catalogName, dbPattern);
+
+    for (String db : databases) {
+      List<String> tablesNames = getTableNamesForDatabase(catalogName, db);
+      for (Table table : new TableIterable(client, db, tablesNames, maxBatchSize)) {
+        candidates.add(table);
+      }
+    }
+    return candidates;
+  }
+
+  private List<String> getTableNamesForDatabase(String catalogName, String dbName) throws Exception {
+    List<String> tableNames = new ArrayList<>();
+    Database database = client.getDatabase(catalogName, dbName);
+    if (MetaStoreUtils.checkIfDbNeedsToBeSkipped(database)) {
+      LOG.debug("Skipping table under database: {}", dbName);
+      return tableNames;
+    }
+    if (MetaStoreUtils.isDbBeingPlannedFailedOver(database)) {
+      LOG.info("Skipping table that belongs to database {} being failed over.", dbName);
+      return tableNames;
+    }
+    tableNames = client.listTableNamesByFilter(catalogName, dbName, tableFilter, -1);
+    return tableNames;
+  }
+
   public static class Builder {
     private final IMetaStoreClient client;
     private final String catalogName;

diff --git a/...astore-server/src/main/java/org/apache/hadoop/hive/metastore/PartitionManagementTask.java b/...astore-server/src/main/java/org/apache/hadoop/hive/metastore/PartitionManagementTask.java
@@ -101,7 +101,7 @@ public void run() {
                 .tableCondition(
                     hive_metastoreConstants.HIVE_FILTER_FIELD_PARAMS + "discover__partitions like \"true\" ")
                 .build()
-                .getTables();
+                .getTableNames();
 
         if (candidates.isEmpty()) {
           LOG.info("Got empty table list in catalog: {}, dbPattern: {}", catalogName, dbPattern);