diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index b3977b8c9578..402f8288611e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -4701,7 +4701,7 @@ List getPartitionsWithSpecsByNames(Table tbl, List partNames, } try { - Batchable.runBatched(batchSize, partNames, new Batchable() { + new Batchable() { @Override public List run(List list) throws Exception { req.getFilterSpec().setFilters(list); @@ -4712,7 +4712,7 @@ public List run(List list) throws Exception { } return Collections.emptyList(); } - }); + }.runBatched(batchSize, partNames); } catch (Exception e) { throw new HiveException(e); } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/Batchable.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/Batchable.java similarity index 63% rename from standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/Batchable.java rename to standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/Batchable.java index 571d6bdbd1dc..1f13f6cf9695 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/Batchable.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/Batchable.java @@ -21,68 +21,36 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; -import javax.jdo.Query; import org.apache.hadoop.hive.metastore.api.MetaException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; /** - * Base class to add the batch process for DirectSQL or RawStore queries. + * Base class to add the batch process. * 1. Provide the implementation of run() to process one batch - * 2. Call Batchable.runBatched() to process the whole dataset + * 2. Call runBatched() to process the whole dataset * * I: input type, R: result type */ public abstract class Batchable { - private static final Logger LOG = LoggerFactory.getLogger(Batchable.class); public static final int NO_BATCHING = -1; - private List queries = null; public abstract List run(List input) throws Exception; - public void addQueryAfterUse(Query query) { - if (queries == null) { - queries = new ArrayList(1); - } - queries.add(query); - } - protected void addQueryAfterUse(Batchable b) { - if (b.queries == null) { - return; - } - if (queries == null) { - queries = new ArrayList<>(b.queries); - } else { - queries.addAll(b.queries); - } - } - public void closeAllQueries() { - for (Query q : queries) { - try { - q.closeAll(); - } catch (Throwable t) { - LOG.error("Failed to close a query", t); - } - } - } - - public static List runBatched( + public final List runBatched( final int batchSize, - List input, - Batchable runnable) throws MetaException { + List input) throws MetaException { if (input == null || input.isEmpty()) { return Collections.emptyList(); } try { if (batchSize == NO_BATCHING || batchSize >= input.size()) { - return runnable.run(input); + return run(input); } List result = new ArrayList<>(input.size()); for (int fromIndex = 0, toIndex = 0; toIndex < input.size(); fromIndex = toIndex) { toIndex = Math.min(fromIndex + batchSize, input.size()); List batchedInput = input.subList(fromIndex, toIndex); - List batchedOutput = runnable.run(batchedInput); + List batchedOutput = run(batchedInput); if (batchedOutput != null) { result.addAll(batchedOutput); } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ExceptionHandler.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ExceptionHandler.java similarity index 99% rename from standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ExceptionHandler.java rename to standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ExceptionHandler.java index 4e4950f99f16..c066cbd53dcb 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ExceptionHandler.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ExceptionHandler.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.metastore.utils.JavaUtils; import org.apache.thrift.TException; + import static java.util.Objects.requireNonNull; /** diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/BatchableQuery.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/BatchableQuery.java new file mode 100644 index 000000000000..34cd11c31120 --- /dev/null +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/BatchableQuery.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore; + +import java.util.ArrayList; +import java.util.List; +import javax.jdo.Query; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Base class to add the batch process for DirectSQL or RawStore queries. + */ +public abstract class BatchableQuery extends Batchable { + private static final Logger LOG = LoggerFactory.getLogger(Batchable.class); + + private List queries = null; + + public void addQueryAfterUse(Query query) { + if (queries == null) { + queries = new ArrayList(1); + } + queries.add(query); + } + protected void addQueryAfterUse(BatchableQuery b) { + if (b.queries == null) { + return; + } + if (queries == null) { + queries = new ArrayList<>(b.queries); + } else { + queries.addAll(b.queries); + } + } + public void closeAllQueries() { + for (Query q : queries) { + try { + q.closeAll(); + } catch (Throwable t) { + LOG.error("Failed to close a query", t); + } + } + } +} diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/DirectSqlUpdatePart.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/DirectSqlUpdatePart.java index 159243969322..7063967a8840 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/DirectSqlUpdatePart.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/DirectSqlUpdatePart.java @@ -577,7 +577,7 @@ private void updatePartitionsInBatch(Map, Long> partValuesToId, List conditionKeys = Arrays.asList("\"PART_ID\""); String stmt = TxnUtils.createUpdatePreparedStmt("\"PARTITIONS\"", columns, conditionKeys); int maxRows = dbType.getMaxRows(maxBatchSize, 4); - updateWithStatement(statement -> Batchable.runBatched(maxRows, newParts, new Batchable() { + updateWithStatement(statement -> new Batchable() { @Override public List run(List input) throws SQLException { for (Partition p : input) { @@ -590,12 +590,12 @@ public List run(List input) throws SQLException { statement.executeBatch(); return null; } - }), stmt); + }.runBatched(maxRows, newParts), stmt); } /* Get stringListId from both SKEWED_VALUES and SKEWED_COL_VALUE_LOC_MAP tables. */ private List getStringListId(List sdIds) throws MetaException { - return Batchable.runBatched(maxBatchSize, sdIds, new Batchable() { + return new Batchable() { @Override public List run(List input) throws Exception { List result = new ArrayList<>(); @@ -616,7 +616,7 @@ public List run(List input) throws Exception { } return result; } - }); + }.runBatched(maxBatchSize, sdIds); } private void updateParamTableInBatch(String paramTable, String idColumn, List ids, @@ -654,7 +654,7 @@ private void updateParamTableInBatch(String paramTable, String idColumn, List> getParams(String paramTable, String idName, List ids) throws MetaException { Map> idToParams = new HashMap<>(); - Batchable.runBatched(maxBatchSize, ids, new Batchable() { + new Batchable() { @Override public List run(List input) throws MetaException { String idLists = MetaStoreDirectSql.getIdListForIn(input); @@ -671,7 +671,7 @@ public List run(List input) throws MetaException { } return null; } - }); + }.runBatched(maxBatchSize, ids); return idToParams; } @@ -679,20 +679,18 @@ private void deleteParams(String paramTable, String idColumn, List> deleteIdKeys) throws MetaException { String deleteStmt = "delete from " + paramTable + " where " + idColumn + "=? and \"PARAM_KEY\"=?"; int maxRows = dbType.getMaxRows(maxBatchSize, 2); - updateWithStatement(statement -> Batchable.runBatched(maxRows, deleteIdKeys, - new Batchable, Void>() { - @Override - public List run(List> input) throws SQLException { - for (Pair pair : input) { - statement.setLong(1, pair.getLeft()); - statement.setString(2, pair.getRight()); - statement.addBatch(); - } - statement.executeBatch(); - return null; - } + updateWithStatement(statement -> new Batchable, Void>() { + @Override + public List run(List> input) throws SQLException { + for (Pair pair : input) { + statement.setLong(1, pair.getLeft()); + statement.setString(2, pair.getRight()); + statement.addBatch(); } - ), deleteStmt); + statement.executeBatch(); + return null; + } + }.runBatched(maxRows, deleteIdKeys), deleteStmt); } private void updateParams(String paramTable, String idColumn, @@ -701,21 +699,19 @@ private void updateParams(String paramTable, String idColumn, List conditionKeys = Arrays.asList(idColumn, "\"PARAM_KEY\""); String stmt = TxnUtils.createUpdatePreparedStmt(paramTable, columns, conditionKeys); int maxRows = dbType.getMaxRows(maxBatchSize, 3); - updateWithStatement(statement -> Batchable.runBatched(maxRows, updateIdAndParams, - new Batchable>, Object>() { - @Override - public List run(List>> input) throws SQLException { - for (Pair> pair : input) { - statement.setString(1, pair.getRight().getRight()); - statement.setLong(2, pair.getLeft()); - statement.setString(3, pair.getRight().getLeft()); - statement.addBatch(); - } - statement.executeBatch(); - return null; - } + updateWithStatement(statement -> new Batchable>, Object>() { + @Override + public List run(List>> input) throws SQLException { + for (Pair> pair : input) { + statement.setString(1, pair.getRight().getRight()); + statement.setLong(2, pair.getLeft()); + statement.setString(3, pair.getRight().getLeft()); + statement.addBatch(); } - ), stmt); + statement.executeBatch(); + return null; + } + }.runBatched(maxRows, updateIdAndParams), stmt); } private void insertParams(String paramTable, String idColumn, @@ -723,21 +719,19 @@ private void insertParams(String paramTable, String idColumn, List columns = Arrays.asList(idColumn, "\"PARAM_KEY\"", "\"PARAM_VALUE\""); String query = TxnUtils.createInsertPreparedStmt(paramTable, columns); int maxRows = dbType.getMaxRows(maxBatchSize, 3); - updateWithStatement(statement -> Batchable.runBatched(maxRows, addIdAndParams, - new Batchable>, Void>() { - @Override - public List run(List>> input) throws SQLException { - for (Pair> pair : input) { - statement.setLong(1, pair.getLeft()); - statement.setString(2, pair.getRight().getLeft()); - statement.setString(3, pair.getRight().getRight()); - statement.addBatch(); - } - statement.executeBatch(); - return null; - } + updateWithStatement(statement -> new Batchable>, Void>() { + @Override + public List run(List>> input) throws SQLException { + for (Pair> pair : input) { + statement.setLong(1, pair.getLeft()); + statement.setString(2, pair.getRight().getLeft()); + statement.setString(3, pair.getRight().getRight()); + statement.addBatch(); } - ), query); + statement.executeBatch(); + return null; + } + }.runBatched(maxRows, addIdAndParams), query); } private void updateStorageDescriptorInBatch(Map idToSd) @@ -746,7 +740,7 @@ private void updateStorageDescriptorInBatch(Map idToSd) Map sdIdToSerdeId = new HashMap<>(); Set cdIds = new HashSet<>(); List validSdIds = filterIdsByNonNullValue(new ArrayList<>(idToSd.keySet()), idToSd); - Batchable.runBatched(maxBatchSize, validSdIds, new Batchable() { + new Batchable() { @Override public List run(List input) throws Exception { String idLists = MetaStoreDirectSql.getIdListForIn(input); @@ -765,7 +759,7 @@ public List run(List input) throws Exception { } return null; } - }); + }.runBatched(maxBatchSize, validSdIds); Map>> sdParamsOpt = new HashMap<>(); Map> idToBucketCols = new HashMap<>(); @@ -807,24 +801,22 @@ public List run(List input) throws Exception { sdIdToNewCdId.containsKey(sdId) ? sdIdToNewCdId.get(sdId) : cdId); updateSDInBatch(validSdIds, idToSd, sdIdToCdId); - Set usedIds = new HashSet<>(Batchable.runBatched(maxBatchSize, cdIdsMayDelete, - new Batchable() { - @Override - public List run(List input) throws Exception { - String idLists = MetaStoreDirectSql.getIdListForIn(input); - String queryText = "select DISTINCT \"CD_ID\" from \"SDS\" where \"CD_ID\" in ( " + idLists + ")"; - List cdIds = new ArrayList<>(); - try (QueryWrapper query = new QueryWrapper(pm.newQuery("javax.jdo.query.SQL", queryText))) { - List sqlResult = executeWithArray(query.getInnerQuery(), null, queryText); - if (sqlResult != null) { - for (Object cdId : sqlResult) { - cdIds.add(MetastoreDirectSqlUtils.extractSqlLong(cdId)); - } - } + Set usedIds = new HashSet<>(new Batchable() { + @Override + public List run(List input) throws Exception { + String idLists = MetaStoreDirectSql.getIdListForIn(input); + String queryText = "select DISTINCT \"CD_ID\" from \"SDS\" where \"CD_ID\" in ( " + idLists + ")"; + List cdIds = new ArrayList<>(); + try (QueryWrapper query = new QueryWrapper(pm.newQuery("javax.jdo.query.SQL", queryText))) { + List sqlResult = executeWithArray(query.getInnerQuery(), null, queryText); + if (sqlResult != null) { + for (Object cdId : sqlResult) { + cdIds.add(MetastoreDirectSqlUtils.extractSqlLong(cdId)); } - return cdIds; } - })); + } + return cdIds; + }}.runBatched(maxBatchSize, cdIdsMayDelete)); List unusedCdIds = cdIdsMayDelete.stream().filter(id -> !usedIds.contains(id)).collect(Collectors.toList()); deleteCDInBatch(unusedCdIds); @@ -837,32 +829,30 @@ private void updateSDInBatch(List ids, Map idToSd List conditionKeys = Arrays.asList("\"SD_ID\""); String stmt = TxnUtils.createUpdatePreparedStmt("\"SDS\"", columns, conditionKeys); int maxRows = dbType.getMaxRows(maxBatchSize, 8); - updateWithStatement(statement -> Batchable.runBatched(maxRows, ids, - new Batchable() { - @Override - public List run(List input) throws SQLException { - for (Long sdId : input) { - StorageDescriptor sd = idToSd.get(sdId); - statement.setLong(1, idToCdId.get(sdId)); - statement.setString(2, sd.getInputFormat()); - statement.setObject(3, dbType.getBoolean(sd.isCompressed())); - statement.setObject(4, dbType.getBoolean(sd.isStoredAsSubDirectories())); - statement.setString(5, sd.getLocation()); - statement.setInt(6, sd.getNumBuckets()); - statement.setString(7, sd.getOutputFormat()); - statement.setLong(8, sdId); - statement.addBatch(); - } - statement.executeBatch(); - return null; - } + updateWithStatement(statement -> new Batchable() { + @Override + public List run(List input) throws SQLException { + for (Long sdId : input) { + StorageDescriptor sd = idToSd.get(sdId); + statement.setLong(1, idToCdId.get(sdId)); + statement.setString(2, sd.getInputFormat()); + statement.setObject(3, dbType.getBoolean(sd.isCompressed())); + statement.setObject(4, dbType.getBoolean(sd.isStoredAsSubDirectories())); + statement.setString(5, sd.getLocation()); + statement.setInt(6, sd.getNumBuckets()); + statement.setString(7, sd.getOutputFormat()); + statement.setLong(8, sdId); + statement.addBatch(); } - ), stmt); + statement.executeBatch(); + return null; + } + }.runBatched(maxRows, ids), stmt); } private void updateBucketColsInBatch(Map> sdIdToBucketCols, List sdIds) throws MetaException { - Batchable.runBatched(maxBatchSize, sdIds, new Batchable() { + new Batchable() { @Override public List run(List input) throws MetaException { String idLists = MetaStoreDirectSql.getIdListForIn(input); @@ -870,12 +860,12 @@ public List run(List input) throws MetaException { updateWithStatement(PreparedStatement::executeUpdate, queryText); return null; } - }); + }.runBatched(maxBatchSize, sdIds); List columns = Arrays.asList("\"SD_ID\"", "\"INTEGER_IDX\"", "\"BUCKET_COL_NAME\""); String stmt = TxnUtils.createInsertPreparedStmt("\"BUCKETING_COLS\"", columns); List idWithBucketCols = filterIdsByNonNullValue(sdIds, sdIdToBucketCols); int maxRows = dbType.getMaxRows(maxBatchSize, 3); - updateWithStatement(statement -> Batchable.runBatched(maxRows, idWithBucketCols, new Batchable() { + updateWithStatement(statement -> new Batchable() { @Override public List run(List input) throws SQLException { for (Long id : input) { @@ -890,12 +880,12 @@ public List run(List input) throws SQLException { statement.executeBatch(); return null; } - }), stmt); + }.runBatched(maxRows, idWithBucketCols), stmt); } private void updateSortColsInBatch(Map> sdIdToSortCols, List sdIds) throws MetaException { - Batchable.runBatched(maxBatchSize, sdIds, new Batchable() { + new Batchable() { @Override public List run(List input) throws MetaException { String idLists = MetaStoreDirectSql.getIdListForIn(input); @@ -903,13 +893,13 @@ public List run(List input) throws MetaException { updateWithStatement(PreparedStatement::executeUpdate, queryText); return null; } - }); + }.runBatched(maxBatchSize, sdIds); List columns = Arrays.asList("\"SD_ID\"", "\"INTEGER_IDX\"", "\"COLUMN_NAME\"", "\"ORDER\""); String stmt = TxnUtils.createInsertPreparedStmt("\"SORT_COLS\"", columns); List idWithSortCols = filterIdsByNonNullValue(sdIds, sdIdToSortCols); int maxRows = dbType.getMaxRows(maxBatchSize, 4); - updateWithStatement(statement -> Batchable.runBatched(maxRows, idWithSortCols, new Batchable() { + updateWithStatement(statement -> new Batchable() { @Override public List run(List input) throws SQLException { for (Long id : input) { @@ -925,7 +915,7 @@ public List run(List input) throws SQLException { statement.executeBatch(); return null; } - }), stmt); + }.runBatched(maxRows, idWithSortCols), stmt); } private void updateSkewedInfoInBatch(Map sdIdToSkewedInfo, @@ -934,7 +924,7 @@ private void updateSkewedInfoInBatch(Map sdIdToSkewedInfo, // skewedValues first for the foreign key constraint. List stringListId = getStringListId(sdIds); if (!stringListId.isEmpty()) { - Batchable.runBatched(maxBatchSize, sdIds, new Batchable() { + new Batchable() { @Override public List run(List input) throws Exception { String idLists = MetaStoreDirectSql.getIdListForIn(input); @@ -949,8 +939,8 @@ public List run(List input) throws Exception { updateWithStatement(PreparedStatement::executeUpdate, deleteSkewColNamesQuery); return null; } - }); - Batchable.runBatched(maxBatchSize, stringListId, new Batchable() { + }.runBatched(maxBatchSize, sdIds); + new Batchable() { @Override public List run(List input) throws MetaException { String idLists = MetaStoreDirectSql.getIdListForIn(input); @@ -962,7 +952,7 @@ public List run(List input) throws MetaException { updateWithStatement(PreparedStatement::executeUpdate, deleteStringListQuery); return null; } - }); + }.runBatched(maxBatchSize, stringListId); } // Generate new stringListId for each SdId @@ -1011,7 +1001,7 @@ private void insertSkewedColNamesInBatch(Map> sdIdToSkewedCol String stmt = TxnUtils.createInsertPreparedStmt("\"SKEWED_COL_NAMES\"", columns); List idWithSkewedCols = filterIdsByNonNullValue(sdIds, sdIdToSkewedColNames); int maxRows = dbType.getMaxRows(maxBatchSize, 3); - updateWithStatement(statement -> Batchable.runBatched(maxRows, idWithSkewedCols, new Batchable() { + updateWithStatement(statement -> new Batchable() { @Override public List run(List input) throws SQLException { for (Long id : input) { @@ -1026,26 +1016,24 @@ public List run(List input) throws SQLException { statement.executeBatch(); return null; } - }), stmt); + }.runBatched(maxRows, idWithSkewedCols), stmt); } private void insertStringListInBatch(List stringListIds) throws MetaException { List columns = Arrays.asList("\"STRING_LIST_ID\""); String insertQuery = TxnUtils.createInsertPreparedStmt("\"SKEWED_STRING_LIST\"", columns); int maxRows = dbType.getMaxRows(maxBatchSize, 1); - updateWithStatement(statement -> Batchable.runBatched(maxRows, stringListIds, - new Batchable() { - @Override - public List run(List input) throws SQLException { - for (Long id : input) { - statement.setLong(1, id); - statement.addBatch(); - } - statement.executeBatch(); - return null; - } + updateWithStatement(statement -> new Batchable() { + @Override + public List run(List input) throws SQLException { + for (Long id : input) { + statement.setLong(1, id); + statement.addBatch(); } - ), insertQuery); + statement.executeBatch(); + return null; + } + }.runBatched(maxRows, stringListIds), insertQuery); } private void insertStringListValuesInBatch(Map> stringListIdToValues, @@ -1054,24 +1042,22 @@ private void insertStringListValuesInBatch(Map> stringListIdT String insertQuery = TxnUtils.createInsertPreparedStmt("\"SKEWED_STRING_LIST_VALUES\"", columns); List idWithStringList = filterIdsByNonNullValue(stringListIds, stringListIdToValues); int maxRows = dbType.getMaxRows(maxBatchSize, 3); - updateWithStatement(statement -> Batchable.runBatched(maxRows, idWithStringList, - new Batchable() { - @Override - public List run(List input) throws SQLException { - for (Long stringListId : input) { - List values = stringListIdToValues.get(stringListId); - for (int i = 0; i < values.size(); i++) { - statement.setLong(1, stringListId); - statement.setInt(2, i); - statement.setString(3, values.get(i)); - statement.addBatch(); - } - } - statement.executeBatch(); - return null; + updateWithStatement(statement -> new Batchable() { + @Override + public List run(List input) throws SQLException { + for (Long stringListId : input) { + List values = stringListIdToValues.get(stringListId); + for (int i = 0; i < values.size(); i++) { + statement.setLong(1, stringListId); + statement.setInt(2, i); + statement.setString(3, values.get(i)); + statement.addBatch(); } } - ), insertQuery); + statement.executeBatch(); + return null; + } + }.runBatched(maxRows, idWithStringList), insertQuery); } private void insertSkewedValuesInBatch(Map> sdIdToStringListId, @@ -1080,24 +1066,22 @@ private void insertSkewedValuesInBatch(Map> sdIdToStringListId, String insertQuery = TxnUtils.createInsertPreparedStmt("\"SKEWED_VALUES\"", columns); List idWithSkewedValues = filterIdsByNonNullValue(sdIds, sdIdToStringListId); int maxRows = dbType.getMaxRows(maxBatchSize, 3); - updateWithStatement(statement -> Batchable.runBatched(maxRows, idWithSkewedValues, - new Batchable() { - @Override - public List run(List input) throws Exception { - for (Long sdId : input) { - List stringListIds = sdIdToStringListId.get(sdId); - for (int i = 0; i < stringListIds.size(); i++) { - statement.setLong(1, sdId); - statement.setInt(2, i); - statement.setLong(3, stringListIds.get(i)); - statement.addBatch(); - } - } - statement.executeBatch(); - return null; + updateWithStatement(statement -> new Batchable() { + @Override + public List run(List input) throws Exception { + for (Long sdId : input) { + List stringListIds = sdIdToStringListId.get(sdId); + for (int i = 0; i < stringListIds.size(); i++) { + statement.setLong(1, sdId); + statement.setInt(2, i); + statement.setLong(3, stringListIds.get(i)); + statement.addBatch(); } } - ), insertQuery); + statement.executeBatch(); + return null; + } + }.runBatched(maxRows, idWithSkewedValues), insertQuery); } private void insertSkewColValueLocInBatch(Map>> sdIdToColValueLoc, @@ -1106,30 +1090,28 @@ private void insertSkewColValueLocInBatch(Map>> sd String insertQuery = TxnUtils.createInsertPreparedStmt("\"SKEWED_COL_VALUE_LOC_MAP\"", columns); List idWithColValueLoc = filterIdsByNonNullValue(sdIds, sdIdToColValueLoc); int maxRows = dbType.getMaxRows(maxBatchSize, 3); - updateWithStatement(statement -> Batchable.runBatched(maxRows, idWithColValueLoc, - new Batchable() { - @Override - public List run(List input) throws Exception { - for (Long sdId : input) { - List> stringListIdAndLoc = sdIdToColValueLoc.get(sdId); - for (Pair pair : stringListIdAndLoc) { - statement.setLong(1, sdId); - statement.setLong(2, pair.getLeft()); - statement.setString(3, pair.getRight()); - statement.addBatch(); - } - } - statement.executeBatch(); - return null; + updateWithStatement(statement -> new Batchable() { + @Override + public List run(List input) throws Exception { + for (Long sdId : input) { + List> stringListIdAndLoc = sdIdToColValueLoc.get(sdId); + for (Pair pair : stringListIdAndLoc) { + statement.setLong(1, sdId); + statement.setLong(2, pair.getLeft()); + statement.setString(3, pair.getRight()); + statement.addBatch(); } } - ), insertQuery); + statement.executeBatch(); + return null; + } + }.runBatched(maxRows, idWithColValueLoc), insertQuery); } private Map updateCDInBatch(List cdIds, List sdIds, Map sdIdToCdId, Map> sdIdToNewColumns) throws MetaException { Map>> cdIdToColIdxPair = new HashMap<>(); - Batchable.runBatched(maxBatchSize, cdIds, new Batchable() { + new Batchable() { @Override public List run(List input) throws Exception { String idLists = MetaStoreDirectSql.getIdListForIn(input); @@ -1149,7 +1131,7 @@ public List run(List input) throws Exception { } return null; } - }); + }.runBatched(maxBatchSize, cdIds); List newCdIds = new ArrayList<>(); Map> newCdIdToCols = new HashMap<>(); Map oldCdIdToNewCdId = new HashMap<>(); @@ -1198,43 +1180,41 @@ private void insertCDInBatch(List ids, Map> idToCo throws MetaException { String insertCds = TxnUtils.createInsertPreparedStmt("\"CDS\"", Arrays.asList("\"CD_ID\"")); int maxRows = dbType.getMaxRows(maxBatchSize, 1); - updateWithStatement(statement -> Batchable.runBatched(maxRows, ids, - new Batchable() { - @Override - public List run(List input) throws SQLException { - for (Long id : input) { - statement.setLong(1, id); - statement.addBatch(); - } - statement.executeBatch(); - return null; - } - }), insertCds); + updateWithStatement(statement -> new Batchable() { + @Override + public List run(List input) throws SQLException { + for (Long id : input) { + statement.setLong(1, id); + statement.addBatch(); + } + statement.executeBatch(); + return null; + } + }.runBatched(maxRows, ids), insertCds); List columns = Arrays.asList("\"CD_ID\"", "\"COMMENT\"", "\"COLUMN_NAME\"", "\"TYPE_NAME\"", "\"INTEGER_IDX\""); String insertColumns = TxnUtils.createInsertPreparedStmt("\"COLUMNS_V2\"", columns); int maxRowsForCDs = dbType.getMaxRows(maxBatchSize, 5); - updateWithStatement(statement -> Batchable.runBatched(maxRowsForCDs, ids, - new Batchable() { - @Override - public List run(List input) throws Exception { - for (Long id : input) { - List cols = idToCols.get(id); - for (int i = 0; i < cols.size(); i++) { - FieldSchema col = cols.get(i); - statement.setLong(1, id); - statement.setString(2, col.getComment()); - statement.setString(3, col.getName()); - statement.setString(4, col.getType()); - statement.setInt(5, i); - statement.addBatch(); - } - } - statement.executeBatch(); - return null; + updateWithStatement(statement -> new Batchable() { + @Override + public List run(List input) throws Exception { + for (Long id : input) { + List cols = idToCols.get(id); + for (int i = 0; i < cols.size(); i++) { + FieldSchema col = cols.get(i); + statement.setLong(1, id); + statement.setString(2, col.getComment()); + statement.setString(3, col.getName()); + statement.setString(4, col.getType()); + statement.setInt(5, i); + statement.addBatch(); } - }), insertColumns); + } + statement.executeBatch(); + return null; + } + }.runBatched(maxRowsForCDs, ids), insertColumns); } private void updateKeyConstraintsInBatch(Map oldCdIdToNewCdId, @@ -1248,33 +1228,32 @@ private void updateKeyConstraintsInBatch(Map oldCdIdToNewCdId, String updateChild = TxnUtils.createUpdatePreparedStmt(tableName, childColumns, childColumns); for (String updateStmt : new String[]{updateParent, updateChild}) { int maxRows = dbType.getMaxRows(maxBatchSize, 4); - updateWithStatement(statement -> Batchable.runBatched(maxRows, oldCdIds, - new Batchable() { - @Override - public List run(List input) throws SQLException { - for (Long oldId : input) { - // Followed the jdo implement to update only mapping columns for KEY_CONSTRAINTS. - if (!oldCdIdToColIdxPairs.containsKey(oldId)) { - continue; - } - Long newId = oldCdIdToNewCdId.get(oldId); - for (Pair idx : oldCdIdToColIdxPairs.get(oldId)) { - statement.setLong(1, newId); - statement.setInt(2, idx.getRight()); - statement.setLong(3, oldId); - statement.setInt(4, idx.getLeft()); - statement.addBatch(); - } - } - statement.executeBatch(); - return null; + updateWithStatement(statement -> new Batchable() { + @Override + public List run(List input) throws SQLException { + for (Long oldId : input) { + // Followed the jdo implement to update only mapping columns for KEY_CONSTRAINTS. + if (!oldCdIdToColIdxPairs.containsKey(oldId)) { + continue; + } + Long newId = oldCdIdToNewCdId.get(oldId); + for (Pair idx : oldCdIdToColIdxPairs.get(oldId)) { + statement.setLong(1, newId); + statement.setInt(2, idx.getRight()); + statement.setLong(3, oldId); + statement.setInt(4, idx.getLeft()); + statement.addBatch(); } - }), updateStmt); + } + statement.executeBatch(); + return null; + } + }.runBatched(maxRows, oldCdIds), updateStmt); } } private void deleteCDInBatch(List cdIds) throws MetaException { - Batchable.runBatched(maxBatchSize, cdIds, new Batchable() { + new Batchable() { @Override public List run(List input) throws Exception { String idLists = MetaStoreDirectSql.getIdListForIn(input); @@ -1292,7 +1271,7 @@ public List run(List input) throws Exception { updateWithStatement(PreparedStatement::executeUpdate, deleteCDs); return null; } - }); + }.runBatched(maxBatchSize, cdIds); } private void updateSerdeInBatch(List ids, Map idToSerde) @@ -1303,21 +1282,20 @@ private void updateSerdeInBatch(List ids, Map idToSerde) String updateStmt = TxnUtils.createUpdatePreparedStmt("\"SERDES\"", columns, condKeys); List idWithSerde = filterIdsByNonNullValue(ids, idToSerde); int maxRows = dbType.getMaxRows(maxBatchSize, 3); - updateWithStatement(statement -> Batchable.runBatched(maxRows, idWithSerde, - new Batchable() { - @Override - public List run(List input) throws SQLException { - for (Long id : input) { - SerDeInfo serde = idToSerde.get(id); - statement.setString(1, serde.getName()); - statement.setString(2, serde.getSerializationLib()); - statement.setLong(3, id); - statement.addBatch(); - } - statement.executeBatch(); - return null; - } - }), updateStmt); + updateWithStatement(statement -> new Batchable() { + @Override + public List run(List input) throws SQLException { + for (Long id : input) { + SerDeInfo serde = idToSerde.get(id); + statement.setString(1, serde.getName()); + statement.setString(2, serde.getSerializationLib()); + statement.setLong(3, id); + statement.addBatch(); + } + statement.executeBatch(); + return null; + } + }.runBatched(maxRows, idWithSerde), updateStmt); } private static final class PartitionInfo { diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java index f3b012c3ff80..9c2081d0d5c9 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java @@ -375,7 +375,7 @@ public void alterTable(RawStore msdb, Warehouse wh, String catName, String dbnam part.setTableName(newTblName); } - Batchable.runBatched(partitionBatchSize, parts, new Batchable() { + new Batchable() { @Override public List run(List input) throws Exception { msdb.alterPartitions(catalogName, newDbName, newTblName, @@ -383,7 +383,7 @@ public List run(List input) throws Exception { input, newt.getWriteId(), writeIdList); return Collections.emptyList(); } - }); + }.runBatched(partitionBatchSize, parts); } Deadline.checkTimeout(); } else { @@ -422,7 +422,7 @@ public List run(List input) throws Exception { int partitionBatchSize = MetastoreConf.getIntVar(handler.getConf(), MetastoreConf.ConfVars.BATCH_RETRIEVE_MAX); Map, List>> changedColsToPartNames = new HashMap<>(); - Batchable.runBatched(partitionBatchSize, parts, new Batchable() { + new Batchable() { @Override public List run(List input) throws Exception { List oldParts = new ArrayList<>(input.size()); @@ -452,7 +452,7 @@ public List run(List input) throws Exception { partVals, (cascade) ? input : oldParts, newt.getWriteId(), writeIdList); return Collections.emptyList(); } - }); + }.runBatched(partitionBatchSize, parts); for (Map.Entry, List>> entry : changedColsToPartNames.entrySet()) { List partNames = new ArrayList<>(); diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java index 415d7c7f557a..c7cec1d9eba9 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java @@ -104,9 +104,11 @@ import org.apache.hadoop.hive.metastore.model.MTableColumnStatistics; import org.apache.hadoop.hive.metastore.model.MWMResourcePlan; import org.apache.hadoop.hive.metastore.parser.ExpressionTree; +import org.apache.hadoop.hive.metastore.parser.ExpressionTree.Condition; import org.apache.hadoop.hive.metastore.parser.ExpressionTree.FilterBuilder; import org.apache.hadoop.hive.metastore.parser.ExpressionTree.LeafNode; import org.apache.hadoop.hive.metastore.parser.ExpressionTree.LogicalOperator; +import org.apache.hadoop.hive.metastore.parser.ExpressionTree.MultiAndLeafNode; import org.apache.hadoop.hive.metastore.parser.ExpressionTree.Operator; import org.apache.hadoop.hive.metastore.parser.ExpressionTree.TreeNode; import org.apache.hadoop.hive.metastore.parser.ExpressionTree.TreeVisitor; @@ -528,7 +530,7 @@ public void addPartitions(List parts, List */ public List alterPartitions(MTable table, List partNames, List newParts, String queryWriteIdList) throws MetaException { - List rows = Batchable.runBatched(batchSize, partNames, new Batchable() { + List rows = new Batchable() { @Override public List run(List input) throws Exception { String filter = "" + PARTITIONS + ".\"PART_NAME\" in (" + makeParams(input.size()) + ")"; @@ -536,7 +538,7 @@ public List run(List input) throws Exception { return getPartitionFieldsViaSqlFilter(table.getDatabase().getCatalogName(), table.getDatabase().getName(), table.getTableName(), columns, filter, input, Collections.emptyList(), null); } - }); + }.runBatched(batchSize, partNames); Map, Long> partValuesToId = new HashMap<>(); Map partIdToSdId = new HashMap<>(); Map partIdToWriteId = new HashMap<>(); @@ -718,12 +720,12 @@ public List getPartitionsViaPartNames(final String catName, final Str if (partNames.isEmpty()) { return Collections.emptyList(); } - return Batchable.runBatched(batchSize, partNames, new Batchable() { + return new Batchable() { @Override public List run(List input) throws MetaException { return getPartitionsByNames(catName, dbName, tblName, partNames, false, args); } - }); + }.runBatched(batchSize, partNames); } /** @@ -757,12 +759,12 @@ public List getPartitionsViaSqlPs(Table table, GetPartitionsArgs args return Collections.emptyList(); // no partitions, bail early. } boolean isAcidTable = TxnUtils.isAcidTable(table); - return Batchable.runBatched(batchSize, partitionIds, new Batchable() { + return new Batchable() { @Override public List run(List input) throws MetaException { return getPartitionsByPartitionIds(catName, dbName, tblName, input, isAcidTable, args); } - }); + }.runBatched(batchSize, partitionIds); } /** @@ -850,12 +852,12 @@ public List getPartitionsUsingProjectionAndFilterSpec(Table tbl, new PartitionProjectionEvaluator(pm, fieldnameToTableName, partitionFields, convertMapNullsToEmptyStrings, isView, includeParamKeyPattern, excludeParamKeyPattern); // Get full objects. For Oracle/etc. do it in batches. - return Batchable.runBatched(batchSize, partitionIds, new Batchable() { + return new Batchable() { @Override public List run(List input) throws MetaException { return projectionEvaluator.getPartitionsUsingProjectionList(input); } - }); + }.runBatched(batchSize, partitionIds); } public static class SqlFilterForPushdown { @@ -1040,12 +1042,12 @@ private List getPartitionsByPartitionIdsInBatch(String catName, Strin if (partIdList.isEmpty()) { return Collections.emptyList(); // no partitions, bail early. } - return Batchable.runBatched(batchSize, partIdList, new Batchable() { + return new Batchable() { @Override public List run(List input) throws MetaException { return getPartitionsByPartitionIds(catName, dbName, tblName, input, isAcidTable, args); } - }); + }.runBatched(batchSize, partIdList); } /** Should be called with the list short enough to not trip up Oracle/etc. */ @@ -1368,6 +1370,9 @@ private static String generateSqlFilter(String catName, String dbName, String ta PartitionFilterGenerator visitor = new PartitionFilterGenerator( catName, dbName, tableName, partitionKeys, params, joins, dbHasJoinCastBug, defaultPartName, dbType, schema); + TreeNode flattened = PartFilterExprUtil.flattenAndExpressions(tree.getRoot()); + tree.setRoot(flattened); + tree.accept(visitor); if (visitor.filterBuffer.hasError()) { LOG.info("Unable to push down SQL filter: " + visitor.filterBuffer.getErrorMessage()); @@ -1422,14 +1427,6 @@ private static enum FilterType { this.clazz = clazz; } - public Set getType() { - return colTypes; - } - - public Class getClazz() { - return clazz; - } - public static FilterType fromType(String colTypeStr) { for (FilterType filterType : FilterType.values()) { if (filterType.colTypes.contains(colTypeStr)) { @@ -1451,24 +1448,69 @@ public static FilterType fromClass(Object value){ @Override public void visit(LeafNode node) throws MetaException { - int partColCount = partitionKeys.size(); - int partColIndex = LeafNode.getPartColIndexForFilter(node.keyName, partitionKeys, filterBuffer); + String filter = visitCondition(node.getCondition(), true); if (filterBuffer.hasError()) { return; } + filterBuffer.append("(" + filter + ")"); + } + + @Override + public void visit(MultiAndLeafNode node) throws MetaException { + StringBuilder filterBuilder = new StringBuilder(); + List partValues = new ArrayList<>(Collections.nCopies(partitionKeys.size(), null)); + boolean hasEqualCondition = false; + for (Condition condition : node.getConditions()) { + boolean isEqual = Operator.isEqualOperator(condition.getOperator()); + if (isEqual) { + hasEqualCondition = true; + int partColIndex = getPartColIndexForFilter(condition.getKeyName(), partitionKeys, filterBuffer); + if (filterBuffer.hasError()) { + return; + } + String partValue = partValues.get(partColIndex); + String nodeValueStr = condition.getValue().toString(); + if (partValue != null && !partValue.equals(nodeValueStr)) { + // Conflicting equal conditions for the same partition key - the filter is unsatisfiable. + filterBuffer.append("(1 = 0)"); + return; + } + partValues.set(partColIndex, nodeValueStr); + } + if (!filterBuilder.isEmpty()) { + filterBuilder.append(" and "); + } + filterBuilder.append(visitCondition(condition, !isEqual)); + } + // Concatenate equality conditions to match a longer index prefix. + if (hasEqualCondition) { + String partName = Warehouse.makePartName(partitionKeys, partValues, "%"); + filterBuilder.append(" and " + PARTITIONS + ".\"PART_NAME\" like ?"); + params.add(partName); + } + + filterBuffer.append("(" + filterBuilder.toString() + ")"); + } + + private String visitCondition(Condition condition, boolean addPartNameFilter) throws MetaException { + int partColIndex = getPartColIndexForFilter(condition.getKeyName(), partitionKeys, filterBuffer); + if (filterBuffer.hasError()) { + return null; + } + FieldSchema partCol = partitionKeys.get(partColIndex); String colTypeStr = ColumnType.getTypeName(partCol.getType()); FilterType colType = FilterType.fromType(colTypeStr); if (colType == FilterType.Invalid) { filterBuffer.setError("Filter pushdown not supported for type " + colTypeStr); - return; + return null; } - FilterType valType = FilterType.fromClass(node.value); - Object nodeValue = node.value; + Object nodeValue = condition.getValue(); + FilterType valType = FilterType.fromClass(nodeValue); if (valType == FilterType.Invalid) { - filterBuffer.setError("Filter pushdown not supported for value " + node.value.getClass()); - return; + filterBuffer.setError("Filter pushdown not supported for value " + nodeValue.getClass()); + return null; } String nodeValue0 = "?"; @@ -1487,7 +1529,7 @@ public void visit(LeafNode node) throws MetaException { } else if (colType == FilterType.Timestamp) { if (dbType.isDERBY() || dbType.isMYSQL()) { filterBuffer.setError("Filter pushdown on timestamp not supported for " + dbType.dbType); - return; + return null; } try { MetaStoreUtils.convertStringToTimestamp((String) nodeValue); @@ -1506,7 +1548,7 @@ public void visit(LeafNode node) throws MetaException { // to be coerced?). Let the expression evaluation sort this one out, not metastore. filterBuffer.setError("Cannot push down filter for " + colTypeStr + " column and value " + nodeValue.getClass()); - return; + return null; } if (joins.isEmpty()) { @@ -1514,7 +1556,7 @@ public void visit(LeafNode node) throws MetaException { // joining multiple times for one column (if there are several filters on it), we will // keep numCols elements in the list, one for each column; we will fill it with nulls, // put each join at a corresponding index when necessary, and remove nulls in the end. - for (int i = 0; i < partColCount; ++i) { + for (int i = 0; i < partitionKeys.size(); ++i) { joins.add(null); } } @@ -1527,7 +1569,8 @@ public void visit(LeafNode node) throws MetaException { // Build the filter and add parameters linearly; we are traversing leaf nodes LTR. String tableValue = "\"FILTER" + partColIndex + "\".\"PART_KEY_VAL\""; - if (node.isReverseOrder && nodeValue != null) { + boolean isReverseOrder = condition.isReverseOrder(); + if (isReverseOrder && nodeValue != null) { params.add(nodeValue); } String tableColumn = tableValue; @@ -1559,22 +1602,23 @@ public void visit(LeafNode node) throws MetaException { tableValue += " then " + tableValue0 + " else null end)"; } - if (!node.isReverseOrder && nodeValue != null) { + if (!isReverseOrder && nodeValue != null) { params.add(nodeValue); } // The following syntax is required for using LIKE clause wildcards '_' and '%' as literals. - if (node.operator == Operator.LIKE) { + Operator operator = condition.getOperator(); + if (operator == Operator.LIKE) { nodeValue0 = nodeValue0 + " ESCAPE '\\' "; } - String filter = node.isReverseOrder - ? nodeValue0 + " " + node.operator.getSqlOp() + " " + tableValue - : tableValue + " " + node.operator.getSqlOp() + " " + nodeValue0; + String filter = isReverseOrder + ? nodeValue0 + " " + operator.getSqlOp() + " " + tableValue + : tableValue + " " + operator.getSqlOp() + " " + nodeValue0; // For equals and not-equals filter, we can add partition name filter to improve performance. - boolean isOpEquals = Operator.isEqualOperator(node.operator); - boolean isOpNotEqual = Operator.isNotEqualOperator(node.operator); - String nodeValueStr = node.value.toString(); - if (StringUtils.isNotEmpty(nodeValueStr) && (isOpEquals || isOpNotEqual)) { + boolean isOpEquals = Operator.isEqualOperator(operator); + boolean isOpNotEqual = Operator.isNotEqualOperator(operator); + String nodeValueStr = condition.getValue().toString(); + if (addPartNameFilter && StringUtils.isNotEmpty(nodeValueStr) && (isOpEquals || isOpNotEqual)) { Map partKeyToVal = new HashMap<>(); partKeyToVal.put(partCol.getName(), nodeValueStr); String escapedNameFragment = Warehouse.makePartName(partKeyToVal, false); @@ -1583,6 +1627,7 @@ public void visit(LeafNode node) throws MetaException { // match PART_NAME by like clause. escapedNameFragment += "%"; } + int partColCount = partitionKeys.size(); if (colType != FilterType.Date && partColCount == 1) { // Case where partition column type is not date and there is no other partition columns params.add(escapedNameFragment); @@ -1604,8 +1649,7 @@ public void visit(LeafNode node) throws MetaException { filter += " and " + PARTITIONS + ".\"PART_NAME\"" + (isOpEquals ? " like ? " : " not like ? "); } } - - filterBuffer.append("(" + filter + ")"); + return filter; } } @@ -1632,7 +1676,7 @@ public ColumnStatistics getTableStats(final String catName, final String dbName, + " inner join " + DBS + " on " + TBLS + ".\"DB_ID\" = " + DBS + ".\"DB_ID\" " + " where " + DBS + ".\"CTLG_NAME\" = ? and " + DBS + ".\"NAME\" = ? and " + TBLS + ".\"TBL_NAME\" = ?" + " and \"ENGINE\" = ? and \"COLUMN_NAME\" in ("; - Batchable b = new Batchable() { + BatchableQuery b = new BatchableQuery() { @Override public List run(List input) throws MetaException { String queryText = queryText0 + makeParams(input.size()) + ")"; @@ -1660,7 +1704,7 @@ public List run(List input) throws MetaException { }; List list; try { - list = Batchable.runBatched(batchSize, colNames, b); + list = b.runBatched(batchSize, colNames); if (list != null) { list = new ArrayList<>(list); } @@ -1838,10 +1882,10 @@ private long partsFoundForPartitions( + " and " + PART_COL_STATS + ".\"COLUMN_NAME\" in (%1$s) and " + PARTITIONS + ".\"PART_NAME\" in (%2$s)" + " and " + PART_COL_STATS + ".\"ENGINE\" = ?" + " group by " + PART_COL_STATS + ".\"PART_ID\""; - List allCounts = Batchable.runBatched(batchSize, colNames, new Batchable() { + List allCounts = new Batchable() { @Override public List run(final List inputColName) throws MetaException { - return Batchable.runBatched(batchSize, partNames, new Batchable() { + return new Batchable() { @Override public List run(List inputPartNames) throws MetaException { long partsFound = 0; @@ -1863,9 +1907,9 @@ public List run(List inputPartNames) throws MetaException { return Lists.newArrayList(partsFound); } } - }); + }.runBatched(batchSize, partNames); } - }); + }.runBatched(batchSize, colNames); long partsFound = 0; for (Long val : allCounts) { partsFound += val; @@ -1878,19 +1922,19 @@ private List columnStatisticsObjForPartitions( List colNames, String engine, long partsFound, final boolean useDensityFunctionForNDVEstimation, final double ndvTuner, final boolean enableBitVector, boolean enableKll) throws MetaException { final boolean areAllPartsFound = (partsFound == partNames.size()); - return Batchable.runBatched(batchSize, colNames, new Batchable() { + return new Batchable() { @Override public List run(final List inputColNames) throws MetaException { - return Batchable.runBatched(batchSize, partNames, new Batchable() { + return new Batchable() { @Override public List run(List inputPartNames) throws MetaException { return columnStatisticsObjForPartitionsBatch(catName, dbName, tableName, inputPartNames, inputColNames, engine, areAllPartsFound, useDensityFunctionForNDVEstimation, ndvTuner, enableBitVector, enableKll); } - }); + }.runBatched(batchSize, partNames); } - }); + }.runBatched(batchSize, colNames); } public List getColStatsForAllTablePartitions(String catName, String dbName, @@ -2314,10 +2358,10 @@ public List getPartitionStats( + " and " + PARTITIONS + ".\"PART_NAME\" in (%2$s)" + " and " + PART_COL_STATS + ".\"ENGINE\" = ? " + " order by " + PARTITIONS + ".\"PART_NAME\""; - Batchable b = new Batchable() { + BatchableQuery b = new BatchableQuery() { @Override public List run(final List inputColNames) throws MetaException { - Batchable b2 = new Batchable() { + BatchableQuery b2 = new BatchableQuery() { @Override public List run(List inputPartNames) throws MetaException { String queryText = String.format(queryText0, @@ -2338,7 +2382,7 @@ public List run(List inputPartNames) throws MetaException { } }; try { - return Batchable.runBatched(batchSize, partNames, b2); + return b2.runBatched(batchSize, partNames); } finally { addQueryAfterUse(b2); } @@ -2349,7 +2393,7 @@ public List run(List inputPartNames) throws MetaException { String lastPartName = null; int from = 0; try { - List list = Batchable.runBatched(batchSize, colNames, b); + List list = b.runBatched(batchSize, colNames); for (int i = 0; i <= list.size(); ++i) { boolean isLast = i == list.size(); String partName = isLast ? null : (String) list.get(i)[0]; @@ -2848,7 +2892,7 @@ public void dropPartitionsViaSqlFilter(final String catName, final String dbName return; } - Batchable.runBatched(batchSize, partNames, new Batchable() { + new Batchable() { @Override public List run(List input) throws MetaException { String filter = "" + PARTITIONS + ".\"PART_NAME\" in (" + makeParams(input.size()) + ")"; @@ -2861,7 +2905,7 @@ public List run(List input) throws MetaException { dropPartitionsByPartitionIds(partitionIds); return Collections.emptyList(); } - }); + }.runBatched(batchSize, partNames); } @@ -3274,7 +3318,7 @@ public boolean deleteTableColumnStatistics(long tableId, List colNames, public boolean deletePartitionColumnStats(String catName, String dbName, String tblName, List partNames, List colNames, String engine) throws MetaException { - Batchable.runBatched(batchSize, partNames, new Batchable() { + new Batchable() { @Override public List run(List input) throws Exception { String sqlFilter = PARTITIONS + ".\"PART_NAME\" in (" + makeParams(input.size()) + ")"; @@ -3300,7 +3344,7 @@ public List run(List input) throws Exception { } return null; } - }); + }.runBatched(batchSize, partNames); return true; } @@ -3319,12 +3363,12 @@ public List getFunctions(String catName) throws MetaException { return Collections.emptyList(); // no functions, bail early. } // Get full objects. For Oracle/etc. do it in batches. - return Batchable.runBatched(batchSize, funcIds, new Batchable() { + return new Batchable() { @Override public List run(List input) throws MetaException { return getFunctionsFromFunctionIds(input, catName); } - }); + }.runBatched(batchSize, funcIds); } private List getFunctionsFromFunctionIds(List funcIdList, String catName) throws MetaException { diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/Msck.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/Msck.java index 2bdae8902ce0..483da2ec271f 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/Msck.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/Msck.java @@ -263,9 +263,7 @@ public int repair(MsckInfo msckInfo) throws TException, MetastoreException, IOEx partFetchBatch = Batchable.NO_BATCHING; } final Map byName = new HashMap<>(names.size() * 2); - List allParts = Batchable.runBatched( - partFetchBatch, - names, + List allParts = new Batchable() { @Override public List run(List batch) throws Exception { @@ -280,7 +278,7 @@ public List run(List batch) throws Exception { return Collections.emptyList(); } } - }); + }.runBatched(partFetchBatch, names); for (Partition p : allParts) { final String pName = Warehouse.makePartName(table.getPartitionKeys(), p.getValues()); diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java index c9856fcd9125..6ee484d77b45 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -2977,7 +2977,7 @@ private void dropPartitionsViaJdo(String catName, String dbName, String tblName, openTransaction(); try { - Batchable.runBatched(batchSize, partNames, new Batchable() { + new Batchable() { @Override public List run(List input) throws MetaException { // Delete all things. @@ -2992,7 +2992,7 @@ public List run(List input) throws MetaException { dropPartitionsNoTxn(catName, dbName, tblName, input); return Collections.emptyList(); } - }); + }.runBatched(batchSize, partNames); if (!(success = commitTransaction())) { throw new MetaException("Failed to drop partitions"); @@ -4033,7 +4033,7 @@ private List getPartitionsViaOrmFilter(String catName, String dbName, if (partNames.isEmpty()) { return Collections.emptyList(); } - return Batchable.runBatched(batchSize, partNames, new Batchable() { + return new Batchable() { @Override public List run(List input) throws MetaException { Pair> queryWithParams = @@ -4051,7 +4051,7 @@ public List run(List input) throws MetaException { return partitions; } } - }); + }.runBatched(batchSize, partNames); } private void dropPartitionsNoTxn(String catName, String dbName, String tblName, List partNames) { @@ -9394,7 +9394,7 @@ private List getMTableColumnStatistics(Table table, List List result = Collections.emptyList(); try (Query query = pm.newQuery(MTableColumnStatistics.class)) { result = - Batchable.runBatched(batchSize, colNames, new Batchable() { + new Batchable() { @Override public List run(List input) throws MetaException { @@ -9419,7 +9419,7 @@ public List run(List input) pm.retrieveAll(paritial); return paritial; } - }); + }.runBatched(batchSize, colNames); if (result.size() > colNames.size()) { throw new MetaException("Unexpected " + result.size() + " statistics for " @@ -9982,7 +9982,7 @@ private boolean deletePartitionColumnStatisticsViaJdo(String catName, String dbN String catalog = normalizeIdentifier(catName); try { openTransaction(); - Batchable b = new Batchable() { + BatchableQuery b = new BatchableQuery() { @Override public List run(List input) throws Exception { Query query = pm.newQuery(MPartitionColumnStatistics.class); @@ -10030,7 +10030,7 @@ public List run(List input) throws Exception { } }; try { - Batchable.runBatched(batchSize, partNames, b); + b.runBatched(batchSize, partNames); } finally { b.closeAllQueries(); } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartFilterExprUtil.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartFilterExprUtil.java index 6cd48d53f4bf..8d3cbde007b3 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartFilterExprUtil.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartFilterExprUtil.java @@ -18,15 +18,24 @@ package org.apache.hadoop.hive.metastore; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars; +import org.apache.hadoop.hive.metastore.parser.ExpressionTree; +import org.apache.hadoop.hive.metastore.parser.ExpressionTree.BaseLeafNode; +import org.apache.hadoop.hive.metastore.parser.ExpressionTree.Condition; +import org.apache.hadoop.hive.metastore.parser.ExpressionTree.LeafNode; +import org.apache.hadoop.hive.metastore.parser.ExpressionTree.LogicalOperator; +import org.apache.hadoop.hive.metastore.parser.ExpressionTree.MultiAndLeafNode; +import org.apache.hadoop.hive.metastore.parser.ExpressionTree.TreeNode; import org.apache.hadoop.hive.metastore.parser.PartFilterParser; import org.apache.hadoop.hive.metastore.utils.JavaUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.parser.ExpressionTree; /** * Utility functions for working with partition filter expressions @@ -116,4 +125,59 @@ private static ExpressionTree makeExpressionTree(String filter) throws MetaExcep public static ExpressionTree parseFilterTree(String filter) throws MetaException { return PartFilterParser.parseFilter(filter); } + + public static TreeNode buildTreeFromNodes(List nodes, LogicalOperator operator) { + // The 'nodes' list is expected to have at least one element. + // If the list if empty, the lexer parse would have failed. + assert !nodes.isEmpty() ; + if (nodes.size() == 1) { + return nodes.get(0); + } + TreeNode root = new TreeNode(nodes.get(0), operator, nodes.get(1)); + for (int i = 2; i < nodes.size(); ++i) { + TreeNode tmp = new TreeNode(root, operator, nodes.get(i)); + root = tmp; + } + return root; + } + + /** + * Flatten all AND-connected leaf nodes in the given expression tree + * into MultiAndLeafNodes for more efficient evaluation. + */ + public static TreeNode flattenAndExpressions(TreeNode node) { + if (node == null || node instanceof BaseLeafNode) { + return node; + } + TreeNode left = flattenAndExpressions(node.getLhs()); + TreeNode right = flattenAndExpressions(node.getRhs()); + if (node.getAndOr() == LogicalOperator.AND) { + List flatConditions = new ArrayList<>(); + List orNodes = new ArrayList<>(); + flattenConditions(left, flatConditions, orNodes); + flattenConditions(right, flatConditions, orNodes); + if (!flatConditions.isEmpty()) { + TreeNode andNode = flatConditions.size() == 1 ? + new LeafNode(flatConditions.get(0)) : + new MultiAndLeafNode(flatConditions); + orNodes.add(andNode); + } + return buildTreeFromNodes(orNodes, LogicalOperator.AND); + } + return new TreeNode(left, node.getAndOr(), right); + } + + private static void flattenConditions(TreeNode node, List flatConditions, List orNodes) { + if (node == null) { + return; + } + if (node instanceof BaseLeafNode leaf) { + flatConditions.addAll(leaf.getConditions()); + } else if (node.getAndOr() == LogicalOperator.AND) { + flattenConditions(node.getLhs(), flatConditions, orNodes); + flattenConditions(node.getRhs(), flatConditions, orNodes); + } else { + orNodes.add(node); + } + } } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/parser/ExpressionTree.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/parser/ExpressionTree.java index b6baa3333c1a..7629a11a1085 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/parser/ExpressionTree.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/parser/ExpressionTree.java @@ -19,11 +19,11 @@ import java.sql.Timestamp; import java.sql.Date; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; -import java.util.Stack; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.metastore.ColumnType; @@ -137,9 +137,28 @@ protected void beginTreeNode(TreeNode node) throws MetaException {} protected void midTreeNode(TreeNode node) throws MetaException {} protected void endTreeNode(TreeNode node) throws MetaException {} protected void visit(LeafNode node) throws MetaException {} + protected void visit(MultiAndLeafNode node) throws MetaException {} protected boolean shouldStop() { return false; } + + /** + * Get partition column index in the table partition column list that + * corresponds to the key that is being filtered on by this tree node. + * @param partitionKeys list of partition keys. + * @param filterBuilder filter builder used to report error, if any. + * @return The index. + */ + protected int getPartColIndexForFilter(String partitionKeyName, + List partitionKeys, FilterBuilder filterBuilder) throws MetaException { + int partitionColumnIndex = Iterables.indexOf(partitionKeys, key -> partitionKeyName.equalsIgnoreCase(key.getName())); + if( partitionColumnIndex < 0) { + filterBuilder.setError("Specified key <" + partitionKeyName + + "> is not a partitioning key for the table"); + return -1; + } + return partitionColumnIndex; + } } /** @@ -239,49 +258,113 @@ public String toString() { } } - /** - * The Class representing the leaf level nodes in the ExpressionTree. - */ - public static class LeafNode extends TreeNode { - public String keyName; - public Operator operator; + public static class Condition { + private final String keyName; + private final Operator operator; /** * Constant expression side of the operator. Can currently be a String or a Long. */ - public Object value; - public boolean isReverseOrder = false; + private final Object value; + private final boolean isReverseOrder; - @Override - protected void accept(TreeVisitor visitor) throws MetaException { - visitor.visit(this); + public Condition(String keyName, Operator operator, Object value) { + this(keyName, operator, value, false); + } + + public Condition(String keyName, Operator operator, Object value, boolean isReverseOrder) { + this.keyName = keyName; + this.operator = operator; + this.value = value; + this.isReverseOrder = isReverseOrder; + } + + public String getKeyName() { + return keyName; + } + + public Operator getOperator() { + return operator; + } + + public Object getValue() { + return value; + } + + public boolean isReverseOrder() { + return isReverseOrder; } @Override public String toString() { - return "LeafNode{" + + return "{" + "keyName='" + keyName + '\'' + ", operator='" + operator + '\'' + ", value=" + value + (isReverseOrder ? ", isReverseOrder=true" : "") + '}'; } + } - /** - * Get partition column index in the table partition column list that - * corresponds to the key that is being filtered on by this tree node. - * @param partitionKeys list of partition keys. - * @param filterBuilder filter builder used to report error, if any. - * @return The index. - */ - public static int getPartColIndexForFilter(String partitionKeyName, - List partitionKeys, FilterBuilder filterBuilder) throws MetaException { - int partitionColumnIndex = Iterables.indexOf(partitionKeys, key -> partitionKeyName.equalsIgnoreCase(key.getName())); - if( partitionColumnIndex < 0) { - filterBuilder.setError("Specified key <" + partitionKeyName + - "> is not a partitioning key for the table"); - return -1; - } - return partitionColumnIndex; + /** + * The Class representing the leaf level nodes in the ExpressionTree. + */ + public static abstract class BaseLeafNode extends TreeNode { + public abstract List getConditions(); + } + + /** + * Leaf node with a single condition. + */ + public static class LeafNode extends BaseLeafNode { + private final Condition condition; + + public LeafNode(Condition condition) { + this.condition = condition; + } + + @Override + public List getConditions() { + return Collections.singletonList(condition); + } + + public Condition getCondition() { + return condition; + } + + @Override + protected void accept(TreeVisitor visitor) throws MetaException { + visitor.visit(this); + } + + @Override + public String toString() { + return "LeafNode" + condition.toString(); + } + } + + /** + * Leaf node with multiple AND-connected conditions. + */ + public static class MultiAndLeafNode extends BaseLeafNode { + private final List conditions; + + public MultiAndLeafNode(List conditions) { + this.conditions = conditions; + } + + @Override + public List getConditions() { + return conditions; + } + + @Override + protected void accept(TreeVisitor visitor) throws MetaException { + visitor.visit(this); + } + + @Override + public String toString() { + return "MultiAndLeafNode" + conditions.toString(); } } @@ -339,12 +422,12 @@ protected void endTreeNode(TreeNode node) throws MetaException { @Override protected void visit(LeafNode node) throws MetaException { beforeParsing(); - keyName = node.keyName; - operator = node.operator; - value = node.value; - isReverseOrder = node.isReverseOrder; - if (node.keyName.startsWith(hive_metastoreConstants.HIVE_FILTER_FIELD_PARAMS) - && DatabaseProduct.isDerbyOracle() && node.operator == Operator.EQUALS) { + keyName = node.getCondition().getKeyName(); + operator = node.getCondition().getOperator(); + value = node.getCondition().getValue(); + isReverseOrder = node.getCondition().isReverseOrder(); + if (keyName.startsWith(hive_metastoreConstants.HIVE_FILTER_FIELD_PARAMS) + && DatabaseProduct.isDerbyOracle() && operator == Operator.EQUALS) { // Rewrite the EQUALS operator to LIKE operator = Operator.LIKE; } @@ -437,7 +520,7 @@ private void generateJDOFilterGeneral(Map params, private void generateJDOFilterOverPartitions(Configuration conf, Map params, FilterBuilder filterBuilder, List partitionKeys) throws MetaException { int partitionColumnCount = partitionKeys.size(); - int partitionColumnIndex = LeafNode.getPartColIndexForFilter(keyName, partitionKeys, filterBuilder); + int partitionColumnIndex = getPartColIndexForFilter(keyName, partitionKeys, filterBuilder); if (filterBuilder.hasError()) return; boolean canPushDownIntegral = @@ -619,11 +702,6 @@ private static void makeFilterForEquals(String keyName, String value, String par */ private TreeNode root = null; - /** - * The node stack used to keep track of the tree nodes during parsing. - */ - private final Stack nodeStack = new Stack<>(); - public TreeNode getRoot() { return this.root; } @@ -631,32 +709,4 @@ public TreeNode getRoot() { public void setRoot(TreeNode tn) { this.root = tn; } - - - /** - * Adds a intermediate node of either type(AND/OR). Pops last two nodes from - * the stack and sets them as children of the new node and pushes itself - * onto the stack. - * @param andOr the operator type - */ - public void addIntermediateNode(LogicalOperator andOr) { - - TreeNode rhs = nodeStack.pop(); - TreeNode lhs = nodeStack.pop(); - TreeNode newNode = new TreeNode(lhs, andOr, rhs); - nodeStack.push(newNode); - root = newNode; - } - - /** - * Adds a leaf node, pushes the new node onto the stack. - * @param newNode the new node - */ - public void addLeafNode(LeafNode newNode) { - if( root == null ) { - root = newNode; - } - nodeStack.push(newNode); - } - } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/parser/PartFilterVisitor.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/parser/PartFilterVisitor.java index 28c61ba1c2a9..673db04fffcf 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/parser/PartFilterVisitor.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/parser/PartFilterVisitor.java @@ -26,12 +26,14 @@ import org.antlr.v4.runtime.tree.RuleNode; import org.antlr.v4.runtime.tree.TerminalNode; import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.hive.metastore.parser.ExpressionTree.Condition; +import org.apache.hadoop.hive.metastore.parser.ExpressionTree.LeafNode; +import org.apache.hadoop.hive.metastore.parser.ExpressionTree.LogicalOperator; +import org.apache.hadoop.hive.metastore.parser.ExpressionTree.Operator; +import org.apache.hadoop.hive.metastore.parser.ExpressionTree.TreeNode; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; -import static org.apache.hadoop.hive.metastore.parser.ExpressionTree.LeafNode; -import static org.apache.hadoop.hive.metastore.parser.ExpressionTree.LogicalOperator; -import static org.apache.hadoop.hive.metastore.parser.ExpressionTree.Operator; -import static org.apache.hadoop.hive.metastore.parser.ExpressionTree.TreeNode; +import static org.apache.hadoop.hive.metastore.PartFilterExprUtil.buildTreeFromNodes; public class PartFilterVisitor extends PartitionFilterBaseVisitor { @@ -72,20 +74,6 @@ public TreeNode visitAndExpression(PartitionFilterParser.AndExpressionContext ct return buildTreeFromNodes(nodes, LogicalOperator.AND); } - private TreeNode buildTreeFromNodes(List nodes, LogicalOperator operator) { - // The 'nodes' list is expected to have at least one element. - // If the list if empty, the lexer parse would have failed. - if (nodes.size() == 1) { - return nodes.get(0); - } - TreeNode root = new TreeNode(nodes.get(0), operator, nodes.get(1)); - for (int i = 2; i < nodes.size(); ++i) { - TreeNode tmp = new TreeNode(root, operator, nodes.get(i)); - root = tmp; - } - return root; - } - @Override public TreeNode visitExpression(PartitionFilterParser.ExpressionContext ctx) { if (ctx.orExpression() != null) { @@ -96,38 +84,34 @@ public TreeNode visitExpression(PartitionFilterParser.ExpressionContext ctx) { @Override public TreeNode visitComparison(PartitionFilterParser.ComparisonContext ctx) { - LeafNode leafNode = new LeafNode(); - leafNode.keyName = (String) visit(ctx.key); - leafNode.value = visit(ctx.value); - leafNode.operator = visitComparisonOperator(ctx.comparisonOperator()); - return leafNode; + String keyName = (String) visit(ctx.key); + Object value = visit(ctx.value); + Operator operator = visitComparisonOperator(ctx.comparisonOperator()); + return new LeafNode(new Condition(keyName, operator, value)); } @Override public Object visitReverseComparison(PartitionFilterParser.ReverseComparisonContext ctx) { - LeafNode leafNode = new LeafNode(); - leafNode.keyName = (String) visit(ctx.key); - leafNode.value = visit(ctx.value); - leafNode.operator = visitComparisonOperator(ctx.comparisonOperator()); - leafNode.isReverseOrder = true; - return leafNode; + String keyName = (String) visit(ctx.key); + Object value = visit(ctx.value); + Operator operator = visitComparisonOperator(ctx.comparisonOperator()); + return new LeafNode(new Condition(keyName, operator, value, true)); } @Override public TreeNode visitBetweenCondition(PartitionFilterParser.BetweenConditionContext ctx) { - LeafNode left = new LeafNode(); - LeafNode right = new LeafNode(); - left.keyName = right.keyName = (String) visit(ctx.key); - left.value = visit(ctx.lower); - right.value = visit(ctx.upper); + String keyName = (String) visit(ctx.key); + Object leftValue = visit(ctx.lower); + Object rightValue = visit(ctx.upper); boolean isPositive = ctx.NOT() == null; - left.operator = isPositive ? Operator.GREATERTHANOREQUALTO : Operator.LESSTHAN; - right.operator = isPositive ? Operator.LESSTHANOREQUALTO : Operator.GREATERTHAN; + Operator leftOperator = isPositive ? Operator.GREATERTHANOREQUALTO : Operator.LESSTHAN; + Operator rightOperator = isPositive ? Operator.LESSTHANOREQUALTO : Operator.GREATERTHAN; LogicalOperator rootOperator = isPositive ? LogicalOperator.AND : LogicalOperator.OR; - TreeNode treeNode = new TreeNode(left, rootOperator, right); - return treeNode; + LeafNode left = new LeafNode(new Condition(keyName, leftOperator, leftValue)); + LeafNode right = new LeafNode(new Condition(keyName, rightOperator, rightValue)); + return new TreeNode(left, rootOperator, right); } @Override @@ -141,11 +125,9 @@ public TreeNode visitInCondition(PartitionFilterParser.InConditionContext ctx) { private TreeNode buildInCondition(String keyName, List values, boolean isPositive) { List nodes = values.stream() .map(value -> { - LeafNode leafNode = new LeafNode(); - leafNode.keyName = keyName; - leafNode.value = value; - leafNode.operator = isPositive ? Operator.EQUALS : Operator.NOTEQUALS2; - return leafNode; }) + Operator operator = isPositive ? Operator.EQUALS : Operator.NOTEQUALS2; + Condition condition = new Condition(keyName, operator, value); + return new LeafNode(condition); }) .collect(Collectors.toList()); return buildTreeFromNodes(nodes, isPositive ? LogicalOperator.OR : LogicalOperator.AND); } @@ -164,10 +146,10 @@ public TreeNode visitMultiColInExpression(PartitionFilterParser.MultiColInExpres } List nodes = new ArrayList<>(struct.size()); for (int j = 0; j < struct.size(); ++j) { - LeafNode leafNode = new LeafNode(); - leafNode.keyName = keyNames.get(j); - leafNode.value = struct.get(j); - leafNode.operator = isPositive ? Operator.EQUALS : Operator.NOTEQUALS2; + String keyName = keyNames.get(j); + Object value = struct.get(j); + Operator operator = isPositive ? Operator.EQUALS : Operator.NOTEQUALS2; + LeafNode leafNode = new LeafNode(new Condition(keyName, operator, value)); nodes.add(leafNode); } treeNodes.add(buildTreeFromNodes(nodes, isPositive ? LogicalOperator.AND : LogicalOperator.OR)); @@ -327,13 +309,10 @@ private TreeNode negateTree(TreeNode node) { } private LeafNode negateLeafNode(LeafNode leaf) { - LeafNode negatedLeaf = new LeafNode(); - negatedLeaf.keyName = leaf.keyName; - - // Invert the operator for the leaf node - negatedLeaf.operator = invertOperator(leaf.operator); - negatedLeaf.value = leaf.value; - return negatedLeaf; + Condition condition = leaf.getCondition(); + Operator invertedOperator = invertOperator(condition.getOperator()); + Condition negatedCondition = new Condition(condition.getKeyName(), invertedOperator, condition.getValue()); + return new LeafNode(negatedCondition); } @Override diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/MetaToolObjectStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/MetaToolObjectStore.java index f99d86895d08..3408d6dceeb8 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/MetaToolObjectStore.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/MetaToolObjectStore.java @@ -52,7 +52,6 @@ import org.slf4j.LoggerFactory; import static org.apache.commons.lang3.StringUtils.repeat; -import static org.apache.hadoop.hive.metastore.Batchable.runBatched; /** * This class should be used in metatool only @@ -674,7 +673,7 @@ public List getMetadataSummary(String catalogFilter, private void collectPartitionSummary(Map summaries, Set partedTabs, Set nonPartedTabs) throws MetaException { String queryText0 = "select \"TBL_ID\", count(1) from \"PARTITION_KEYS\" where \"TBL_ID\" in ("; - runBatched(batchSize, new ArrayList<>(summaries.keySet()), new Batchable() { + new Batchable() { @Override public List run(List input) throws Exception { Pair, List> qResult = getResultFromInput(input, queryText0, " group by \"TBL_ID\""); @@ -696,9 +695,9 @@ public List run(List input) throws Exception { } return Collections.emptyList(); } - }); + }.runBatched(batchSize, new ArrayList<>(summaries.keySet())); String queryText1 = "select \"TBL_ID\", count(1) from \"PARTITIONS\" where \"TBL_ID\" in ("; - runBatched(batchSize, new ArrayList<>(partedTabs), new Batchable() { + new Batchable() { @Override public List run(List input) throws Exception { Pair, List> qResult = getResultFromInput(input, queryText1, " group by \"TBL_ID\""); @@ -717,7 +716,7 @@ public List run(List input) throws Exception { } return Collections.emptyList(); } - }); + }.runBatched(batchSize, new ArrayList<>(partedTabs)); } private void collectColumnSummary(Map summaries) throws MetaException { @@ -725,7 +724,7 @@ private void collectColumnSummary(Map summaries) thr " sum(CASE WHEN \"TYPE_NAME\" like 'struct%' THEN 1 ELSE 0 END), sum(CASE WHEN \"TYPE_NAME\" like 'map%' THEN 1 ELSE 0 END)" + " from \"TBLS\" t join \"SDS\" s on t.\"SD_ID\" = s.\"SD_ID\" join \"CDS\" c on s.\"CD_ID\" = c.\"CD_ID\" join \"COLUMNS_V2\" v on c.\"CD_ID\" = v.\"CD_ID\"" + " where \"TBL_ID\" in ("; - runBatched(batchSize, new ArrayList<>(summaries.keySet()), new Batchable() { + new Batchable() { @Override public List run(List input) throws Exception { Pair, List> qResult = getResultFromInput(input, queryText0, " group by \"TBL_ID\""); @@ -745,7 +744,7 @@ public List run(List input) throws Exception { } return Collections.emptyList(); } - }); + }.runBatched(batchSize, new ArrayList<>(summaries.keySet())); } private void collectTabFormatSummary(Map summaries) throws MetaException { @@ -754,7 +753,7 @@ private void collectTabFormatSummary(Map summaries) String queryText1 = "select p.\"TBL_ID\", " + dbType.toVarChar("p.\"PARAM_VALUE\"") + " from \"TABLE_PARAMS\" p " + " where p.\"PARAM_KEY\" = 'transactional_properties' and p.\"TBL_ID\" in ("; List transactionTables = new ArrayList<>(); - runBatched(batchSize, new ArrayList<>(summaries.keySet()), new Batchable() { + new Batchable() { @Override public List run(List input) throws Exception { Pair, List> qResult = getResultFromInput(input, queryText0, ""); @@ -775,9 +774,9 @@ public List run(List input) throws Exception { } return Collections.emptyList(); } - }); + }.runBatched(batchSize, new ArrayList<>(summaries.keySet())); - runBatched(batchSize, transactionTables, new Batchable() { + new Batchable() { @Override public List run(List input) throws Exception { Pair, List> qResult = getResultFromInput(input, queryText1, ""); @@ -799,7 +798,7 @@ public List run(List input) throws Exception { } return Collections.emptyList(); } - }); + }.runBatched(batchSize, transactionTables); } private Pair, List> getResultFromInput(List input, @@ -844,24 +843,24 @@ private void collectBasicStats(Map summaries, Set partedTabs) throws MetaException { String queryText0 = "select \"TBL_ID\", \"PARAM_KEY\", CAST(" + dbType.toVarChar("\"PARAM_VALUE\"") + " AS decimal(21,0)) from \"TABLE_PARAMS\" where \"PARAM_KEY\" " + "in ('" + StatsSetupConst.TOTAL_SIZE + "', '" + StatsSetupConst.NUM_FILES + "', '" + StatsSetupConst.ROW_COUNT + "') and \"TBL_ID\" in ("; - runBatched(batchSize, new ArrayList<>(nonPartedTabs), new Batchable() { + new Batchable() { @Override public List run(List input) throws Exception { collectBasicStats(queryText0, input, summaries, ""); return Collections.emptyList(); } - }); + }.runBatched(batchSize, new ArrayList<>(nonPartedTabs)); String queryText1 = "select \"TBL_ID\", \"PARAM_KEY\", sum(CAST(" + dbType.toVarChar("\"PARAM_VALUE\"") + " AS decimal(21,0))) from \"PARTITIONS\" t " + "join \"PARTITION_PARAMS\" p on p.\"PART_ID\" = t.\"PART_ID\" where \"PARAM_KEY\" " + "in ('" + StatsSetupConst.TOTAL_SIZE + "', '" + StatsSetupConst.NUM_FILES + "', '" + StatsSetupConst.ROW_COUNT + "') and t.\"TBL_ID\" in ("; - runBatched(batchSize, new ArrayList<>(partedTabs), new Batchable() { + new Batchable() { @Override public List run(List input) throws Exception { collectBasicStats(queryText1, input, summaries, " group by \"TBL_ID\", \"PARAM_KEY\""); return Collections.emptyList(); } - }); + }.runBatched(batchSize, new ArrayList<>(partedTabs)); } private void collectBasicStats(String queryText0, List input, Map summaries, String subQ) @@ -920,7 +919,7 @@ public Set filterTablesForSummary(List tableSummarie } Deadline.checkTimeout(); - List tables = Batchable.runBatched(batchSize, new ArrayList<>(tableIds), new Batchable() { + List tables = new Batchable() { @Override public List run(List input) throws Exception { int size = input.size(); @@ -956,7 +955,7 @@ public List run(List input) throws Exception { } return ids; } - }); + }.runBatched(batchSize, new ArrayList<>(tableIds)); return new HashSet<>(tables); } } diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestPartFilterExprUtil.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestPartFilterExprUtil.java index f4a7cfd58dac..56ab4168422b 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestPartFilterExprUtil.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestPartFilterExprUtil.java @@ -21,6 +21,7 @@ import org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.parser.ExpressionTree; +import org.apache.hadoop.hive.metastore.parser.ExpressionTree.TreeNode; import org.junit.Test; import org.junit.experimental.categories.Category; @@ -34,21 +35,29 @@ public class TestPartFilterExprUtil { @Test public void testAndOrPrecedence() throws MetaException { checkFilter("dt=10 or dt=20 and dt=30 and dt=40 or dt=50 or dt=60 and dt=70", - "TreeNode{lhs=TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=10}, andOr='OR', rhs=TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=20}, andOr='AND', rhs=LeafNode{keyName='dt', operator='=', value=30}}, andOr='AND', rhs=LeafNode{keyName='dt', operator='=', value=40}}}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=50}}, andOr='OR', rhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=60}, andOr='AND', rhs=LeafNode{keyName='dt', operator='=', value=70}}}"); + "TreeNode{lhs=TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=10}, andOr='OR', rhs=TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=20}, andOr='AND', rhs=LeafNode{keyName='dt', operator='=', value=30}}, andOr='AND', rhs=LeafNode{keyName='dt', operator='=', value=40}}}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=50}}, andOr='OR', rhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=60}, andOr='AND', rhs=LeafNode{keyName='dt', operator='=', value=70}}}", + "TreeNode{lhs=TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=10}, andOr='OR', rhs=MultiAndLeafNode[{keyName='dt', operator='=', value=20}, {keyName='dt', operator='=', value=30}, {keyName='dt', operator='=', value=40}]}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=50}}, andOr='OR', rhs=MultiAndLeafNode[{keyName='dt', operator='=', value=60}, {keyName='dt', operator='=', value=70}]}"); checkFilter("dt=10 or dt=20 and (dt=30 and dt=40 or dt=50) or dt=60 and dt=70", - "TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=10}, andOr='OR', rhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=20}, andOr='AND', rhs=TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=30}, andOr='AND', rhs=LeafNode{keyName='dt', operator='=', value=40}}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=50}}}}, andOr='OR', rhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=60}, andOr='AND', rhs=LeafNode{keyName='dt', operator='=', value=70}}}"); + "TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=10}, andOr='OR', rhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=20}, andOr='AND', rhs=TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=30}, andOr='AND', rhs=LeafNode{keyName='dt', operator='=', value=40}}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=50}}}}, andOr='OR', rhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=60}, andOr='AND', rhs=LeafNode{keyName='dt', operator='=', value=70}}}", + "TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=10}, andOr='OR', rhs=TreeNode{lhs=TreeNode{lhs=MultiAndLeafNode[{keyName='dt', operator='=', value=30}, {keyName='dt', operator='=', value=40}], andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=50}}, andOr='AND', rhs=LeafNode{keyName='dt', operator='=', value=20}}}, andOr='OR', rhs=MultiAndLeafNode[{keyName='dt', operator='=', value=60}, {keyName='dt', operator='=', value=70}]}"); checkFilter("dt=10 or dt=20 and dt=30 and (dt=40 or dt=50 or dt=60) and dt=70", - "TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=10}, andOr='OR', rhs=TreeNode{lhs=TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=20}, andOr='AND', rhs=LeafNode{keyName='dt', operator='=', value=30}}, andOr='AND', rhs=TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=40}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=50}}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=60}}}, andOr='AND', rhs=LeafNode{keyName='dt', operator='=', value=70}}}"); + "TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=10}, andOr='OR', rhs=TreeNode{lhs=TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=20}, andOr='AND', rhs=LeafNode{keyName='dt', operator='=', value=30}}, andOr='AND', rhs=TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=40}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=50}}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=60}}}, andOr='AND', rhs=LeafNode{keyName='dt', operator='=', value=70}}}", + "TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=10}, andOr='OR', rhs=TreeNode{lhs=TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=40}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=50}}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=60}}, andOr='AND', rhs=MultiAndLeafNode[{keyName='dt', operator='=', value=20}, {keyName='dt', operator='=', value=30}, {keyName='dt', operator='=', value=70}]}}"); checkFilter("(dt=10 or dt=20) and dt=30 and (dt=40 or dt=50) or dt=60 and dt=70", - "TreeNode{lhs=TreeNode{lhs=TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=10}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=20}}, andOr='AND', rhs=LeafNode{keyName='dt', operator='=', value=30}}, andOr='AND', rhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=40}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=50}}}, andOr='OR', rhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=60}, andOr='AND', rhs=LeafNode{keyName='dt', operator='=', value=70}}}"); + "TreeNode{lhs=TreeNode{lhs=TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=10}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=20}}, andOr='AND', rhs=LeafNode{keyName='dt', operator='=', value=30}}, andOr='AND', rhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=40}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=50}}}, andOr='OR', rhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=60}, andOr='AND', rhs=LeafNode{keyName='dt', operator='=', value=70}}}", + "TreeNode{lhs=TreeNode{lhs=TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=10}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=20}}, andOr='AND', rhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=40}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=50}}}, andOr='AND', rhs=LeafNode{keyName='dt', operator='=', value=30}}, andOr='OR', rhs=MultiAndLeafNode[{keyName='dt', operator='=', value=60}, {keyName='dt', operator='=', value=70}]}"); checkFilter("(dt=10 or dt=20) and dt=30 and ((dt=40 or dt=50) or dt=60) and dt=70", - "TreeNode{lhs=TreeNode{lhs=TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=10}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=20}}, andOr='AND', rhs=LeafNode{keyName='dt', operator='=', value=30}}, andOr='AND', rhs=TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=40}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=50}}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=60}}}, andOr='AND', rhs=LeafNode{keyName='dt', operator='=', value=70}}"); + "TreeNode{lhs=TreeNode{lhs=TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=10}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=20}}, andOr='AND', rhs=LeafNode{keyName='dt', operator='=', value=30}}, andOr='AND', rhs=TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=40}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=50}}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=60}}}, andOr='AND', rhs=LeafNode{keyName='dt', operator='=', value=70}}", + "TreeNode{lhs=TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=10}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=20}}, andOr='AND', rhs=TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=40}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=50}}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=60}}}, andOr='AND', rhs=MultiAndLeafNode[{keyName='dt', operator='=', value=30}, {keyName='dt', operator='=', value=70}]}"); checkFilter("(dt=10 or dt=20) and (dt=30 and ((dt=40 or dt=50) or dt=60) and dt=70)", - "TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=10}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=20}}, andOr='AND', rhs=TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=30}, andOr='AND', rhs=TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=40}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=50}}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=60}}}, andOr='AND', rhs=LeafNode{keyName='dt', operator='=', value=70}}}"); + "TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=10}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=20}}, andOr='AND', rhs=TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=30}, andOr='AND', rhs=TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=40}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=50}}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=60}}}, andOr='AND', rhs=LeafNode{keyName='dt', operator='=', value=70}}}", + "TreeNode{lhs=TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=10}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=20}}, andOr='AND', rhs=TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=40}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=50}}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=60}}}, andOr='AND', rhs=MultiAndLeafNode[{keyName='dt', operator='=', value=30}, {keyName='dt', operator='=', value=70}]}"); checkFilter("dt=10 or dt=20 and (dt=30 and ((dt=40 or dt=50) or dt=60) and dt=70)", - "TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=10}, andOr='OR', rhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=20}, andOr='AND', rhs=TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=30}, andOr='AND', rhs=TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=40}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=50}}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=60}}}, andOr='AND', rhs=LeafNode{keyName='dt', operator='=', value=70}}}}"); + "TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=10}, andOr='OR', rhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=20}, andOr='AND', rhs=TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=30}, andOr='AND', rhs=TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=40}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=50}}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=60}}}, andOr='AND', rhs=LeafNode{keyName='dt', operator='=', value=70}}}}", + "TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=10}, andOr='OR', rhs=TreeNode{lhs=TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=40}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=50}}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=60}}, andOr='AND', rhs=MultiAndLeafNode[{keyName='dt', operator='=', value=20}, {keyName='dt', operator='=', value=30}, {keyName='dt', operator='=', value=70}]}}"); checkFilter("dt=10 or (dt=20 and dt=30 and (dt=40 or dt=50) or dt=60) and dt=70", - "TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=10}, andOr='OR', rhs=TreeNode{lhs=TreeNode{lhs=TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=20}, andOr='AND', rhs=LeafNode{keyName='dt', operator='=', value=30}}, andOr='AND', rhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=40}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=50}}}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=60}}, andOr='AND', rhs=LeafNode{keyName='dt', operator='=', value=70}}}"); + "TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=10}, andOr='OR', rhs=TreeNode{lhs=TreeNode{lhs=TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=20}, andOr='AND', rhs=LeafNode{keyName='dt', operator='=', value=30}}, andOr='AND', rhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=40}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=50}}}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=60}}, andOr='AND', rhs=LeafNode{keyName='dt', operator='=', value=70}}}", + "TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=10}, andOr='OR', rhs=TreeNode{lhs=TreeNode{lhs=TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='dt', operator='=', value=40}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=50}}, andOr='AND', rhs=MultiAndLeafNode[{keyName='dt', operator='=', value=20}, {keyName='dt', operator='=', value=30}]}, andOr='OR', rhs=LeafNode{keyName='dt', operator='=', value=60}}, andOr='AND', rhs=LeafNode{keyName='dt', operator='=', value=70}}}"); } @Test @@ -56,7 +65,8 @@ public void testExpressionCombination() throws MetaException { checkFilter("(a) in (10, 20) and a != 30", "TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='a', operator='=', value=10}, andOr='OR', rhs=LeafNode{keyName='a', operator='=', value=20}}, andOr='AND', rhs=LeafNode{keyName='a', operator='!=', value=30}}"); checkFilter("(a) in (10, 20) and a between 10 and 15", - "TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='a', operator='=', value=10}, andOr='OR', rhs=LeafNode{keyName='a', operator='=', value=20}}, andOr='AND', rhs=TreeNode{lhs=LeafNode{keyName='a', operator='>=', value=10}, andOr='AND', rhs=LeafNode{keyName='a', operator='<=', value=15}}}"); + "TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='a', operator='=', value=10}, andOr='OR', rhs=LeafNode{keyName='a', operator='=', value=20}}, andOr='AND', rhs=TreeNode{lhs=LeafNode{keyName='a', operator='>=', value=10}, andOr='AND', rhs=LeafNode{keyName='a', operator='<=', value=15}}}", + "TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='a', operator='=', value=10}, andOr='OR', rhs=LeafNode{keyName='a', operator='=', value=20}}, andOr='AND', rhs=MultiAndLeafNode[{keyName='a', operator='>=', value=10}, {keyName='a', operator='<=', value=15}]}"); checkFilter("(a) in (10, 20) and a not between 10 and 15", "TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='a', operator='=', value=10}, andOr='OR', rhs=LeafNode{keyName='a', operator='=', value=20}}, andOr='AND', rhs=TreeNode{lhs=LeafNode{keyName='a', operator='<', value=10}, andOr='OR', rhs=LeafNode{keyName='a', operator='>', value=15}}}"); checkFilter("(a) in (10, 20) and a not between 10 and 15 and b < 10", @@ -92,13 +102,15 @@ public void testSingleColInExpressionWhenDateLiteralTypeIsSpecified() throws Met @Test public void testMultiColInExpressionWhenDateLiteralTypeIsNotSpecifiedNorQuoted() throws MetaException { checkFilter("(struct(ds1,ds2)) IN (struct(2000-05-08, 2001-04-08), struct(2000-05-09, 2001-04-09))", - "TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='ds1', operator='=', value=2000-05-08}, andOr='AND', rhs=LeafNode{keyName='ds2', operator='=', value=2001-04-08}}, andOr='OR', rhs=TreeNode{lhs=LeafNode{keyName='ds1', operator='=', value=2000-05-09}, andOr='AND', rhs=LeafNode{keyName='ds2', operator='=', value=2001-04-09}}}"); + "TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='ds1', operator='=', value=2000-05-08}, andOr='AND', rhs=LeafNode{keyName='ds2', operator='=', value=2001-04-08}}, andOr='OR', rhs=TreeNode{lhs=LeafNode{keyName='ds1', operator='=', value=2000-05-09}, andOr='AND', rhs=LeafNode{keyName='ds2', operator='=', value=2001-04-09}}}", + "TreeNode{lhs=MultiAndLeafNode[{keyName='ds1', operator='=', value=2000-05-08}, {keyName='ds2', operator='=', value=2001-04-08}], andOr='OR', rhs=MultiAndLeafNode[{keyName='ds1', operator='=', value=2000-05-09}, {keyName='ds2', operator='=', value=2001-04-09}]}"); } @Test public void testMultiColInExpressionWhenDateLiteralTypeIsSpecified() throws MetaException { checkFilter("(struct(ds1,ds2)) IN (struct(DATE'2000-05-08',DATE'2001-04-08'), struct(DATE'2000-05-09',DATE'2001-04-09'))", - "TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='ds1', operator='=', value=2000-05-08}, andOr='AND', rhs=LeafNode{keyName='ds2', operator='=', value=2001-04-08}}, andOr='OR', rhs=TreeNode{lhs=LeafNode{keyName='ds1', operator='=', value=2000-05-09}, andOr='AND', rhs=LeafNode{keyName='ds2', operator='=', value=2001-04-09}}}"); + "TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='ds1', operator='=', value=2000-05-08}, andOr='AND', rhs=LeafNode{keyName='ds2', operator='=', value=2001-04-08}}, andOr='OR', rhs=TreeNode{lhs=LeafNode{keyName='ds1', operator='=', value=2000-05-09}, andOr='AND', rhs=LeafNode{keyName='ds2', operator='=', value=2001-04-09}}}", + "TreeNode{lhs=MultiAndLeafNode[{keyName='ds1', operator='=', value=2000-05-08}, {keyName='ds2', operator='=', value=2001-04-08}], andOr='OR', rhs=MultiAndLeafNode[{keyName='ds1', operator='=', value=2000-05-09}, {keyName='ds2', operator='=', value=2001-04-09}]}"); } @Test @@ -116,61 +128,71 @@ public void testSingleColInExpressionWhenTimestampLiteralTypeIsSpecified() throw @Test public void testMultiColInExpressionWhenTimestampLiteralTypeIsNotSpecifiedNorQuoted() throws MetaException { checkFilter("(struct(ds1,ds2)) IN (struct(2000-05-08 01:00:00, 2001-04-08 01:00:00), struct(2000-05-09 01:00:00, 2001-04-09 01:00:00))", - "TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='ds1', operator='=', value=2000-05-08 01:00:00}, andOr='AND', rhs=LeafNode{keyName='ds2', operator='=', value=2001-04-08 01:00:00}}, andOr='OR', rhs=TreeNode{lhs=LeafNode{keyName='ds1', operator='=', value=2000-05-09 01:00:00}, andOr='AND', rhs=LeafNode{keyName='ds2', operator='=', value=2001-04-09 01:00:00}}}"); + "TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='ds1', operator='=', value=2000-05-08 01:00:00}, andOr='AND', rhs=LeafNode{keyName='ds2', operator='=', value=2001-04-08 01:00:00}}, andOr='OR', rhs=TreeNode{lhs=LeafNode{keyName='ds1', operator='=', value=2000-05-09 01:00:00}, andOr='AND', rhs=LeafNode{keyName='ds2', operator='=', value=2001-04-09 01:00:00}}}", + "TreeNode{lhs=MultiAndLeafNode[{keyName='ds1', operator='=', value=2000-05-08 01:00:00}, {keyName='ds2', operator='=', value=2001-04-08 01:00:00}], andOr='OR', rhs=MultiAndLeafNode[{keyName='ds1', operator='=', value=2000-05-09 01:00:00}, {keyName='ds2', operator='=', value=2001-04-09 01:00:00}]}"); } @Test public void testMultiColInExpressionWhenTimestampLiteralTypeIsSpecified() throws MetaException { checkFilter("(struct(ds1,ds2)) IN (struct(TIMESTAMP'2000-05-08 01:00:00',TIMESTAMP'2001-04-08 01:00:00'), struct(TIMESTAMP'2000-05-09 01:00:00',TIMESTAMP'2001-04-09 01:00:00'))", - "TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='ds1', operator='=', value=2000-05-08 01:00:00}, andOr='AND', rhs=LeafNode{keyName='ds2', operator='=', value=2001-04-08 01:00:00}}, andOr='OR', rhs=TreeNode{lhs=LeafNode{keyName='ds1', operator='=', value=2000-05-09 01:00:00}, andOr='AND', rhs=LeafNode{keyName='ds2', operator='=', value=2001-04-09 01:00:00}}}"); + "TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='ds1', operator='=', value=2000-05-08 01:00:00}, andOr='AND', rhs=LeafNode{keyName='ds2', operator='=', value=2001-04-08 01:00:00}}, andOr='OR', rhs=TreeNode{lhs=LeafNode{keyName='ds1', operator='=', value=2000-05-09 01:00:00}, andOr='AND', rhs=LeafNode{keyName='ds2', operator='=', value=2001-04-09 01:00:00}}}", + "TreeNode{lhs=MultiAndLeafNode[{keyName='ds1', operator='=', value=2000-05-08 01:00:00}, {keyName='ds2', operator='=', value=2001-04-08 01:00:00}], andOr='OR', rhs=MultiAndLeafNode[{keyName='ds1', operator='=', value=2000-05-09 01:00:00}, {keyName='ds2', operator='=', value=2001-04-09 01:00:00}]}"); } @Test public void testBetweenExpressionWhenDateLiteralTypeIsNotSpecifiedNorQuoted() throws MetaException { checkFilter("(j BETWEEN 1990-11-10 AND 1990-11-11)", - "TreeNode{lhs=LeafNode{keyName='j', operator='>=', value=1990-11-10}, andOr='AND', rhs=LeafNode{keyName='j', operator='<=', value=1990-11-11}}"); + "TreeNode{lhs=LeafNode{keyName='j', operator='>=', value=1990-11-10}, andOr='AND', rhs=LeafNode{keyName='j', operator='<=', value=1990-11-11}}", + "MultiAndLeafNode[{keyName='j', operator='>=', value=1990-11-10}, {keyName='j', operator='<=', value=1990-11-11}]"); } @Test public void testBetweenExpressionWhenDateLiteralTypeIsSpecified() throws MetaException { checkFilter("(j BETWEEN DATE'1990-11-10' AND DATE'1990-11-11')", - "TreeNode{lhs=LeafNode{keyName='j', operator='>=', value=1990-11-10}, andOr='AND', rhs=LeafNode{keyName='j', operator='<=', value=1990-11-11}}"); + "TreeNode{lhs=LeafNode{keyName='j', operator='>=', value=1990-11-10}, andOr='AND', rhs=LeafNode{keyName='j', operator='<=', value=1990-11-11}}", + "MultiAndLeafNode[{keyName='j', operator='>=', value=1990-11-10}, {keyName='j', operator='<=', value=1990-11-11}]"); } @Test public void testBetweenExpressionWhenTimestampLiteralTypeIsNotSpecifiedNorQuoted() throws MetaException { checkFilter("dt BETWEEN 2000-01-01 01:00:00 AND 2000-01-01 01:42:00)", - "TreeNode{lhs=LeafNode{keyName='dt', operator='>=', value=2000-01-01 01:00:00}, andOr='AND', rhs=LeafNode{keyName='dt', operator='<=', value=2000-01-01 01:42:00}}"); + "TreeNode{lhs=LeafNode{keyName='dt', operator='>=', value=2000-01-01 01:00:00}, andOr='AND', rhs=LeafNode{keyName='dt', operator='<=', value=2000-01-01 01:42:00}}", + "MultiAndLeafNode[{keyName='dt', operator='>=', value=2000-01-01 01:00:00}, {keyName='dt', operator='<=', value=2000-01-01 01:42:00}]"); } @Test public void testBetweenExpressionWhenTimestampLiteralTypeIsSpecified() throws MetaException { checkFilter("dt BETWEEN TIMESTAMP'2000-01-01 01:00:00' AND TIMESTAMP'2000-01-01 01:42:00')", - "TreeNode{lhs=LeafNode{keyName='dt', operator='>=', value=2000-01-01 01:00:00}, andOr='AND', rhs=LeafNode{keyName='dt', operator='<=', value=2000-01-01 01:42:00}}"); + "TreeNode{lhs=LeafNode{keyName='dt', operator='>=', value=2000-01-01 01:00:00}, andOr='AND', rhs=LeafNode{keyName='dt', operator='<=', value=2000-01-01 01:42:00}}", + "MultiAndLeafNode[{keyName='dt', operator='>=', value=2000-01-01 01:00:00}, {keyName='dt', operator='<=', value=2000-01-01 01:42:00}]"); } @Test public void testBinaryExpressionWhenDateLiteralTypeIsNotSpecifiedNorQuoted() throws MetaException { checkFilter("(j = 1990-11-10 or j = 1990-11-11 and j = 1990-11-12)", - "TreeNode{lhs=LeafNode{keyName='j', operator='=', value=1990-11-10}, andOr='OR', rhs=TreeNode{lhs=LeafNode{keyName='j', operator='=', value=1990-11-11}, andOr='AND', rhs=LeafNode{keyName='j', operator='=', value=1990-11-12}}}"); + "TreeNode{lhs=LeafNode{keyName='j', operator='=', value=1990-11-10}, andOr='OR', rhs=TreeNode{lhs=LeafNode{keyName='j', operator='=', value=1990-11-11}, andOr='AND', rhs=LeafNode{keyName='j', operator='=', value=1990-11-12}}}", + "TreeNode{lhs=LeafNode{keyName='j', operator='=', value=1990-11-10}, andOr='OR', rhs=MultiAndLeafNode[{keyName='j', operator='=', value=1990-11-11}, {keyName='j', operator='=', value=1990-11-12}]}"); } @Test public void testBinaryExpressionWhenDateLiteralTypeIsSpecified() throws MetaException { checkFilter("(j = DATE'1990-11-10' or j = DATE'1990-11-11' and j = DATE'1990-11-12')", - "TreeNode{lhs=LeafNode{keyName='j', operator='=', value=1990-11-10}, andOr='OR', rhs=TreeNode{lhs=LeafNode{keyName='j', operator='=', value=1990-11-11}, andOr='AND', rhs=LeafNode{keyName='j', operator='=', value=1990-11-12}}}"); + "TreeNode{lhs=LeafNode{keyName='j', operator='=', value=1990-11-10}, andOr='OR', rhs=TreeNode{lhs=LeafNode{keyName='j', operator='=', value=1990-11-11}, andOr='AND', rhs=LeafNode{keyName='j', operator='=', value=1990-11-12}}}", + "TreeNode{lhs=LeafNode{keyName='j', operator='=', value=1990-11-10}, andOr='OR', rhs=MultiAndLeafNode[{keyName='j', operator='=', value=1990-11-11}, {keyName='j', operator='=', value=1990-11-12}]}"); } @Test public void testBinaryExpressionWhenTimeStampLiteralTypeIsNotSpecifiedNorQuoted() throws MetaException { checkFilter("(j = 1990-11-10 01:00:00 or j = 1990-11-11 01:00:24 and j = 1990-11-12 01:42:00)", - "TreeNode{lhs=LeafNode{keyName='j', operator='=', value=1990-11-10 01:00:00}, andOr='OR', rhs=TreeNode{lhs=LeafNode{keyName='j', operator='=', value=1990-11-11 01:00:24}, andOr='AND', rhs=LeafNode{keyName='j', operator='=', value=1990-11-12 01:42:00}}}"); + "TreeNode{lhs=LeafNode{keyName='j', operator='=', value=1990-11-10 01:00:00}, andOr='OR', rhs=TreeNode{lhs=LeafNode{keyName='j', operator='=', value=1990-11-11 01:00:24}, andOr='AND', rhs=LeafNode{keyName='j', operator='=', value=1990-11-12 01:42:00}}}", + "TreeNode{lhs=LeafNode{keyName='j', operator='=', value=1990-11-10 01:00:00}, andOr='OR', rhs=MultiAndLeafNode[{keyName='j', operator='=', value=1990-11-11 01:00:24}, {keyName='j', operator='=', value=1990-11-12 01:42:00}]}"); } @Test public void testBinaryExpressionWhenTimeStampLiteralTypeIsSpecified() throws MetaException { checkFilter("(j = TIMESTAMP'1990-11-10 01:00:00' or j = TIMESTAMP'1990-11-11 01:00:24' and j = TIMESTAMP'1990-11-12 01:42:00')", - "TreeNode{lhs=LeafNode{keyName='j', operator='=', value=1990-11-10 01:00:00}, andOr='OR', rhs=TreeNode{lhs=LeafNode{keyName='j', operator='=', value=1990-11-11 01:00:24}, andOr='AND', rhs=LeafNode{keyName='j', operator='=', value=1990-11-12 01:42:00}}}"); + "TreeNode{lhs=LeafNode{keyName='j', operator='=', value=1990-11-10 01:00:00}, andOr='OR', rhs=TreeNode{lhs=LeafNode{keyName='j', operator='=', value=1990-11-11 01:00:24}, andOr='AND', rhs=LeafNode{keyName='j', operator='=', value=1990-11-12 01:42:00}}}", + "TreeNode{lhs=LeafNode{keyName='j', operator='=', value=1990-11-10 01:00:00}, andOr='OR', rhs=MultiAndLeafNode[{keyName='j', operator='=', value=1990-11-11 01:00:24}, {keyName='j', operator='=', value=1990-11-12 01:42:00}]}"); } @@ -183,7 +205,8 @@ public void testSingleColInExpressionWithIntLiteral() throws MetaException { @Test public void testBetweenExpressionWithIntLiteral() throws MetaException { checkFilter("dt between 10 and 20", - "TreeNode{lhs=LeafNode{keyName='dt', operator='>=', value=10}, andOr='AND', rhs=LeafNode{keyName='dt', operator='<=', value=20}}"); + "TreeNode{lhs=LeafNode{keyName='dt', operator='>=', value=10}, andOr='AND', rhs=LeafNode{keyName='dt', operator='<=', value=20}}", + "MultiAndLeafNode[{keyName='dt', operator='>=', value=10}, {keyName='dt', operator='<=', value=20}]"); } @Test @@ -213,19 +236,22 @@ public void testSingleColInExpressionWithStringLikeDate() throws MetaException { @Test public void testMultiColInExpressionWithDateLikeString() throws MetaException { checkFilter("(struct(ds1,ds2)) IN (struct('2000-05-08','2001-04-08'), struct('2000-05-09','2001-04-09'))", - "TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='ds1', operator='=', value=2000-05-08}, andOr='AND', rhs=LeafNode{keyName='ds2', operator='=', value=2001-04-08}}, andOr='OR', rhs=TreeNode{lhs=LeafNode{keyName='ds1', operator='=', value=2000-05-09}, andOr='AND', rhs=LeafNode{keyName='ds2', operator='=', value=2001-04-09}}}"); + "TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='ds1', operator='=', value=2000-05-08}, andOr='AND', rhs=LeafNode{keyName='ds2', operator='=', value=2001-04-08}}, andOr='OR', rhs=TreeNode{lhs=LeafNode{keyName='ds1', operator='=', value=2000-05-09}, andOr='AND', rhs=LeafNode{keyName='ds2', operator='=', value=2001-04-09}}}", + "TreeNode{lhs=MultiAndLeafNode[{keyName='ds1', operator='=', value=2000-05-08}, {keyName='ds2', operator='=', value=2001-04-08}], andOr='OR', rhs=MultiAndLeafNode[{keyName='ds1', operator='=', value=2000-05-09}, {keyName='ds2', operator='=', value=2001-04-09}]}"); } @Test public void testBetweenExpressionWithStringLikeDate() throws MetaException { checkFilter("(j BETWEEN '1990-11-10' AND '1990-11-11')", - "TreeNode{lhs=LeafNode{keyName='j', operator='>=', value=1990-11-10}, andOr='AND', rhs=LeafNode{keyName='j', operator='<=', value=1990-11-11}}"); + "TreeNode{lhs=LeafNode{keyName='j', operator='>=', value=1990-11-10}, andOr='AND', rhs=LeafNode{keyName='j', operator='<=', value=1990-11-11}}", + "MultiAndLeafNode[{keyName='j', operator='>=', value=1990-11-10}, {keyName='j', operator='<=', value=1990-11-11}]"); } @Test public void testBinaryExpressionWithStringLikeDate() throws MetaException { checkFilter("(j = '1990-11-10' or j = '1990-11-11' and j = '1990-11-12')", - "TreeNode{lhs=LeafNode{keyName='j', operator='=', value=1990-11-10}, andOr='OR', rhs=TreeNode{lhs=LeafNode{keyName='j', operator='=', value=1990-11-11}, andOr='AND', rhs=LeafNode{keyName='j', operator='=', value=1990-11-12}}}"); + "TreeNode{lhs=LeafNode{keyName='j', operator='=', value=1990-11-10}, andOr='OR', rhs=TreeNode{lhs=LeafNode{keyName='j', operator='=', value=1990-11-11}, andOr='AND', rhs=LeafNode{keyName='j', operator='=', value=1990-11-12}}}", + "TreeNode{lhs=LeafNode{keyName='j', operator='=', value=1990-11-10}, andOr='OR', rhs=MultiAndLeafNode[{keyName='j', operator='=', value=1990-11-11}, {keyName='j', operator='=', value=1990-11-12}]}"); } @Test @@ -237,24 +263,34 @@ public void testSingleColInExpressionWithStringLikeTimestamp() throws MetaExcept @Test public void testMultiColInExpressionWithTimestampLikeString() throws MetaException { checkFilter("(struct(ds1,ds2)) IN (struct('2000-05-08 01:00:00','2001-04-08 01:00:00'), struct('2000-05-09 01:00:00','2001-04-09 01:00:00'))", - "TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='ds1', operator='=', value=2000-05-08 01:00:00}, andOr='AND', rhs=LeafNode{keyName='ds2', operator='=', value=2001-04-08 01:00:00}}, andOr='OR', rhs=TreeNode{lhs=LeafNode{keyName='ds1', operator='=', value=2000-05-09 01:00:00}, andOr='AND', rhs=LeafNode{keyName='ds2', operator='=', value=2001-04-09 01:00:00}}}"); + "TreeNode{lhs=TreeNode{lhs=LeafNode{keyName='ds1', operator='=', value=2000-05-08 01:00:00}, andOr='AND', rhs=LeafNode{keyName='ds2', operator='=', value=2001-04-08 01:00:00}}, andOr='OR', rhs=TreeNode{lhs=LeafNode{keyName='ds1', operator='=', value=2000-05-09 01:00:00}, andOr='AND', rhs=LeafNode{keyName='ds2', operator='=', value=2001-04-09 01:00:00}}}", + "TreeNode{lhs=MultiAndLeafNode[{keyName='ds1', operator='=', value=2000-05-08 01:00:00}, {keyName='ds2', operator='=', value=2001-04-08 01:00:00}], andOr='OR', rhs=MultiAndLeafNode[{keyName='ds1', operator='=', value=2000-05-09 01:00:00}, {keyName='ds2', operator='=', value=2001-04-09 01:00:00}]}"); } @Test public void testBetweenExpressionWithStringLikeTimestamp() throws MetaException { checkFilter("dt BETWEEN '2000-01-01 01:00:00' AND '2000-01-01 01:42:00')", - "TreeNode{lhs=LeafNode{keyName='dt', operator='>=', value=2000-01-01 01:00:00}, andOr='AND', rhs=LeafNode{keyName='dt', operator='<=', value=2000-01-01 01:42:00}}"); + "TreeNode{lhs=LeafNode{keyName='dt', operator='>=', value=2000-01-01 01:00:00}, andOr='AND', rhs=LeafNode{keyName='dt', operator='<=', value=2000-01-01 01:42:00}}", + "MultiAndLeafNode[{keyName='dt', operator='>=', value=2000-01-01 01:00:00}, {keyName='dt', operator='<=', value=2000-01-01 01:42:00}]"); } @Test public void testBinaryExpressionWithStringLikeTimeStamp() throws MetaException { checkFilter("(j = '1990-11-10 01:00:00' or j = '1990-11-11 01:00:24' and j = '1990-11-12 01:42:00')", - "TreeNode{lhs=LeafNode{keyName='j', operator='=', value=1990-11-10 01:00:00}, andOr='OR', rhs=TreeNode{lhs=LeafNode{keyName='j', operator='=', value=1990-11-11 01:00:24}, andOr='AND', rhs=LeafNode{keyName='j', operator='=', value=1990-11-12 01:42:00}}}"); + "TreeNode{lhs=LeafNode{keyName='j', operator='=', value=1990-11-10 01:00:00}, andOr='OR', rhs=TreeNode{lhs=LeafNode{keyName='j', operator='=', value=1990-11-11 01:00:24}, andOr='AND', rhs=LeafNode{keyName='j', operator='=', value=1990-11-12 01:42:00}}}", + "TreeNode{lhs=LeafNode{keyName='j', operator='=', value=1990-11-10 01:00:00}, andOr='OR', rhs=MultiAndLeafNode[{keyName='j', operator='=', value=1990-11-11 01:00:24}, {keyName='j', operator='=', value=1990-11-12 01:42:00}]}"); } private void checkFilter(String filter, String expectTreeString) throws MetaException { + checkFilter(filter, expectTreeString, expectTreeString); + } + + private void checkFilter(String filter, String expectTreeString, String expectedFlattenedTreeString) + throws MetaException { ExpressionTree expressionTree = PartFilterExprUtil.parseFilterTree(filter); assertThat(expressionTree.getRoot().toString(), is(expectTreeString)); + TreeNode flattened = PartFilterExprUtil.flattenAndExpressions(expressionTree.getRoot()); + assertThat(flattened.toString(), is(expectedFlattenedTreeString)); } @Test diff --git a/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkTool.java b/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkTool.java index 551ffabe6b9a..217b7bbd096f 100644 --- a/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkTool.java +++ b/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkTool.java @@ -62,10 +62,8 @@ import static org.apache.hadoop.hive.metastore.tools.HMSBenchmarks.benchmarkGetPartitionsByPs; import static org.apache.hadoop.hive.metastore.tools.HMSBenchmarks.benchmarkGetPartitionsStat; import static org.apache.hadoop.hive.metastore.tools.HMSBenchmarks.benchmarkGetTable; -import static org.apache.hadoop.hive.metastore.tools.HMSBenchmarks.benchmarkListAllTables; import static org.apache.hadoop.hive.metastore.tools.HMSBenchmarks.benchmarkListDatabases; -import static org.apache.hadoop.hive.metastore.tools.HMSBenchmarks.benchmarkListManyPartitions; -import static org.apache.hadoop.hive.metastore.tools.HMSBenchmarks.benchmarkListPartition; +import static org.apache.hadoop.hive.metastore.tools.HMSBenchmarks.benchmarkListPartitions; import static org.apache.hadoop.hive.metastore.tools.HMSBenchmarks.benchmarkListTables; import static org.apache.hadoop.hive.metastore.tools.HMSBenchmarks.benchmarkOpenTxns; import static org.apache.hadoop.hive.metastore.tools.HMSBenchmarks.benchmarkPartitionManagement; @@ -271,82 +269,42 @@ private void runNonAcidBenchmarks() { .doSanitize(doSanitize) .add("getNid", () -> benchmarkGetNotificationId(bench, bData)) .add("listDatabases", () -> benchmarkListDatabases(bench, bData)) - .add("listTables", () -> benchmarkListAllTables(bench, bData)) + .add("listTables", () -> benchmarkListTables(bench, bData, instances)) .add("getTable", () -> benchmarkGetTable(bench, bData)) .add("createTable", () -> benchmarkTableCreate(bench, bData)) .add("dropTable", () -> benchmarkDeleteCreate(bench, bData)) .add("dropTableWithPartitions", - () -> benchmarkDeleteWithPartitions(bench, bData, 1, nParameters[0])) - .add("dropTableMetadataWithPartitions", - () -> benchmarkDeleteMetaOnlyWithPartitions(bench, bData, 1, nParameters[0])) - .add("addPartition", () -> benchmarkCreatePartition(bench, bData, 1)) - .add("dropPartition", () -> benchmarkDropPartition(bench, bData, 1)) - .add("listPartition", () -> benchmarkListPartition(bench, bData)) - .add("getPartition", - () -> benchmarkGetPartitions(bench, bData, 1)) + () -> benchmarkDeleteWithPartitions(bench, bData, instances, nParameters[0])) + .add("dropTableMetaOnlyWithPartitions", + () -> benchmarkDeleteMetaOnlyWithPartitions(bench, bData, instances, nParameters[0])) + .add("addPartition", () -> benchmarkCreatePartition(bench, bData, instances)) + .add("addPartitions", () -> benchmarkCreatePartitions(bench, bData, instances)) + .add("dropPartition", () -> benchmarkDropPartition(bench, bData, instances)) + .add("dropPartitions", () -> benchmarkDropPartitions(bench, bData, instances)) + .add("listPartitions", () -> benchmarkListPartitions(bench, bData, instances)) + .add("alterPartitions", () -> benchmarkAlterPartitions(bench, bData, instances)) + .add("getPartitions", + () -> benchmarkGetPartitions(bench, bData, instances)) .add("getPartitionNames", - () -> benchmarkGetPartitionNames(bench, bData, 1)) + () -> benchmarkGetPartitionNames(bench, bData, instances)) .add("getPartitionsByNames", - () -> benchmarkGetPartitionsByName(bench, bData, 1)) + () -> benchmarkGetPartitionsByName(bench, bData, instances)) .add("getPartitionsByFilter", - () -> benchmarkGetPartitionsByFilter(bench, bData, 1)) + () -> benchmarkGetPartitionsByFilter(bench, bData, instances)) .add("getPartitionsByPs", - () -> benchmarkGetPartitionsByPs(bench, bData, 1)) + () -> benchmarkGetPartitionsByPs(bench, bData, instances)) .add("getPartitionsStat", - () -> benchmarkGetPartitionsStat(bench, bData, 1)) + () -> benchmarkGetPartitionsStat(bench, bData, instances)) .add("updatePartitionsStat", - () -> benchmarkUpdatePartitionsStat(bench, bData, 1)) + () -> benchmarkUpdatePartitionsStat(bench, bData, instances)) .add("renameTable", - () -> benchmarkRenameTable(bench, bData, 1)) + () -> benchmarkRenameTable(bench, bData, instances)) .add("dropDatabase", - () -> benchmarkDropDatabase(bench, bData, 1)) - .add("openTxn", - () -> benchmarkOpenTxns(bench, bData, 1)) + () -> benchmarkDropDatabase(bench, bData, instances)) + .add("openTxns", + () -> benchmarkOpenTxns(bench, bData, instances)) .add("PartitionManagementTask", - () -> benchmarkPartitionManagement(bench, bData, 1)); - - for (int howMany: instances) { - suite.add("listTables" + '.' + howMany, - () -> benchmarkListTables(bench, bData, howMany)) - .add("dropTableWithPartitions" + '.' + howMany, - () -> benchmarkDeleteWithPartitions(bench, bData, howMany, nParameters[0])) - .add("dropTableMetaOnlyWithPartitions" + '.' + howMany, - () -> benchmarkDeleteMetaOnlyWithPartitions(bench, bData, howMany, nParameters[0])) - .add("listPartitions" + '.' + howMany, - () -> benchmarkListManyPartitions(bench, bData, howMany)) - .add("getPartitions" + '.' + howMany, - () -> benchmarkGetPartitions(bench, bData, howMany)) - .add("getPartitionNames" + '.' + howMany, - () -> benchmarkGetPartitionNames(bench, bData, howMany)) - .add("getPartitionsByNames" + '.' + howMany, - () -> benchmarkGetPartitionsByName(bench, bData, howMany)) - .add("getPartitionsByFilter" + '.' + howMany, - () -> benchmarkGetPartitionsByFilter(bench, bData, howMany)) - .add("getPartitionsByPs" + '.' + howMany, - () -> benchmarkGetPartitionsByPs(bench, bData, howMany)) - .add("getPartitionsStat" + '.' + howMany, - () -> benchmarkGetPartitionsStat(bench, bData, howMany)) - .add("updatePartitionsStat" + '.' + howMany, - () -> benchmarkUpdatePartitionsStat(bench, bData, howMany)) - .add("addPartitions" + '.' + howMany, - () -> benchmarkCreatePartitions(bench, bData, howMany)) - .add("dropPartitions" + '.' + howMany, - () -> benchmarkDropPartitions(bench, bData, howMany)) - .add("alterPartitions" + '.' + howMany, - () -> benchmarkAlterPartitions(bench, bData, howMany)) - .add("renameTable" + '.' + howMany, - () -> benchmarkRenameTable(bench, bData, howMany)) - .add("dropDatabase" + '.' + howMany, - () -> benchmarkDropDatabase(bench, bData, howMany)) - .add("addPartition" + '.' + howMany, - () -> benchmarkCreatePartition(bench, bData, howMany)) - .add("dropPartition" + '.' + howMany, - () -> benchmarkDropPartition(bench, bData, howMany)) - .add("openTxns" + '.' + howMany, - () -> benchmarkOpenTxns(bench, bData, howMany)) - .add("PartitionManagementTask" + "." + howMany, - () -> benchmarkPartitionManagement(bench, bData, howMany)); - } + () -> benchmarkPartitionManagement(bench, bData, instances)); List toRun = suite.listMatching(matches, exclude); if (toRun.isEmpty()) { diff --git a/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/HMSBenchmarks.java b/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/HMSBenchmarks.java index c01200c33be5..77b7ee2eeabd 100644 --- a/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/HMSBenchmarks.java +++ b/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/HMSBenchmarks.java @@ -27,7 +27,6 @@ import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.PartitionsStatsRequest; import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.thrift.TException; import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -62,55 +61,52 @@ final class HMSBenchmarks { private static final String PARAM_KEY = "parameter_"; private static final String PARAM_VALUE = "value_"; - static DescriptiveStatistics benchmarkListDatabases(@NotNull MicroBenchmark benchmark, + static Map benchmarkListDatabases(@NotNull MicroBenchmark benchmark, @NotNull BenchData data) { final HMSClient client = data.getClient(); - return benchmark.measure(() -> - throwingSupplierWrapper(() -> client.getAllDatabases(null))); + Map res = new HashMap<>(); + res.put("", benchmark.measure(() -> + throwingSupplierWrapper(() -> client.getAllDatabases(null)))); + return res; } - static DescriptiveStatistics benchmarkListAllTables(@NotNull MicroBenchmark benchmark, - @NotNull BenchData data) { - - final HMSClient client = data.getClient(); - String dbName = data.dbName; - - return benchmark.measure(() -> - throwingSupplierWrapper(() -> client.getAllTables(dbName, null))); - } - - static DescriptiveStatistics benchmarkTableCreate(@NotNull MicroBenchmark bench, + static Map benchmarkTableCreate(@NotNull MicroBenchmark bench, @NotNull BenchData data) { final HMSClient client = data.getClient(); String dbName = data.dbName; String tableName = data.tableName; Table table = Util.TableBuilder.buildDefaultTable(dbName, tableName); + Map res = new HashMap<>(); - return bench.measure(null, + res.put("", bench.measure(null, () -> throwingSupplierWrapper(() -> client.createTable(table)), - () -> throwingSupplierWrapper(() -> client.dropTable(dbName, tableName))); + () -> throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)))); + return res; } - static DescriptiveStatistics benchmarkDeleteCreate(@NotNull MicroBenchmark bench, + static Map benchmarkDeleteCreate(@NotNull MicroBenchmark bench, @NotNull BenchData data) { final HMSClient client = data.getClient(); String dbName = data.dbName; String tableName = data.tableName; Table table = Util.TableBuilder.buildDefaultTable(dbName, tableName); + Map res = new HashMap<>(); - return bench.measure( + res.put("", bench.measure( () -> throwingSupplierWrapper(() -> client.createTable(table)), () -> throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)), - null); + null)); + return res; } - static DescriptiveStatistics benchmarkDeleteWithPartitions(@NotNull MicroBenchmark bench, + static Map benchmarkDeleteWithPartitions(@NotNull MicroBenchmark bench, @NotNull BenchData data, - int howMany, + int[] partitionCounts, int nparams) { final HMSClient client = data.getClient(); String dbName = data.dbName; String tableName = data.tableName; + Map res = new HashMap<>(); // Create many parameters Map parameters = new HashMap<>(nparams); @@ -118,24 +114,28 @@ static DescriptiveStatistics benchmarkDeleteWithPartitions(@NotNull MicroBenchma parameters.put(PARAM_KEY + i, PARAM_VALUE + i); } - return bench.measure( - () -> throwingSupplierWrapper(() -> { - BenchmarkUtils.createPartitionedTable(client, dbName, tableName); - addManyPartitions(client, dbName, tableName, parameters, - Collections.singletonList("d"), howMany); - return true; - }), - () -> throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)), - null); + for (int count : partitionCounts) { + res.put("." + count, bench.measure( + () -> throwingSupplierWrapper(() -> { + BenchmarkUtils.createPartitionedTable(client, dbName, tableName); + addManyPartitions(client, dbName, tableName, parameters, + Collections.singletonList("d"), count); + return true; + }), + () -> throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)), + null)); + } + return res; } - static DescriptiveStatistics benchmarkDeleteMetaOnlyWithPartitions(@NotNull MicroBenchmark bench, + static Map benchmarkDeleteMetaOnlyWithPartitions(@NotNull MicroBenchmark bench, @NotNull BenchData data, - int howMany, + int[] partitionCounts, int nparams) { final HMSClient client = data.getClient(); String dbName = data.dbName; String tableName = data.tableName; + Map res = new HashMap<>(); // Create many parameters Map parameters = new HashMap<>(nparams); @@ -143,556 +143,582 @@ static DescriptiveStatistics benchmarkDeleteMetaOnlyWithPartitions(@NotNull Micr parameters.put(PARAM_KEY + i, PARAM_VALUE + i); } - return bench.measure( + for (int count : partitionCounts) { + res.put("." + count, bench.measure( () -> throwingSupplierWrapper(() -> { BenchmarkUtils.createPartitionedTable(client, dbName, tableName); addManyPartitions(client, dbName, tableName, parameters, - Collections.singletonList("d"), howMany); + Collections.singletonList("d"), count); return true; }), () -> throwingSupplierWrapper(() -> client.dropTable(dbName, tableName, false)), - null); + null)); + } + return res; } - static DescriptiveStatistics benchmarkGetTable(@NotNull MicroBenchmark bench, + static Map benchmarkGetTable(@NotNull MicroBenchmark bench, @NotNull BenchData data) { final HMSClient client = data.getClient(); String dbName = data.dbName; String tableName = data.tableName; + Map res = new HashMap<>(); BenchmarkUtils.createPartitionedTable(client, dbName, tableName); try { - return bench.measure(() -> - throwingSupplierWrapper(() -> client.getTable(dbName, tableName))); + res.put("", bench.measure(() -> + throwingSupplierWrapper(() -> client.getTable(dbName, tableName)))); } finally { throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); } + return res; } - static DescriptiveStatistics benchmarkListTables(@NotNull MicroBenchmark bench, + static Map benchmarkListTables(@NotNull MicroBenchmark bench, @NotNull BenchData data, - int count) { + int[] tableCounts) { final HMSClient client = data.getClient(); String dbName = data.dbName; + Map res = new HashMap<>(); // Create a bunch of tables String format = "tmp_table_%d"; - try { - BenchmarkUtils.createManyTables(client, count, dbName, format); - return bench.measure(() -> - throwingSupplierWrapper(() -> client.getAllTables(dbName, null))); - } finally { - BenchmarkUtils.dropManyTables(client, count, dbName, format); + for (int count : tableCounts) { + try { + BenchmarkUtils.createManyTables(client, count, dbName, format); + res.put("." + count, bench.measure(() -> + throwingSupplierWrapper(() -> client.getAllTables(dbName, null)))); + } finally { + BenchmarkUtils.dropManyTables(client, count, dbName, format); + } } + return res; } - static DescriptiveStatistics benchmarkCreatePartition(@NotNull MicroBenchmark bench, + static Map benchmarkCreatePartition(@NotNull MicroBenchmark bench, @NotNull BenchData data, - int howMany) { + int[] partitionCounts) { final HMSClient client = data.getClient(); String dbName = data.dbName; String tableName = data.tableName; + Map res = new HashMap<>(); - BenchmarkUtils.createPartitionedTable(client, dbName, tableName); - try { - Table t = client.getTable(dbName, tableName); - List parts = createManyPartitions(t, null, Collections.singletonList("d"), howMany); - - return bench.measure(null, - () -> throwingSupplierWrapper(() -> { - parts.forEach(part -> throwingSupplierWrapper(() -> client.addPartition(part))); - return null; - }), - () -> throwingSupplierWrapper(() -> client.dropPartitions(dbName, tableName, null))); - } catch (TException e) { - e.printStackTrace(); - return new DescriptiveStatistics(); - } finally { - throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); - } - } - - static DescriptiveStatistics benchmarkListPartition(@NotNull MicroBenchmark bench, - @NotNull BenchData data) { - final HMSClient client = data.getClient(); - String dbName = data.dbName; - String tableName = data.tableName; + for (int count : partitionCounts) { + BenchmarkUtils.createPartitionedTable(client, dbName, tableName); + try { + Table t = throwingSupplierWrapper(() -> client.getTable(dbName, tableName)); + List parts = createManyPartitions(t, null, Collections.singletonList("d"), count); - BenchmarkUtils.createPartitionedTable(client, dbName, tableName); - try { - addManyPartitions(client, dbName, tableName, null, - Collections.singletonList("d"), 1); - - return bench.measure(() -> - throwingSupplierWrapper(() -> client.listPartitions(dbName, tableName))); - } catch (TException e) { - e.printStackTrace(); - return new DescriptiveStatistics(); - } finally { - throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + res.put("." + count, bench.measure(null, + () -> throwingSupplierWrapper(() -> { + parts.forEach(part -> throwingSupplierWrapper(() -> client.addPartition(part))); + return null; + }), + () -> throwingSupplierWrapper(() -> client.dropPartitions(dbName, tableName, null)))); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } } + return res; } - static DescriptiveStatistics benchmarkListManyPartitions(@NotNull MicroBenchmark bench, + static Map benchmarkListPartitions(@NotNull MicroBenchmark bench, @NotNull BenchData data, - int howMany) { + int[] partitionCounts) { final HMSClient client = data.getClient(); String dbName = data.dbName; String tableName = data.tableName; - - BenchmarkUtils.createPartitionedTable(client, dbName, tableName); - try { - addManyPartitions(client, dbName, tableName, null, Collections.singletonList("d"), howMany); - LOG.debug("Created {} partitions", howMany); - LOG.debug("started benchmark... "); - return bench.measure(() -> - throwingSupplierWrapper(() -> client.listPartitions(dbName, tableName))); - } catch (TException e) { - e.printStackTrace(); - return new DescriptiveStatistics(); - } finally { - throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + Map res = new HashMap<>(); + + for (int count : partitionCounts) { + BenchmarkUtils.createPartitionedTable(client, dbName, tableName); + try { + addManyPartitionsNoException(client, dbName, tableName, null, Collections.singletonList("d"), count); + LOG.debug("Created {} partitions", count); + LOG.debug("started benchmark... "); + res.put("." + count, bench.measure(() -> + throwingSupplierWrapper(() -> client.listPartitions(dbName, tableName)))); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } } + return res; } - static DescriptiveStatistics benchmarkGetPartitions(@NotNull MicroBenchmark bench, + static Map benchmarkGetPartitions(@NotNull MicroBenchmark bench, @NotNull BenchData data, - int howMany) { + int[] partitionCounts) { final HMSClient client = data.getClient(); String dbName = data.dbName; String tableName = data.tableName; - - BenchmarkUtils.createPartitionedTable(client, dbName, tableName); - try { - addManyPartitions(client, dbName, tableName, null, Collections.singletonList("d"), howMany); - LOG.debug("Created {} partitions", howMany); - LOG.debug("started benchmark... "); - return bench.measure(() -> - throwingSupplierWrapper(() -> client.getPartitions(dbName, tableName))); - } catch (TException e) { - e.printStackTrace(); - return new DescriptiveStatistics(); - } finally { - throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + Map res = new HashMap<>(); + + for (int count : partitionCounts) { + BenchmarkUtils.createPartitionedTable(client, dbName, tableName); + try { + addManyPartitionsNoException(client, dbName, tableName, null, Collections.singletonList("d"), count); + LOG.debug("Created {} partitions", count); + LOG.debug("started benchmark... "); + res.put("." + count, bench.measure(() -> + throwingSupplierWrapper(() -> client.getPartitions(dbName, tableName)))); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } } + return res; } - static DescriptiveStatistics benchmarkDropPartition(@NotNull MicroBenchmark bench, + static Map benchmarkDropPartition(@NotNull MicroBenchmark bench, @NotNull BenchData data, - int count) { + int[] partitionCounts) { final HMSClient client = data.getClient(); String dbName = data.dbName; String tableName = data.tableName; - - BenchmarkUtils.createPartitionedTable(client, dbName, tableName); - try { - return bench.measure( - () -> addManyPartitionsNoException(client, dbName, tableName, null, - Collections.singletonList("d"), count), - () -> throwingSupplierWrapper(() -> { - List partNames = client.getPartitionNames(dbName, tableName); - partNames.forEach(partName -> - throwingSupplierWrapper(() -> client.dropPartition(dbName, tableName, partName))); - return null; - }), - null); - } finally { - throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + Map res = new HashMap<>(); + + for (int count : partitionCounts) { + BenchmarkUtils.createPartitionedTable(client, dbName, tableName); + try { + res.put("." + count, bench.measure( + () -> addManyPartitionsNoException(client, dbName, tableName, null, + Collections.singletonList("d"), count), + () -> throwingSupplierWrapper(() -> { + List partNames = client.getPartitionNames(dbName, tableName); + partNames.forEach(partName -> + throwingSupplierWrapper(() -> client.dropPartition(dbName, tableName, partName))); + return null; + }), + null)); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } } + return res; } - static DescriptiveStatistics benchmarkCreatePartitions(@NotNull MicroBenchmark bench, + static Map benchmarkCreatePartitions(@NotNull MicroBenchmark bench, @NotNull BenchData data, - int count) { + int[] partitionCounts) { final HMSClient client = data.getClient(); String dbName = data.dbName; String tableName = data.tableName; - - BenchmarkUtils.createPartitionedTable(client, dbName, tableName); - try { - return bench.measure( - null, - () -> addManyPartitionsNoException(client, dbName, tableName, null, - Collections.singletonList("d"), count), - () -> throwingSupplierWrapper(() -> - client.dropPartitions(dbName, tableName, null)) - ); - } finally { - throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + Map res = new HashMap<>(); + + for (int count : partitionCounts) { + BenchmarkUtils.createPartitionedTable(client, dbName, tableName); + try { + res.put("." + count, bench.measure( + null, + () -> addManyPartitionsNoException(client, dbName, tableName, null, + Collections.singletonList("d"), count), + () -> throwingSupplierWrapper(() -> + client.dropPartitions(dbName, tableName, null)) + )); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } } + return res; } - static DescriptiveStatistics benchmarkDropPartitions(@NotNull MicroBenchmark bench, + static Map benchmarkDropPartitions(@NotNull MicroBenchmark bench, @NotNull BenchData data, - int count) { + int[] partitionCounts) { final HMSClient client = data.getClient(); String dbName = data.dbName; String tableName = data.tableName; - - BenchmarkUtils.createPartitionedTable(client, dbName, tableName); - try { - return bench.measure( - () -> addManyPartitionsNoException(client, dbName, tableName, null, - Collections.singletonList("d"), count), - () -> throwingSupplierWrapper(() -> - client.dropPartitions(dbName, tableName, null)), - null - ); - } finally { - throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + Map res = new HashMap<>(); + + for (int count : partitionCounts) { + BenchmarkUtils.createPartitionedTable(client, dbName, tableName); + try { + res.put("." + count, bench.measure( + () -> addManyPartitionsNoException(client, dbName, tableName, null, + Collections.singletonList("d"), count), + () -> throwingSupplierWrapper(() -> + client.dropPartitions(dbName, tableName, null)), + null + )); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } } + return res; } - static DescriptiveStatistics benchmarkAlterPartitions(@NotNull MicroBenchmark bench, + static Map benchmarkAlterPartitions(@NotNull MicroBenchmark bench, @NotNull BenchData data, - int count) { + int[] partitionCounts) { final HMSClient client = data.getClient(); String dbName = data.dbName; String tableName = data.tableName; - - BenchmarkUtils.createPartitionedTable(client, dbName, tableName); - try { - return bench.measure( - () -> addManyPartitionsNoException(client, dbName, tableName, null, - Collections.singletonList("d"), count), - () -> throwingSupplierWrapper(() -> { - List newPartitions = client.getPartitions(dbName, tableName); - newPartitions.forEach(p -> { - p.getParameters().put("new_param", "param_val"); - p.getSd().setCols(Arrays.asList(new FieldSchema("new_col", "string", null))); - }); - client.alterPartitions(dbName, tableName, newPartitions); - return null; - }), - () -> throwingSupplierWrapper(() -> - client.dropPartitions(dbName, tableName, null)) - ); - } finally { - throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + Map res = new HashMap<>(); + + for (int count : partitionCounts) { + BenchmarkUtils.createPartitionedTable(client, dbName, tableName); + try { + res.put("." + count, bench.measure( + () -> addManyPartitionsNoException(client, dbName, tableName, null, + Collections.singletonList("d"), count), + () -> throwingSupplierWrapper(() -> { + List newPartitions = client.getPartitions(dbName, tableName); + newPartitions.forEach(p -> { + p.getParameters().put("new_param", "param_val"); + p.getSd().setCols(Arrays.asList(new FieldSchema("new_col", "string", null))); + }); + client.alterPartitions(dbName, tableName, newPartitions); + return null; + }), + () -> throwingSupplierWrapper(() -> + client.dropPartitions(dbName, tableName, null)) + )); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } } + return res; } - static DescriptiveStatistics benchmarkGetPartitionNames(@NotNull MicroBenchmark bench, + static Map benchmarkGetPartitionNames(@NotNull MicroBenchmark bench, @NotNull BenchData data, - int count) { + int[] partitionCounts) { final HMSClient client = data.getClient(); String dbName = data.dbName; String tableName = data.tableName; - - BenchmarkUtils.createPartitionedTable(client, dbName, tableName); - try { - addManyPartitionsNoException(client, dbName, tableName, null, - Collections.singletonList("d"), count); - return bench.measure( - () -> throwingSupplierWrapper(() -> client.getPartitionNames(dbName, tableName)) - ); - } finally { - throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + Map res = new HashMap<>(); + + for (int count : partitionCounts) { + BenchmarkUtils.createPartitionedTable(client, dbName, tableName); + try { + addManyPartitionsNoException(client, dbName, tableName, null, + Collections.singletonList("d"), count); + res.put("." + count, bench.measure( + () -> throwingSupplierWrapper(() -> client.getPartitionNames(dbName, tableName)) + )); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } } + return res; } - static DescriptiveStatistics benchmarkGetPartitionsByName(@NotNull MicroBenchmark bench, + static Map benchmarkGetPartitionsByName(@NotNull MicroBenchmark bench, @NotNull BenchData data, - int count) { + int[] partitionCounts) { final HMSClient client = data.getClient(); String dbName = data.dbName; String tableName = data.tableName; - - BenchmarkUtils.createPartitionedTable(client, dbName, tableName); - try { - addManyPartitionsNoException(client, dbName, tableName, null, - Collections.singletonList("d"), count); - List partitionNames = throwingSupplierWrapper(() -> - client.getPartitionNames(dbName, tableName)); - return bench.measure( - () -> - throwingSupplierWrapper(() -> - client.getPartitionsByNames(dbName, tableName, partitionNames)) - ); - } finally { - throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + Map res = new HashMap<>(); + + for (int count : partitionCounts) { + BenchmarkUtils.createPartitionedTable(client, dbName, tableName); + try { + addManyPartitionsNoException(client, dbName, tableName, null, + Collections.singletonList("d"), count); + List partitionNames = throwingSupplierWrapper(() -> + client.getPartitionNames(dbName, tableName)); + res.put("." + count, bench.measure( + () -> + throwingSupplierWrapper(() -> + client.getPartitionsByNames(dbName, tableName, partitionNames)) + )); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } } + return res; } - static DescriptiveStatistics benchmarkGetPartitionsByFilter(@NotNull MicroBenchmark bench, + static Map benchmarkGetPartitionsByFilter(@NotNull MicroBenchmark bench, @NotNull BenchData data, - int count) { + int[] partitionCounts) { final HMSClient client = data.getClient(); String dbName = data.dbName; String tableName = data.tableName; - - BenchmarkUtils.createPartitionedTable(client, dbName, tableName, createSchema(Arrays.asList("p_a", "p_b", "p_c"))); - try { - addManyPartitionsNoException(client, dbName, tableName, null, - Arrays.asList("a", "b", "c"), count); - return bench.measure( - () -> - throwingSupplierWrapper(() -> { - // test multiple cases for get_partitions_by_filter - client.getPartitionsByFilter(dbName, tableName, "`p_a`='a0'"); - client.getPartitionsByFilter(dbName, tableName, - "`p_a`='a0' or `p_b`='b0' or `p_c`='c0' or `p_a`='a1' or `p_b`='b1' or `p_c`='c1'"); - return null; - }) - ); - } finally { - throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + Map res = new HashMap<>(); + + for (int count : partitionCounts) { + BenchmarkUtils.createPartitionedTable(client, dbName, tableName, createSchema(Arrays.asList("p_a", "p_b", "p_c"))); + try { + // Create multiple partitions with values: [a0, b0, c0], [a0, b1, c1], [a0, b2, c2]... + List> values = IntStream.range(0, count) + .mapToObj(i -> Arrays.asList("a0", "b" + i, "c" + i)) + .collect(Collectors.toList()); + addManyPartitionsNoException(client, dbName, tableName, null, values); + res.put("#simple." + count, bench.measure( + () -> + throwingSupplierWrapper(() -> { + client.getPartitionsByFilter(dbName, tableName, "`p_b`='b0'"); + return null; + }) + )); + res.put("#multiOr." + count, bench.measure( + () -> + throwingSupplierWrapper(() -> { + client.getPartitionsByFilter(dbName, tableName, + " `p_b`='b0' or `p_c`='c0' or `p_b`='b1' or `p_c`='c1'"); + return null; + }) + )); + res.put("#multiAnd." + count, bench.measure( + () -> + throwingSupplierWrapper(() -> { + client.getPartitionsByFilter(dbName, tableName, "`p_a`='a0' and `p_b`='b0' and `p_c`='c0'"); + return null; + }) + )); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } } + return res; } - static DescriptiveStatistics benchmarkGetPartitionsByPs(@NotNull MicroBenchmark bench, + static Map benchmarkGetPartitionsByPs(@NotNull MicroBenchmark bench, @NotNull BenchData data, - int count) { + int[] partitionCounts) { final HMSClient client = data.getClient(); String dbName = data.dbName; String tableName = data.tableName; - - BenchmarkUtils.createPartitionedTable(client, dbName, tableName, createSchema(Arrays.asList("p_a", "p_b", "p_c"))); - try { - // Create multiple partitions with values: [a0, b0, c0], [a0, b1, c1], [a0, b2, c2]... - List> values = IntStream.range(0, count) - .mapToObj(i -> Arrays.asList("a0", "b" + i, "c" + i)) - .collect(Collectors.toList()); - addManyPartitionsNoException(client, dbName, tableName, null, values); - return bench.measure( - () -> - throwingSupplierWrapper(() -> - client.getPartitionsByPs(dbName, tableName, Arrays.asList("a0"))) - ); - } finally { - throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + Map res = new HashMap<>(); + + for (int count : partitionCounts) { + BenchmarkUtils.createPartitionedTable(client, dbName, tableName, createSchema(Arrays.asList("p_a", "p_b", "p_c"))); + try { + // Create multiple partitions with values: [a0, b0, c0], [a0, b1, c1], [a0, b2, c2]... + List> values = IntStream.range(0, count) + .mapToObj(i -> Arrays.asList("a0", "b" + i, "c" + i)) + .collect(Collectors.toList()); + addManyPartitionsNoException(client, dbName, tableName, null, values); + res.put("." + count, bench.measure( + () -> + throwingSupplierWrapper(() -> + client.getPartitionsByPs(dbName, tableName, Arrays.asList("a0"))) + )); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } } + return res; } - static DescriptiveStatistics benchmarkGetPartitionsStat(@NotNull MicroBenchmark bench, + static Map benchmarkGetPartitionsStat(@NotNull MicroBenchmark bench, @NotNull BenchData data, - int count) { + int[] partitionCounts) { final HMSClient client = data.getClient(); String dbName = data.dbName; String tableName = data.tableName; - - BenchmarkUtils.createPartitionedTable(client, dbName, tableName); - try { - addManyPartitionsNoException(client, dbName, tableName, null, - Collections.singletonList("d"), count); - List partNames = throwingSupplierWrapper(() -> - client.getPartitionNames(dbName, tableName)); - updateManyPartitionsStatsNoException(client, dbName, tableName, partNames); - PartitionsStatsRequest request = new PartitionsStatsRequest( - dbName, tableName, Arrays.asList("name"), partNames); - return bench.measure( - () -> - throwingSupplierWrapper(() -> client.getPartitionsStats(request)) - ); - } finally { - throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + Map res = new HashMap<>(); + + for (int count : partitionCounts) { + BenchmarkUtils.createPartitionedTable(client, dbName, tableName); + try { + addManyPartitionsNoException(client, dbName, tableName, null, + Collections.singletonList("d"), count); + List partNames = throwingSupplierWrapper(() -> + client.getPartitionNames(dbName, tableName)); + updateManyPartitionsStatsNoException(client, dbName, tableName, partNames); + PartitionsStatsRequest request = new PartitionsStatsRequest( + dbName, tableName, Arrays.asList("name"), partNames); + res.put("." + count, bench.measure( + () -> + throwingSupplierWrapper(() -> client.getPartitionsStats(request)) + )); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } } + return res; } - static DescriptiveStatistics benchmarkUpdatePartitionsStat(@NotNull MicroBenchmark bench, + static Map benchmarkUpdatePartitionsStat(@NotNull MicroBenchmark bench, @NotNull BenchData data, - int count) { + int[] partitionCounts) { final HMSClient client = data.getClient(); String dbName = data.dbName; String tableName = data.tableName; - - BenchmarkUtils.createPartitionedTable(client, dbName, tableName); - try { - addManyPartitionsNoException(client, dbName, tableName, null, - Collections.singletonList("d"), count); - List partNames = throwingSupplierWrapper(() -> - client.getPartitionNames(dbName, tableName)); - return bench.measure( - () -> updateManyPartitionsStatsNoException(client, dbName, tableName, partNames) - ); - } finally { - throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + Map res = new HashMap<>(); + + for (int count : partitionCounts) { + BenchmarkUtils.createPartitionedTable(client, dbName, tableName); + try { + addManyPartitionsNoException(client, dbName, tableName, null, + Collections.singletonList("d"), count); + List partNames = throwingSupplierWrapper(() -> + client.getPartitionNames(dbName, tableName)); + res.put("." + count, bench.measure( + () -> updateManyPartitionsStatsNoException(client, dbName, tableName, partNames) + )); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } } + return res; } - static DescriptiveStatistics benchmarkRenameTable(@NotNull MicroBenchmark bench, + static Map benchmarkRenameTable(@NotNull MicroBenchmark bench, @NotNull BenchData data, - int count) { + int[] partitionCounts) { final HMSClient client = data.getClient(); String dbName = data.dbName; String tableName = data.tableName; - - BenchmarkUtils.createPartitionedTable(client, dbName, tableName); - try { - addManyPartitionsNoException(client, dbName, tableName, null, - Collections.singletonList("d"), count); - Table oldTable = client.getTable(dbName, tableName); - oldTable.getSd().setLocation(""); - Table newTable = oldTable.deepCopy(); - newTable.setTableName(tableName + "_renamed"); - - return bench.measure( - () -> { - // Measuring 2 renames, so the tests are idempotent - throwingSupplierWrapper(() -> - client.alterTable(oldTable.getDbName(), oldTable.getTableName(), newTable)); - throwingSupplierWrapper(() -> - client.alterTable(newTable.getDbName(), newTable.getTableName(), oldTable)); - } - ); - } catch (TException e) { - e.printStackTrace(); - return new DescriptiveStatistics(); - } finally { - throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + Map res = new HashMap<>(); + + for (int count : partitionCounts) { + BenchmarkUtils.createPartitionedTable(client, dbName, tableName); + try { + addManyPartitionsNoException(client, dbName, tableName, null, + Collections.singletonList("d"), count); + Table oldTable = throwingSupplierWrapper(() -> client.getTable(dbName, tableName)); + oldTable.getSd().setLocation(""); + Table newTable = oldTable.deepCopy(); + newTable.setTableName(tableName + "_renamed"); + + res.put("." + count, bench.measure( + () -> { + // Measuring 2 renames, so the tests are idempotent + throwingSupplierWrapper(() -> + client.alterTable(oldTable.getDbName(), oldTable.getTableName(), newTable)); + throwingSupplierWrapper(() -> + client.alterTable(newTable.getDbName(), newTable.getTableName(), oldTable)); + } + )); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } } + return res; } - static DescriptiveStatistics benchmarkDropDatabase(@NotNull MicroBenchmark bench, + static Map benchmarkDropDatabase(@NotNull MicroBenchmark bench, @NotNull BenchData data, - int count) { + int[] tableCounts) { final HMSClient client = data.getClient(); String dbName = data.dbName; - - throwingSupplierWrapper(() -> client.dropDatabase(dbName)); - try { - return bench.measure( - () -> { - throwingSupplierWrapper(() -> client.createDatabase(dbName)); - BenchmarkUtils.createManyTables(client, count, dbName, "tmp_table_%d"); - }, - () -> throwingSupplierWrapper(() -> client.dropDatabase(dbName)), - null - ); - } finally { - throwingSupplierWrapper(() -> client.createDatabase(dbName)); + Map res = new HashMap<>(); + + for (int count : tableCounts) { + throwingSupplierWrapper(() -> client.dropDatabase(dbName)); + try { + res.put("." + count, bench.measure( + () -> { + throwingSupplierWrapper(() -> client.createDatabase(dbName)); + BenchmarkUtils.createManyTables(client, count, dbName, "tmp_table_%d"); + }, + () -> throwingSupplierWrapper(() -> client.dropDatabase(dbName)), + null + )); + } finally { + throwingSupplierWrapper(() -> client.createDatabase(dbName)); + } } + return res; } - static DescriptiveStatistics benchmarkOpenTxns(@NotNull MicroBenchmark bench, + static Map benchmarkOpenTxns(@NotNull MicroBenchmark bench, @NotNull BenchData data, - int howMany) { - final HMSClient client = data.getClient(); - return bench.measure(null, - () -> throwingSupplierWrapper(() -> client.openTxn(howMany)), - () -> throwingSupplierWrapper(() -> client.abortTxns(client.getOpenTxns()))); - } - - static DescriptiveStatistics benchmarkAllocateTableWriteIds(@NotNull MicroBenchmark bench, - @NotNull BenchData data, - int howMany) { - final HMSClient client = data.getClient(); - - return bench.measure( - () -> throwingSupplierWrapper(() -> client.openTxn(howMany)), - () -> throwingSupplierWrapper(() -> client.allocateTableWriteIds("test_db", "test_tbl", client.getOpenTxns())), - () -> throwingSupplierWrapper(() -> client.abortTxns(client.getOpenTxns())) - ); - } - - static DescriptiveStatistics benchmarkGetValidWriteIds(@NotNull MicroBenchmark bench, - @NotNull BenchData data, - int howMany) { + int[] txnCounts) { final HMSClient client = data.getClient(); - String dbName = data.dbName; - List tableNames = new ArrayList<>(); - - return bench.measure( - () -> { - BenchmarkUtils.createManyTables(client, howMany, dbName, "tmp_table_%d"); - for (int i = 0; i < howMany; i++) { - tableNames.add(dbName + ".tmp_table_" + i); - } - }, - () -> throwingSupplierWrapper(() -> client.getValidWriteIds(tableNames)), - () -> { - BenchmarkUtils.dropManyTables(client, howMany, dbName, "tmp_table_%d"); - } - ); + Map res = new HashMap<>(); + for (int count : txnCounts) { + res.put("." + count, bench.measure(null, + () -> throwingSupplierWrapper(() -> client.openTxn(count)), + () -> throwingSupplierWrapper(() -> client.abortTxns(client.getOpenTxns())))); + } + return res; } - static DescriptiveStatistics benchmarkGetNotificationId(@NotNull MicroBenchmark benchmark, + static Map benchmarkGetNotificationId(@NotNull MicroBenchmark benchmark, @NotNull BenchData data) { HMSClient client = data.getClient(); - return benchmark.measure(() -> - throwingSupplierWrapper(client::getCurrentNotificationId)); + Map res = new HashMap<>(); + res.put("", benchmark.measure(() -> + throwingSupplierWrapper(client::getCurrentNotificationId))); + return res; } - static DescriptiveStatistics benchmarkPartitionManagement(@NotNull MicroBenchmark bench, + static Map benchmarkPartitionManagement(@NotNull MicroBenchmark bench, @NotNull BenchData data, - int tableCount) { - - String dbName = data.dbName + "_" + tableCount, tableNamePrefix = data.tableName; - final HMSClient client = data.getClient(); - final PartitionManagementTask partitionManagementTask = new PartitionManagementTask(); - final List paths = new ArrayList<>(); - final FileSystem fs; - try { - fs = FileSystem.get(client.getHadoopConf()); - client.getHadoopConf().set("hive.metastore.uris", client.getServerURI().toString()); - client.getHadoopConf().set("metastore.partition.management.database.pattern", dbName); - partitionManagementTask.setConf(client.getHadoopConf()); - - client.createDatabase(dbName); - for (int i = 0; i < tableCount; i++) { - String tableName = tableNamePrefix + "_" + i; - Util.TableBuilder tableBuilder = new Util.TableBuilder(dbName, tableName).withType(TableType.MANAGED_TABLE) - .withColumns(createSchema(Arrays.asList(new String[] {"astring:string", "aint:int", "adouble:double", "abigint:bigint"}))) - .withPartitionKeys(createSchema(Collections.singletonList("d"))); - boolean enableDynamicPart = i % 5 == 0; - if (enableDynamicPart) { - tableBuilder.withParameter("discover.partitions", "true"); - } - client.createTable(tableBuilder.build()); - addManyPartitionsNoException(client, dbName, tableName, null, Collections.singletonList("d"), 500); - if (enableDynamicPart) { - Table t = client.getTable(dbName, tableName); - Path tabLoc = new Path(t.getSd().getLocation()); - for (int j = 501; j <= 1000; j++) { - Path path = new Path(tabLoc, "d=d" + j + "_1"); - paths.add(path); + int[] tableCounts) { + Map res = new HashMap<>(); + for (int tableCount : tableCounts) { + String dbName = data.dbName + "_" + tableCount, tableNamePrefix = data.tableName; + final HMSClient client = data.getClient(); + final PartitionManagementTask partitionManagementTask = new PartitionManagementTask(); + final List paths = new ArrayList<>(); + final FileSystem fs; + try { + fs = FileSystem.get(client.getHadoopConf()); + client.getHadoopConf().set("hive.metastore.uris", client.getServerURI().toString()); + client.getHadoopConf().set("metastore.partition.management.database.pattern", dbName); + partitionManagementTask.setConf(client.getHadoopConf()); + + client.createDatabase(dbName); + for (int i = 0; i < tableCount; i++) { + String tableName = tableNamePrefix + "_" + i; + Util.TableBuilder tableBuilder = new Util.TableBuilder(dbName, tableName).withType(TableType.MANAGED_TABLE) + .withColumns(createSchema(Arrays.asList(new String[] {"astring:string", "aint:int", "adouble:double", "abigint:bigint"}))) + .withPartitionKeys(createSchema(Collections.singletonList("d"))); + boolean enableDynamicPart = i % 5 == 0; + if (enableDynamicPart) { + tableBuilder.withParameter("discover.partitions", "true"); + } + client.createTable(tableBuilder.build()); + addManyPartitionsNoException(client, dbName, tableName, null, Collections.singletonList("d"), 500); + if (enableDynamicPart) { + Table t = client.getTable(dbName, tableName); + Path tabLoc = new Path(t.getSd().getLocation()); + for (int j = 501; j <= 1000; j++) { + Path path = new Path(tabLoc, "d=d" + j + "_1"); + paths.add(path); + } } } + } catch (Exception e) { + throw new RuntimeException(e); } - } catch (Exception e) { - throw new RuntimeException(e); - } - final AtomicLong id = new AtomicLong(0); - ExecutorService service = Executors.newFixedThreadPool(20); - Runnable preRun = () -> { - int len = paths.size() / 20; - id.getAndIncrement(); - List futures = new ArrayList<>(); - for (int i = 0; i <= 20; i++) { - int k = i; - futures.add(service.submit((Callable) () -> { - for (int j = k * len; j < (k + 1) * len && j < paths.size(); j++) { - Path path = paths.get(j); - if (id.get() == 1) { - fs.mkdirs(path); - } else { - String fileName = path.getName().split("_")[0]; - long seq = id.get(); - Path destPath = new Path(path.getParent(), fileName + "_" + seq); - Path sourcePath = new Path(path.getParent(), fileName + "_" + (seq-1)); - fs.rename(sourcePath, destPath); + final AtomicLong id = new AtomicLong(0); + ExecutorService service = Executors.newFixedThreadPool(20); + Runnable preRun = () -> { + int len = paths.size() / 20; + id.getAndIncrement(); + List futures = new ArrayList<>(); + for (int i = 0; i <= 20; i++) { + int k = i; + futures.add(service.submit((Callable) () -> { + for (int j = k * len; j < (k + 1) * len && j < paths.size(); j++) { + Path path = paths.get(j); + if (id.get() == 1) { + fs.mkdirs(path); + } else { + String fileName = path.getName().split("_")[0]; + long seq = id.get(); + Path destPath = new Path(path.getParent(), fileName + "_" + seq); + Path sourcePath = new Path(path.getParent(), fileName + "_" + (seq-1)); + fs.rename(sourcePath, destPath); + } } + return null; + })); + } + for (Future future : futures) { + try { + future.get(); + } catch (Exception e) { + service.shutdown(); + throw new RuntimeException(e); } - return null; - })); - } - for (Future future : futures) { - try { - future.get(); - } catch (Exception e) { - service.shutdown(); - throw new RuntimeException(e); } - } - }; + }; - try { - return bench.measure(preRun, partitionManagementTask, null); - } finally { - service.shutdown(); + try { + res.put("." + tableCount, bench.measure(preRun, partitionManagementTask, null)); + } finally { + service.shutdown(); + } } + return res; } - } diff --git a/standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkSuite.java b/standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkSuite.java index f11551414278..80ceeea6014d 100644 --- a/standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkSuite.java +++ b/standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkSuite.java @@ -71,7 +71,7 @@ public final class BenchmarkSuite { // mean +/- MARGIN * stddev private static final double MARGIN = 2; // Collection of benchmarks - private final Map> suite = new HashMap<>(); + private final Map>> suite = new HashMap<>(); // List of benchmarks. All benchmarks are executed in the order // they are inserted private final List benchmarks = new ArrayList<>(); @@ -120,17 +120,13 @@ public Map getResult() { * @return this to allow chaining */ private BenchmarkSuite runAll(List names) { - if (doSanitize) { - names.forEach(name -> { - LOG.info("Running benchmark {}", name); - result.put(name, sanitize(suite.get(name).get())); - }); - } else { - names.forEach(name -> { - LOG.info("Running benchmark {}", name); - result.put(name, suite.get(name).get()); - }); - } + names.forEach(name -> { + LOG.info("Running benchmark {}", name); + for (Map.Entry entry : suite.get(name).get().entrySet()) { + DescriptiveStatistics stats = doSanitize ? sanitize(entry.getValue()) : entry.getValue(); + result.put(name + entry.getKey(), stats); + } + }); return this; } @@ -164,7 +160,7 @@ public BenchmarkSuite runMatching(@Nullable Pattern[] positive, * @param b benchmark corresponding to name * @return this */ - public BenchmarkSuite add(@NotNull String name, @NotNull Supplier b) { + public BenchmarkSuite add(@NotNull String name, @NotNull Supplier> b) { suite.put(name, b); benchmarks.add(name); return this; diff --git a/standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/Util.java b/standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/Util.java index b94bd80e7e47..fb13d3f97a35 100644 --- a/standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/Util.java +++ b/standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/Util.java @@ -20,6 +20,8 @@ import com.google.common.base.Joiner; import com.google.common.net.HostAndPort; + +import org.apache.hadoop.hive.metastore.Batchable; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; @@ -632,7 +634,14 @@ static Object addManyPartitions(@NotNull HMSClient client, @NotNull List arguments, int npartitions) throws TException { Table table = client.getTable(dbName, tableName); - client.addPartitions(createManyPartitions(table, parameters, arguments, npartitions)); + List partitions = createManyPartitions(table, parameters, arguments, npartitions); + new Batchable() { + @Override + public List run(List input) throws Exception { + client.addPartitions(input); + return null; + } + }.runBatched(1000, partitions); return null; } @@ -675,7 +684,14 @@ static void addManyPartitionsNoException(@NotNull HMSClient client, List> values) { throwingSupplierWrapper(() -> { Table table = client.getTable(dbName, tableName); - client.addPartitions(createManyPartitions(table, parameters, values)); + List partitions = createManyPartitions(table, parameters, values); + new Batchable() { + @Override + public List run(List input) throws Exception { + client.addPartitions(input); + return null; + } + }.runBatched(1000, partitions); return null; }); }