diff --git a/.blazar.yaml b/.blazar.yaml new file mode 100644 index 000000000000..e034ada7508d --- /dev/null +++ b/.blazar.yaml @@ -0,0 +1,25 @@ +buildpack: + name: Blazar-Buildpack-Java-single-module + +env: + MAVEN_PHASE: "package assembly:single deploy" + HADOOP_DEP_VERSION: "3.3.6-hubspot-SNAPSHOT" + MAVEN_BUILD_ARGS: "-Phadoop-3.0 -Dhadoop.profile=3.0 -Dhadoop-three.version=$HADOOP_DEP_VERSION -Dgpg.skip=true -DskipTests -DdeployAtEnd -pl hbase-assembly -am -T1C" + + # Below variables are generated in prepare_environment.sh. + # The build environment requires environment variables to be explicitly defined before they may + # be modified by the `write-build-env-var` utilty script to persist changes to an environment variable + # throughout a build + REPO_NAME: "" + SET_VERSION: "" + HBASE_VERSION: "" + PKG_RELEASE: "" + FULL_BUILD_VERSION: "" + +before: + - description: "Prepare build environment" + commands: + - $WORKSPACE/build-scripts/prepare_environment.sh + +provides: + - hbase diff --git a/.build-jdk17 b/.build-jdk17 new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/build-scripts/prepare_environment.sh b/build-scripts/prepare_environment.sh new file mode 100755 index 000000000000..65842dcd4d17 --- /dev/null +++ b/build-scripts/prepare_environment.sh @@ -0,0 +1,97 @@ +# +# Generates the appropriate environment vars so that we: +# - build against the right version of hadoop, and properly set up maven +# - generate the correct maven version based on the branches +# - upload RPMs with the correct release based on the branch, and to the right yum repo +# +# Since we need to distribute .blazar.yaml to all sub-modules of the project, we define our constants once +# in this script which can be re-used by every .blazar.yaml. +# +set -ex +printenv + +# We base the expected main branch and resulting maven version for clients on the hbase minor version +# The reason for this is hbase re-branches for each minor release (2.4, 2.5, 2.6, etc). At each re-branch +# the histories diverge. So we'll need to create our own fork of each new minor release branch. +# The convention is a fork named "hubspot-$minorVersion", and the maven coordinates "$minorVersion-hubspot-SNAPSHOT" +MINOR_VERSION="2.6" +MAIN_BRANCH="hubspot-${MINOR_VERSION}" + +# +# Validate inputs from blazar +# + +if [ -z "$WORKSPACE" ]; then + echo "Missing env var \$WORKSPACE" + exit 1 +fi +if [ -z "$GIT_BRANCH" ]; then + echo "Missing env var \$GIT_BRANCH" + exit 1 +fi +if [ -z "$BUILD_COMMAND_RC_FILE" ]; then + echo "Missing env var \$BUILD_COMMAND_RC_FILE" + exit 1 +fi + +# +# Extract current hbase version from root pom.xml +# + +# the pom.xml has an invalid xml namespace, so just remove that so xmllint can parse it. +cat $WORKSPACE/pom.xml | sed '2 s/xmlns=".*"//g' > pom.xml.tmp +HBASE_VERSION=$(echo "cat /project/properties/revision/text()" | xmllint --nocdata --shell pom.xml.tmp | sed '1d;$d') +rm pom.xml.tmp + +# sanity check that we've got some that looks right. it wouldn't be the end of the world if we got it wrong, but +# will help avoid confusion. +if [[ ! "$HBASE_VERSION" =~ 2\.[0-9]+\.[0-9]+ ]]; then + echo "Unexpected HBASE_Version extracted from pom.xml. Got $HBASE_VERSION but expected a string like '2.4.3', with 3 numbers separated by decimals, the first number being 2." + exit 1 +fi + +# +# Generate branch-specific env vars +# We are going to generate the maven version and the RPM release here: +# - For the maven version, we need to special case our main branch +# - For RPM, we want our final version to be: +# main branch: {hbase_version}-hs.{build_number}.el6 +# other branches: {hbase_version}-hs~{branch_name}.{build_number}.el6, where branch_name substitutes underscore for non-alpha-numeric characters +# + +echo "Git branch $GIT_BRANCH. Detecting appropriate version override and RPM release." + +RELEASE="hs" + +if [[ "$GIT_BRANCH" = "$MAIN_BRANCH" ]]; then + SET_VERSION="${MINOR_VERSION}-hubspot-SNAPSHOT" + REPO_NAME="AnyLinuxVersion_hs-hbase" +elif [[ "$GIT_BRANCH" != "hubspot" ]]; then + SET_VERSION="${MINOR_VERSION}-${GIT_BRANCH}-SNAPSHOT" + RELEASE="${RELEASE}~${GIT_BRANCH//[^[:alnum:]]/_}" + REPO_NAME="AnyLinuxVersion_hs-hbase-develop" +else + echo "Invalid git branch $GIT_BRANCH" + exit 1 +fi + +RELEASE="${RELEASE}.${BUILD_NUMBER}" +FULL_BUILD_VERSION="${HBASE_VERSION}-${RELEASE}" + +# SET_VERSION is not the most intuitive name, but it's required for set-maven-versions script +write-build-env-var SET_VERSION "$SET_VERSION" +write-build-env-var HBASE_VERSION "$HBASE_VERSION" +write-build-env-var PKG_RELEASE "$RELEASE" +write-build-env-var FULL_BUILD_VERSION "$FULL_BUILD_VERSION" +write-build-env-var REPO_NAME "$REPO_NAME" +# Adding this value as versioninfo.version ensures we have the same value as would normally +# show up in a non-hubspot hbase build. Otherwise due to set-maven-versions we'd end up +# with 2.6-hubspot-SNAPSHOT which is not very useful as a point of reference. +# Another option would be to pass in our FULL_BUILD_VERSION but that might cause some funniness +# with the expectations in VersionInfo.compareVersion(). +write-build-env-var MAVEN_BUILD_ARGS "$MAVEN_BUILD_ARGS -Dversioninfo.version=$HBASE_VERSION" + +echo "Building HBase version $HBASE_VERSION" +echo "Will deploy to nexus with version $SET_VERSION" +echo "Will create rpm with version $FULL_BUILD_VERSION" +echo "Will run maven with extra args $MAVEN_BUILD_ARGS" diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java index 50eceb84996b..5e24b24de91a 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java @@ -123,8 +123,7 @@ protected static int getIndex(TableName tbl, List sTableList) { * @param tablesToBackup list of tables to be backed up */ protected List handleBulkLoad(List tablesToBackup) throws IOException { - List activeFiles = new ArrayList<>(); - List archiveFiles = new ArrayList<>(); + Map toBulkload = new HashMap<>(); List bulkLoads = backupManager.readBulkloadRows(tablesToBackup); FileSystem tgtFs; try { @@ -137,6 +136,8 @@ protected List handleBulkLoad(List tablesToBackup) throws I for (BulkLoad bulkLoad : bulkLoads) { TableName srcTable = bulkLoad.getTableName(); + MergeSplitBulkloadInfo bulkloadInfo = + toBulkload.computeIfAbsent(srcTable, MergeSplitBulkloadInfo::new); String regionName = bulkLoad.getRegion(); String fam = bulkLoad.getColumnFamily(); String filename = FilenameUtils.getName(bulkLoad.getHfilePath()); @@ -166,27 +167,30 @@ protected List handleBulkLoad(List tablesToBackup) throws I srcTableQualifier); LOG.trace("copying {} to {}", p, tgt); } - activeFiles.add(p.toString()); + bulkloadInfo.addActiveFile(p.toString()); } else if (fs.exists(archive)) { LOG.debug("copying archive {} to {}", archive, tgt); - archiveFiles.add(archive.toString()); + bulkloadInfo.addArchivedFiles(archive.toString()); } - mergeSplitBulkloads(activeFiles, archiveFiles, srcTable); - incrementalCopyBulkloadHFiles(tgtFs, srcTable); } + + for (MergeSplitBulkloadInfo bulkloadInfo : toBulkload.values()) { + mergeSplitAndCopyBulkloadedHFiles(bulkloadInfo.getActiveFiles(), + bulkloadInfo.getArchiveFiles(), bulkloadInfo.getSrcTable(), tgtFs); + } + return bulkLoads; } - private void mergeSplitBulkloads(List activeFiles, List archiveFiles, - TableName tn) throws IOException { + private void mergeSplitAndCopyBulkloadedHFiles(List activeFiles, + List archiveFiles, TableName tn, FileSystem tgtFs) throws IOException { int attempt = 1; - while (!activeFiles.isEmpty()) { LOG.info("MergeSplit {} active bulk loaded files. Attempt={}", activeFiles.size(), attempt++); // Active file can be archived during copy operation, // we need to handle this properly try { - mergeSplitBulkloads(activeFiles, tn); + mergeSplitAndCopyBulkloadedHFiles(activeFiles, tn, tgtFs); break; } catch (IOException e) { int numActiveFiles = activeFiles.size(); @@ -200,11 +204,12 @@ private void mergeSplitBulkloads(List activeFiles, List archiveF } if (!archiveFiles.isEmpty()) { - mergeSplitBulkloads(archiveFiles, tn); + mergeSplitAndCopyBulkloadedHFiles(archiveFiles, tn, tgtFs); } } - private void mergeSplitBulkloads(List files, TableName tn) throws IOException { + private void mergeSplitAndCopyBulkloadedHFiles(List files, TableName tn, FileSystem tgtFs) + throws IOException { MapReduceHFileSplitterJob player = new MapReduceHFileSplitterJob(); conf.set(MapReduceHFileSplitterJob.BULK_OUTPUT_CONF_KEY, getBulkOutputDirForTable(tn).toString()); @@ -219,6 +224,9 @@ private void mergeSplitBulkloads(List files, TableName tn) throws IOExce result = player.run(args); } catch (Exception e) { LOG.error("Failed to run MapReduceHFileSplitterJob", e); + // Delete the bulkload directory if we fail to run the HFile splitter job for any reason + // as it might be re-tried + deleteBulkLoadDirectory(); throw new IOException(e); } @@ -226,6 +234,8 @@ private void mergeSplitBulkloads(List files, TableName tn) throws IOExce throw new IOException( "Failed to run MapReduceHFileSplitterJob with invalid result: " + result); } + + incrementalCopyBulkloadHFiles(tgtFs, tn); } private void updateFileLists(List activeFiles, List archiveFiles) diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/MergeSplitBulkloadInfo.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/MergeSplitBulkloadInfo.java new file mode 100644 index 000000000000..2e81b17bc113 --- /dev/null +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/MergeSplitBulkloadInfo.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.backup.impl; + +import java.util.ArrayList; +import java.util.List; +import org.apache.hadoop.hbase.TableName; +import org.apache.yetus.audience.InterfaceAudience; + +@InterfaceAudience.Private +class MergeSplitBulkloadInfo { + private final List activeFiles = new ArrayList<>(); + private final List archiveFiles = new ArrayList<>(); + + private final TableName srcTable; + + public MergeSplitBulkloadInfo(TableName srcTable) { + this.srcTable = srcTable; + } + + public TableName getSrcTable() { + return srcTable; + } + + public List getArchiveFiles() { + return archiveFiles; + } + + public List getActiveFiles() { + return activeFiles; + } + + public void addActiveFile(String file) { + activeFiles.add(file); + } + + public void addArchivedFiles(String file) { + archiveFiles.add(file); + } +} diff --git a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestIncrementalBackup.java b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestIncrementalBackup.java index a91e6f01a6f1..433a9a9e8dd2 100644 --- a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestIncrementalBackup.java +++ b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestIncrementalBackup.java @@ -22,7 +22,6 @@ import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertThrows; import static org.junit.Assert.assertTrue; - import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; @@ -58,6 +57,7 @@ import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.CommonFSUtils; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.hadoop.hbase.util.HFileArchiveUtil; import org.apache.hadoop.hbase.util.HFileTestUtil; import org.junit.After; import org.junit.Assert; @@ -68,7 +68,6 @@ import org.junit.runners.Parameterized; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - import org.apache.hbase.thirdparty.com.google.common.base.Throwables; import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableList; import org.apache.hbase.thirdparty.com.google.common.collect.Lists; @@ -101,11 +100,14 @@ public TestIncrementalBackup(Boolean b) { public void ensurePreviousBackupTestsAreCleanedUp() throws Exception { TEST_UTIL.flush(table1); TEST_UTIL.flush(table2); - TEST_UTIL.flush(table1_restore); TEST_UTIL.truncateTable(table1).close(); TEST_UTIL.truncateTable(table2).close(); - TEST_UTIL.truncateTable(table1_restore).close(); + + if (TEST_UTIL.getAdmin().tableExists(table1_restore)) { + TEST_UTIL.flush(table1_restore); + TEST_UTIL.truncateTable(table1_restore).close(); + } TEST_UTIL.getMiniHBaseCluster().getRegionServerThreads().forEach(rst -> { try { @@ -427,6 +429,73 @@ public void TestIncBackupRestoreWithOriginalSplitsSeperateFs() throws Exception } + @Test + public void TestIncBackupRestoreHandlesArchivedFiles() throws Exception { + byte[] fam2 = Bytes.toBytes("f2"); + TableDescriptor newTable1Desc = TableDescriptorBuilder.newBuilder(table1Desc) + .setColumnFamily(ColumnFamilyDescriptorBuilder.newBuilder(fam2).build()).build(); + TEST_UTIL.getAdmin().modifyTable(newTable1Desc); + try (Connection conn = ConnectionFactory.createConnection(conf1); + BackupAdminImpl admin = new BackupAdminImpl(conn)) { + String backupTargetDir = TEST_UTIL.getDataTestDir("backupTarget").toString(); + BACKUP_ROOT_DIR = new File(backupTargetDir).toURI().toString(); + + List tables = Lists.newArrayList(table1); + + insertIntoTable(conn, table1, famName, 3, 100); + String fullBackupId = takeFullBackup(tables, admin, true); + assertTrue(checkSucceeded(fullBackupId)); + + insertIntoTable(conn, table1, famName, 4, 100); + + HRegion regionToBulkload = TEST_UTIL.getHBaseCluster().getRegions(table1).get(0); + String regionName = regionToBulkload.getRegionInfo().getEncodedName(); + // Requires a mult-fam bulkload to ensure we're appropriately handling + // multi-file bulkloads + Path regionDir = doBulkload(table1, regionName, famName, fam2); + + // archive the files in the region directory + Path archiveDir = + HFileArchiveUtil.getStoreArchivePath(conf1, table1, regionName, Bytes.toString(famName)); + TEST_UTIL.getTestFileSystem().mkdirs(archiveDir); + RemoteIterator iter = + TEST_UTIL.getTestFileSystem().listFiles(regionDir, true); + List paths = new ArrayList<>(); + while (iter.hasNext()) { + Path path = iter.next().getPath(); + if (path.toString().contains("_SeqId_")) { + paths.add(path); + } + } + assertTrue(paths.size() > 1); + Path path = paths.get(0); + String name = path.toString(); + int startIdx = name.lastIndexOf(Path.SEPARATOR); + String filename = name.substring(startIdx + 1); + Path archiveFile = new Path(archiveDir, filename); + // archive 1 of the files + boolean success = TEST_UTIL.getTestFileSystem().rename(path, archiveFile); + assertTrue(success); + assertTrue(TEST_UTIL.getTestFileSystem().exists(archiveFile)); + assertFalse(TEST_UTIL.getTestFileSystem().exists(path)); + + BackupRequest request = + createBackupRequest(BackupType.INCREMENTAL, tables, BACKUP_ROOT_DIR, true); + String incrementalBackupId = admin.backupTables(request); + assertTrue(checkSucceeded(incrementalBackupId)); + + TableName[] fromTable = new TableName[] { table1 }; + TableName[] toTable = new TableName[] { table1_restore }; + + admin.restore(BackupUtils.createRestoreRequest(BACKUP_ROOT_DIR, incrementalBackupId, false, + fromTable, toTable, true)); + + int actualRowCount = TEST_UTIL.countRows(table1_restore); + int expectedRowCount = TEST_UTIL.countRows(table1); + assertEquals(expectedRowCount, actualRowCount); + } + } + private void checkThrowsCFMismatch(IOException ex, List tables) { Throwable cause = Throwables.getRootCause(ex); assertEquals(cause.getClass(), ColumnFamilyMismatchException.class); @@ -448,12 +517,13 @@ private String takeFullBackup(List tables, BackupAdminImpl backupAdmi return backupId; } - private static void doBulkload(TableName tn, String regionName, byte[]... fams) + private static Path doBulkload(TableName tn, String regionName, byte[]... fams) throws IOException { Path regionDir = createHFiles(tn, regionName, fams); Map results = BulkLoadHFiles.create(conf1).bulkLoad(tn, regionDir); assertFalse(results.isEmpty()); + return regionDir; } private static Path createHFiles(TableName tn, String regionName, byte[]... fams) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/HRegionInfo.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/HRegionInfo.java index 33d7d98c61e0..41aca85f5376 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/HRegionInfo.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/HRegionInfo.java @@ -161,6 +161,7 @@ public static String prettyPrint(final String encodedRegionName) { public static final String NO_HASH = null; private String encodedName = null; private byte[] encodedNameAsBytes = null; + private String nameAsString = null; private int replicaId = DEFAULT_REPLICA_ID; // Current TableName @@ -455,15 +456,21 @@ public byte[] getRegionName() { /** Returns Region name as a String for use in logging, etc. */ @Override public String getRegionNameAsString() { - if (RegionInfo.hasEncodedName(this.regionName)) { - // new format region names already have their encoded name. - return Bytes.toStringBinary(this.regionName); + if (nameAsString == null) { + String name; + if (RegionInfo.hasEncodedName(this.regionName)) { + // new format region names already have their encoded name. + name = Bytes.toStringBinary(this.regionName); + } else { + // old format. regionNameStr doesn't have the region name. + name = Bytes.toStringBinary(this.regionName) + "." + this.getEncodedName(); + } + // may race with other threads setting this, but that's ok + nameAsString = name; + return name; + } else { + return nameAsString; } - - // old format. regionNameStr doesn't have the region name. - // - // - return Bytes.toStringBinary(this.regionName) + "." + this.getEncodedName(); } /** Returns the encoded region name */ diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncBufferedMutator.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncBufferedMutator.java index 6cc2b5adf9d4..479446f8ea13 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncBufferedMutator.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncBufferedMutator.java @@ -88,6 +88,11 @@ default CompletableFuture mutate(Mutation mutation) { */ long getWriteBufferSize(); + /** + * The maximum number of mutations that this buffered mutator will buffer before flushing them + */ + int getMaxMutations(); + /** * Returns the periodical flush interval, 0 means disabled. */ diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncBufferedMutatorBuilder.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncBufferedMutatorBuilder.java index 4659fe63eefc..833550decd05 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncBufferedMutatorBuilder.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncBufferedMutatorBuilder.java @@ -126,6 +126,13 @@ default AsyncBufferedMutatorBuilder setRequestAttributes(Map req throw new UnsupportedOperationException("Not implemented"); } + /** + * Set the maximum number of mutations that this buffered mutator will buffer before flushing + * them. If you are talking to a cluster that uses hbase.rpc.rows.size.threshold.reject to reject + * large Multi requests, you may need this setting to avoid rejections. Default is no limit. + */ + AsyncBufferedMutatorBuilder setMaxMutations(int maxMutations); + /** * Create the {@link AsyncBufferedMutator} instance. */ diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncBufferedMutatorBuilderImpl.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncBufferedMutatorBuilderImpl.java index 6905ff3065cb..7fa860dc3d4e 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncBufferedMutatorBuilderImpl.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncBufferedMutatorBuilderImpl.java @@ -40,12 +40,15 @@ class AsyncBufferedMutatorBuilderImpl implements AsyncBufferedMutatorBuilder { private int maxKeyValueSize; + private int maxMutations; + public AsyncBufferedMutatorBuilderImpl(AsyncConnectionConfiguration connConf, AsyncTableBuilder tableBuilder, HashedWheelTimer periodicalFlushTimer) { this.tableBuilder = tableBuilder; this.writeBufferSize = connConf.getWriteBufferSize(); this.periodicFlushTimeoutNs = connConf.getWriteBufferPeriodicFlushTimeoutNs(); this.maxKeyValueSize = connConf.getMaxKeyValueSize(); + this.maxMutations = connConf.getBufferedMutatorMaxMutations(); this.periodicalFlushTimer = periodicalFlushTimer; } @@ -115,9 +118,16 @@ public AsyncBufferedMutatorBuilder setMaxKeyValueSize(int maxKeyValueSize) { return this; } + @Override + public AsyncBufferedMutatorBuilder setMaxMutations(int maxMutations) { + Preconditions.checkArgument(maxMutations > 0, "maxMutations %d must be > 0", maxMutations); + this.maxMutations = maxMutations; + return this; + } + @Override public AsyncBufferedMutator build() { return new AsyncBufferedMutatorImpl(periodicalFlushTimer, tableBuilder.build(), writeBufferSize, - periodicFlushTimeoutNs, maxKeyValueSize); + periodicFlushTimeoutNs, maxKeyValueSize, maxMutations); } } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncBufferedMutatorImpl.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncBufferedMutatorImpl.java index 59eff8cf33f1..e5500b0977b1 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncBufferedMutatorImpl.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncBufferedMutatorImpl.java @@ -32,6 +32,8 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.TableName; import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hbase.thirdparty.io.netty.util.HashedWheelTimer; import org.apache.hbase.thirdparty.io.netty.util.Timeout; @@ -42,6 +44,8 @@ @InterfaceAudience.Private class AsyncBufferedMutatorImpl implements AsyncBufferedMutator { + private static final Logger LOG = LoggerFactory.getLogger(AsyncBufferedMutatorImpl.class); + private final HashedWheelTimer periodicalFlushTimer; private final AsyncTable table; @@ -52,6 +56,8 @@ class AsyncBufferedMutatorImpl implements AsyncBufferedMutator { private final int maxKeyValueSize; + private final int maxMutations; + private List mutations = new ArrayList<>(); private List> futures = new ArrayList<>(); @@ -63,12 +69,13 @@ class AsyncBufferedMutatorImpl implements AsyncBufferedMutator { Timeout periodicFlushTask; AsyncBufferedMutatorImpl(HashedWheelTimer periodicalFlushTimer, AsyncTable table, - long writeBufferSize, long periodicFlushTimeoutNs, int maxKeyValueSize) { + long writeBufferSize, long periodicFlushTimeoutNs, int maxKeyValueSize, int maxMutations) { this.periodicalFlushTimer = periodicalFlushTimer; this.table = table; this.writeBufferSize = writeBufferSize; this.periodicFlushTimeoutNs = periodicFlushTimeoutNs; this.maxKeyValueSize = maxKeyValueSize; + this.maxMutations = maxMutations; } @Override @@ -145,6 +152,10 @@ Stream.> generate(CompletableFuture::new).limit(mutation this.futures.addAll(futures); bufferedSize += heapSize; if (bufferedSize >= writeBufferSize) { + LOG.trace("Flushing because write buffer size {} reached", writeBufferSize); + internalFlush(); + } else if (maxMutations > 0 && this.mutations.size() >= maxMutations) { + LOG.trace("Flushing because max mutations {} reached", maxMutations); internalFlush(); } } @@ -172,6 +183,11 @@ public long getPeriodicalFlushTimeout(TimeUnit unit) { return unit.convert(periodicFlushTimeoutNs, TimeUnit.NANOSECONDS); } + @Override + public int getMaxMutations() { + return maxMutations; + } + @Override public Map getRequestAttributes() { return table.getRequestAttributes(); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncConnectionConfiguration.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncConnectionConfiguration.java index 5fb95ebbd877..14bc0598d844 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncConnectionConfiguration.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncConnectionConfiguration.java @@ -101,6 +101,8 @@ class AsyncConnectionConfiguration { private final int maxKeyValueSize; + private final int bufferedMutatorMaxMutations; + AsyncConnectionConfiguration(Configuration conf) { ConnectionConfiguration connectionConf = new ConnectionConfiguration(conf); @@ -111,6 +113,7 @@ class AsyncConnectionConfiguration { this.writeBufferPeriodicFlushTimeoutNs = connectionConf.getWriteBufferPeriodicFlushTimeoutMs(); this.maxKeyValueSize = connectionConf.getMaxKeyValueSize(); this.maxRetries = connectionConf.getRetriesNumber(); + this.bufferedMutatorMaxMutations = connectionConf.getBufferedMutatorMaxMutations(); // fields from connection configuration that need to be converted to nanos this.metaOperationTimeoutNs = @@ -229,4 +232,8 @@ long getPrimaryMetaScanTimeoutNs() { int getMaxKeyValueSize() { return maxKeyValueSize; } + + int getBufferedMutatorMaxMutations() { + return bufferedMutatorMaxMutations; + } } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRequestFutureImpl.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRequestFutureImpl.java index b34ef863d565..f3e9c0ed0178 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRequestFutureImpl.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRequestFutureImpl.java @@ -23,6 +23,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.Comparator; import java.util.Date; import java.util.HashMap; import java.util.List; @@ -34,6 +35,7 @@ import java.util.concurrent.RejectedExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Collectors; import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.HBaseServerException; import org.apache.hadoop.hbase.HConstants; @@ -52,6 +54,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hbase.thirdparty.com.google.common.base.Strings; + /** * The context, and return value, for a single submit/submitAll call. Note on how this class (one AP * submit) works. Initially, all requests are split into groups by server; request is sent to each @@ -195,7 +199,7 @@ public void run() { try { // setup the callable based on the actions, if we don't have one already from the request if (callable == null) { - callable = createCallable(server, tableName, multiAction); + callable = createCallable(server, tableName, multiAction, numAttempt); } RpcRetryingCaller caller = asyncProcess.createCaller(callable, rpcTimeout); @@ -387,10 +391,8 @@ public AsyncRequestFutureImpl(AsyncProcessTask task, List actions, long } else { this.replicaGetIndices = null; } - this.callsInProgress = !hasAnyReplicaGets - ? null - : Collections - .newSetFromMap(new ConcurrentHashMap()); + this.callsInProgress = + Collections.newSetFromMap(new ConcurrentHashMap()); this.asyncProcess = asyncProcess; this.errorsByServer = createServerErrorTracker(); this.errors = new BatchErrors(); @@ -540,7 +542,12 @@ private HRegionLocation getReplicaLocationOrFail(Action action) { private void manageLocationError(Action action, Exception ex) { String msg = - "Cannot get replica " + action.getReplicaId() + " location for " + action.getAction(); + "Cannot get replica " + action.getReplicaId() + " location for " + action.getAction() + ": "; + if (ex instanceof OperationTimeoutExceededException) { + msg += "Operation timeout exceeded."; + } else { + msg += ex == null ? "null cause" : ex.toString(); + } LOG.error(msg); if (ex == null) { ex = new IOException(msg); @@ -1247,20 +1254,31 @@ private String buildDetailedErrorMsg(String string, int index) { @Override public void waitUntilDone() throws InterruptedIOException { + long startTime = EnvironmentEdgeManager.currentTime(); try { if (this.operationTimeout > 0) { // the worker thread maybe over by some exception without decrement the actionsInProgress, // then the guarantee of operationTimeout will be broken, so we should set cutoff to avoid // stuck here forever - long cutoff = (EnvironmentEdgeManager.currentTime() + this.operationTimeout) * 1000L; + long cutoff = (startTime + this.operationTimeout) * 1000L; if (!waitUntilDone(cutoff)) { - throw new SocketTimeoutException("time out before the actionsInProgress changed to zero"); + String msg = "time out before the actionsInProgress changed to zero, with " + + actionsInProgress.get() + " remaining" + getServersInProgress(); + + throw new SocketTimeoutException(msg); } } else { waitUntilDone(Long.MAX_VALUE); } } catch (InterruptedException iex) { - throw new InterruptedIOException(iex.getMessage()); + long duration = EnvironmentEdgeManager.currentTime() - startTime; + String message = "Interrupted after waiting " + duration + "ms of " + operationTimeout + + "ms operation timeout, with " + actionsInProgress.get() + " remaining" + + getServersInProgress(); + if (!Strings.isNullOrEmpty(iex.getMessage())) { + message += ": " + iex.getMessage(); + } + throw new InterruptedIOException(message); } finally { if (callsInProgress != null) { for (CancellableRegionServerCallable clb : callsInProgress) { @@ -1270,6 +1288,29 @@ public void waitUntilDone() throws InterruptedIOException { } } + private String getServersInProgress() { + if (callsInProgress != null) { + Map serversInProgress = new HashMap<>(callsInProgress.size()); + for (CancellableRegionServerCallable callable : callsInProgress) { + if (callable instanceof MultiServerCallable) { + MultiServerCallable multiServerCallable = (MultiServerCallable) callable; + int numAttempt = multiServerCallable.getNumAttempt(); + serversInProgress.compute(multiServerCallable.getServerName(), + (k, v) -> v == null ? numAttempt : Math.max(v, numAttempt)); + } + } + + if (serversInProgress.size() > 0) { + return " on servers: " + serversInProgress.entrySet().stream() + .sorted(Comparator.> comparingInt(Map.Entry::getValue) + .reversed()) + .map(entry -> entry.getKey() + "(" + entry.getValue() + " attempts)") + .collect(Collectors.joining(", ")); + } + } + return ""; + } + private boolean waitUntilDone(long cutoff) throws InterruptedException { boolean hasWait = cutoff != Long.MAX_VALUE; long lastLog = EnvironmentEdgeManager.currentTime(); @@ -1336,10 +1377,10 @@ private ConnectionImplementation.ServerErrorTracker createServerErrorTracker() { * Create a callable. Isolated to be easily overridden in the tests. */ private MultiServerCallable createCallable(final ServerName server, TableName tableName, - final MultiAction multi) { + final MultiAction multi, int numAttempt) { return new MultiServerCallable(asyncProcess.connection, tableName, server, multi, asyncProcess.rpcFactory.newController(), rpcTimeout, tracker, multi.getPriority(), - requestAttributes); + requestAttributes, numAttempt); } private void updateResult(int index, Object result) { diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/BufferedMutator.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/BufferedMutator.java index afb37e6e3ab9..9053cf448750 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/BufferedMutator.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/BufferedMutator.java @@ -203,6 +203,14 @@ default Map getRequestAttributes() { return Collections.emptyMap(); } + /** + * The maximum number of mutations that this buffered mutator will buffer before flushing them + */ + default int getMaxMutations() { + throw new UnsupportedOperationException( + "The BufferedMutator::getMaxMutations has not been implemented"); + } + /** * Listens for asynchronous exceptions on a {@link BufferedMutator}. */ diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/BufferedMutatorImpl.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/BufferedMutatorImpl.java index 3e2b011337e5..29fb2c43bc0b 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/BufferedMutatorImpl.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/BufferedMutatorImpl.java @@ -87,6 +87,7 @@ public class BufferedMutatorImpl implements BufferedMutator { private Timer writeBufferPeriodicFlushTimer = null; private final int maxKeyValueSize; + private final int maxMutations; private final ExecutorService pool; private final AtomicInteger rpcTimeout; private final AtomicInteger operationTimeout; @@ -130,6 +131,10 @@ public class BufferedMutatorImpl implements BufferedMutator { ? params.getMaxKeyValueSize() : tableConf.getMaxKeyValueSize(); + this.maxMutations = params.getMaxMutations() != UNSET + ? params.getMaxMutations() + : conn.getConnectionConfiguration().getBufferedMutatorMaxMutations(); + this.rpcTimeout = new AtomicInteger(params.getRpcTimeout() != UNSET ? params.getRpcTimeout() : conn.getConnectionConfiguration().getWriteRpcTimeout()); @@ -286,8 +291,11 @@ private void doFlush(boolean flushAll) throws InterruptedIOException, RetriesExhaustedWithDetailsException { List errors = new ArrayList<>(); while (true) { - if (!flushAll && currentWriteBufferSize.get() <= writeBufferSize) { - // There is the room to accept more mutations. + if ( + !flushAll && (currentWriteBufferSize.get() <= writeBufferSize) + && (maxMutations == UNSET || size() < maxMutations) + ) { + // There is room to accept more mutations. break; } AsyncRequestFuture asf; diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/BufferedMutatorParams.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/BufferedMutatorParams.java index 0b36a59f9e35..3f13ee9834bb 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/BufferedMutatorParams.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/BufferedMutatorParams.java @@ -24,6 +24,8 @@ import org.apache.hadoop.hbase.TableName; import org.apache.yetus.audience.InterfaceAudience; +import org.apache.hbase.thirdparty.com.google.common.collect.Maps; + /** * Parameters for instantiating a {@link BufferedMutator}. */ @@ -41,6 +43,7 @@ public class BufferedMutatorParams implements Cloneable { private String implementationClassName = null; private int rpcTimeout = UNSET; private int operationTimeout = UNSET; + private int maxMutations = UNSET; protected Map requestAttributes = Collections.emptyMap(); private BufferedMutator.ExceptionListener listener = new BufferedMutator.ExceptionListener() { @Override @@ -89,6 +92,23 @@ public int getOperationTimeout() { return operationTimeout; } + /** + * Set the maximum number of mutations that this buffered mutator will buffer before flushing + * them. If you are talking to a cluster that uses hbase.rpc.rows.size.threshold.reject to reject + * large Multi requests, you may need this setting to avoid rejections. Default is no limit. + */ + public BufferedMutatorParams setMaxMutations(int maxMutations) { + this.maxMutations = maxMutations; + return this; + } + + /** + * The maximum number of mutations that this buffered mutator will buffer before flushing them + */ + public int getMaxMutations() { + return maxMutations; + } + public BufferedMutatorParams setRequestAttribute(String key, byte[] value) { if (requestAttributes.isEmpty()) { requestAttributes = new HashMap<>(); @@ -204,6 +224,8 @@ public BufferedMutatorParams clone() { clone.writeBufferPeriodicFlushTimeoutMs = this.writeBufferPeriodicFlushTimeoutMs; clone.writeBufferPeriodicFlushTimerTickMs = this.writeBufferPeriodicFlushTimerTickMs; clone.maxKeyValueSize = this.maxKeyValueSize; + clone.maxMutations = this.maxMutations; + clone.requestAttributes = Maps.newHashMap(this.requestAttributes); clone.pool = this.pool; clone.listener = this.listener; clone.implementationClassName = this.implementationClassName; diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionConfiguration.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionConfiguration.java index 2a6651b5dde0..15e09d6a3b43 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionConfiguration.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionConfiguration.java @@ -47,6 +47,9 @@ public class ConnectionConfiguration { public static final long WRITE_BUFFER_PERIODIC_FLUSH_TIMERTICK_MS_DEFAULT = 1000L; // 1 second public static final String MAX_KEYVALUE_SIZE_KEY = "hbase.client.keyvalue.maxsize"; public static final int MAX_KEYVALUE_SIZE_DEFAULT = 10485760; + public static final String BUFFERED_MUTATOR_MAX_MUTATIONS_KEY = + "hbase.client.write.buffer.maxmutations"; + public static final int BUFFERED_MUTATOR_MAX_MUTATIONS_DEFAULT = -1; public static final String PRIMARY_CALL_TIMEOUT_MICROSECOND = "hbase.client.primaryCallTimeout.get"; public static final int PRIMARY_CALL_TIMEOUT_MICROSECOND_DEFAULT = 10000; // 10ms @@ -94,6 +97,7 @@ public class ConnectionConfiguration { private final int metaReplicaCallTimeoutMicroSecondScan; private final int retries; private final int maxKeyValueSize; + private final int bufferedMutatorMaxMutations; private final int rpcTimeout; private final int readRpcTimeout; private final int metaReadRpcTimeout; @@ -150,6 +154,9 @@ public class ConnectionConfiguration { this.maxKeyValueSize = conf.getInt(MAX_KEYVALUE_SIZE_KEY, MAX_KEYVALUE_SIZE_DEFAULT); + this.bufferedMutatorMaxMutations = + conf.getInt(BUFFERED_MUTATOR_MAX_MUTATIONS_KEY, BUFFERED_MUTATOR_MAX_MUTATIONS_DEFAULT); + this.rpcTimeout = conf.getInt(HConstants.HBASE_RPC_TIMEOUT_KEY, HConstants.DEFAULT_HBASE_RPC_TIMEOUT); @@ -203,6 +210,7 @@ protected ConnectionConfiguration() { this.retries = HConstants.DEFAULT_HBASE_CLIENT_RETRIES_NUMBER; this.clientScannerAsyncPrefetch = Scan.DEFAULT_HBASE_CLIENT_SCANNER_ASYNC_PREFETCH; this.maxKeyValueSize = MAX_KEYVALUE_SIZE_DEFAULT; + this.bufferedMutatorMaxMutations = BUFFERED_MUTATOR_MAX_MUTATIONS_DEFAULT; this.readRpcTimeout = HConstants.DEFAULT_HBASE_RPC_TIMEOUT; this.metaReadRpcTimeout = HConstants.DEFAULT_HBASE_RPC_TIMEOUT; this.writeRpcTimeout = HConstants.DEFAULT_HBASE_RPC_TIMEOUT; @@ -271,6 +279,10 @@ public int getMaxKeyValueSize() { return maxKeyValueSize; } + public int getBufferedMutatorMaxMutations() { + return bufferedMutatorMaxMutations; + } + public long getScannerMaxResultSize() { return scannerMaxResultSize; } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionImplementation.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionImplementation.java index 9d99e98d529d..5e0cd51730b2 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionImplementation.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionImplementation.java @@ -519,6 +519,9 @@ public BufferedMutator getBufferedMutator(BufferedMutatorParams params) { if (params.getMaxKeyValueSize() == BufferedMutatorParams.UNSET) { params.maxKeyValueSize(connectionConfig.getMaxKeyValueSize()); } + if (params.getMaxMutations() == BufferedMutatorParams.UNSET) { + params.setMaxMutations(connectionConfig.getBufferedMutatorMaxMutations()); + } // Look to see if an alternate BufferedMutation implementation is wanted. // Look in params and in config. If null, use default. String implementationClassName = params.getImplementationClassName(); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MultiServerCallable.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MultiServerCallable.java index 6ba0832b26e5..33933dd5684f 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MultiServerCallable.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MultiServerCallable.java @@ -48,14 +48,17 @@ @InterfaceAudience.Private class MultiServerCallable extends CancellableRegionServerCallable { private MultiAction multiAction; + private final int numAttempt; private boolean cellBlock; MultiServerCallable(final ClusterConnection connection, final TableName tableName, final ServerName location, final MultiAction multi, RpcController rpcController, int rpcTimeout, - RetryingTimeTracker tracker, int priority, Map requestAttributes) { + RetryingTimeTracker tracker, int priority, Map requestAttributes, + int numAttempt) { super(connection, tableName, null, rpcController, rpcTimeout, tracker, priority, requestAttributes); this.multiAction = multi; + this.numAttempt = numAttempt; // RegionServerCallable has HRegionLocation field, but this is a multi-region request. // Using region info from parent HRegionLocation would be a mistake for this class; so // we will store the server here, and throw if someone tries to obtain location/regioninfo. @@ -63,6 +66,10 @@ class MultiServerCallable extends CancellableRegionServerCallable this.cellBlock = isCellBlock(); } + public int getNumAttempt() { + return numAttempt; + } + public void reset(ServerName location, MultiAction multiAction) { this.location = new HRegionLocation(null, location); this.multiAction = multiAction; diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MutableRegionInfo.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MutableRegionInfo.java index 4217201b85e3..d6d8e00f7822 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MutableRegionInfo.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MutableRegionInfo.java @@ -64,6 +64,7 @@ class MutableRegionInfo implements RegionInfo { private final int hashCode; private final String encodedName; private final byte[] encodedNameAsBytes; + private String nameAsString = null; private final TableName tableName; private static int generateHashCode(final TableName tableName, final byte[] startKey, @@ -149,10 +150,21 @@ public byte[] getRegionName() { return regionName; } - /** Returns Region name as a String for use in logging, etc. */ + /** + * Returns region name as a String for use in logging, tracing, etc. Expensive enough to compute + * that we do it on first request and save it. Used often because it's included in trace of every + * RPC. + */ @Override public String getRegionNameAsString() { - return RegionInfo.getRegionNameAsString(this, this.regionName); + if (nameAsString == null) { + String name = RegionInfo.getRegionNameAsString(this, this.regionName); + // may race with other threads setting this, but that's ok + nameAsString = name; + return name; + } else { + return nameAsString; + } } /** Returns the encoded region name */ diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/CellBlockBuilder.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/CellBlockBuilder.java index e7364ca3b429..4156a91c9e51 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/CellBlockBuilder.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/CellBlockBuilder.java @@ -34,10 +34,10 @@ import org.apache.hadoop.hbase.io.ByteBufferInputStream; import org.apache.hadoop.hbase.io.ByteBufferListOutputStream; import org.apache.hadoop.hbase.io.ByteBufferOutputStream; +import org.apache.hadoop.hbase.io.compress.CodecPool; import org.apache.hadoop.hbase.nio.ByteBuff; import org.apache.hadoop.hbase.nio.SingleByteBuff; import org.apache.hadoop.hbase.util.ClassSize; -import org.apache.hadoop.io.compress.CodecPool; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionInputStream; import org.apache.hadoop.io.compress.Compressor; diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/quotas/RpcThrottlingException.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/quotas/RpcThrottlingException.java index 2c1f13e94e66..dfa8eacb13b9 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/quotas/RpcThrottlingException.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/quotas/RpcThrottlingException.java @@ -40,14 +40,17 @@ public enum Type { ReadSizeExceeded, RequestCapacityUnitExceeded, ReadCapacityUnitExceeded, - WriteCapacityUnitExceeded + WriteCapacityUnitExceeded, + AtomicRequestNumberExceeded, + AtomicReadSizeExceeded, + AtomicWriteSizeExceeded, } - private static final String[] MSG_TYPE = - new String[] { "number of requests exceeded", "request size limit exceeded", - "number of read requests exceeded", "number of write requests exceeded", - "write size limit exceeded", "read size limit exceeded", "request capacity unit exceeded", - "read capacity unit exceeded", "write capacity unit exceeded" }; + private static final String[] MSG_TYPE = new String[] { "number of requests exceeded", + "request size limit exceeded", "number of read requests exceeded", + "number of write requests exceeded", "write size limit exceeded", "read size limit exceeded", + "request capacity unit exceeded", "read capacity unit exceeded", "write capacity unit exceeded", + "atomic request number exceeded", "atomic read size exceeded", "atomic write size exceeded" }; private static final String MSG_WAIT = " - wait "; @@ -127,6 +130,21 @@ public static void throwWriteCapacityUnitExceeded(final long waitInterval) throwThrottlingException(Type.WriteCapacityUnitExceeded, waitInterval); } + public static void throwAtomicRequestNumberExceeded(final long waitInterval) + throws RpcThrottlingException { + throwThrottlingException(Type.AtomicRequestNumberExceeded, waitInterval); + } + + public static void throwAtomicReadSizeExceeded(final long waitInterval) + throws RpcThrottlingException { + throwThrottlingException(Type.AtomicReadSizeExceeded, waitInterval); + } + + public static void throwAtomicWriteSizeExceeded(final long waitInterval) + throws RpcThrottlingException { + throwThrottlingException(Type.AtomicWriteSizeExceeded, waitInterval); + } + private static void throwThrottlingException(final Type type, final long waitInterval) throws RpcThrottlingException { String msg = MSG_TYPE[type.ordinal()] + MSG_WAIT + stringFromMillis(waitInterval); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/quotas/ThrottleSettings.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/quotas/ThrottleSettings.java index 01dfc3709ae6..efde451c1222 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/quotas/ThrottleSettings.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/quotas/ThrottleSettings.java @@ -93,11 +93,14 @@ public String toString() { case REQUEST_NUMBER: case WRITE_NUMBER: case READ_NUMBER: + case ATOMIC_REQUEST_NUMBER: builder.append(String.format("%dreq", timedQuota.getSoftLimit())); break; case REQUEST_SIZE: case WRITE_SIZE: case READ_SIZE: + case ATOMIC_READ_SIZE: + case ATOMIC_WRITE_SIZE: builder.append(sizeToString(timedQuota.getSoftLimit())); break; case REQUEST_CAPACITY_UNIT: diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/quotas/ThrottleType.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/quotas/ThrottleType.java index 80827dafe6d5..2c5a25acc2c4 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/quotas/ThrottleType.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/quotas/ThrottleType.java @@ -50,4 +50,13 @@ public enum ThrottleType { /** Throttling based on the read data capacity unit */ READ_CAPACITY_UNIT, + + /** Throttling based on the IO footprint of an atomic request */ + ATOMIC_READ_SIZE, + + /** Throttling based on the number of atomic requests per time-unit */ + ATOMIC_REQUEST_NUMBER, + + /** Throttling based on the size of atomic write requests */ + ATOMIC_WRITE_SIZE, } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java index c104fdcfa33a..46eb86aeb336 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java @@ -2433,6 +2433,12 @@ public static ThrottleType toThrottleType(final QuotaProtos.ThrottleType proto) return ThrottleType.READ_CAPACITY_UNIT; case WRITE_CAPACITY_UNIT: return ThrottleType.WRITE_CAPACITY_UNIT; + case ATOMIC_READ_SIZE: + return ThrottleType.ATOMIC_READ_SIZE; + case ATOMIC_REQUEST_NUMBER: + return ThrottleType.ATOMIC_REQUEST_NUMBER; + case ATOMIC_WRITE_SIZE: + return ThrottleType.ATOMIC_WRITE_SIZE; default: throw new RuntimeException("Invalid ThrottleType " + proto); } @@ -2462,6 +2468,12 @@ public static QuotaProtos.ThrottleType toProtoThrottleType(final ThrottleType ty return QuotaProtos.ThrottleType.READ_CAPACITY_UNIT; case WRITE_CAPACITY_UNIT: return QuotaProtos.ThrottleType.WRITE_CAPACITY_UNIT; + case ATOMIC_READ_SIZE: + return QuotaProtos.ThrottleType.ATOMIC_READ_SIZE; + case ATOMIC_REQUEST_NUMBER: + return QuotaProtos.ThrottleType.ATOMIC_REQUEST_NUMBER; + case ATOMIC_WRITE_SIZE: + return QuotaProtos.ThrottleType.ATOMIC_WRITE_SIZE; default: throw new RuntimeException("Invalid ThrottleType " + type); } diff --git a/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestBufferedMutatorParams.java b/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestBufferedMutatorParams.java index ba23d1053938..b6c52a0cd0d6 100644 --- a/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestBufferedMutatorParams.java +++ b/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestBufferedMutatorParams.java @@ -21,6 +21,7 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; +import java.nio.charset.StandardCharsets; import java.util.Collection; import java.util.List; import java.util.concurrent.Callable; @@ -140,7 +141,8 @@ public void testClone() { BufferedMutator.ExceptionListener listener = new MockExceptionListener(); bmp.writeBufferSize(17).setWriteBufferPeriodicFlushTimeoutMs(123) - .setWriteBufferPeriodicFlushTimerTickMs(456).maxKeyValueSize(13).pool(pool) + .setWriteBufferPeriodicFlushTimerTickMs(456).maxKeyValueSize(13).setMaxMutations(3737) + .setRequestAttribute("foo", "bar".getBytes(StandardCharsets.UTF_8)).pool(pool) .listener(listener); bmp.implementationClassName("someClassName"); BufferedMutatorParams clone = bmp.clone(); @@ -151,6 +153,7 @@ public void testClone() { assertEquals(123, clone.getWriteBufferPeriodicFlushTimeoutMs()); assertEquals(456, clone.getWriteBufferPeriodicFlushTimerTickMs()); assertEquals(13, clone.getMaxKeyValueSize()); + assertEquals(3737, clone.getMaxMutations()); assertEquals("someClassName", clone.getImplementationClassName()); cloneTest(bmp, clone); @@ -178,6 +181,8 @@ private void cloneTest(BufferedMutatorParams some, BufferedMutatorParams clone) assertEquals(some.getWriteBufferPeriodicFlushTimerTickMs(), clone.getWriteBufferPeriodicFlushTimerTickMs()); assertEquals(some.getMaxKeyValueSize(), clone.getMaxKeyValueSize()); + assertTrue(some.getMaxMutations() == clone.getMaxMutations()); + assertEquals(some.requestAttributes, clone.requestAttributes); assertTrue(some.getListener() == clone.getListener()); assertTrue(some.getPool() == clone.getPool()); assertEquals(some.getImplementationClassName(), clone.getImplementationClassName()); diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/ByteBufferKeyOnlyKeyValue.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/ByteBufferKeyOnlyKeyValue.java index a29a98a8c091..8e453fdb985d 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/ByteBufferKeyOnlyKeyValue.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/ByteBufferKeyOnlyKeyValue.java @@ -296,4 +296,15 @@ public long heapSize() { } return ClassSize.align(FIXED_OVERHEAD); } + + /** + * Completely clears the state of this cell. Useful if you want to reuse this object to avoid + * allocations. + */ + public void clear() { + this.buf = null; + this.offset = 0; + this.length = 0; + this.rowLen = 0; + } } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/compress/BlockDecompressorHelper.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/compress/BlockDecompressorHelper.java new file mode 100644 index 000000000000..b03c0c35f7a6 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/compress/BlockDecompressorHelper.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.io.compress; + +import java.io.IOException; +import java.nio.ByteBuffer; +import org.apache.hadoop.hbase.nio.ByteBuff; +import org.apache.yetus.audience.InterfaceAudience; + +/** + * Helper to decompress a ByteBuff that was created by a + * {@link org.apache.hadoop.io.compress.BlockCompressorStream}, or is at least in the same format. + * Parses the binary format and delegates actual decompression work to the provided + * {@link RawDecompressor}. Note that the use of the word "block" here does not refer to an HFile + * block. + */ +@InterfaceAudience.Private +public class BlockDecompressorHelper { + + public interface RawDecompressor { + int decompress(ByteBuff output, ByteBuff input, int inputLen) throws IOException; + } + + public static int decompress(ByteBuff output, ByteBuff input, int inputSize, + RawDecompressor rawDecompressor) throws IOException { + int totalDecompressedBytes = 0; + int compressedBytesConsumed = 0; + + while (compressedBytesConsumed < inputSize) { + int decompressedBlockSize = rawReadInt(input); + compressedBytesConsumed += 4; + int decompressedBytesInBlock = 0; + + while (decompressedBytesInBlock < decompressedBlockSize) { + int compressedChunkSize = rawReadInt(input); + compressedBytesConsumed += 4; + int n = rawDecompressor.decompress(output, input, compressedChunkSize); + if (n <= 0) { + throw new IOException("Decompression failed. Compressed size: " + compressedChunkSize + + ", decompressed size: " + decompressedBlockSize); + } + compressedBytesConsumed += compressedChunkSize; + decompressedBytesInBlock += n; + totalDecompressedBytes += n; + } + } + return totalDecompressedBytes; + } + + /** + * Read an integer from the buffer in big-endian byte order. Note that {@link ByteBuffer#getInt()} + * reads in system-dependent endian-ness, so we can't use that. + */ + private static int rawReadInt(ByteBuff input) { + int b1 = Byte.toUnsignedInt(input.get()); + int b2 = Byte.toUnsignedInt(input.get()); + int b3 = Byte.toUnsignedInt(input.get()); + int b4 = Byte.toUnsignedInt(input.get()); + return ((b1 << 24) + (b2 << 16) + (b3 << 8) + b4); + } + +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/compress/ByteBuffDecompressionCodec.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/compress/ByteBuffDecompressionCodec.java new file mode 100644 index 000000000000..233fc0160bd5 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/compress/ByteBuffDecompressionCodec.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.io.compress; + +import org.apache.hadoop.conf.Configuration; +import org.apache.yetus.audience.InterfaceAudience; + +@InterfaceAudience.Private +public interface ByteBuffDecompressionCodec { + + Class getByteBuffDecompressorType(); + + ByteBuffDecompressor createByteBuffDecompressor(); + + Compression.HFileDecompressionContext + getDecompressionContextFromConfiguration(Configuration conf); + +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/compress/ByteBuffDecompressor.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/compress/ByteBuffDecompressor.java new file mode 100644 index 000000000000..432b903fe4d6 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/compress/ByteBuffDecompressor.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.io.compress; + +import edu.umd.cs.findbugs.annotations.Nullable; +import java.io.Closeable; +import java.io.IOException; +import org.apache.hadoop.hbase.nio.ByteBuff; +import org.apache.yetus.audience.InterfaceAudience; + +/** + * Specification of a ByteBuff-based decompressor, which can be more efficient than the stream-based + * Decompressor. + */ +@InterfaceAudience.Private +public interface ByteBuffDecompressor extends Closeable { + + /** + * Fills the ouput buffer with uncompressed data. Always call + * {@link #canDecompress(ByteBuff, ByteBuff)} first to check if this decompressor can handle your + * input and output buffers. + * @return The actual number of bytes of uncompressed data. + */ + int decompress(ByteBuff output, ByteBuff input, int inputLen) throws IOException; + + /** + * Signals of these two particular {@link ByteBuff}s are compatible with this decompressor. + * ByteBuffs can have one or multiple backing buffers, and each of these may be stored in heap or + * direct memory. Different {@link ByteBuffDecompressor}s may be able to handle different + * combinations of these, so always check. + */ + boolean canDecompress(ByteBuff output, ByteBuff input); + + /** + * Call before every use of {@link #canDecompress(ByteBuff, ByteBuff)} and + * {@link #decompress(ByteBuff, ByteBuff, int)} to reinitialize the decompressor with settings + * from the HFileInfo. This can matter because ByteBuffDecompressors are reused many times. + */ + void reinit(@Nullable Compression.HFileDecompressionContext newHFileDecompressionContext); + +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/compress/CodecPool.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/compress/CodecPool.java new file mode 100644 index 000000000000..437ca67f0a94 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/compress/CodecPool.java @@ -0,0 +1,263 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.io.compress; + +import edu.umd.cs.findbugs.annotations.Nullable; +import java.util.Comparator; +import java.util.NavigableSet; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.ConcurrentSkipListSet; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.Compressor; +import org.apache.hadoop.io.compress.Decompressor; +import org.apache.hadoop.io.compress.DoNotPool; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hbase.thirdparty.com.google.common.cache.CacheBuilder; +import org.apache.hbase.thirdparty.com.google.common.cache.CacheLoader; +import org.apache.hbase.thirdparty.com.google.common.cache.LoadingCache; + +/** + * A global compressor/decompressor pool used to save and reuse (possibly native) + * compression/decompression codecs. Copied from the class of the same name in hadoop-common and + * augmented to improve borrow/return performance. + */ +@InterfaceAudience.Private +public class CodecPool { + private static final Logger LOG = LoggerFactory.getLogger(CodecPool.class); + + private static final ConcurrentMap, NavigableSet> COMPRESSOR_POOL = + new ConcurrentHashMap<>(); + + private static final ConcurrentMap, + NavigableSet> DECOMPRESSOR_POOL = new ConcurrentHashMap<>(); + + private static final ConcurrentMap, + NavigableSet> BYTE_BUFF_DECOMPRESSOR_POOL = new ConcurrentHashMap<>(); + + private static LoadingCache, AtomicInteger> createCache() { + return CacheBuilder.newBuilder().build(new CacheLoader, AtomicInteger>() { + @Override + public AtomicInteger load(Class key) throws Exception { + return new AtomicInteger(); + } + }); + } + + /** + * Map to track the number of leased compressors. Only used in unit tests, kept null otherwise. + */ + @Nullable + private static LoadingCache, AtomicInteger> compressorCounts = null; + + /** + * Map to tracks the number of leased decompressors. Only used in unit tests, kept null otherwise. + */ + @Nullable + private static LoadingCache, AtomicInteger> decompressorCounts = null; + + /** + * Call if you want lease counting to be enabled. Only used in unit tests. + */ + static void initLeaseCounting() { + compressorCounts = createCache(); + decompressorCounts = createCache(); + } + + private static T borrow(ConcurrentMap, NavigableSet> pool, + Class codecClass) { + if (codecClass == null) { + return null; + } + + NavigableSet codecSet = pool.get(codecClass); + if (codecSet != null) { + // If a copy of the codec is available, pollFirst() will grab one. + // If not, it will return null. + return codecSet.pollFirst(); + } else { + return null; + } + } + + private static boolean payback(ConcurrentMap, NavigableSet> pool, T codec) { + if (codec != null) { + Class codecClass = ReflectionUtils.getClass(codec); + Set codecSet = pool.computeIfAbsent(codecClass, + k -> new ConcurrentSkipListSet<>(Comparator.comparingInt(System::identityHashCode))); + return codecSet.add(codec); + } + return false; + } + + /** + * Copied from hadoop-common without significant modification. + */ + private static int getLeaseCount(LoadingCache, AtomicInteger> usageCounts, + Class codecClass) { + return usageCounts.getUnchecked((Class) codecClass).get(); + } + + /** + * Copied from hadoop-common without significant modification. + */ + private static void updateLeaseCount(LoadingCache, AtomicInteger> usageCounts, + T codec, int delta) { + if (codec != null && usageCounts != null) { + Class codecClass = ReflectionUtils.getClass(codec); + usageCounts.getUnchecked(codecClass).addAndGet(delta); + } + } + + /** + * Get a {@link Compressor} for the given {@link CompressionCodec} from the pool, or get a new one + * if the pool is empty. Copied from hadoop-common without significant modification. + */ + public static Compressor getCompressor(CompressionCodec codec, Configuration conf) { + Compressor compressor = borrow(COMPRESSOR_POOL, codec.getCompressorType()); + if (compressor == null) { + compressor = codec.createCompressor(); + LOG.info("Got brand-new compressor [" + codec.getDefaultExtension() + "]"); + } else { + compressor.reinit(conf); + if (LOG.isDebugEnabled()) { + LOG.debug("Got recycled compressor"); + } + } + if (compressor != null && !compressor.getClass().isAnnotationPresent(DoNotPool.class)) { + updateLeaseCount(compressorCounts, compressor, 1); + } + return compressor; + } + + public static Compressor getCompressor(CompressionCodec codec) { + return getCompressor(codec, null); + } + + /** + * Get a {@link Decompressor} for the given {@link CompressionCodec} from the pool, or get a new + * one if the pool is empty. Copied from hadoop-common without significant modification. + */ + public static Decompressor getDecompressor(CompressionCodec codec) { + Decompressor decompressor = borrow(DECOMPRESSOR_POOL, codec.getDecompressorType()); + if (decompressor == null) { + decompressor = codec.createDecompressor(); + LOG.info("Got brand-new Decompressor [" + codec.getDefaultExtension() + "]"); + } else { + if (LOG.isDebugEnabled()) { + LOG.debug("Got recycled Decompressor"); + } + } + if (decompressor != null && !decompressor.getClass().isAnnotationPresent(DoNotPool.class)) { + updateLeaseCount(decompressorCounts, decompressor, 1); + } + return decompressor; + } + + public static ByteBuffDecompressor getByteBuffDecompressor(ByteBuffDecompressionCodec codec) { + ByteBuffDecompressor decompressor = + borrow(BYTE_BUFF_DECOMPRESSOR_POOL, codec.getByteBuffDecompressorType()); + if (decompressor == null) { + decompressor = codec.createByteBuffDecompressor(); + LOG.info("Got brand-new ByteBuffDecompressor " + decompressor.getClass().getName()); + } else { + if (LOG.isDebugEnabled()) { + LOG.debug("Got recycled ByteBuffDecompressor"); + } + } + return decompressor; + } + + /** + * Return the {@link Compressor} to the pool. Copied from hadoop-common without significant + * modification. + */ + public static void returnCompressor(Compressor compressor) { + if (compressor == null) { + return; + } + // if the compressor can't be reused, don't pool it. + if (compressor.getClass().isAnnotationPresent(DoNotPool.class)) { + compressor.end(); + return; + } + compressor.reset(); + if (payback(COMPRESSOR_POOL, compressor)) { + updateLeaseCount(compressorCounts, compressor, -1); + } + } + + /** + * Return the {@link Decompressor} to the pool. Copied from hadoop-common without significant + * modification. + */ + public static void returnDecompressor(Decompressor decompressor) { + if (decompressor == null) { + return; + } + // if the decompressor can't be reused, don't pool it. + if (decompressor.getClass().isAnnotationPresent(DoNotPool.class)) { + decompressor.end(); + return; + } + decompressor.reset(); + if (payback(DECOMPRESSOR_POOL, decompressor)) { + updateLeaseCount(decompressorCounts, decompressor, -1); + } + } + + public static void returnByteBuffDecompressor(ByteBuffDecompressor decompressor) { + if (decompressor == null) { + return; + } + // if the decompressor can't be reused, don't pool it. + if (decompressor.getClass().isAnnotationPresent(DoNotPool.class)) { + return; + } + payback(BYTE_BUFF_DECOMPRESSOR_POOL, decompressor); + } + + /** + * Returns the number of leased {@link Compressor}s for this {@link CompressionCodec}. Copied from + * hadoop-common without significant modification. + */ + static int getLeasedCompressorsCount(@Nullable CompressionCodec codec) { + if (compressorCounts == null) { + throw new IllegalStateException("initLeaseCounting() not called to set up lease counting"); + } + return (codec == null) ? 0 : getLeaseCount(compressorCounts, codec.getCompressorType()); + } + + /** + * Returns the number of leased {@link Decompressor}s for this {@link CompressionCodec}. Copied + * from hadoop-common without significant modification. + */ + static int getLeasedDecompressorsCount(@Nullable CompressionCodec codec) { + if (decompressorCounts == null) { + throw new IllegalStateException("initLeaseCounting() not called to set up lease counting"); + } + return (codec == null) ? 0 : getLeaseCount(decompressorCounts, codec.getDecompressorType()); + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/compress/Compression.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/compress/Compression.java index 7f73cd2f004e..2697ed152844 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/compress/Compression.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/compress/Compression.java @@ -17,8 +17,10 @@ */ package org.apache.hadoop.hbase.io.compress; +import edu.umd.cs.findbugs.annotations.Nullable; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; +import java.io.Closeable; import java.io.FilterOutputStream; import java.io.IOException; import java.io.InputStream; @@ -26,7 +28,8 @@ import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; -import org.apache.hadoop.io.compress.CodecPool; +import org.apache.hadoop.hbase.io.HeapSize; +import org.apache.hadoop.hbase.nio.ByteBuff; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionInputStream; import org.apache.hadoop.io.compress.CompressionOutputStream; @@ -508,11 +511,76 @@ public void returnDecompressor(Decompressor decompressor) { } } + /** + * Signals if this codec theoretically supports decompression on {@link ByteBuff}s. This can be + * faster than using a DecompressionStream. If this method returns true, you can call + * {@link #getByteBuffDecompressor()} to obtain a {@link ByteBuffDecompressor}. You must then + * also call {@link ByteBuffDecompressor#canDecompress(ByteBuff, ByteBuff)} before attempting + * decompression, to verify if that decompressor is capable of handling your particular input + * and output buffers. + */ + public boolean supportsByteBuffDecompression() { + CompressionCodec codec = getCodec(conf); + return codec instanceof ByteBuffDecompressionCodec; + } + + /** + * Be sure to call {@link #supportsByteBuffDecompression()} before calling this method. + * @throws IllegalStateException if the codec does not support block decompression + */ + public ByteBuffDecompressor getByteBuffDecompressor() { + CompressionCodec codec = getCodec(conf); + if (codec instanceof ByteBuffDecompressionCodec) { + ByteBuffDecompressor decompressor = + CodecPool.getByteBuffDecompressor((ByteBuffDecompressionCodec) codec); + if (LOG.isTraceEnabled()) { + LOG.trace("Retrieved decompressor {} from pool.", decompressor); + } + return decompressor; + } else { + throw new IllegalStateException("Codec " + codec + " does not support block decompression"); + } + } + + public void returnByteBuffDecompressor(ByteBuffDecompressor decompressor) { + if (decompressor != null) { + if (LOG.isTraceEnabled()) { + LOG.trace("Returning decompressor {} to pool.", decompressor); + } + CodecPool.returnByteBuffDecompressor(decompressor); + } + } + + /** + * Get an object that holds settings used by ByteBuffDecompressor. It's expensive to pull these + * from a Configuration object every time we decompress a block, so pull them here when, for + * example, opening an HFile, and reuse the returned HFileDecompressionContext as much as + * possible. The concrete class of this object will be one that is specific to the codec + * implementation in use. You don't need to inspect it yourself, just pass it along to + * {@link ByteBuffDecompressor#reinit(HFileDecompressionContext)}. + */ + @Nullable + public HFileDecompressionContext + getHFileDecompressionContextForConfiguration(Configuration conf) { + if (supportsByteBuffDecompression()) { + return ((ByteBuffDecompressionCodec) getCodec(conf)) + .getDecompressionContextFromConfiguration(conf); + } else { + return null; + } + } + public String getName() { return compressName; } } + /** + * See {@link Algorithm#getHFileDecompressionContextForConfiguration(Configuration)}. + */ + public static abstract class HFileDecompressionContext implements Closeable, HeapSize { + } + public static Algorithm getCompressionAlgorithmByName(String compressName) { Algorithm[] algos = Algorithm.class.getEnumConstants(); diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/compress/DictionaryCache.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/compress/DictionaryCache.java index 1d6e25675f26..78fa448b63df 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/compress/DictionaryCache.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/compress/DictionaryCache.java @@ -59,11 +59,11 @@ private DictionaryCache() { * @param path the hadoop Path where the dictionary is located, as a String * @return the dictionary bytes if successful, null otherwise */ - public static byte[] getDictionary(final Configuration conf, final String path) - throws IOException { + public static byte[] getDictionary(final Configuration conf, final String path) { if (path == null || path.isEmpty()) { return null; } + // Create the dictionary loading cache if we haven't already if (CACHE == null) { synchronized (DictionaryCache.class) { @@ -91,7 +91,7 @@ public byte[] load(String s) throws Exception { try { return CACHE.get(path); } catch (ExecutionException e) { - throw new IOException(e); + throw new RuntimeException("Unable to load dictionary at " + path, e); } } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultDecodingContext.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultDecodingContext.java index 7c4e348b44ad..81f8e5fa6a24 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultDecodingContext.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultDecodingContext.java @@ -24,6 +24,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.io.ByteBuffInputStream; import org.apache.hadoop.hbase.io.TagCompressionContext; +import org.apache.hadoop.hbase.io.compress.ByteBuffDecompressor; import org.apache.hadoop.hbase.io.compress.CanReinit; import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.crypto.Cipher; @@ -43,6 +44,7 @@ */ @InterfaceAudience.Private public class HFileBlockDefaultDecodingContext implements HFileBlockDecodingContext { + private final Configuration conf; private final HFileContext fileContext; private TagCompressionContext tagCompressionContext; @@ -55,6 +57,13 @@ public HFileBlockDefaultDecodingContext(Configuration conf, HFileContext fileCon @Override public void prepareDecoding(int onDiskSizeWithoutHeader, int uncompressedSizeWithoutHeader, ByteBuff blockBufferWithoutHeader, ByteBuff onDiskBlock) throws IOException { + + // If possible, use the ByteBuffer decompression mechanism to avoid extra copies. + if (canDecompressViaByteBuff(blockBufferWithoutHeader, onDiskBlock)) { + decompressViaByteBuff(blockBufferWithoutHeader, onDiskBlock, onDiskSizeWithoutHeader); + return; + } + final ByteBuffInputStream byteBuffInputStream = new ByteBuffInputStream(onDiskBlock); InputStream dataInputStream = new DataInputStream(byteBuffInputStream); @@ -119,6 +128,46 @@ public void prepareDecoding(int onDiskSizeWithoutHeader, int uncompressedSizeWit } } + /** + * When only decompression is needed (not decryption), and the input and output buffers are + * SingleByteBuffs, and the decompression algorithm supports it, we can do decompression without + * any intermediate heap buffers. Do not call unless you've checked + * {@link #canDecompressViaByteBuff} first. + */ + private void decompressViaByteBuff(ByteBuff blockBufferWithoutHeader, ByteBuff onDiskBlock, + int onDiskSizeWithoutHeader) throws IOException { + Compression.Algorithm compression = fileContext.getCompression(); + ByteBuffDecompressor decompressor = compression.getByteBuffDecompressor(); + try { + decompressor.reinit(fileContext.getDecompressionContext()); + decompressor.decompress(blockBufferWithoutHeader, onDiskBlock, onDiskSizeWithoutHeader); + } finally { + compression.returnByteBuffDecompressor(decompressor); + } + } + + private boolean canDecompressViaByteBuff(ByteBuff blockBufferWithoutHeader, + ByteBuff onDiskBlock) { + // Theoretically we can do ByteBuff decompression after doing streaming decryption, but the + // refactoring necessary to support this has not been attempted. For now, we skip ByteBuff + // decompression if the input is encrypted. + if (fileContext.getEncryptionContext() != Encryption.Context.NONE) { + return false; + } else if (!fileContext.getCompression().supportsByteBuffDecompression()) { + return false; + } else { + ByteBuffDecompressor decompressor = fileContext.getCompression().getByteBuffDecompressor(); + try { + decompressor.reinit(fileContext.getDecompressionContext()); + // Even if we have a ByteBuffDecompressor, we still need to check if it can decompress + // our particular ByteBuffs + return decompressor.canDecompress(blockBufferWithoutHeader, onDiskBlock); + } finally { + fileContext.getCompression().returnByteBuffDecompressor(decompressor); + } + } + } + @Override public HFileContext getHFileContext() { return this.fileContext; diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/RowIndexSeekerV1.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/RowIndexSeekerV1.java index e283803a143b..c82906dc21a0 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/RowIndexSeekerV1.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/RowIndexSeekerV1.java @@ -84,10 +84,10 @@ public void setCurrentBuffer(ByteBuff buffer) { public Cell getKey() { if (current.keyBuffer.hasArray()) { return new KeyValue.KeyOnlyKeyValue(current.keyBuffer.array(), - current.keyBuffer.arrayOffset() + current.keyBuffer.position(), current.keyLength); + current.keyBuffer.arrayOffset() + current.keyOffset, current.keyLength); } else { final byte[] key = new byte[current.keyLength]; - ByteBufferUtils.copyFromBufferToArray(key, current.keyBuffer, current.keyBuffer.position(), 0, + ByteBufferUtils.copyFromBufferToArray(key, current.keyBuffer, current.keyOffset, 0, current.keyLength); return new KeyValue.KeyOnlyKeyValue(key, 0, current.keyLength); } @@ -254,9 +254,8 @@ protected void decodeNext() { currentBuffer.skip(Bytes.SIZEOF_LONG); // key part currentBuffer.asSubByteBuffer(currentBuffer.position(), current.keyLength, tmpPair); - ByteBuffer key = tmpPair.getFirst().duplicate(); - key.position(tmpPair.getSecond()).limit(tmpPair.getSecond() + current.keyLength); - current.keyBuffer = key; + current.keyBuffer = tmpPair.getFirst(); + current.keyOffset = tmpPair.getSecond(); currentBuffer.skip(current.keyLength); // value part current.valueOffset = currentBuffer.position(); @@ -270,7 +269,7 @@ protected void decodeNext() { current.memstoreTS = 0; } current.nextKvOffset = currentBuffer.position(); - current.currentKey.setKey(current.keyBuffer, tmpPair.getSecond(), current.keyLength); + current.currentKey.setKey(current.keyBuffer, current.keyOffset, current.keyLength); } protected void decodeTags() { @@ -288,6 +287,7 @@ private class SeekerState { protected ByteBuff currentBuffer; protected int startOffset = -1; + protected int keyOffset = -1; protected int valueOffset = -1; protected int keyLength; protected int valueLength; @@ -297,7 +297,7 @@ private class SeekerState { protected ByteBuffer keyBuffer = null; protected long memstoreTS; protected int nextKvOffset; - // buffer backed keyonlyKV + // buffer backed keyonlyKV, reset and re-used as necessary to avoid allocations private ByteBufferKeyOnlyKeyValue currentKey = new ByteBufferKeyOnlyKeyValue(); protected boolean isValid() { @@ -306,7 +306,7 @@ protected boolean isValid() { protected void invalidate() { valueOffset = -1; - currentKey = new ByteBufferKeyOnlyKeyValue(); + currentKey.clear(); currentBuffer = null; } @@ -320,6 +320,7 @@ protected void copyFromNext(SeekerState nextState) { nextState.currentKey.getRowPosition() - Bytes.SIZEOF_SHORT, nextState.keyLength); startOffset = nextState.startOffset; + keyOffset = nextState.keyOffset; valueOffset = nextState.valueOffset; keyLength = nextState.keyLength; valueLength = nextState.valueLength; diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContext.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContext.java index 98520d949af4..5dbf34304266 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContext.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContext.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hbase.io.hfile; +import edu.umd.cs.findbugs.annotations.Nullable; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.InnerStoreCellComparator; @@ -50,6 +52,11 @@ public class HFileContext implements HeapSize, Cloneable { private boolean includesTags; /** Compression algorithm used **/ private Compression.Algorithm compressAlgo = Compression.Algorithm.NONE; + /** + * Details used by compression algorithm that are more efficiently loaded once and then reused + **/ + @Nullable + private Compression.HFileDecompressionContext decompressionContext = null; /** Whether tags to be compressed or not **/ private boolean compressTags; /** the checksum type **/ @@ -80,6 +87,7 @@ public HFileContext(HFileContext context) { this.includesMvcc = context.includesMvcc; this.includesTags = context.includesTags; this.compressAlgo = context.compressAlgo; + this.decompressionContext = context.decompressionContext; this.compressTags = context.compressTags; this.checksumType = context.checksumType; this.bytesPerChecksum = context.bytesPerChecksum; @@ -95,14 +103,16 @@ public HFileContext(HFileContext context) { } HFileContext(boolean useHBaseChecksum, boolean includesMvcc, boolean includesTags, - Compression.Algorithm compressAlgo, boolean compressTags, ChecksumType checksumType, - int bytesPerChecksum, int blockSize, DataBlockEncoding encoding, - Encryption.Context cryptoContext, long fileCreateTime, String hfileName, byte[] columnFamily, - byte[] tableName, CellComparator cellComparator, IndexBlockEncoding indexBlockEncoding) { + Compression.Algorithm compressAlgo, Compression.HFileDecompressionContext decompressionContext, + boolean compressTags, ChecksumType checksumType, int bytesPerChecksum, int blockSize, + DataBlockEncoding encoding, Encryption.Context cryptoContext, long fileCreateTime, + String hfileName, byte[] columnFamily, byte[] tableName, CellComparator cellComparator, + IndexBlockEncoding indexBlockEncoding) { this.usesHBaseChecksum = useHBaseChecksum; this.includesMvcc = includesMvcc; this.includesTags = includesTags; this.compressAlgo = compressAlgo; + this.decompressionContext = decompressionContext; this.compressTags = compressTags; this.checksumType = checksumType; this.bytesPerChecksum = bytesPerChecksum; @@ -141,6 +151,20 @@ public Compression.Algorithm getCompression() { return compressAlgo; } + /** + * Get an object that, if non-null, may be cast into a codec-specific type that exposes some + * information from the store-file-specific Configuration that is relevant to decompression. For + * example, ZSTD tables can have "hbase.io.compress.zstd.dictionary" on their table descriptor, + * and decompressions of blocks in that table must use that dictionary. It's cheaper for HBase to + * load these settings into an object of their own once and check this upon each block + * decompression, than it is to call into {@link Configuration#get(String)} on each block + * decompression. + */ + @Nullable + public Compression.HFileDecompressionContext getDecompressionContext() { + return decompressionContext; + } + public boolean isUseHBaseChecksum() { return usesHBaseChecksum; } @@ -238,6 +262,9 @@ public long heapSize() { if (this.tableName != null) { size += ClassSize.sizeOfByteArray(this.tableName.length); } + if (this.decompressionContext != null) { + size += this.decompressionContext.heapSize(); + } return size; } @@ -274,6 +301,8 @@ public String toString() { sb.append(compressAlgo); sb.append(", compressTags="); sb.append(compressTags); + sb.append(", decompressionContext="); + sb.append(decompressionContext); sb.append(", cryptoContext=["); sb.append(cryptoContext); sb.append("]"); diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContextBuilder.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContextBuilder.java index 0394f12144e3..341461b26b1f 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContextBuilder.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContextBuilder.java @@ -17,8 +17,10 @@ */ package org.apache.hadoop.hbase.io.hfile; +import edu.umd.cs.findbugs.annotations.Nullable; import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; import org.apache.hadoop.hbase.io.crypto.Encryption; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; @@ -42,6 +44,8 @@ public class HFileContextBuilder { private boolean includesTags = false; /** Compression algorithm used **/ private Algorithm compression = Algorithm.NONE; + @Nullable + private Compression.HFileDecompressionContext decompressionContext = null; /** Whether tags to be compressed or not **/ private boolean compressTags = false; /** the checksum type **/ @@ -73,6 +77,7 @@ public HFileContextBuilder(final HFileContext hfc) { this.includesMvcc = hfc.isIncludesMvcc(); this.includesTags = hfc.isIncludesTags(); this.compression = hfc.getCompression(); + this.decompressionContext = hfc.getDecompressionContext(); this.compressTags = hfc.isCompressTags(); this.checkSumType = hfc.getChecksumType(); this.bytesPerChecksum = hfc.getBytesPerChecksum(); @@ -107,6 +112,12 @@ public HFileContextBuilder withCompression(Algorithm compression) { return this; } + public HFileContextBuilder + withDecompressionContext(@Nullable Compression.HFileDecompressionContext decompressionContext) { + this.decompressionContext = decompressionContext; + return this; + } + public HFileContextBuilder withCompressTags(boolean compressTags) { this.compressTags = compressTags; return this; @@ -169,7 +180,8 @@ public HFileContextBuilder withCellComparator(CellComparator cellComparator) { public HFileContext build() { return new HFileContext(usesHBaseChecksum, includesMvcc, includesTags, compression, - compressTags, checkSumType, bytesPerChecksum, blockSize, encoding, cryptoContext, - fileCreateTime, hfileName, columnFamily, tableName, cellComparator, indexBlockEncoding); + decompressionContext, compressTags, checkSumType, bytesPerChecksum, blockSize, encoding, + cryptoContext, fileCreateTime, hfileName, columnFamily, tableName, cellComparator, + indexBlockEncoding); } } diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/io/compress/TestCodecPool.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/io/compress/TestCodecPool.java new file mode 100644 index 000000000000..166c12a658c3 --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/io/compress/TestCodecPool.java @@ -0,0 +1,255 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.io.compress; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.OutputStream; +import java.util.HashSet; +import java.util.Random; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.LinkedBlockingDeque; +import java.util.concurrent.TimeUnit; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.testclassification.MiscTests; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.apache.hadoop.io.compress.CompressionInputStream; +import org.apache.hadoop.io.compress.Compressor; +import org.apache.hadoop.io.compress.Decompressor; +import org.apache.hadoop.io.compress.DefaultCodec; +import org.apache.hadoop.io.compress.GzipCodec; +import org.apache.hadoop.io.compress.zlib.BuiltInGzipDecompressor; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +/** + * Along with CodecPool, this is copied from the class of the same name in hadoop-common. Modified + * to accommodate changes to HBase's CodecPool. + */ +@Category({ MiscTests.class, SmallTests.class }) +public class TestCodecPool { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestCodecPool.class); + + private final String LEASE_COUNT_ERR = "Incorrect number of leased (de)compressors"; + DefaultCodec codec; + + @BeforeClass + public static void beforeClass() { + CodecPool.initLeaseCounting(); + } + + @Before + public void setup() { + this.codec = new DefaultCodec(); + this.codec.setConf(new Configuration()); + } + + @Test(timeout = 10000) + public void testCompressorPoolCounts() { + // Get two compressors and return them + Compressor comp1 = CodecPool.getCompressor(codec); + Compressor comp2 = CodecPool.getCompressor(codec); + assertEquals(LEASE_COUNT_ERR, 2, CodecPool.getLeasedCompressorsCount(codec)); + + CodecPool.returnCompressor(comp2); + assertEquals(LEASE_COUNT_ERR, 1, CodecPool.getLeasedCompressorsCount(codec)); + + CodecPool.returnCompressor(comp1); + assertEquals(LEASE_COUNT_ERR, 0, CodecPool.getLeasedCompressorsCount(codec)); + + CodecPool.returnCompressor(comp1); + assertEquals(LEASE_COUNT_ERR, 0, CodecPool.getLeasedCompressorsCount(codec)); + } + + @Test(timeout = 10000) + public void testCompressorNotReturnSameInstance() { + Compressor comp = CodecPool.getCompressor(codec); + CodecPool.returnCompressor(comp); + CodecPool.returnCompressor(comp); + Set compressors = new HashSet(); + for (int i = 0; i < 10; ++i) { + compressors.add(CodecPool.getCompressor(codec)); + } + assertEquals(10, compressors.size()); + for (Compressor compressor : compressors) { + CodecPool.returnCompressor(compressor); + } + } + + @Test(timeout = 10000) + public void testDecompressorPoolCounts() { + // Get two decompressors and return them + Decompressor decomp1 = CodecPool.getDecompressor(codec); + Decompressor decomp2 = CodecPool.getDecompressor(codec); + assertEquals(LEASE_COUNT_ERR, 2, CodecPool.getLeasedDecompressorsCount(codec)); + + CodecPool.returnDecompressor(decomp2); + assertEquals(LEASE_COUNT_ERR, 1, CodecPool.getLeasedDecompressorsCount(codec)); + + CodecPool.returnDecompressor(decomp1); + assertEquals(LEASE_COUNT_ERR, 0, CodecPool.getLeasedDecompressorsCount(codec)); + + CodecPool.returnDecompressor(decomp1); + assertEquals(LEASE_COUNT_ERR, 0, CodecPool.getLeasedCompressorsCount(codec)); + } + + @Test(timeout = 10000) + public void testMultiThreadedCompressorPool() throws InterruptedException { + final int iterations = 4; + ExecutorService threadpool = Executors.newFixedThreadPool(3); + final LinkedBlockingDeque queue = + new LinkedBlockingDeque(2 * iterations); + + Callable consumer = new Callable() { + @Override + public Boolean call() throws Exception { + Compressor c = queue.take(); + CodecPool.returnCompressor(c); + return c != null; + } + }; + + Callable producer = new Callable() { + @Override + public Boolean call() throws Exception { + Compressor c = CodecPool.getCompressor(codec); + queue.put(c); + return c != null; + } + }; + + for (int i = 0; i < iterations; i++) { + threadpool.submit(consumer); + threadpool.submit(producer); + } + + // wait for completion + threadpool.shutdown(); + threadpool.awaitTermination(1000, TimeUnit.SECONDS); + + assertEquals(LEASE_COUNT_ERR, 0, CodecPool.getLeasedCompressorsCount(codec)); + } + + @Test(timeout = 10000) + public void testMultiThreadedDecompressorPool() throws InterruptedException { + final int iterations = 4; + ExecutorService threadpool = Executors.newFixedThreadPool(3); + final LinkedBlockingDeque queue = + new LinkedBlockingDeque(2 * iterations); + + Callable consumer = new Callable() { + @Override + public Boolean call() throws Exception { + Decompressor dc = queue.take(); + CodecPool.returnDecompressor(dc); + return dc != null; + } + }; + + Callable producer = new Callable() { + @Override + public Boolean call() throws Exception { + Decompressor c = CodecPool.getDecompressor(codec); + queue.put(c); + return c != null; + } + }; + + for (int i = 0; i < iterations; i++) { + threadpool.submit(consumer); + threadpool.submit(producer); + } + + // wait for completion + threadpool.shutdown(); + threadpool.awaitTermination(1000, TimeUnit.SECONDS); + + assertEquals(LEASE_COUNT_ERR, 0, CodecPool.getLeasedDecompressorsCount(codec)); + } + + @Test(timeout = 10000) + public void testDecompressorNotReturnSameInstance() { + Decompressor decomp = CodecPool.getDecompressor(codec); + CodecPool.returnDecompressor(decomp); + CodecPool.returnDecompressor(decomp); + Set decompressors = new HashSet(); + for (int i = 0; i < 10; ++i) { + decompressors.add(CodecPool.getDecompressor(codec)); + } + assertEquals(10, decompressors.size()); + for (Decompressor decompressor : decompressors) { + CodecPool.returnDecompressor(decompressor); + } + } + + @Test(timeout = 10000) + public void testDoNotPoolDecompressorNotUseableAfterReturn() throws Exception { + + final GzipCodec gzipCodec = new GzipCodec(); + gzipCodec.setConf(new Configuration()); + + final Random random = new Random(); + final byte[] bytes = new byte[1024]; + random.nextBytes(bytes); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (OutputStream outputStream = gzipCodec.createOutputStream(baos)) { + outputStream.write(bytes); + } + + final byte[] gzipBytes = baos.toByteArray(); + final ByteArrayInputStream bais = new ByteArrayInputStream(gzipBytes); + + // BuiltInGzipDecompressor is an explicit example of a Decompressor + // with the @DoNotPool annotation + final Decompressor decompressor = new BuiltInGzipDecompressor(); + CodecPool.returnDecompressor(decompressor); + + final CompressionInputStream inputStream = gzipCodec.createInputStream(bais, decompressor); + boolean passed = false; + try { + inputStream.read(); + } catch (Exception e) { + if ( + e.getMessage().contains("decompress called on closed decompressor") + || e.getMessage().contains("Inflater has been closed") + ) { + passed = true; + } + } + + if (!passed) { + fail("Decompressor from Codec with @DoNotPool should not be " + + "useable after returning to CodecPool"); + } + } + +} diff --git a/hbase-compression/hbase-compression-zstd/src/main/java/org/apache/hadoop/hbase/io/compress/zstd/ZstdByteBuffDecompressor.java b/hbase-compression/hbase-compression-zstd/src/main/java/org/apache/hadoop/hbase/io/compress/zstd/ZstdByteBuffDecompressor.java new file mode 100644 index 000000000000..fd3f778edf89 --- /dev/null +++ b/hbase-compression/hbase-compression-zstd/src/main/java/org/apache/hadoop/hbase/io/compress/zstd/ZstdByteBuffDecompressor.java @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.io.compress.zstd; + +import com.github.luben.zstd.ZstdDecompressCtx; +import com.github.luben.zstd.ZstdDictDecompress; +import edu.umd.cs.findbugs.annotations.Nullable; +import java.io.IOException; +import java.nio.ByteBuffer; +import org.apache.hadoop.hbase.io.compress.BlockDecompressorHelper; +import org.apache.hadoop.hbase.io.compress.ByteBuffDecompressor; +import org.apache.hadoop.hbase.io.compress.Compression; +import org.apache.hadoop.hbase.nio.ByteBuff; +import org.apache.hadoop.hbase.nio.SingleByteBuff; +import org.apache.yetus.audience.InterfaceAudience; + +/** + * Glue for ByteBuffDecompressor on top of zstd-jni + */ +@InterfaceAudience.Private +public class ZstdByteBuffDecompressor implements ByteBuffDecompressor { + + protected int dictId; + protected ZstdDecompressCtx ctx; + // Intended to be set to false by some unit tests + private boolean allowByteBuffDecompression; + + ZstdByteBuffDecompressor(@Nullable byte[] dictionaryBytes) { + ctx = new ZstdDecompressCtx(); + if (dictionaryBytes != null) { + this.ctx.loadDict(new ZstdDictDecompress(dictionaryBytes)); + dictId = ZstdCodec.getDictionaryId(dictionaryBytes); + } + allowByteBuffDecompression = true; + } + + @Override + public boolean canDecompress(ByteBuff output, ByteBuff input) { + return allowByteBuffDecompression && output instanceof SingleByteBuff + && input instanceof SingleByteBuff; + } + + @Override + public int decompress(ByteBuff output, ByteBuff input, int inputLen) throws IOException { + return BlockDecompressorHelper.decompress(output, input, inputLen, this::decompressRaw); + } + + private int decompressRaw(ByteBuff output, ByteBuff input, int inputLen) throws IOException { + if (output instanceof SingleByteBuff && input instanceof SingleByteBuff) { + ByteBuffer nioOutput = output.nioByteBuffers()[0]; + ByteBuffer nioInput = input.nioByteBuffers()[0]; + int origOutputPos = nioOutput.position(); + int n; + if (nioOutput.isDirect() && nioInput.isDirect()) { + n = ctx.decompressDirectByteBuffer(nioOutput, nioOutput.position(), + nioOutput.limit() - nioOutput.position(), nioInput, nioInput.position(), inputLen); + } else if (!nioOutput.isDirect() && !nioInput.isDirect()) { + n = ctx.decompressByteArray(nioOutput.array(), + nioOutput.arrayOffset() + nioOutput.position(), nioOutput.limit() - nioOutput.position(), + nioInput.array(), nioInput.arrayOffset() + nioInput.position(), inputLen); + } else if (nioOutput.isDirect() && !nioInput.isDirect()) { + n = ctx.decompressByteArrayToDirectByteBuffer(nioOutput, nioOutput.position(), + nioOutput.limit() - nioOutput.position(), nioInput.array(), + nioInput.arrayOffset() + nioInput.position(), inputLen); + } else if (!nioOutput.isDirect() && nioInput.isDirect()) { + n = ctx.decompressDirectByteBufferToByteArray(nioOutput.array(), + nioOutput.arrayOffset() + nioOutput.position(), nioOutput.limit() - nioOutput.position(), + nioInput, nioInput.position(), inputLen); + } else { + throw new IllegalStateException("Unreachable line"); + } + + nioOutput.position(origOutputPos + n); + nioInput.position(input.position() + inputLen); + + return n; + } else { + throw new IllegalStateException( + "At least one buffer is not a SingleByteBuff, this is not supported"); + } + } + + @Override + public void reinit(@Nullable Compression.HFileDecompressionContext newHFileDecompressionContext) { + if (newHFileDecompressionContext != null) { + if (newHFileDecompressionContext instanceof ZstdHFileDecompressionContext) { + ZstdHFileDecompressionContext zstdContext = + (ZstdHFileDecompressionContext) newHFileDecompressionContext; + allowByteBuffDecompression = zstdContext.isAllowByteBuffDecompression(); + if (zstdContext.getDict() == null && dictId != 0) { + ctx.loadDict((byte[]) null); + dictId = 0; + } else if (zstdContext.getDictId() != dictId) { + this.ctx.loadDict(zstdContext.getDict()); + this.dictId = zstdContext.getDictId(); + } + } else { + throw new IllegalArgumentException( + "ZstdByteBuffDecompression#reinit() was given an HFileDecompressionContext that was not a ZstdHFileDecompressionContext, this should never happen"); + } + } + } + + @Override + public void close() { + ctx.close(); + } + +} diff --git a/hbase-compression/hbase-compression-zstd/src/main/java/org/apache/hadoop/hbase/io/compress/zstd/ZstdCodec.java b/hbase-compression/hbase-compression-zstd/src/main/java/org/apache/hadoop/hbase/io/compress/zstd/ZstdCodec.java index 7b97c817aca1..e934aa12c6cf 100644 --- a/hbase-compression/hbase-compression-zstd/src/main/java/org/apache/hadoop/hbase/io/compress/zstd/ZstdCodec.java +++ b/hbase-compression/hbase-compression-zstd/src/main/java/org/apache/hadoop/hbase/io/compress/zstd/ZstdCodec.java @@ -18,15 +18,23 @@ package org.apache.hadoop.hbase.io.compress.zstd; import com.github.luben.zstd.Zstd; +import com.github.luben.zstd.ZstdDictDecompress; +import edu.umd.cs.findbugs.annotations.Nullable; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; +import org.apache.hadoop.hbase.io.compress.ByteBuffDecompressionCodec; +import org.apache.hadoop.hbase.io.compress.ByteBuffDecompressor; +import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.compress.DictionaryCache; +import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.io.compress.BlockCompressorStream; import org.apache.hadoop.io.compress.BlockDecompressorStream; import org.apache.hadoop.io.compress.CompressionCodec; @@ -36,19 +44,25 @@ import org.apache.hadoop.io.compress.Decompressor; import org.apache.yetus.audience.InterfaceAudience; +import org.apache.hbase.thirdparty.com.google.common.cache.Cache; +import org.apache.hbase.thirdparty.com.google.common.cache.CacheBuilder; + /** * Hadoop ZStandard codec implemented with zstd-jni. *

* This is data format compatible with Hadoop's native ZStandard codec. */ @InterfaceAudience.Private -public class ZstdCodec implements Configurable, CompressionCodec { +public class ZstdCodec implements Configurable, CompressionCodec, ByteBuffDecompressionCodec { public static final String ZSTD_LEVEL_KEY = "hbase.io.compress.zstd.level"; public static final String ZSTD_BUFFER_SIZE_KEY = "hbase.io.compress.zstd.buffersize"; public static final int ZSTD_BUFFER_SIZE_DEFAULT = 256 * 1024; public static final String ZSTD_DICTIONARY_KEY = "hbase.io.compress.zstd.dictionary"; + private static final Cache> DECOMPRESS_DICT_CACHE = + CacheBuilder.newBuilder().maximumSize(100).expireAfterAccess(10, TimeUnit.MINUTES).build(); + private Configuration conf; private int bufferSize; private int level; @@ -80,6 +94,11 @@ public Decompressor createDecompressor() { return new ZstdDecompressor(bufferSize, dictionary); } + @Override + public ByteBuffDecompressor createByteBuffDecompressor() { + return new ZstdByteBuffDecompressor(dictionary); + } + @Override public CompressionInputStream createInputStream(InputStream in) throws IOException { return createInputStream(in, createDecompressor()); @@ -113,6 +132,17 @@ public Class getDecompressorType() { return ZstdDecompressor.class; } + @Override + public Class getByteBuffDecompressorType() { + return ZstdByteBuffDecompressor.class; + } + + @Override + public Compression.HFileDecompressionContext + getDecompressionContextFromConfiguration(Configuration conf) { + return ZstdHFileDecompressionContext.fromConfiguration(conf); + } + @Override public String getDefaultExtension() { return ".zst"; @@ -133,12 +163,30 @@ static int getBufferSize(Configuration conf) { return size > 0 ? size : ZSTD_BUFFER_SIZE_DEFAULT; } + @Nullable static byte[] getDictionary(final Configuration conf) { String path = conf.get(ZSTD_DICTIONARY_KEY); + return DictionaryCache.getDictionary(conf, path); + } + + /** + * Returns dictionary and its ID number, useful for comparing to other dictionaries for equality + */ + @Nullable + static Pair getDecompressDictionary(final Configuration conf) { + String path = conf.get(ZSTD_DICTIONARY_KEY); + if (path == null) { + return null; + } + try { - return DictionaryCache.getDictionary(conf, path); - } catch (IOException e) { - throw new RuntimeException("Unable to load dictionary at " + path, e); + return DECOMPRESS_DICT_CACHE.get(path, () -> { + byte[] dictBytes = DictionaryCache.getDictionary(conf, path); + int dictId = getDictionaryId(dictBytes); + return new Pair<>(new ZstdDictDecompress(dictBytes), dictId); + }); + } catch (ExecutionException e) { + throw new RuntimeException("Unable to load ZSTD dictionary", e); } } diff --git a/hbase-compression/hbase-compression-zstd/src/main/java/org/apache/hadoop/hbase/io/compress/zstd/ZstdHFileDecompressionContext.java b/hbase-compression/hbase-compression-zstd/src/main/java/org/apache/hadoop/hbase/io/compress/zstd/ZstdHFileDecompressionContext.java new file mode 100644 index 000000000000..d85f9b30b1b4 --- /dev/null +++ b/hbase-compression/hbase-compression-zstd/src/main/java/org/apache/hadoop/hbase/io/compress/zstd/ZstdHFileDecompressionContext.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.io.compress.zstd; + +import com.github.luben.zstd.ZstdDictDecompress; +import edu.umd.cs.findbugs.annotations.Nullable; +import java.io.IOException; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.io.compress.Compression; +import org.apache.hadoop.hbase.io.hfile.HFileContext; +import org.apache.hadoop.hbase.util.ClassSize; +import org.apache.hadoop.hbase.util.Pair; +import org.apache.yetus.audience.InterfaceAudience; + +/** + * Holds HFile-level settings used by ZstdByteBuffDecompressor. It's expensive to pull these from a + * Configuration object every time we decompress a block, so pull them upon opening an HFile, and + * reuse them in every block that gets decompressed. + */ +@InterfaceAudience.Private +public class ZstdHFileDecompressionContext extends Compression.HFileDecompressionContext { + + public static final long FIXED_OVERHEAD = ClassSize.estimateBase(HFileContext.class, false); + + @Nullable + private final ZstdDictDecompress dict; + private final int dictId; + // Intended to be set to false by some unit tests + private final boolean allowByteBuffDecompression; + + private ZstdHFileDecompressionContext(@Nullable ZstdDictDecompress dict, int dictId, + boolean allowByteBuffDecompression) { + this.dict = dict; + this.dictId = dictId; + this.allowByteBuffDecompression = allowByteBuffDecompression; + } + + @Nullable + public ZstdDictDecompress getDict() { + return dict; + } + + public int getDictId() { + return dictId; + } + + public boolean isAllowByteBuffDecompression() { + return allowByteBuffDecompression; + } + + public static ZstdHFileDecompressionContext fromConfiguration(Configuration conf) { + boolean allowByteBuffDecompression = + conf.getBoolean("hbase.io.compress.zstd.allowByteBuffDecompression", true); + Pair dictAndId = ZstdCodec.getDecompressDictionary(conf); + if (dictAndId != null) { + return new ZstdHFileDecompressionContext(dictAndId.getFirst(), dictAndId.getSecond(), + allowByteBuffDecompression); + } else { + return new ZstdHFileDecompressionContext(null, 0, allowByteBuffDecompression); + } + } + + @Override + public void close() throws IOException { + if (dict != null) { + dict.close(); + } + } + + @Override + public long heapSize() { + // ZstdDictDecompress objects are cached and shared between ZstdHFileDecompressionContexts, so + // don't include ours in our heap size. + return FIXED_OVERHEAD; + } + + @Override + public String toString() { + return "ZstdHFileDecompressionContext{dictId=" + dictId + ", allowByteBuffDecompression=" + + allowByteBuffDecompression + '}'; + } +} diff --git a/hbase-compression/hbase-compression-zstd/src/test/java/org/apache/hadoop/hbase/io/compress/zstd/TestHFileCompressionZstd.java b/hbase-compression/hbase-compression-zstd/src/test/java/org/apache/hadoop/hbase/io/compress/zstd/TestHFileCompressionZstd.java index da8e1ae52bca..0c9302cb7da1 100644 --- a/hbase-compression/hbase-compression-zstd/src/test/java/org/apache/hadoop/hbase/io/compress/zstd/TestHFileCompressionZstd.java +++ b/hbase-compression/hbase-compression-zstd/src/test/java/org/apache/hadoop/hbase/io/compress/zstd/TestHFileCompressionZstd.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hbase.io.compress.HFileTestBase; import org.apache.hadoop.hbase.testclassification.IOTests; import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.junit.Before; import org.junit.BeforeClass; import org.junit.ClassRule; import org.junit.Test; @@ -43,6 +44,11 @@ public class TestHFileCompressionZstd extends HFileTestBase { @BeforeClass public static void setUpBeforeClass() throws Exception { + HFileTestBase.setUpBeforeClass(); + } + + @Before + public void setUp() throws Exception { conf = TEST_UTIL.getConfiguration(); conf.set(Compression.ZSTD_CODEC_CLASS_KEY, ZstdCodec.class.getCanonicalName()); Compression.Algorithm.ZSTD.reload(conf); @@ -50,7 +56,17 @@ public static void setUpBeforeClass() throws Exception { } @Test - public void test() throws Exception { + public void testWithStreamDecompression() throws Exception { + conf.setBoolean("hbase.io.compress.zstd.allowByteBuffDecompression", false); + Compression.Algorithm.ZSTD.reload(conf); + + Path path = new Path(TEST_UTIL.getDataTestDir(), + HBaseTestingUtility.getRandomUUID().toString() + ".hfile"); + doTest(conf, path, Compression.Algorithm.ZSTD); + } + + @Test + public void testWithByteBuffDecompression() throws Exception { Path path = new Path(TEST_UTIL.getDataTestDir(), HBaseTestingUtility.getRandomUUID().toString() + ".hfile"); doTest(conf, path, Compression.Algorithm.ZSTD); diff --git a/hbase-compression/hbase-compression-zstd/src/test/java/org/apache/hadoop/hbase/io/compress/zstd/TestZstdByteBuffDecompressor.java b/hbase-compression/hbase-compression-zstd/src/test/java/org/apache/hadoop/hbase/io/compress/zstd/TestZstdByteBuffDecompressor.java new file mode 100644 index 000000000000..94e95e1ae02b --- /dev/null +++ b/hbase-compression/hbase-compression-zstd/src/test/java/org/apache/hadoop/hbase/io/compress/zstd/TestZstdByteBuffDecompressor.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.io.compress.zstd; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.nio.ByteBuffer; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.nio.ByteBuff; +import org.apache.hadoop.hbase.nio.MultiByteBuff; +import org.apache.hadoop.hbase.nio.SingleByteBuff; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestZstdByteBuffDecompressor { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestZstdByteBuffDecompressor.class); + + /* + * "HBase is fun to use and very fast" compressed with zstd, and then prepended with metadata as a + * BlockCompressorStream would. The phrase is split in three parts and put into the payload in + * this structure: (block 1: (chunk 1: HBase is), (chunk 2: fun to use)), (block 2: (chunk 1: and + * very fast)) + */ + private static final byte[] COMPRESSED_PAYLOAD = Bytes.fromHex( + "000000130000001228b52ffd20094900004842617365206973200000001428b52ffd200b59000066756e20746f20757365200000000d0000001628b52ffd200d690000616e6420766572792066617374"); + + @Test + public void testCapabilities() { + ByteBuff emptySingleHeapBuff = new SingleByteBuff(ByteBuffer.allocate(0)); + ByteBuff emptyMultiHeapBuff = new MultiByteBuff(ByteBuffer.allocate(0), ByteBuffer.allocate(0)); + ByteBuff emptySingleDirectBuff = new SingleByteBuff(ByteBuffer.allocateDirect(0)); + ByteBuff emptyMultiDirectBuff = + new MultiByteBuff(ByteBuffer.allocateDirect(0), ByteBuffer.allocateDirect(0)); + + try (ZstdByteBuffDecompressor decompressor = new ZstdByteBuffDecompressor(null)) { + assertTrue(decompressor.canDecompress(emptySingleHeapBuff, emptySingleHeapBuff)); + assertTrue(decompressor.canDecompress(emptySingleDirectBuff, emptySingleDirectBuff)); + assertTrue(decompressor.canDecompress(emptySingleHeapBuff, emptySingleDirectBuff)); + assertTrue(decompressor.canDecompress(emptySingleDirectBuff, emptySingleHeapBuff)); + assertFalse(decompressor.canDecompress(emptyMultiHeapBuff, emptyMultiHeapBuff)); + assertFalse(decompressor.canDecompress(emptyMultiDirectBuff, emptyMultiDirectBuff)); + assertFalse(decompressor.canDecompress(emptySingleHeapBuff, emptyMultiHeapBuff)); + assertFalse(decompressor.canDecompress(emptySingleDirectBuff, emptyMultiDirectBuff)); + } + } + + @Test + public void testDecompressHeapToHeap() throws IOException { + try (ZstdByteBuffDecompressor decompressor = new ZstdByteBuffDecompressor(null)) { + ByteBuff output = new SingleByteBuff(ByteBuffer.allocate(64)); + ByteBuff input = new SingleByteBuff(ByteBuffer.wrap(COMPRESSED_PAYLOAD)); + int decompressedSize = decompressor.decompress(output, input, COMPRESSED_PAYLOAD.length); + assertEquals("HBase is fun to use and very fast", + Bytes.toString(output.toBytes(0, decompressedSize))); + } + } + + @Test + public void testDecompressDirectToDirect() throws IOException { + try (ZstdByteBuffDecompressor decompressor = new ZstdByteBuffDecompressor(null)) { + ByteBuff output = new SingleByteBuff(ByteBuffer.allocateDirect(64)); + ByteBuff input = new SingleByteBuff(ByteBuffer.allocateDirect(COMPRESSED_PAYLOAD.length)); + input.put(COMPRESSED_PAYLOAD); + input.rewind(); + int decompressedSize = decompressor.decompress(output, input, COMPRESSED_PAYLOAD.length); + assertEquals("HBase is fun to use and very fast", + Bytes.toString(output.toBytes(0, decompressedSize))); + } + } + + @Test + public void testDecompressDirectToHeap() throws IOException { + try (ZstdByteBuffDecompressor decompressor = new ZstdByteBuffDecompressor(null)) { + ByteBuff output = new SingleByteBuff(ByteBuffer.allocate(64)); + ByteBuff input = new SingleByteBuff(ByteBuffer.allocateDirect(COMPRESSED_PAYLOAD.length)); + input.put(COMPRESSED_PAYLOAD); + input.rewind(); + int decompressedSize = decompressor.decompress(output, input, COMPRESSED_PAYLOAD.length); + assertEquals("HBase is fun to use and very fast", + Bytes.toString(output.toBytes(0, decompressedSize))); + } + } + + @Test + public void testDecompressHeapToDirect() throws IOException { + try (ZstdByteBuffDecompressor decompressor = new ZstdByteBuffDecompressor(null)) { + ByteBuff output = new SingleByteBuff(ByteBuffer.allocateDirect(64)); + ByteBuff input = new SingleByteBuff(ByteBuffer.wrap(COMPRESSED_PAYLOAD)); + int decompressedSize = decompressor.decompress(output, input, COMPRESSED_PAYLOAD.length); + assertEquals("HBase is fun to use and very fast", + Bytes.toString(output.toBytes(0, decompressedSize))); + } + } + +} diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java index c68809a1fddb..c23c222edc54 100644 --- a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java +++ b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java @@ -533,6 +533,14 @@ public interface MetricsRegionServerSource extends BaseSource, JvmPauseMonitorSo String ZEROCOPY_BYTES_READ = "zeroCopyBytesRead"; String ZEROCOPY_BYTES_READ_DESC = "The number of bytes read through HDFS zero copy"; + String LOCAL_RACK_BYTES_READ = "localRackBytesRead"; + String LOCAL_RACK_BYTES_READ_DESC = + "The number of bytes read from the same rack of the RegionServer, but not the local HDFS DataNode"; + + String REMOTE_RACK_BYTES_READ = "remoteRackBytesRead"; + String REMOTE_RACK_BYTES_READ_DESC = + "The number of bytes read from a different rack from that of the RegionServer"; + String BLOCKED_REQUESTS_COUNT = "blockedRequestCount"; String BLOCKED_REQUESTS_COUNT_DESC = "The number of blocked requests because of memstore size is " + "larger than blockingMemStoreSize"; diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapper.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapper.java index 10e71d091f59..67d31ffe64c4 100644 --- a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapper.java +++ b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapper.java @@ -544,6 +544,10 @@ public interface MetricsRegionServerWrapper { /** Returns Number of bytes read from the local HDFS DataNode. */ long getLocalBytesRead(); + long getLocalRackBytesRead(); + + long getRemoteRackBytesRead(); + /** Returns Number of bytes read locally through HDFS short circuit. */ long getShortCircuitBytesRead(); diff --git a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java index e0429cfb55d1..b42a02d0e659 100644 --- a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java +++ b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java @@ -560,6 +560,10 @@ private MetricsRecordBuilder addGaugesToMetricsRecordBuilder(MetricsRecordBuilde PERCENT_FILES_LOCAL_SECONDARY_REGIONS_DESC), rsWrap.getPercentFileLocalSecondaryRegions()) .addGauge(Interns.info(TOTAL_BYTES_READ, TOTAL_BYTES_READ_DESC), rsWrap.getTotalBytesRead()) .addGauge(Interns.info(LOCAL_BYTES_READ, LOCAL_BYTES_READ_DESC), rsWrap.getLocalBytesRead()) + .addGauge(Interns.info(LOCAL_RACK_BYTES_READ, LOCAL_RACK_BYTES_READ_DESC), + rsWrap.getLocalRackBytesRead()) + .addGauge(Interns.info(REMOTE_RACK_BYTES_READ, REMOTE_RACK_BYTES_READ_DESC), + rsWrap.getRemoteRackBytesRead()) .addGauge(Interns.info(SHORTCIRCUIT_BYTES_READ, SHORTCIRCUIT_BYTES_READ_DESC), rsWrap.getShortCircuitBytesRead()) .addGauge(Interns.info(ZEROCOPY_BYTES_READ, ZEROCOPY_BYTES_READ_DESC), diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchSuspendResumeRsAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchSuspendResumeRsAction.java index 559dec829ee3..78c78c531060 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchSuspendResumeRsAction.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchSuspendResumeRsAction.java @@ -97,6 +97,8 @@ public void perform() throws Exception { suspendRs(server); } catch (Shell.ExitCodeException e) { LOG.warn("Problem suspending but presume successful; code={}", e.getExitCode(), e); + } catch (Exception e) { + LOG.warn("Problem suspending but presume successful", e); } suspendedServers.add(server); break; @@ -106,6 +108,8 @@ public void perform() throws Exception { resumeRs(server); } catch (Shell.ExitCodeException e) { LOG.info("Problem resuming, will retry; code={}", e.getExitCode(), e); + } catch (Exception e) { + LOG.warn("Problem resulting, will retry", e); } break; } diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/PolicyBasedChaosMonkey.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/PolicyBasedChaosMonkey.java index fb8ab209c3a1..756f0d3846a6 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/PolicyBasedChaosMonkey.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/PolicyBasedChaosMonkey.java @@ -86,7 +86,6 @@ private static ExecutorService buildMonkeyThreadPool(final int size) { return Executors.newFixedThreadPool(size, new ThreadFactoryBuilder().setDaemon(false) .setNameFormat("ChaosMonkey-%d").setUncaughtExceptionHandler((t, e) -> { LOG.error("Uncaught exception in thread {}", t.getName(), e); - throw new RuntimeException(e); }).build()); } diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestBigLinkedList.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestBigLinkedList.java index c1854d87c199..2c4dd96eedab 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestBigLinkedList.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestBigLinkedList.java @@ -1532,9 +1532,20 @@ protected void runVerify(String outputDir, int numReducers, long expectedNumNode Verify verify = new Verify(); verify.setConf(getConf()); - int retCode = verify.run(iterationOutput, numReducers); - if (retCode > 0) { - throw new RuntimeException("Verify.run failed with return code: " + retCode); + + int retries = getConf().getInt("hbase.itbll.verify.retries", 1); + + while (true) { + int retCode = verify.run(iterationOutput, numReducers); + if (retCode > 0) { + if (retries-- > 0) { + LOG.warn("Verify.run failed with return code: {}. Will retry", retries); + } else { + throw new RuntimeException("Verify.run failed with return code: " + retCode); + } + } else { + break; + } } if (!verify.verify(expectedNumNodes)) { diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java index 0356f806bf48..4d07e2fbdaef 100644 --- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java +++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java @@ -346,6 +346,25 @@ protected void afterReplay(TEnvironment env) { // no-op } + /** + * Called before we call the execute method of this procedure, but after we acquire the execution + * lock and procedure scheduler lock. + */ + protected void beforeExec(TEnvironment env) throws ProcedureSuspendedException { + // no-op + } + + /** + * Called after we call the execute method of this procedure, and also after we initialize all the + * sub procedures and persist the the state if persistence is needed. + *

+ * This is for doing some hooks after we initialize the sub procedures. See HBASE-29259 for more + * details on why we can not release the region lock inside the execute method. + */ + protected void afterExec(TEnvironment env) { + // no-op + } + /** * Called when the procedure is marked as completed (success or rollback). The procedure * implementor may use this method to cleanup in-memory states. This operation will not be retried diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java index 0a3c43b6790b..b19cb01a947c 100644 --- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java +++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java @@ -1781,6 +1781,7 @@ private void execProcedure(RootProcedureState procStack, reExecute = false; procedure.resetPersistence(); try { + procedure.beforeExec(getEnvironment()); subprocs = procedure.doExecute(getEnvironment()); if (subprocs != null && subprocs.length == 0) { subprocs = null; @@ -1790,11 +1791,13 @@ private void execProcedure(RootProcedureState procStack, suspended = true; } catch (ProcedureYieldException e) { LOG.trace("Yield {}", procedure, e); + procedure.afterExec(getEnvironment()); yieldProcedure(procedure); return; } catch (InterruptedException e) { LOG.trace("Yield interrupt {}", procedure, e); handleInterruptedException(procedure, e); + procedure.afterExec(getEnvironment()); yieldProcedure(procedure); return; } catch (Throwable e) { @@ -1866,6 +1869,7 @@ private void execProcedure(RootProcedureState procStack, updateStoreOnExec(procStack, procedure, subprocs); } } + procedure.afterExec(getEnvironment()); // if the store is not running we are aborting if (!store.isRunning()) { diff --git a/hbase-protocol-shaded/src/main/protobuf/Quota.proto b/hbase-protocol-shaded/src/main/protobuf/Quota.proto index 5b00d74980b5..e524e015b625 100644 --- a/hbase-protocol-shaded/src/main/protobuf/Quota.proto +++ b/hbase-protocol-shaded/src/main/protobuf/Quota.proto @@ -49,6 +49,9 @@ enum ThrottleType { REQUEST_CAPACITY_UNIT = 7; WRITE_CAPACITY_UNIT = 8; READ_CAPACITY_UNIT = 9; + ATOMIC_READ_SIZE = 10; + ATOMIC_REQUEST_NUMBER = 11; + ATOMIC_WRITE_SIZE = 12; } message Throttle { @@ -64,6 +67,10 @@ message Throttle { optional TimedQuota req_capacity_unit = 7; optional TimedQuota write_capacity_unit = 8; optional TimedQuota read_capacity_unit = 9; + + optional TimedQuota atomic_read_size = 10; + optional TimedQuota atomic_req_num = 11; + optional TimedQuota atomic_write_size = 12; } message ThrottleRequest { diff --git a/hbase-rpm/.blazar.yaml b/hbase-rpm/.blazar.yaml new file mode 100644 index 000000000000..a1bfcb2ae17b --- /dev/null +++ b/hbase-rpm/.blazar.yaml @@ -0,0 +1,30 @@ +buildpack: + name: Buildpack-RPMs + +env: + RPM_BUILD_COMMAND: ./build.sh + # Below variables are generated in prepare_environment.sh. + # The build environment requires environment variables to be explicitly defined before they may + # be modified by the `write-build-env-var` utilty script to persist changes to an environment variable + # throughout a build + REPO_NAME: "" + SET_VERSION: "" + HBASE_VERSION: "" + PKG_RELEASE: "" + FULL_BUILD_VERSION: "" + MAVEN_BUILD_ARGS: "" + +enableBuildTargets: + - almalinux9_amd64 + +depends: + - hbase + +before: + - description: "Prepare build environment" + commands: + - $WORKSPACE/build-scripts/prepare_environment.sh + +stepActivation: + uploadRpms: + branchRegexes: ['.*'] diff --git a/hbase-rpm/build.sh b/hbase-rpm/build.sh new file mode 100755 index 000000000000..b527ca732913 --- /dev/null +++ b/hbase-rpm/build.sh @@ -0,0 +1,51 @@ +#!/bin/bash +set -e +set -x + +ROOT_DIR="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" + +for iv in HBASE_VERSION SET_VERSION PKG_RELEASE; do + if [[ "X${!iv}" = "X" ]]; then + echo "Must specifiy $iv" + exit 1 + fi +done + +# Setup build dir +BUILD_DIR="${ROOT_DIR}/build" +rm -rf $BUILD_DIR +mkdir -p ${BUILD_DIR}/{SOURCES,SPECS,RPMS} +cp -a $ROOT_DIR/sources/* ${BUILD_DIR}/SOURCES/ +cp $ROOT_DIR/hbase.spec ${BUILD_DIR}/SPECS/ + +# Download bin tar built by hbase-assembly +SOURCES_DIR=$BUILD_DIR/SOURCES +mvn dependency:copy \ + -Dartifact=org.apache.hbase:hbase-assembly:${SET_VERSION}:tar.gz:bin \ + -DoutputDirectory=$SOURCES_DIR \ + -DlocalRepositoryDirectory=$SOURCES_DIR \ + -Dtransitive=false +INPUT_TAR=`ls -d $SOURCES_DIR/hbase-assembly-*.tar.gz` + +if [[ $HBASE_VERSION == *"-SNAPSHOT" ]]; then + # unreleased verion. do i want to denote that in the rpm release somehow? + # it can't be in the version, so strip here + HBASE_VERSION=${HBASE_VERSION//-SNAPSHOT/} +fi + +rpmbuild \ + --define "_topdir $BUILD_DIR" \ + --define "input_tar $INPUT_TAR" \ + --define "hbase_version ${HBASE_VERSION}" \ + --define "maven_version ${SET_VERSION}" \ + --define "release ${PKG_RELEASE}%{?dist}" \ + -bb \ + $BUILD_DIR/SPECS/hbase.spec + +if [[ -d $RPMS_OUTPUT_DIR ]]; then + mkdir -p $RPMS_OUTPUT_DIR + + # Move rpms to output dir for upload + + find ${BUILD_DIR}/RPMS -name "*.rpm" -exec mv {} $RPMS_OUTPUT_DIR/ \; +fi diff --git a/hbase-rpm/hbase.spec b/hbase-rpm/hbase.spec new file mode 100644 index 000000000000..107c92636f06 --- /dev/null +++ b/hbase-rpm/hbase.spec @@ -0,0 +1,131 @@ +# taken from hbase.spec in https://github.com/apache/bigtop/ +# greatly modified to simplify and fix dependencies to work in the hubspot environment + +%define hadoop_major_version 3.2 +%define hbase_major_version 2.4 +%define etc_hbase_conf %{_sysconfdir}/hbase/conf +%define etc_hbase_conf_dist %{etc_hbase_conf}.dist +%define hbase_home /usr/lib/hbase +%define bin_hbase %{hbase_home}/bin +%define lib_hbase %{hbase_home}/lib +%define conf_hbase %{hbase_home}/conf +%define logs_hbase %{hbase_home}/logs +%define pids_hbase %{hbase_home}/pids +%define man_dir %{_mandir} +%define hbase_username hbase +%define hadoop_home /usr/lib/hadoop +%define zookeeper_home /usr/lib/zookeeper + +# FIXME: brp-repack-jars uses unzip to expand jar files +# Unfortunately guice-2.0.jar pulled by ivy contains some files and directories without any read permission +# and make whole process to fail. +# So for now brp-repack-jars is being deactivated until this is fixed. +# See BIGTOP-294 +%define __os_install_post \ + %{_rpmconfigdir}/brp-compress ; \ + %{_rpmconfigdir}/brp-strip-static-archive %{__strip} ; \ + %{_rpmconfigdir}/brp-strip-comment-note %{__strip} %{__objdump} ; \ + /usr/lib/rpm/brp-python-bytecompile ; \ + %{nil} + +%define doc_hbase %{_docdir}/hbase-%{hbase_version} +%global initd_dir %{_sysconfdir}/rc.d/init.d +%define alternatives_cmd alternatives + +# Disable debuginfo package +%define debug_package %{nil} + +# HubSpot: use zstd because it decompresses much faster +%define _binary_payload w19.zstdio +%define _source_payload w19.zstdio + +Name: hbase +Version: %{hbase_version} +Release: %{release} +BuildArch: noarch +Summary: HBase is the Hadoop database. Use it when you need random, realtime read/write access to your Big Data. This project's goal is the hosting of very large tables -- billions of rows X millions of columns -- atop clusters of commodity hardware. +URL: http://hbase.apache.org/ +Group: Systems/Daemons +Buildroot: %{_topdir}/INSTALL/hbase-%{maven_version} +License: ASL 2.0 +Source0: %{input_tar} +Source1: install_hbase.sh + +Requires: coreutils, /usr/sbin/useradd, /sbin/chkconfig, /sbin/service +Requires: hadoop >= %{hadoop_major_version} + +AutoReq: no + +%description +HBase is an open-source, distributed, column-oriented store modeled after Google' Bigtable: A Distributed Storage System for Structured Data by Chang et al. Just as Bigtable leverages the distributed data storage provided by the Google File System, HBase provides Bigtable-like capabilities on top of Hadoop. HBase includes: + + * Convenient base classes for backing Hadoop MapReduce jobs with HBase tables + * Query predicate push down via server side scan and get filters + * Optimizations for real time queries + * A high performance Thrift gateway + * A REST-ful Web service gateway that supports XML, Protobuf, and binary data encoding options + * Cascading source and sink modules + * Extensible jruby-based (JIRB) shell + * Support for exporting metrics via the Hadoop metrics subsystem to files or Ganglia; or via JMX + +%prep +%setup -n hbase-%{maven_version} + +%install +%__rm -rf $RPM_BUILD_ROOT +bash %{SOURCE1} \ + --input-tar=%{SOURCE0} \ + --doc-dir=%{doc_hbase} \ + --conf-dir=%{etc_hbase_conf_dist} \ + --prefix=$RPM_BUILD_ROOT + +%__install -d -m 0755 $RPM_BUILD_ROOT/%{initd_dir}/ + +%__install -d -m 0755 %{buildroot}/%{_localstatedir}/log/hbase +ln -s %{_localstatedir}/log/hbase %{buildroot}/%{logs_hbase} + +%__install -d -m 0755 %{buildroot}/%{_localstatedir}/run/hbase +ln -s %{_localstatedir}/run/hbase %{buildroot}/%{pids_hbase} + +%__install -d -m 0755 %{buildroot}/%{_localstatedir}/lib/hbase + +%__install -d -m 0755 $RPM_BUILD_ROOT/usr/bin + +# Pull hadoop from its packages +rm -f $RPM_BUILD_ROOT/%{lib_hbase}/{hadoop,slf4j-log4j12-}*.jar + +ln -f -s %{hadoop_home}/client/hadoop-annotations.jar $RPM_BUILD_ROOT/%{lib_hbase} +ln -f -s %{hadoop_home}/client/hadoop-auth.jar $RPM_BUILD_ROOT/%{lib_hbase} +ln -f -s %{hadoop_home}/client/hadoop-common.jar $RPM_BUILD_ROOT/%{lib_hbase} +ln -f -s %{hadoop_home}/client/hadoop-hdfs-client.jar $RPM_BUILD_ROOT/%{lib_hbase} +ln -f -s %{hadoop_home}/client/hadoop-mapreduce-client-common.jar $RPM_BUILD_ROOT/%{lib_hbase} +ln -f -s %{hadoop_home}/client/hadoop-mapreduce-client-core.jar $RPM_BUILD_ROOT/%{lib_hbase} +ln -f -s %{hadoop_home}/client/hadoop-mapreduce-client-jobclient.jar $RPM_BUILD_ROOT/%{lib_hbase} +ln -f -s %{hadoop_home}/client/hadoop-yarn-api.jar $RPM_BUILD_ROOT/%{lib_hbase} +ln -f -s %{hadoop_home}/client/hadoop-yarn-client.jar $RPM_BUILD_ROOT/%{lib_hbase} +ln -f -s %{hadoop_home}/client/hadoop-yarn-common.jar $RPM_BUILD_ROOT/%{lib_hbase} + +%pre +getent group hbase 2>/dev/null >/dev/null || /usr/sbin/groupadd -r hbase +getent passwd hbase 2>&1 > /dev/null || /usr/sbin/useradd -c "HBase" -s /sbin/nologin -g hbase -r -d /var/lib/hbase hbase 2> /dev/null || : + +%post +%{alternatives_cmd} --install %{etc_hbase_conf} %{name}-conf %{etc_hbase_conf_dist} 30 + +%files +%defattr(-,hbase,hbase) +%{logs_hbase} +%{pids_hbase} +%dir %{_localstatedir}/log/hbase +%dir %{_localstatedir}/run/hbase +%dir %{_localstatedir}/lib/hbase + +%defattr(-,root,root) +%{hbase_home} +%{hbase_home}/hbase-*.jar +/usr/bin/hbase +%config(noreplace) %{etc_hbase_conf_dist} + +# files from doc package +%defattr(-,root,root) +%doc %{doc_hbase}/ diff --git a/hbase-rpm/sources/hbase.1 b/hbase-rpm/sources/hbase.1 new file mode 100644 index 000000000000..349218fe1d87 --- /dev/null +++ b/hbase-rpm/sources/hbase.1 @@ -0,0 +1,88 @@ +.\" Licensed to the Apache Software Foundation (ASF) under one or more +.\" contributor license agreements. See the NOTICE file distributed with +.\" this work for additional information regarding copyright ownership. +.\" The ASF licenses this file to You under the Apache License, Version 2.0 +.\" (the "License"); you may not use this file except in compliance with +.\" the License. You may obtain a copy of the License at +.\" +.\" http://www.apache.org/licenses/LICENSE-2.0 +.\" +.\" Unless required by applicable law or agreed to in writing, software +.\" distributed under the License is distributed on an "AS IS" BASIS, +.\" WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.\" See the License for the specific language governing permissions and +.\" limitations under the License. +.\" +.\" Process this file with +.\" groff -man -Tascii hbase.1 +.\" +.TH hbase 1 "October 2010 " Linux "User Manuals" + +.SH NAME +HBase \- HBase is the Hadoop database. + +.SH SYNOPSIS + +.B hbase +\fICOMMAND\fR + +.SH DESCRIPTION + +HBase is the Hadoop database. Use it when you need random, realtime +read/write access to your Big Data. This project's goal is the hosting +of very large tables -- billions of rows X millions of columns -- atop +clusters of commodity hardware. + +HBase is an open-source, distributed, versioned, column-oriented store +modeled after Google's Bigtable: A Distributed Storage System for +Structured Data by Chang et al. Just as Bigtable leverages the +distributed data storage provided by the Google File System, HBase +provides Bigtable-like capabilities on top of Hadoop. + +For more information about HBase, see http://hbase.apache.org. + +\fICOMMAND\fR may be one of the following: + shell run the HBase shell + shell-tests run the HBase shell tests + zkcli run the ZooKeeper shell + master run an HBase HMaster node + regionserver run an HBase HRegionServer node + zookeeper run a Zookeeper server + rest run an HBase REST server + thrift run an HBase Thrift server + avro run an HBase Avro server + migrate upgrade an hbase.rootdir + hbck run the hbase 'fsck' tool + or + CLASSNAME run the class named CLASSNAME + +Most commands print help when invoked w/o parameters or with --help. + +.SH ENVIRONMENT + +.IP JAVA_HOME +The java implementation to use. Overrides JAVA_HOME. + +.IP HBASE_CLASSPATH +Extra Java CLASSPATH entries. + +.IP HBASE_HEAPSIZE +The maximum amount of heap to use, in MB. Default is 1000. + +.IP HBASE_OPTS +Extra Java runtime options. + +.IP HBASE_CONF_DIR +Alternate conf dir. Default is ${HBASE_HOME}/conf. + +.IP HBASE_ROOT_LOGGER +The root appender. Default is INFO,console + +.IP HIVE_OPT +Extra Java runtime options. + +.IP HADOOP_HOME +Optionally, the Hadoop home to run with. + +.SH COPYRIGHT +Copyright (C) 2010 The Apache Software Foundation. All rights reserved. diff --git a/hbase-rpm/sources/install_hbase.sh b/hbase-rpm/sources/install_hbase.sh new file mode 100755 index 000000000000..95265d2100c8 --- /dev/null +++ b/hbase-rpm/sources/install_hbase.sh @@ -0,0 +1,180 @@ +#!/bin/bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -ex + +usage() { + echo " +usage: $0 + Required not-so-options: + --mvn-target-dir=DIR path to the output of the mvn assembly + --prefix=PREFIX path to install into + + Optional options: + --doc-dir=DIR path to install docs into [/usr/share/doc/hbase] + --lib-dir=DIR path to install hbase home [/usr/lib/hbase] + --installed-lib-dir=DIR path where lib-dir will end up on target system + --bin-dir=DIR path to install bins [/usr/bin] + --examples-dir=DIR path to install examples [doc-dir/examples] + ... [ see source for more similar options ] + " + exit 1 +} + +OPTS=$(getopt \ + -n $0 \ + -o '' \ + -l 'prefix:' \ + -l 'doc-dir:' \ + -l 'lib-dir:' \ + -l 'installed-lib-dir:' \ + -l 'bin-dir:' \ + -l 'examples-dir:' \ + -l 'conf-dir:' \ + -l 'input-tar:' -- "$@") + +if [ $? != 0 ] ; then + usage +fi + +eval set -- "$OPTS" +while true ; do + case "$1" in + --prefix) + PREFIX=$2 ; shift 2 + ;; + --input-tar) + INPUT_TAR=$2 ; shift 2 + ;; + --doc-dir) + DOC_DIR=$2 ; shift 2 + ;; + --lib-dir) + LIB_DIR=$2 ; shift 2 + ;; + --bin-dir) + BIN_DIR=$2 ; shift 2 + ;; + --examples-dir) + EXAMPLES_DIR=$2 ; shift 2 + ;; + --conf-dir) + CONF_DIR=$2 ; shift 2 + ;; + --) + shift ; break + ;; + *) + echo "Unknown option: $1" + usage + exit 1 + ;; + esac +done + +for var in PREFIX INPUT_TAR ; do + if [ -z "$(eval "echo \$$var")" ]; then + echo Missing param: $var + usage + fi +done + +MAN_DIR=${MAN_DIR:-/usr/share/man/man1} +DOC_DIR=${DOC_DIR:-/usr/share/doc/hbase} +LIB_DIR=${LIB_DIR:-/usr/lib/hbase} + +BIN_DIR=${BIN_DIR:-/usr/lib/hbase/bin} +ETC_DIR=${ETC_DIR:-/etc/hbase} +CONF_DIR=${CONF_DIR:-${ETC_DIR}/conf.dist} +THRIFT_DIR=${THRIFT_DIR:-${LIB_DIR}/include/thrift} + +EXTRACT_DIR=extracted +rm -rf $EXTRACT_DIR +mkdir $EXTRACT_DIR + +version_part=$SET_VERSION +if [ -z "$version_part" ]; then + version_part=$HBASE_VERSION +fi + +tar -C $EXTRACT_DIR --strip-components=1 -xzf $INPUT_TAR + +# we do not need the shaded clients in our rpm. they bloat the size and cause classpath issues for hbck2. +rm -rf $EXTRACT_DIR/lib/shaded-clients + +install -d -m 0755 $PREFIX/$LIB_DIR +install -d -m 0755 $PREFIX/$LIB_DIR/lib +install -d -m 0755 $PREFIX/$DOC_DIR +install -d -m 0755 $PREFIX/$BIN_DIR +install -d -m 0755 $PREFIX/$ETC_DIR +install -d -m 0755 $PREFIX/$MAN_DIR +install -d -m 0755 $PREFIX/$THRIFT_DIR + +cp -ra $EXTRACT_DIR/lib/* ${PREFIX}/${LIB_DIR}/lib/ +cp $EXTRACT_DIR/lib/hbase*.jar $PREFIX/$LIB_DIR + +# We do not currently run "mvn site", so do not have a docs dir. +# Only copy contents if dir exists +if [ -n "$(ls -A $EXTRACT_DIR/docs 2>/dev/null)" ]; then + cp -a $EXTRACT_DIR/docs/* $PREFIX/$DOC_DIR + cp $EXTRACT_DIR/*.txt $PREFIX/$DOC_DIR/ +else + echo "Doc generation is currently disabled in our RPM build. If this is an issue, it should be possible to enable them with some work. See https://git.hubteam.com/HubSpot/apache-hbase/blob/hubspot-2/rpm/sources/do-component-build#L17-L24 for details." > $PREFIX/$DOC_DIR/README.txt +fi + +cp -a $EXTRACT_DIR/conf $PREFIX/$CONF_DIR +cp -a $EXTRACT_DIR/bin/* $PREFIX/$BIN_DIR + +# Purge scripts that don't work with packages +for file in rolling-restart.sh graceful_stop.sh local-regionservers.sh \ + master-backup.sh regionservers.sh zookeepers.sh hbase-daemons.sh \ + start-hbase.sh stop-hbase.sh local-master-backup.sh ; do + rm -f $PREFIX/$BIN_DIR/$file +done + + +ln -s $ETC_DIR/conf $PREFIX/$LIB_DIR/conf + +# Make a symlink of hbase.jar to hbase-version.jar +pushd `pwd` +cd $PREFIX/$LIB_DIR +for i in `ls hbase*jar | grep -v tests.jar` +do + ln -s $i `echo $i | sed -n 's/\(.*\)\(-[0-9].*\)\(.jar\)/\1\3/p'` +done +popd + +wrapper=$PREFIX/usr/bin/hbase +mkdir -p `dirname $wrapper` +cat > $wrapper < { + int numEvictedReferred = top + ? cache.evictBlocksRangeByHfileName(referred, offset, Long.MAX_VALUE) + : cache.evictBlocksRangeByHfileName(referred, 0, offset); + int numEvictedReference = cache.evictBlocksByHfileName(reference); + LOG.trace( + "Closing reference: {}; referred file: {}; was top? {}; evicted for referred: {};" + + "evicted for reference: {}", + reference, referred, top, numEvictedReferred, numEvictedReference); + }); + } + reader.close(false); + } else { + reader.close(evictOnClose); + } + } + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java index 5b11035ebe73..a468752de5cb 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java @@ -235,4 +235,15 @@ default Optional>> getFullyCachedFiles() { default Optional> getRegionCachedInfo() { return Optional.empty(); } + + /** + * Evict all blocks for the given file name between the passed offset values. + * @param hfileName The file for which blocks should be evicted. + * @param initOffset the initial offset for the range of blocks to be evicted. + * @param endOffset the end offset for the range of blocks to be evicted. + * @return number of blocks evicted. + */ + default int evictBlocksRangeByHfileName(String hfileName, long initOffset, long endOffset) { + return 0; + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheUtil.java index 65b886f80ed5..7324701efe58 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheUtil.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheUtil.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hbase.io.hfile; +import static org.apache.hadoop.hbase.io.hfile.HFileBlock.FILL_HEADER; + import java.io.IOException; import java.nio.ByteBuffer; import java.util.HashSet; @@ -28,8 +30,10 @@ import java.util.concurrent.ConcurrentSkipListSet; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.metrics.impl.FastLongHistogram; +import org.apache.hadoop.hbase.nio.ByteBuff; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.ChecksumType; import org.apache.hadoop.hbase.util.GsonUtil; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; @@ -258,6 +262,44 @@ public static int getMaxCachedBlocksByFile(Configuration conf) { return conf == null ? DEFAULT_MAX : conf.getInt("hbase.ui.blockcache.by.file.max", DEFAULT_MAX); } + /** + * Similarly to HFileBlock.Writer.getBlockForCaching(), creates a HFileBlock instance without + * checksum for caching. This is needed for when we cache blocks via readers (either prefetch or + * client read), otherwise we may fail equality comparison when checking against same block that + * may already have been cached at write time. + * @param cacheConf the related CacheConfig object. + * @param block the HFileBlock instance to be converted. + * @return the resulting HFileBlock instance without checksum. + */ + public static HFileBlock getBlockForCaching(CacheConfig cacheConf, HFileBlock block) { + // Calculate how many bytes we need for checksum on the tail of the block. + int numBytes = cacheConf.shouldCacheCompressed(block.getBlockType().getCategory()) + ? 0 + : (int) ChecksumUtil.numBytes(block.getOnDiskDataSizeWithHeader(), + block.getHFileContext().getBytesPerChecksum()); + ByteBuff buff = block.getBufferReadOnly(); + HFileBlockBuilder builder = new HFileBlockBuilder(); + return builder.withBlockType(block.getBlockType()) + .withOnDiskSizeWithoutHeader(block.getOnDiskSizeWithoutHeader()) + .withUncompressedSizeWithoutHeader(block.getUncompressedSizeWithoutHeader()) + .withPrevBlockOffset(block.getPrevBlockOffset()).withByteBuff(buff) + .withFillHeader(FILL_HEADER).withOffset(block.getOffset()).withNextBlockOnDiskSize(-1) + .withOnDiskDataSizeWithHeader(block.getOnDiskDataSizeWithHeader() + numBytes) + .withHFileContext(cloneContext(block.getHFileContext())) + .withByteBuffAllocator(cacheConf.getByteBuffAllocator()).withShared(!buff.hasArray()).build(); + } + + public static HFileContext cloneContext(HFileContext context) { + HFileContext newContext = new HFileContextBuilder().withBlockSize(context.getBlocksize()) + .withBytesPerCheckSum(0).withChecksumType(ChecksumType.NULL) // no checksums in cached data + .withCompression(context.getCompression()) + .withDataBlockEncoding(context.getDataBlockEncoding()) + .withHBaseCheckSum(context.isUseHBaseChecksum()).withCompressTags(context.isCompressTags()) + .withIncludesMvcc(context.isIncludesMvcc()).withIncludesTags(context.isIncludesTags()) + .withColumnFamily(context.getColumnFamily()).withTableName(context.getTableName()).build(); + return newContext; + } + /** * Use one of these to keep a running account of cached blocks by file. Throw it away when done. * This is different than metrics in that it is stats on current state of a cache. See diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java index 34c97ee64daa..92d7f4eb8903 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java @@ -72,6 +72,8 @@ public class CacheConfig implements ConfigurationObserver { */ public static final String EVICT_BLOCKS_ON_CLOSE_KEY = "hbase.rs.evictblocksonclose"; + public static final String EVICT_BLOCKS_ON_SPLIT_KEY = "hbase.rs.evictblocksonsplit"; + /** * Configuration key to prefetch all blocks of a given file into the block cache when the file is * opened. @@ -107,6 +109,7 @@ public class CacheConfig implements ConfigurationObserver { public static final boolean DEFAULT_CACHE_INDEXES_ON_WRITE = false; public static final boolean DEFAULT_CACHE_BLOOMS_ON_WRITE = false; public static final boolean DEFAULT_EVICT_ON_CLOSE = false; + public static final boolean DEFAULT_EVICT_ON_SPLIT = true; public static final boolean DEFAULT_CACHE_DATA_COMPRESSED = false; public static final boolean DEFAULT_PREFETCH_ON_OPEN = false; public static final boolean DEFAULT_CACHE_COMPACTED_BLOCKS_ON_WRITE = false; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java index 00dc8e4a5551..ef536f9e0be3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java @@ -492,4 +492,9 @@ public Optional getBlockSize(BlockCacheKey key) { return l1Result.isPresent() ? l1Result : l2Cache.getBlockSize(key); } + @Override + public int evictBlocksRangeByHfileName(String hfileName, long initOffset, long endOffset) { + return l1Cache.evictBlocksRangeByHfileName(hfileName, initOffset, endOffset) + + l2Cache.evictBlocksRangeByHfileName(hfileName, initOffset, endOffset); + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java index 16bec1e95888..4c73fc2bcdc7 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java @@ -697,7 +697,7 @@ public boolean isUnpacked() { * when block is returned to the cache. * @return the offset of this block in the file it was read from */ - long getOffset() { + public long getOffset() { if (offset < 0) { throw new IllegalStateException("HFile block offset not initialized properly"); } @@ -1205,16 +1205,7 @@ void writeBlock(BlockWritable bw, FSDataOutputStream out) throws IOException { * being wholesome (ECC memory or if file-backed, it does checksumming). */ HFileBlock getBlockForCaching(CacheConfig cacheConf) { - HFileContext newContext = new HFileContextBuilder().withBlockSize(fileContext.getBlocksize()) - .withBytesPerCheckSum(0).withChecksumType(ChecksumType.NULL) // no checksums in cached data - .withCompression(fileContext.getCompression()) - .withDataBlockEncoding(fileContext.getDataBlockEncoding()) - .withHBaseCheckSum(fileContext.isUseHBaseChecksum()) - .withCompressTags(fileContext.isCompressTags()) - .withIncludesMvcc(fileContext.isIncludesMvcc()) - .withIncludesTags(fileContext.isIncludesTags()) - .withColumnFamily(fileContext.getColumnFamily()).withTableName(fileContext.getTableName()) - .build(); + HFileContext newContext = BlockCacheUtil.cloneContext(fileContext); // Build the HFileBlock. HFileBlockBuilder builder = new HFileBlockBuilder(); ByteBuff buff; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileInfo.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileInfo.java index 1f2e5ec6d965..e16235373856 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileInfo.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileInfo.java @@ -404,6 +404,8 @@ private HFileContext createHFileContext(Path path, FixedFileTrailer trailer, Con throws IOException { HFileContextBuilder builder = new HFileContextBuilder().withHBaseCheckSum(true) .withHFileName(path.getName()).withCompression(trailer.getCompressionCodec()) + .withDecompressionContext( + trailer.getCompressionCodec().getHFileDecompressionContextForConfiguration(conf)) .withCellComparator(FixedFileTrailer.createComparator(trailer.getComparatorClassName())); // Check for any key material available byte[] keyBytes = trailer.getEncryptionKey(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePreadReader.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePreadReader.java index 926237314828..b95ce4bde556 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePreadReader.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePreadReader.java @@ -46,7 +46,7 @@ public HFilePreadReader(ReaderContext context, HFileInfo fileInfo, CacheConfig c }); // Prefetch file blocks upon open if requested - if (cacheConf.shouldPrefetchOnOpen() && cacheIfCompactionsOff() && shouldCache.booleanValue()) { + if (cacheConf.shouldPrefetchOnOpen() && shouldCache.booleanValue()) { PrefetchExecutor.request(path, new Runnable() { @Override public void run() { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java index b6a061043070..db2383db399d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hbase.io.hfile; -import static org.apache.hadoop.hbase.regionserver.CompactSplit.HBASE_REGION_SERVER_ENABLE_COMPACTION; import static org.apache.hadoop.hbase.trace.HBaseSemanticAttributes.BLOCK_CACHE_KEY_KEY; import io.opentelemetry.api.common.Attributes; @@ -42,14 +41,12 @@ import org.apache.hadoop.hbase.SizeCachedKeyValue; import org.apache.hadoop.hbase.SizeCachedNoTagsByteBufferKeyValue; import org.apache.hadoop.hbase.SizeCachedNoTagsKeyValue; -import org.apache.hadoop.hbase.io.HFileLink; import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext; import org.apache.hadoop.hbase.nio.ByteBuff; import org.apache.hadoop.hbase.regionserver.KeyValueScanner; -import org.apache.hadoop.hbase.regionserver.StoreFileInfo; import org.apache.hadoop.hbase.util.ByteBufferUtils; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.IdLock; @@ -159,6 +156,10 @@ public BlockIndexNotLoadedException(Path path) { } } + public CacheConfig getCacheConf() { + return cacheConf; + } + private Optional toStringFirstKey() { return getFirstKey().map(CellUtil::getCellKeyAsString); } @@ -307,7 +308,7 @@ public NotSeekedException(Path path) { } } - protected static class HFileScannerImpl implements HFileScanner { + public static class HFileScannerImpl implements HFileScanner { private ByteBuff blockBuffer; protected final boolean cacheBlocks; protected final boolean pread; @@ -340,6 +341,11 @@ protected static class HFileScannerImpl implements HFileScanner { // Whether we returned a result for curBlock's size in recordBlockSize(). // gets reset whenever curBlock is changed. private boolean providedCurrentBlockSize = false; + + public HFileBlock getCurBlock() { + return curBlock; + } + // Previous blocks that were used in the course of the read protected final ArrayList prevBlocks = new ArrayList<>(); @@ -1292,8 +1298,6 @@ public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize, final bo BlockCacheKey cacheKey = new BlockCacheKey(path, dataBlockOffset, this.isPrimaryReplicaReader(), expectedBlockType); - boolean cacheable = cacheBlock && cacheIfCompactionsOff(); - boolean useLock = false; IdLock.Entry lockEntry = null; final Span span = Span.current(); @@ -1340,7 +1344,7 @@ public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize, final bo return cachedBlock; } - if (!useLock && cacheable && cacheConf.shouldLockOnCacheMiss(expectedBlockType)) { + if (!useLock && cacheBlock && cacheConf.shouldLockOnCacheMiss(expectedBlockType)) { // check cache again with lock useLock = true; continue; @@ -1351,7 +1355,7 @@ public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize, final bo span.addEvent("block cache miss", attributes); // Load block from filesystem. HFileBlock hfileBlock = fsBlockReader.readBlockData(dataBlockOffset, onDiskBlockSize, pread, - !isCompaction, shouldUseHeap(expectedBlockType, cacheable)); + !isCompaction, shouldUseHeap(expectedBlockType, cacheBlock)); try { validateBlockType(hfileBlock, expectedBlockType); } catch (IOException e) { @@ -1364,25 +1368,30 @@ public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize, final bo // Don't need the unpacked block back and we're storing the block in the cache compressed if (cacheOnly && cacheCompressed && cacheOnRead) { + HFileBlock blockNoChecksum = BlockCacheUtil.getBlockForCaching(cacheConf, hfileBlock); cacheConf.getBlockCache().ifPresent(cache -> { LOG.debug("Skipping decompression of block {} in prefetch", cacheKey); // Cache the block if necessary - if (cacheable && cacheConf.shouldCacheBlockOnRead(category)) { - cache.cacheBlock(cacheKey, hfileBlock, cacheConf.isInMemory(), cacheOnly); + if (cacheBlock && cacheConf.shouldCacheBlockOnRead(category)) { + cache.cacheBlock(cacheKey, blockNoChecksum, cacheConf.isInMemory(), cacheOnly); } }); if (updateCacheMetrics && hfileBlock.getBlockType().isData()) { HFile.DATABLOCK_READ_COUNT.increment(); } - return hfileBlock; + return blockNoChecksum; } HFileBlock unpacked = hfileBlock.unpack(hfileContext, fsBlockReader); + HFileBlock unpackedNoChecksum = BlockCacheUtil.getBlockForCaching(cacheConf, unpacked); // Cache the block if necessary cacheConf.getBlockCache().ifPresent(cache -> { - if (cacheable && cacheConf.shouldCacheBlockOnRead(category)) { + if (cacheBlock && cacheConf.shouldCacheBlockOnRead(category)) { // Using the wait on cache during compaction and prefetching. - cache.cacheBlock(cacheKey, cacheCompressed ? hfileBlock : unpacked, + cache.cacheBlock(cacheKey, + cacheCompressed + ? BlockCacheUtil.getBlockForCaching(cacheConf, hfileBlock) + : unpackedNoChecksum, cacheConf.isInMemory(), cacheOnly); } }); @@ -1394,7 +1403,7 @@ public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize, final bo HFile.DATABLOCK_READ_COUNT.increment(); } - return unpacked; + return unpackedNoChecksum; } } finally { if (lockEntry != null) { @@ -1716,9 +1725,4 @@ public int getMajorVersion() { public void unbufferStream() { fsBlockReader.unbufferStream(); } - - protected boolean cacheIfCompactionsOff() { - return (!StoreFileInfo.isReference(name) && !HFileLink.isHFileLink(name)) - || !conf.getBoolean(HBASE_REGION_SERVER_ENABLE_COMPACTION, true); - } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java index 3b08655bcfb3..cd82af74108a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java @@ -79,6 +79,7 @@ import org.apache.hadoop.hbase.nio.RefCnt; import org.apache.hadoop.hbase.protobuf.ProtobufMagic; import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.StoreFileInfo; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.IdReadWriteLock; @@ -222,6 +223,8 @@ public class BucketCache implements BlockCache, HeapSize { // reset after a successful read/write. private volatile long ioErrorStartTime = -1; + private Configuration conf; + /** * A ReentrantReadWriteLock to lock on a particular block identified by offset. The purpose of * this is to avoid freeing the block which is being read. @@ -582,6 +585,30 @@ protected void cacheBlockWithWaitInternal(BlockCacheKey cacheKey, Cacheable cach } } + /** + * If the passed cache key relates to a reference (.), this method looks + * for the block from the referred file, in the cache. If present in the cache, the block for the + * referred file is returned, otherwise, this method returns null. It will also return null if the + * passed cache key doesn't relate to a reference. + * @param key the BlockCacheKey instance to look for in the cache. + * @return the cached block from the referred file, null if there's no such block in the cache or + * the passed key doesn't relate to a reference. + */ + public BucketEntry getBlockForReference(BlockCacheKey key) { + BucketEntry foundEntry = null; + String referredFileName = null; + if (StoreFileInfo.isReference(key.getHfileName())) { + referredFileName = StoreFileInfo.getReferredToRegionAndFile(key.getHfileName()).getSecond(); + } + if (referredFileName != null) { + BlockCacheKey convertedCacheKey = new BlockCacheKey(referredFileName, key.getOffset()); + foundEntry = backingMap.get(convertedCacheKey); + LOG.debug("Got a link/ref: {}. Related cacheKey: {}. Found entry: {}", key.getHfileName(), + convertedCacheKey, foundEntry); + } + return foundEntry; + } + /** * Get the buffer of the block with the specified key. * @param key block's cache key @@ -605,6 +632,9 @@ public Cacheable getBlock(BlockCacheKey key, boolean caching, boolean repeat, return re.getData(); } BucketEntry bucketEntry = backingMap.get(key); + if (bucketEntry == null) { + bucketEntry = getBlockForReference(key); + } if (bucketEntry != null) { long start = System.nanoTime(); ReentrantReadWriteLock lock = offsetLock.getLock(bucketEntry.offset()); @@ -613,7 +643,9 @@ public Cacheable getBlock(BlockCacheKey key, boolean caching, boolean repeat, // We can not read here even if backingMap does contain the given key because its offset // maybe changed. If we lock BlockCacheKey instead of offset, then we can only check // existence here. - if (bucketEntry.equals(backingMap.get(key))) { + if ( + bucketEntry.equals(backingMap.get(key)) || bucketEntry.equals(getBlockForReference(key)) + ) { // Read the block from IOEngine based on the bucketEntry's offset and length, NOTICE: the // block will use the refCnt of bucketEntry, which means if two HFileBlock mapping to // the same BucketEntry, then all of the three will share the same refCnt. @@ -1750,8 +1782,15 @@ protected String getAlgorithm() { */ @Override public int evictBlocksByHfileName(String hfileName) { + return evictBlocksRangeByHfileName(hfileName, 0, Long.MAX_VALUE); + } + + @Override + public int evictBlocksRangeByHfileName(String hfileName, long initOffset, long endOffset) { fileNotFullyCached(hfileName); - Set keySet = getAllCacheKeysForFile(hfileName); + Set keySet = getAllCacheKeysForFile(hfileName, initOffset, endOffset); + LOG.debug("found {} blocks for file {}, starting offset: {}, end offset: {}", keySet.size(), + hfileName, initOffset, endOffset); int numEvicted = 0; for (BlockCacheKey key : keySet) { if (evictBlock(key)) { @@ -1761,9 +1800,9 @@ public int evictBlocksByHfileName(String hfileName) { return numEvicted; } - private Set getAllCacheKeysForFile(String hfileName) { - return blocksByHFile.subSet(new BlockCacheKey(hfileName, Long.MIN_VALUE), true, - new BlockCacheKey(hfileName, Long.MAX_VALUE), true); + private Set getAllCacheKeysForFile(String hfileName, long init, long end) { + return blocksByHFile.subSet(new BlockCacheKey(hfileName, init), true, + new BlockCacheKey(hfileName, end), true); } /** @@ -2173,25 +2212,20 @@ public void notifyFileCachingCompleted(Path fileName, int totalBlockCount, int d try { final MutableInt count = new MutableInt(); LOG.debug("iterating over {} entries in the backing map", backingMap.size()); - backingMap.entrySet().stream().forEach(entry -> { - if ( - entry.getKey().getHfileName().equals(fileName.getName()) - && entry.getKey().getBlockType().equals(BlockType.DATA) - ) { - long offsetToLock = entry.getValue().offset(); - LOG.debug("found block {} in the backing map. Acquiring read lock for offset {}", - entry.getKey(), offsetToLock); - ReentrantReadWriteLock lock = offsetLock.getLock(offsetToLock); - lock.readLock().lock(); - locks.add(lock); - // rechecks the given key is still there (no eviction happened before the lock acquired) - if (backingMap.containsKey(entry.getKey())) { - count.increment(); - } else { - lock.readLock().unlock(); - locks.remove(lock); - LOG.debug("found block {}, but when locked and tried to count, it was gone."); - } + Set result = getAllCacheKeysForFile(fileName.getName(), 0, Long.MAX_VALUE); + if (result.isEmpty() && StoreFileInfo.isReference(fileName)) { + result = getAllCacheKeysForFile( + StoreFileInfo.getReferredToRegionAndFile(fileName.getName()).getSecond(), 0, + Long.MAX_VALUE); + } + result.stream().forEach(entry -> { + LOG.debug("found block for file {} in the backing map. Acquiring read lock for offset {}", + fileName.getName(), entry.getOffset()); + ReentrantReadWriteLock lock = offsetLock.getLock(entry.getOffset()); + lock.readLock().lock(); + locks.add(lock); + if (backingMap.containsKey(entry) && entry.getBlockType() == BlockType.DATA) { + count.increment(); } }); int metaCount = totalBlockCount - dataBlockCount; @@ -2214,17 +2248,19 @@ public void notifyFileCachingCompleted(Path fileName, int totalBlockCount, int d + "and try the verification again.", fileName.getName()); Thread.sleep(100); notifyFileCachingCompleted(fileName, totalBlockCount, dataBlockCount, size); - } else - if ((getAllCacheKeysForFile(fileName.getName()).size() - metaCount) == dataBlockCount) { - LOG.debug("We counted {} data blocks, expected was {}, there was no more pending in " - + "the cache write queue but we now found that total cached blocks for file {} " - + "is equal to data block count.", count, dataBlockCount, fileName.getName()); - fileCacheCompleted(fileName, size); - } else { - LOG.info("We found only {} data blocks cached from a total of {} for file {}, " - + "but no blocks pending caching. Maybe cache is full or evictions " - + "happened concurrently to cache prefetch.", count, dataBlockCount, fileName); - } + } else if ( + (getAllCacheKeysForFile(fileName.getName(), 0, Long.MAX_VALUE).size() - metaCount) + == dataBlockCount + ) { + LOG.debug("We counted {} data blocks, expected was {}, there was no more pending in " + + "the cache write queue but we now found that total cached blocks for file {} " + + "is equal to data block count.", count, dataBlockCount, fileName.getName()); + fileCacheCompleted(fileName, size); + } else { + LOG.info("We found only {} data blocks cached from a total of {} for file {}, " + + "but no blocks pending caching. Maybe cache is full or evictions " + + "happened concurrently to cache prefetch.", count, dataBlockCount, fileName); + } } } catch (InterruptedException e) { throw new RuntimeException(e); @@ -2250,14 +2286,20 @@ public Optional shouldCacheFile(String fileName) { @Override public Optional isAlreadyCached(BlockCacheKey key) { - return Optional.of(getBackingMap().containsKey(key)); + boolean foundKey = backingMap.containsKey(key); + // if there's no entry for the key itself, we need to check if this key is for a reference, + // and if so, look for a block from the referenced file using this getBlockForReference method. + return Optional.of(foundKey ? true : getBlockForReference(key) != null); } @Override public Optional getBlockSize(BlockCacheKey key) { BucketEntry entry = backingMap.get(key); if (entry == null) { - return Optional.empty(); + // the key might be for a reference tha we had found the block from the referenced file in + // the cache when we first tried to cache it. + entry = getBlockForReference(key); + return entry == null ? Optional.empty() : Optional.of(entry.getOnDiskSizeWithHeader()); } else { return Optional.of(entry.getOnDiskSizeWithHeader()); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 9cafbb7cbf9e..21da55d7757b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -137,6 +137,7 @@ import org.apache.hadoop.hbase.master.cleaner.SnapshotCleanerChore; import org.apache.hadoop.hbase.master.hbck.HbckChore; import org.apache.hadoop.hbase.master.http.MasterDumpServlet; +import org.apache.hadoop.hbase.master.http.MasterHealthServlet; import org.apache.hadoop.hbase.master.http.MasterRedirectServlet; import org.apache.hadoop.hbase.master.http.MasterStatusServlet; import org.apache.hadoop.hbase.master.http.api_v1.ResourceConfigFactory; @@ -775,6 +776,11 @@ protected Class getDumpServlet() { return MasterDumpServlet.class; } + @Override + protected Class getHealthServlet() { + return MasterHealthServlet.class; + } + @Override public MetricsMaster getMasterMetrics() { return metricsMaster; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java index d1caa2094212..dd377881ae26 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java @@ -283,11 +283,22 @@ private void unattach(MasterProcedureEnv env) { getParent(env).unattachRemoteProc(this); } + @Override + protected void beforeExec(MasterProcedureEnv env) { + RegionStateNode regionNode = getRegionNode(env); + regionNode.lock(); + } + + @Override + protected void afterExec(MasterProcedureEnv env) { + RegionStateNode regionNode = getRegionNode(env); + regionNode.unlock(); + } + @Override protected Procedure[] execute(MasterProcedureEnv env) throws ProcedureYieldException, ProcedureSuspendedException, InterruptedException { RegionStateNode regionNode = getRegionNode(env); - regionNode.lock(); try { switch (state) { case REGION_REMOTE_PROCEDURE_DISPATCH: { @@ -333,8 +344,6 @@ protected Procedure[] execute(MasterProcedureEnv env) setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT); skipPersistence(); throw new ProcedureSuspendedException(); - } finally { - regionNode.unlock(); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java index 81397915647d..18fe47ad31a1 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java @@ -18,7 +18,9 @@ package org.apache.hadoop.hbase.master.assignment; import static org.apache.hadoop.hbase.io.hfile.CacheConfig.DEFAULT_EVICT_ON_CLOSE; +import static org.apache.hadoop.hbase.io.hfile.CacheConfig.DEFAULT_EVICT_ON_SPLIT; import static org.apache.hadoop.hbase.io.hfile.CacheConfig.EVICT_BLOCKS_ON_CLOSE_KEY; +import static org.apache.hadoop.hbase.io.hfile.CacheConfig.EVICT_BLOCKS_ON_SPLIT_KEY; import static org.apache.hadoop.hbase.master.LoadBalancer.BOGUS_SERVER_NAME; import static org.apache.hadoop.hbase.master.assignment.AssignmentManager.FORCE_REGION_RETAINMENT; @@ -37,7 +39,6 @@ import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineRegionProcedure; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure; -import org.apache.hadoop.hbase.procedure2.Procedure; import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; @@ -335,7 +336,9 @@ private void closeRegion(MasterProcedureEnv env, RegionStateNode regionNode) thr env.getAssignmentManager().regionClosing(regionNode); CloseRegionProcedure closeProc = isSplit ? new CloseRegionProcedure(this, getRegion(), regionNode.getRegionLocation(), - assignCandidate, true) + assignCandidate, + env.getMasterConfiguration().getBoolean(EVICT_BLOCKS_ON_SPLIT_KEY, + DEFAULT_EVICT_ON_SPLIT)) : new CloseRegionProcedure(this, getRegion(), regionNode.getRegionLocation(), assignCandidate, evictCache); addChildProcedure(closeProc); @@ -386,19 +389,18 @@ private Flow confirmClosed(MasterProcedureEnv env, RegionStateNode regionNode) return Flow.HAS_MORE_STATE; } - // Override to lock RegionStateNode - @SuppressWarnings("rawtypes") @Override - protected Procedure[] execute(MasterProcedureEnv env) - throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException { + protected void beforeExec(MasterProcedureEnv env) { RegionStateNode regionNode = env.getAssignmentManager().getRegionStates().getOrCreateRegionStateNode(getRegion()); regionNode.lock(); - try { - return super.execute(env); - } finally { - regionNode.unlock(); - } + } + + @Override + protected void afterExec(MasterProcedureEnv env) { + RegionStateNode regionNode = + env.getAssignmentManager().getRegionStates().getOrCreateRegionStateNode(getRegion()); + regionNode.unlock(); } private RegionStateNode getRegionStateNode(MasterProcedureEnv env) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/AssignRegionAction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/AssignRegionAction.java index c99ae092d775..8a79b64142e0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/AssignRegionAction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/AssignRegionAction.java @@ -17,9 +17,13 @@ */ package org.apache.hadoop.hbase.master.balancer; +import java.util.List; import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.master.RegionPlan; import org.apache.yetus.audience.InterfaceAudience; +import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableList; + @InterfaceAudience.Private class AssignRegionAction extends BalanceAction { private final int region; @@ -46,6 +50,12 @@ public BalanceAction undoAction() { throw new UnsupportedOperationException(HConstants.NOT_IMPLEMENTED); } + @Override + List toRegionPlans(BalancerClusterState cluster) { + return ImmutableList + .of(new RegionPlan(cluster.regions[getRegion()], null, cluster.servers[getServer()])); + } + @Override public String toString() { return getType() + ": " + region + ":" + server; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalanceAction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalanceAction.java index 56b473ae710c..a65b5253907c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalanceAction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalanceAction.java @@ -17,6 +17,9 @@ */ package org.apache.hadoop.hbase.master.balancer; +import java.util.Collections; +import java.util.List; +import org.apache.hadoop.hbase.master.RegionPlan; import org.apache.yetus.audience.InterfaceAudience; /** @@ -28,11 +31,11 @@ enum Type { ASSIGN_REGION, MOVE_REGION, SWAP_REGIONS, + MOVE_BATCH, NULL, } - static final BalanceAction NULL_ACTION = new BalanceAction(Type.NULL) { - }; + static final BalanceAction NULL_ACTION = new NullBalanceAction(); private final Type type; @@ -43,16 +46,39 @@ enum Type { /** * Returns an Action which would undo this action */ - BalanceAction undoAction() { - return this; - } + abstract BalanceAction undoAction(); + + /** + * Returns the Action represented as RegionPlans + */ + abstract List toRegionPlans(BalancerClusterState cluster); Type getType() { return type; } + long getStepCount() { + return 1; + } + @Override public String toString() { return type + ":"; } + + private static final class NullBalanceAction extends BalanceAction { + private NullBalanceAction() { + super(Type.NULL); + } + + @Override + BalanceAction undoAction() { + return this; + } + + @Override + List toRegionPlans(BalancerClusterState cluster) { + return Collections.emptyList(); + } + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java index b857055fb3ab..efba0aee733b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java @@ -26,6 +26,9 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; import org.agrona.collections.Hashing; import org.agrona.collections.Int2IntCounterMap; import org.apache.hadoop.hbase.HDFSBlocksDistribution; @@ -34,11 +37,14 @@ import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.master.RackManager; import org.apache.hadoop.hbase.net.Address; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.Pair; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hbase.thirdparty.com.google.common.base.Suppliers; + /** * An efficient array based implementation similar to ClusterState for keeping the status of the * cluster in terms of region assignment and distribution. LoadBalancers, such as @@ -123,6 +129,15 @@ class BalancerClusterState { // Maps regionName -> oldServerName -> cache ratio of the region on the old server Map> regionCacheRatioOnOldServerMap; + private final Supplier> shuffledServerIndicesSupplier = + Suppliers.memoizeWithExpiration(() -> { + Collection serverIndices = serversToIndex.values(); + List shuffledServerIndices = new ArrayList<>(serverIndices); + Collections.shuffle(shuffledServerIndices); + return shuffledServerIndices; + }, 5, TimeUnit.SECONDS); + private long stopRequestedAt = Long.MAX_VALUE; + static class DefaultRackManager extends RackManager { @Override public String getRack(ServerName server) { @@ -297,11 +312,16 @@ protected BalancerClusterState(Map> clusterState, regionIndex++; } + if (LOG.isTraceEnabled()) { + for (int i = 0; i < numServers; i++) { + LOG.trace("server {} has {} regions", i, regionsPerServer[i].length); + } + } for (int i = 0; i < serversPerHostList.size(); i++) { serversPerHost[i] = new int[serversPerHostList.get(i).size()]; for (int j = 0; j < serversPerHost[i].length; j++) { serversPerHost[i][j] = serversPerHostList.get(i).get(j); - LOG.debug("server {} is on host {}", serversPerHostList.get(i).get(j), i); + LOG.trace("server {} is on host {}", serversPerHostList.get(i).get(j), i); } if (serversPerHost[i].length > 1) { multiServersPerHost = true; @@ -312,7 +332,7 @@ protected BalancerClusterState(Map> clusterState, serversPerRack[i] = new int[serversPerRackList.get(i).size()]; for (int j = 0; j < serversPerRack[i].length; j++) { serversPerRack[i][j] = serversPerRackList.get(i).get(j); - LOG.info("server {} is on rack {}", serversPerRackList.get(i).get(j), i); + LOG.trace("server {} is on rack {}", serversPerRackList.get(i).get(j), i); } } @@ -728,8 +748,25 @@ public void doAction(BalanceAction action) { regionMoved(a.getFromRegion(), a.getFromServer(), a.getToServer()); regionMoved(a.getToRegion(), a.getToServer(), a.getFromServer()); break; + case MOVE_BATCH: + assert action instanceof MoveBatchAction : action.getClass(); + MoveBatchAction mba = (MoveBatchAction) action; + for (int serverIndex : mba.getServerToRegionsToRemove().keySet()) { + Set regionsToRemove = mba.getServerToRegionsToRemove().get(serverIndex); + regionsPerServer[serverIndex] = + removeRegions(regionsPerServer[serverIndex], regionsToRemove); + } + for (int serverIndex : mba.getServerToRegionsToAdd().keySet()) { + Set regionsToAdd = mba.getServerToRegionsToAdd().get(serverIndex); + regionsPerServer[serverIndex] = addRegions(regionsPerServer[serverIndex], regionsToAdd); + } + for (MoveRegionAction moveRegionAction : mba.getMoveActions()) { + regionMoved(moveRegionAction.getRegion(), moveRegionAction.getFromServer(), + moveRegionAction.getToServer()); + } + break; default: - throw new RuntimeException("Uknown action:" + action.getType()); + throw new RuntimeException("Unknown action:" + action.getType()); } } @@ -891,6 +928,52 @@ int[] addRegion(int[] regions, int regionIndex) { return newRegions; } + int[] removeRegions(int[] regions, Set regionIndicesToRemove) { + // Calculate the size of the new regions array + int newSize = regions.length - regionIndicesToRemove.size(); + if (newSize < 0) { + throw new IllegalStateException( + "Region indices mismatch: more regions to remove than in the regions array"); + } + + int[] newRegions = new int[newSize]; + int newIndex = 0; + + // Copy only the regions not in the removal set + for (int region : regions) { + if (!regionIndicesToRemove.contains(region)) { + newRegions[newIndex++] = region; + } + } + + // If the newIndex is smaller than newSize, some regions were missing from the input array + if (newIndex != newSize) { + throw new IllegalStateException("Region indices mismatch: some regions in the removal " + + "set were not found in the regions array"); + } + + return newRegions; + } + + int[] addRegions(int[] regions, Set regionIndicesToAdd) { + int[] newRegions = new int[regions.length + regionIndicesToAdd.size()]; + + // Copy the existing regions to the new array + System.arraycopy(regions, 0, newRegions, 0, regions.length); + + // Add the new regions at the end of the array + int newIndex = regions.length; + for (int regionIndex : regionIndicesToAdd) { + newRegions[newIndex++] = regionIndex; + } + + return newRegions; + } + + List getShuffledServerIndices() { + return shuffledServerIndicesSupplier.get(); + } + int[] addRegionSorted(int[] regions, int regionIndex) { int[] newRegions = new int[regions.length + 1]; int i = 0; @@ -990,6 +1073,22 @@ void setNumMovedRegions(int numMovedRegions) { this.numMovedRegions = numMovedRegions; } + public int getMaxReplicas() { + return maxReplicas; + } + + void setStopRequestedAt(long stopRequestedAt) { + this.stopRequestedAt = stopRequestedAt; + } + + boolean isStopRequested() { + return EnvironmentEdgeManager.currentTime() > stopRequestedAt; + } + + Deque[] getRegionLoads() { + return regionLoads; + } + @Override public String toString() { StringBuilder desc = new StringBuilder("Cluster={servers=["); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionals.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionals.java new file mode 100644 index 000000000000..b82c68b37da3 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionals.java @@ -0,0 +1,242 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer; + +import java.lang.reflect.Constructor; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.master.RegionPlan; +import org.apache.hadoop.hbase.master.balancer.replicas.ReplicaKeyCache; +import org.apache.hadoop.hbase.util.ReflectionUtils; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSet; + +/** + * Balancer conditionals supplement cost functions in the {@link StochasticLoadBalancer}. Cost + * functions are insufficient and difficult to work with when making discrete decisions; this is + * because they operate on a continuous scale, and each cost function's multiplier affects the + * relative importance of every other cost function. So it is difficult to meaningfully and clearly + * value many aspects of your region distribution via cost functions alone. Conditionals allow you + * to very clearly define discrete rules that your balancer would ideally follow. To clarify, a + * conditional violation will not block a region assignment because we would prefer to have uptime + * than have perfectly intentional balance. But conditionals allow you to, for example, define that + * a region's primary and secondary should not live on the same rack. Another example, conditionals + * make it easy to define that system tables will ideally be isolated on their own RegionServer + * (without needing to manage distinct RegionServer groups). + */ +@InterfaceAudience.Private +final class BalancerConditionals implements Configurable { + + private static final Logger LOG = LoggerFactory.getLogger(BalancerConditionals.class); + + public static final String DISTRIBUTE_REPLICAS_KEY = + "hbase.master.balancer.stochastic.conditionals.distributeReplicas"; + public static final boolean DISTRIBUTE_REPLICAS_DEFAULT = false; + + public static final String ISOLATE_META_TABLE_KEY = + "hbase.master.balancer.stochastic.conditionals.isolateMetaTable"; + public static final boolean ISOLATE_META_TABLE_DEFAULT = false; + + public static final String ISOLATE_SYSTEM_TABLES_KEY = + "hbase.master.balancer.stochastic.conditionals.isolateSystemTables"; + public static final boolean ISOLATE_SYSTEM_TABLES_DEFAULT = false; + + public static final String ADDITIONAL_CONDITIONALS_KEY = + "hbase.master.balancer.stochastic.additionalConditionals"; + + private Set> conditionalClasses = Collections.emptySet(); + private Set conditionals = Collections.emptySet(); + private Configuration conf; + + static BalancerConditionals create() { + return new BalancerConditionals(); + } + + private BalancerConditionals() { + } + + boolean shouldRunBalancer(BalancerClusterState cluster) { + return isConditionalBalancingEnabled() && conditionals.stream() + .map(RegionPlanConditional::getCandidateGenerators).flatMap(Collection::stream) + .map(generator -> generator.getWeight(cluster)).anyMatch(weight -> weight > 0); + } + + Set> getConditionalClasses() { + return new HashSet<>(conditionalClasses); + } + + Collection getConditionals() { + return conditionals; + } + + boolean isReplicaDistributionEnabled() { + return conditionalClasses.stream() + .anyMatch(DistributeReplicasConditional.class::isAssignableFrom); + } + + boolean isTableIsolationEnabled() { + return conditionalClasses.stream().anyMatch(TableIsolationConditional.class::isAssignableFrom); + } + + boolean isMetaTableIsolationEnabled() { + return conditionalClasses.contains(MetaTableIsolationConditional.class); + } + + boolean isServerHostingIsolatedTables(BalancerClusterState cluster, int serverIdx) { + return conditionals.stream().filter(TableIsolationConditional.class::isInstance) + .map(TableIsolationConditional.class::cast) + .anyMatch(conditional -> conditional.isServerHostingIsolatedTables(cluster, serverIdx)); + } + + boolean isConditionalBalancingEnabled() { + return !conditionalClasses.isEmpty(); + } + + void clearConditionalWeightCaches() { + conditionals.stream().map(RegionPlanConditional::getCandidateGenerators) + .flatMap(Collection::stream) + .forEach(RegionPlanConditionalCandidateGenerator::clearWeightCache); + } + + void loadClusterState(BalancerClusterState cluster) { + conditionals = conditionalClasses.stream().map(clazz -> createConditional(clazz, cluster)) + .filter(Objects::nonNull).collect(Collectors.toSet()); + } + + /** + * Indicates whether the action is good for our conditional compliance. + * @param cluster The cluster state + * @param action The proposed action + * @return -1 if conditionals improve, 0 if neutral, 1 if conditionals degrade + */ + int getViolationCountChange(BalancerClusterState cluster, BalanceAction action) { + // Cluster is in pre-move state, so figure out the proposed violations + boolean isViolatingPost = isViolating(cluster, action); + cluster.doAction(action); + + // Cluster is in post-move state, so figure out the original violations + BalanceAction undoAction = action.undoAction(); + boolean isViolatingPre = isViolating(cluster, undoAction); + + // Reset cluster + cluster.doAction(undoAction); + + if (isViolatingPre == isViolatingPost) { + return 0; + } else if (!isViolatingPre && isViolatingPost) { + return 1; + } else { + return -1; + } + } + + /** + * Check if the proposed action violates conditionals + * @param cluster The cluster state + * @param action The proposed action + */ + boolean isViolating(BalancerClusterState cluster, BalanceAction action) { + conditionals.forEach(conditional -> conditional.setClusterState(cluster)); + if (conditionals.isEmpty()) { + return false; + } + List regionPlans = action.toRegionPlans(cluster); + for (RegionPlan regionPlan : regionPlans) { + if (isViolating(regionPlan)) { + return true; + } + } + return false; + } + + private boolean isViolating(RegionPlan regionPlan) { + for (RegionPlanConditional conditional : conditionals) { + if (conditional.isViolating(regionPlan)) { + return true; + } + } + return false; + } + + private RegionPlanConditional createConditional(Class clazz, + BalancerClusterState cluster) { + if (cluster == null) { + cluster = new BalancerClusterState(Collections.emptyMap(), null, null, null, null); + } + try { + Constructor ctor = + clazz.getDeclaredConstructor(BalancerConditionals.class, BalancerClusterState.class); + return ReflectionUtils.instantiate(clazz.getName(), ctor, this, cluster); + } catch (NoSuchMethodException e) { + LOG.warn("Cannot find constructor with Configuration and " + + "BalancerClusterState parameters for class '{}': {}", clazz.getName(), e.getMessage()); + } + return null; + } + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + ImmutableSet.Builder> conditionalClasses = + ImmutableSet.builder(); + + boolean distributeReplicas = + conf.getBoolean(DISTRIBUTE_REPLICAS_KEY, DISTRIBUTE_REPLICAS_DEFAULT); + if (distributeReplicas) { + conditionalClasses.add(DistributeReplicasConditional.class); + } + + boolean isolateMetaTable = conf.getBoolean(ISOLATE_META_TABLE_KEY, ISOLATE_META_TABLE_DEFAULT); + if (isolateMetaTable) { + conditionalClasses.add(MetaTableIsolationConditional.class); + } + + boolean isolateSystemTables = + conf.getBoolean(ISOLATE_SYSTEM_TABLES_KEY, ISOLATE_SYSTEM_TABLES_DEFAULT); + if (isolateSystemTables) { + conditionalClasses.add(SystemTableIsolationConditional.class); + } + + Class[] classes = conf.getClasses(ADDITIONAL_CONDITIONALS_KEY); + for (Class clazz : classes) { + if (!RegionPlanConditional.class.isAssignableFrom(clazz)) { + LOG.warn("Class {} is not a RegionPlanConditional", clazz.getName()); + continue; + } + conditionalClasses.add(clazz.asSubclass(RegionPlanConditional.class)); + } + this.conditionalClasses = conditionalClasses.build(); + ReplicaKeyCache.getInstance().setConf(conf); + loadClusterState(null); + } + + @Override + public Configuration getConf() { + return conf; + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java index 07cd58920860..fac0d82fe013 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java @@ -77,6 +77,9 @@ public abstract class BaseLoadBalancer implements LoadBalancer { public static final boolean DEFAULT_HBASE_MASTER_LOADBALANCE_BYTABLE = false; + public static final String REGIONS_SLOP_KEY = "hbase.regions.slop"; + public static final float REGIONS_SLOP_DEFAULT = 0.2f; + protected static final int MIN_SERVER_BALANCE = 2; private volatile boolean stopped = false; @@ -256,7 +259,9 @@ protected final boolean sloppyRegionServerExist(ClusterLoadState cs) { float average = cs.getLoadAverage(); // for logging int floor = (int) Math.floor(average * (1 - slop)); int ceiling = (int) Math.ceil(average * (1 + slop)); - if (!(cs.getMaxLoad() > ceiling || cs.getMinLoad() < floor)) { + int maxLoad = cs.getMaxLoad(); + int minLoad = cs.getMinLoad(); + if (!(maxLoad > ceiling || minLoad < floor)) { NavigableMap> serversByLoad = cs.getServersByLoad(); if (LOG.isTraceEnabled()) { // If nothing to balance, then don't say anything unless trace-level logging. @@ -549,7 +554,7 @@ public Map> retainAssignment(Map, CandidateGenerator> - createCandidateGenerators() { + createCandidateGenerators(Configuration conf) { Map, CandidateGenerator> candidateGenerators = new HashMap<>(2); candidateGenerators.put(CacheAwareSkewnessCandidateGenerator.class, diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/CandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/CandidateGenerator.java index d9245495e204..642e8162fff9 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/CandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/CandidateGenerator.java @@ -28,6 +28,8 @@ @InterfaceAudience.Private abstract class CandidateGenerator { + protected static final double MAX_WEIGHT = 1.0; + abstract BalanceAction generate(BalancerClusterState cluster); /** diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/CostFromRegionLoadFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/CostFromRegionLoadFunction.java index 199aa10a75fa..bc61ead8da86 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/CostFromRegionLoadFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/CostFromRegionLoadFunction.java @@ -66,7 +66,7 @@ protected void regionMoved(int region, int oldServer, int newServer) { } @Override - protected final double cost() { + protected double cost() { return cost.cost(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/CostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/CostFunction.java index 1dcd4580b1a6..ee2fc2b6a5e9 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/CostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/CostFunction.java @@ -76,6 +76,13 @@ void postAction(BalanceAction action) { regionMoved(a.getFromRegion(), a.getFromServer(), a.getToServer()); regionMoved(a.getToRegion(), a.getToServer(), a.getFromServer()); break; + case MOVE_BATCH: + MoveBatchAction mba = (MoveBatchAction) action; + for (MoveRegionAction moveRegionAction : mba.getMoveActions()) { + regionMoved(moveRegionAction.getRegion(), moveRegionAction.getFromServer(), + moveRegionAction.getToServer()); + } + break; default: throw new RuntimeException("Uknown action:" + action.getType()); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/DistributeReplicasCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/DistributeReplicasCandidateGenerator.java new file mode 100644 index 000000000000..be7c7871f9c7 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/DistributeReplicasCandidateGenerator.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer; + +import static org.apache.hadoop.hbase.master.balancer.DistributeReplicasConditional.getReplicaKey; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import org.apache.hadoop.hbase.master.balancer.replicas.ReplicaKey; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * CandidateGenerator to distribute colocated replicas across different servers. + */ +@InterfaceAudience.Private +final class DistributeReplicasCandidateGenerator extends RegionPlanConditionalCandidateGenerator { + + private static final Logger LOG = + LoggerFactory.getLogger(DistributeReplicasCandidateGenerator.class); + private static final int BATCH_SIZE = 100_000; + + DistributeReplicasCandidateGenerator(BalancerConditionals balancerConditionals) { + super(balancerConditionals); + } + + @Override + BalanceAction generateCandidate(BalancerClusterState cluster, boolean isWeighing) { + return generateCandidate(cluster, isWeighing, false); + } + + BalanceAction generateCandidate(BalancerClusterState cluster, boolean isWeighing, + boolean isForced) { + if (cluster.getMaxReplicas() < cluster.numRacks) { + LOG.trace("Skipping replica distribution as there are not enough racks to distribute them."); + return BalanceAction.NULL_ACTION; + } + + // Iterate through shuffled servers to find colocated replicas + boolean foundColocatedReplicas = false; + List moveRegionActions = new ArrayList<>(); + List shuffledServerIndices = cluster.getShuffledServerIndices(); + for (int sourceIndex : shuffledServerIndices) { + if (moveRegionActions.size() >= BATCH_SIZE || cluster.isStopRequested()) { + break; + } + int[] serverRegions = cluster.regionsPerServer[sourceIndex]; + Set replicaKeys = new HashSet<>(serverRegions.length); + for (int regionIndex : serverRegions) { + ReplicaKey replicaKey = getReplicaKey(cluster.regions[regionIndex]); + if (replicaKeys.contains(replicaKey)) { + foundColocatedReplicas = true; + if (isWeighing) { + // If weighing, fast exit with an actionable move + return getAction(sourceIndex, regionIndex, pickOtherRandomServer(cluster, sourceIndex), + -1); + } + // If not weighing, pick a good move + for (int i = 0; i < cluster.numServers; i++) { + // Randomize destination ordering so we aren't overloading one destination + int destinationIndex = pickOtherRandomServer(cluster, sourceIndex); + if (destinationIndex == sourceIndex) { + continue; + } + MoveRegionAction possibleAction = + new MoveRegionAction(regionIndex, sourceIndex, destinationIndex); + if (isForced) { + return possibleAction; + } + if (willBeAccepted(cluster, possibleAction)) { + cluster.doAction(possibleAction); // Update cluster state to reflect move + moveRegionActions.add(possibleAction); + break; + } + } + } else { + replicaKeys.add(replicaKey); + } + } + } + + if (!moveRegionActions.isEmpty()) { + return batchMovesAndResetClusterState(cluster, moveRegionActions); + } + // If no colocated replicas are found, return NULL_ACTION + if (foundColocatedReplicas) { + LOG.warn("Could not find a place to put a colocated replica! We will force a move."); + return generateCandidate(cluster, isWeighing, true); + } + LOG.trace("No colocated replicas found. No balancing action required."); + return BalanceAction.NULL_ACTION; + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/DistributeReplicasConditional.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/DistributeReplicasConditional.java new file mode 100644 index 000000000000..e99c0e93a159 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/DistributeReplicasConditional.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer; + +import java.util.List; +import java.util.Set; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.master.RegionPlan; +import org.apache.hadoop.hbase.master.balancer.replicas.ReplicaKey; +import org.apache.hadoop.hbase.master.balancer.replicas.ReplicaKeyCache; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableList; + +/** + * If enabled, this class will help the balancer ensure that replicas aren't placed on the same + * servers or racks as their primary. Configure this via + * {@link BalancerConditionals#DISTRIBUTE_REPLICAS_KEY} + */ +@InterfaceAudience.Private +public class DistributeReplicasConditional extends RegionPlanConditional { + + private final List candidateGenerators; + + public DistributeReplicasConditional(BalancerConditionals balancerConditionals, + BalancerClusterState cluster) { + super(balancerConditionals.getConf(), cluster); + this.candidateGenerators = + ImmutableList.of(new DistributeReplicasCandidateGenerator(balancerConditionals), + new SlopFixingCandidateGenerator(balancerConditionals)); + } + + @Override + public ValidationLevel getValidationLevel() { + return ValidationLevel.SERVER_HOST_RACK; + } + + @Override + List getCandidateGenerators() { + return candidateGenerators; + } + + @Override + boolean isViolatingServer(RegionPlan regionPlan, Set serverRegions) { + return checkViolation(regionPlan.getRegionInfo(), getReplicaKey(regionPlan.getRegionInfo()), + serverRegions); + } + + @Override + boolean isViolatingHost(RegionPlan regionPlan, Set hostRegions) { + return checkViolation(regionPlan.getRegionInfo(), getReplicaKey(regionPlan.getRegionInfo()), + hostRegions); + } + + @Override + boolean isViolatingRack(RegionPlan regionPlan, Set rackRegions) { + return checkViolation(regionPlan.getRegionInfo(), getReplicaKey(regionPlan.getRegionInfo()), + rackRegions); + } + + private boolean checkViolation(RegionInfo movingRegion, ReplicaKey movingReplicaKey, + Set destinationRegions) { + for (RegionInfo regionInfo : destinationRegions) { + if (regionInfo.equals(movingRegion)) { + continue; + } + if (getReplicaKey(regionInfo).equals(movingReplicaKey)) { + return true; + } + } + return false; + } + + static ReplicaKey getReplicaKey(RegionInfo regionInfo) { + return ReplicaKeyCache.getInstance().getReplicaKey(regionInfo); + } + +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredStochasticBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredStochasticBalancer.java index db4c7c95b656..98ad3beac8de 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredStochasticBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredStochasticBalancer.java @@ -81,7 +81,7 @@ public void setFavoredNodesManager(FavoredNodesManager fnm) { @Override protected Map, CandidateGenerator> - createCandidateGenerators() { + createCandidateGenerators(Configuration conf) { Map, CandidateGenerator> fnPickers = new HashMap<>(2); fnPickers.put(FavoredNodeLoadPicker.class, new FavoredNodeLoadPicker()); fnPickers.put(FavoredNodeLocalityPicker.class, new FavoredNodeLocalityPicker()); @@ -90,7 +90,7 @@ public void setFavoredNodesManager(FavoredNodesManager fnm) { /** Returns any candidate generator in random */ @Override - protected CandidateGenerator getRandomGenerator() { + protected CandidateGenerator getRandomGenerator(BalancerClusterState cluster) { Class clazz = shuffledGeneratorClasses.get() .get(ThreadLocalRandom.current().nextInt(candidateGenerators.size())); return candidateGenerators.get(clazz); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/MetaTableIsolationCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/MetaTableIsolationCandidateGenerator.java new file mode 100644 index 000000000000..5aa041f21d7e --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/MetaTableIsolationCandidateGenerator.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer; + +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.yetus.audience.InterfaceAudience; + +@InterfaceAudience.Private +public final class MetaTableIsolationCandidateGenerator extends TableIsolationCandidateGenerator { + + MetaTableIsolationCandidateGenerator(BalancerConditionals balancerConditionals) { + super(balancerConditionals); + } + + @Override + boolean shouldBeIsolated(RegionInfo regionInfo) { + return regionInfo.isMetaRegion(); + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/MetaTableIsolationConditional.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/MetaTableIsolationConditional.java new file mode 100644 index 000000000000..5617468457c4 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/MetaTableIsolationConditional.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer; + +import org.apache.hadoop.hbase.client.RegionInfo; + +/** + * If enabled, this class will help the balancer ensure that the meta table lives on its own + * RegionServer. Configure this via {@link BalancerConditionals#ISOLATE_META_TABLE_KEY} + */ +class MetaTableIsolationConditional extends TableIsolationConditional { + + public MetaTableIsolationConditional(BalancerConditionals balancerConditionals, + BalancerClusterState cluster) { + super(new MetaTableIsolationCandidateGenerator(balancerConditionals), balancerConditionals, + cluster); + } + + @Override + boolean isRegionToIsolate(RegionInfo regionInfo) { + return regionInfo.isMetaRegion(); + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/MoveBatchAction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/MoveBatchAction.java new file mode 100644 index 000000000000..e7ea3ed15e1d --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/MoveBatchAction.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer; + +import java.util.ArrayList; +import java.util.List; +import org.apache.hadoop.hbase.master.RegionPlan; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.hbase.thirdparty.com.google.common.collect.HashMultimap; +import org.apache.hbase.thirdparty.com.google.common.collect.Multimaps; + +@InterfaceAudience.Private +public class MoveBatchAction extends BalanceAction { + private final List moveActions; + + MoveBatchAction(List moveActions) { + super(Type.MOVE_BATCH); + this.moveActions = moveActions; + } + + @Override + BalanceAction undoAction() { + List undoMoves = new ArrayList<>(getMoveActions().size()); + for (int i = getMoveActions().size() - 1; i >= 0; i--) { + MoveRegionAction move = getMoveActions().get(i); + undoMoves + .add(new MoveRegionAction(move.getRegion(), move.getToServer(), move.getFromServer())); + } + return new MoveBatchAction(undoMoves); + } + + @Override + List toRegionPlans(BalancerClusterState cluster) { + List mbRegionPlans = new ArrayList<>(getMoveActions().size()); + for (MoveRegionAction moveRegionAction : getMoveActions()) { + mbRegionPlans.add(new RegionPlan(cluster.regions[moveRegionAction.getRegion()], + cluster.servers[moveRegionAction.getFromServer()], + cluster.servers[moveRegionAction.getToServer()])); + } + return mbRegionPlans; + } + + @Override + long getStepCount() { + return moveActions.size(); + } + + public HashMultimap getServerToRegionsToRemove() { + return moveActions.stream().collect(Multimaps.toMultimap(MoveRegionAction::getFromServer, + MoveRegionAction::getRegion, HashMultimap::create)); + } + + public HashMultimap getServerToRegionsToAdd() { + return moveActions.stream().collect(Multimaps.toMultimap(MoveRegionAction::getToServer, + MoveRegionAction::getRegion, HashMultimap::create)); + } + + List getMoveActions() { + return moveActions; + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/MoveRegionAction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/MoveRegionAction.java index 547c9c5b28e9..9798e9cebe87 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/MoveRegionAction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/MoveRegionAction.java @@ -17,8 +17,12 @@ */ package org.apache.hadoop.hbase.master.balancer; +import java.util.List; +import org.apache.hadoop.hbase.master.RegionPlan; import org.apache.yetus.audience.InterfaceAudience; +import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableList; + @InterfaceAudience.Private class MoveRegionAction extends BalanceAction { private final int region; @@ -49,6 +53,12 @@ public BalanceAction undoAction() { return new MoveRegionAction(region, toServer, fromServer); } + @Override + List toRegionPlans(BalancerClusterState cluster) { + return ImmutableList.of(new RegionPlan(cluster.regions[getRegion()], + cluster.servers[getFromServer()], cluster.servers[getToServer()])); + } + @Override public String toString() { return getType() + ": " + region + ":" + fromServer + " -> " + toServer; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionPlanConditional.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionPlanConditional.java new file mode 100644 index 000000000000..063f3ba5f726 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionPlanConditional.java @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseInterfaceAudience; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.master.RegionPlan; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG) +@InterfaceStability.Evolving +public abstract class RegionPlanConditional { + private static final Logger LOG = LoggerFactory.getLogger(RegionPlanConditional.class); + private BalancerClusterState cluster; + + RegionPlanConditional(Configuration conf, BalancerClusterState cluster) { + this.cluster = cluster; + } + + public enum ValidationLevel { + /** + * Just check the server. + */ + SERVER, + /** + * Check the server and the host. + */ + SERVER_HOST, + /** + * Check the server, host, and rack. + */ + SERVER_HOST_RACK + } + + void setClusterState(BalancerClusterState cluster) { + this.cluster = cluster; + } + + /** + * Returns a {@link ValidationLevel} that is appropriate for this conditional. + * @return the validation level + */ + abstract ValidationLevel getValidationLevel(); + + /** + * Get the candidate generator(s) for this conditional. This can be useful to provide the balancer + * with hints that will appease your conditional. Your conditionals will be triggered in order. + * @return the candidate generator for this conditional + */ + abstract List getCandidateGenerators(); + + /** + * Check if the conditional is violated by the given region plan. + * @param regionPlan the region plan to check + * @return true if the conditional is violated + */ + boolean isViolating(RegionPlan regionPlan) { + if (regionPlan == null) { + return false; + } + int destinationServerIdx = cluster.serversToIndex.get(regionPlan.getDestination().getAddress()); + + // Check Server + int[] destinationRegionIndices = cluster.regionsPerServer[destinationServerIdx]; + Set serverRegions = + getRegionsFromIndex(destinationServerIdx, cluster.regionsPerServer); + for (int regionIdx : destinationRegionIndices) { + serverRegions.add(cluster.regions[regionIdx]); + } + if (isViolatingServer(regionPlan, serverRegions)) { + return true; + } + + if (getValidationLevel() == ValidationLevel.SERVER) { + return false; + } + + // Check Host + int hostIdx = cluster.serverIndexToHostIndex[destinationServerIdx]; + Set hostRegions = getRegionsFromIndex(hostIdx, cluster.regionsPerHost); + if (isViolatingHost(regionPlan, hostRegions)) { + return true; + } + + if (getValidationLevel() == ValidationLevel.SERVER_HOST) { + return false; + } + + // Check Rack + int rackIdx = cluster.serverIndexToRackIndex[destinationServerIdx]; + Set rackRegions = getRegionsFromIndex(rackIdx, cluster.regionsPerRack); + if (isViolatingRack(regionPlan, rackRegions)) { + return true; + } + + return false; + } + + abstract boolean isViolatingServer(RegionPlan regionPlan, Set destinationRegions); + + boolean isViolatingHost(RegionPlan regionPlan, Set destinationRegions) { + return false; + } + + boolean isViolatingRack(RegionPlan regionPlan, Set destinationRegions) { + return false; + } + + private Set getRegionsFromIndex(int index, int[][] regionsPerIndex) { + int[] regionIndices = regionsPerIndex[index]; + if (regionIndices == null) { + return Collections.emptySet(); + } + return Arrays.stream(regionIndices).mapToObj(idx -> cluster.regions[idx]) + .collect(Collectors.toSet()); + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionPlanConditionalCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionPlanConditionalCandidateGenerator.java new file mode 100644 index 000000000000..d28a507ff3fd --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionPlanConditionalCandidateGenerator.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer; + +import java.time.Duration; +import java.util.List; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@InterfaceAudience.Private +@InterfaceStability.Evolving +public abstract class RegionPlanConditionalCandidateGenerator extends CandidateGenerator { + + private static final Logger LOG = + LoggerFactory.getLogger(RegionPlanConditionalCandidateGenerator.class); + + private static final Duration WEIGHT_CACHE_TTL = Duration.ofMinutes(1); + private long lastWeighedAt = -1; + private double lastWeight = 0.0; + + private final BalancerConditionals balancerConditionals; + + RegionPlanConditionalCandidateGenerator(BalancerConditionals balancerConditionals) { + this.balancerConditionals = balancerConditionals; + } + + BalancerConditionals getBalancerConditionals() { + return this.balancerConditionals; + } + + /** + * Generates a balancing action to appease the conditional. + * @param cluster Current state of the cluster. + * @param isWeighing Flag indicating if the generator is being used for weighing. + * @return A BalanceAction, or NULL_ACTION if no action is needed. + */ + abstract BalanceAction generateCandidate(BalancerClusterState cluster, boolean isWeighing); + + @Override + BalanceAction generate(BalancerClusterState cluster) { + BalanceAction balanceAction = generateCandidate(cluster, false); + if (!willBeAccepted(cluster, balanceAction)) { + LOG.debug("Generated action is not widely accepted by all conditionals. " + + "Likely we are finding our way out of a deadlock. balanceAction={}", balanceAction); + } + return balanceAction; + } + + BalanceAction batchMovesAndResetClusterState(BalancerClusterState cluster, + List moves) { + if (moves.isEmpty()) { + return BalanceAction.NULL_ACTION; + } + MoveBatchAction batchAction = new MoveBatchAction(moves); + undoBatchAction(cluster, batchAction); + return batchAction; + } + + boolean willBeAccepted(BalancerClusterState cluster, BalanceAction action) { + BalancerConditionals balancerConditionals = getBalancerConditionals(); + if (balancerConditionals == null) { + return true; + } + return !balancerConditionals.isViolating(cluster, action); + } + + void undoBatchAction(BalancerClusterState cluster, MoveBatchAction batchAction) { + for (int i = batchAction.getMoveActions().size() - 1; i >= 0; i--) { + MoveRegionAction action = batchAction.getMoveActions().get(i); + cluster.doAction(action.undoAction()); + } + } + + void clearWeightCache() { + lastWeighedAt = -1; + } + + double getWeight(BalancerClusterState cluster) { + boolean hasCandidate = false; + + // Candidate generation is expensive, so for re-weighing generators we will cache + // the value for a bit + if (EnvironmentEdgeManager.currentTime() - lastWeighedAt < WEIGHT_CACHE_TTL.toMillis()) { + return lastWeight; + } else { + hasCandidate = generateCandidate(cluster, true) != BalanceAction.NULL_ACTION; + lastWeighedAt = EnvironmentEdgeManager.currentTime(); + } + + if (hasCandidate) { + // If this generator has something to do, then it's important + lastWeight = CandidateGenerator.MAX_WEIGHT; + } else { + lastWeight = 0; + } + return lastWeight; + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SlopFixingCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SlopFixingCandidateGenerator.java new file mode 100644 index 000000000000..f78e1573b417 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SlopFixingCandidateGenerator.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import org.apache.hadoop.hbase.ServerName; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A simple candidate generator that attempts to move regions from the most-loaded servers to the + * least-loaded servers. + */ +@InterfaceAudience.Private +final class SlopFixingCandidateGenerator extends RegionPlanConditionalCandidateGenerator { + + private static final Logger LOG = LoggerFactory.getLogger(SlopFixingCandidateGenerator.class); + + private final float slop; + + SlopFixingCandidateGenerator(BalancerConditionals balancerConditionals) { + super(balancerConditionals); + this.slop = balancerConditionals.getConf().getFloat(BaseLoadBalancer.REGIONS_SLOP_KEY, + BaseLoadBalancer.REGIONS_SLOP_DEFAULT); + } + + @Override + BalanceAction generateCandidate(BalancerClusterState cluster, boolean isWeighing) { + boolean isTableIsolationEnabled = getBalancerConditionals().isTableIsolationEnabled(); + ClusterLoadState cs = new ClusterLoadState(cluster.clusterState); + float average = cs.getLoadAverage(); + int ceiling = (int) Math.ceil(average * (1 + slop)); + Set sloppyServerIndices = new HashSet<>(); + for (int i = 0; i < cluster.numServers; i++) { + int regionCount = cluster.regionsPerServer[i].length; + if (regionCount > ceiling) { + sloppyServerIndices.add(i); + } + } + + if (sloppyServerIndices.isEmpty()) { + LOG.trace("No action to take because no sloppy servers exist."); + return BalanceAction.NULL_ACTION; + } + + List moves = new ArrayList<>(); + Set fixedServers = new HashSet<>(); + for (int sourceServer : sloppyServerIndices) { + if ( + isTableIsolationEnabled + && getBalancerConditionals().isServerHostingIsolatedTables(cluster, sourceServer) + ) { + // Don't fix sloppiness of servers hosting isolated tables + continue; + } + for (int regionIdx : cluster.regionsPerServer[sourceServer]) { + boolean regionFoundMove = false; + for (ServerAndLoad serverAndLoad : cs.getServersByLoad().keySet()) { + ServerName destinationServer = serverAndLoad.getServerName(); + int destinationServerIdx = cluster.serversToIndex.get(destinationServer.getAddress()); + int regionsOnDestination = cluster.regionsPerServer[destinationServerIdx].length; + if (regionsOnDestination < average) { + MoveRegionAction move = + new MoveRegionAction(regionIdx, sourceServer, destinationServerIdx); + if (willBeAccepted(cluster, move)) { + if (isWeighing) { + // Fast exit for weighing candidate + return move; + } + moves.add(move); + cluster.doAction(move); + regionFoundMove = true; + break; + } + } else { + fixedServers.add(serverAndLoad); + } + } + fixedServers.forEach(s -> cs.getServersByLoad().remove(s)); + fixedServers.clear(); + if (!regionFoundMove && LOG.isTraceEnabled()) { + LOG.trace("Could not find a destination for region {} from server {}.", regionIdx, + sourceServer); + } + if (cluster.regionsPerServer[sourceServer].length <= ceiling) { + break; + } + } + } + + return batchMovesAndResetClusterState(cluster, moves); + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index fca4ef952073..689c65fd6ca4 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -54,7 +54,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; import org.apache.hbase.thirdparty.com.google.common.base.Suppliers; /** @@ -192,6 +191,8 @@ public enum GeneratorType { return shuffled; }, 5, TimeUnit.SECONDS); + private final BalancerConditionals balancerConditionals = BalancerConditionals.create(); + /** * The constructor that pass a MetricsStochasticBalancer to BaseLoadBalancer to replace its * default MetricsBalancer @@ -244,16 +245,24 @@ Map, CandidateGenerator> getCandidateGenerat } protected Map, CandidateGenerator> - createCandidateGenerators() { - Map, CandidateGenerator> candidateGenerators = - new HashMap<>(5); - candidateGenerators.put(RandomCandidateGenerator.class, new RandomCandidateGenerator()); - candidateGenerators.put(LoadCandidateGenerator.class, new LoadCandidateGenerator()); - candidateGenerators.put(LocalityBasedCandidateGenerator.class, localityCandidateGenerator); - candidateGenerators.put(RegionReplicaCandidateGenerator.class, - new RegionReplicaCandidateGenerator()); - candidateGenerators.put(RegionReplicaRackCandidateGenerator.class, - new RegionReplicaRackCandidateGenerator()); + createCandidateGenerators(Configuration conf) { + balancerConditionals.setConf(conf); + Map, CandidateGenerator> candidateGenerators; + if (balancerConditionals.isReplicaDistributionEnabled()) { + candidateGenerators = new HashMap<>(3); + candidateGenerators.put(RandomCandidateGenerator.class, new RandomCandidateGenerator()); + candidateGenerators.put(LoadCandidateGenerator.class, new LoadCandidateGenerator()); + candidateGenerators.put(LocalityBasedCandidateGenerator.class, localityCandidateGenerator); + } else { + candidateGenerators = new HashMap<>(5); + candidateGenerators.put(RandomCandidateGenerator.class, new RandomCandidateGenerator()); + candidateGenerators.put(LoadCandidateGenerator.class, new LoadCandidateGenerator()); + candidateGenerators.put(LocalityBasedCandidateGenerator.class, localityCandidateGenerator); + candidateGenerators.put(RegionReplicaCandidateGenerator.class, + new RegionReplicaCandidateGenerator()); + candidateGenerators.put(RegionReplicaRackCandidateGenerator.class, + new RegionReplicaRackCandidateGenerator()); + } return candidateGenerators; } @@ -265,6 +274,7 @@ protected List createCostFunctions(Configuration conf) { addCostFunction(costFunctions, localityCost); addCostFunction(costFunctions, rackLocalityCost); addCostFunction(costFunctions, new TableSkewCostFunction(conf)); + addCostFunction(costFunctions, new StoreFileTableSkewCostFunction(conf)); addCostFunction(costFunctions, regionReplicaHostCostFunction); addCostFunction(costFunctions, regionReplicaRackCostFunction); addCostFunction(costFunctions, new ReadRequestCostFunction(conf)); @@ -288,7 +298,8 @@ protected void loadConf(Configuration conf) { localityCost = new ServerLocalityCostFunction(conf); rackLocalityCost = new RackLocalityCostFunction(conf); - this.candidateGenerators = createCandidateGenerators(); + balancerConditionals.setConf(conf); + this.candidateGenerators = createCandidateGenerators(conf); regionReplicaHostCostFunction = new RegionReplicaHostCostFunction(conf); regionReplicaRackCostFunction = new RegionReplicaRackCostFunction(conf); @@ -377,6 +388,11 @@ void updateMetricsSize(int size) { } private boolean areSomeRegionReplicasColocatedOnHost(BalancerClusterState c) { + if (!c.hasRegionReplicas || balancerConditionals.isReplicaDistributionEnabled()) { + // This check is unnecessary without replicas, or with conditional replica distribution + // The balancer will auto-run if conditional replica distribution candidates are available + return false; + } if (c.numHosts >= c.maxReplicas) { regionReplicaHostCostFunction.prepare(c); double hostCost = Math.abs(regionReplicaHostCostFunction.cost()); @@ -390,6 +406,11 @@ private boolean areSomeRegionReplicasColocatedOnHost(BalancerClusterState c) { } private boolean areSomeRegionReplicasColocatedOnRack(BalancerClusterState c) { + if (!c.hasRegionReplicas || balancerConditionals.isReplicaDistributionEnabled()) { + // This check is unnecessary without replicas, or with conditional replica distribution + // The balancer will auto-run if conditional replica distribution candidates are available + return false; + } if (c.numRacks >= c.maxReplicas) { regionReplicaRackCostFunction.prepare(c); double rackCost = Math.abs(regionReplicaRackCostFunction.cost()); @@ -435,12 +456,20 @@ boolean needsBalance(TableName tableName, BalancerClusterState cluster) { return true; } - if (sloppyRegionServerExist(cs)) { + if ( + // table isolation is inherently incompatible with naive "sloppy server" checks + !balancerConditionals.isTableIsolationEnabled() && sloppyRegionServerExist(cs) + ) { LOG.info("Running balancer because cluster has sloppy server(s)." + " function cost={}", functionCost()); return true; } + if (balancerConditionals.shouldRunBalancer(cluster)) { + LOG.info("Running balancer because conditional candidate generators have important moves"); + return true; + } + double total = 0.0; float localSumMultiplier = 0; // in case this.sumMultiplier is not initialized for (CostFunction c : costFunctions) { @@ -470,14 +499,17 @@ boolean needsBalance(TableName tableName, BalancerClusterState cluster) { } LOG.info( "{} - skipping load balancing because weighted average imbalance={} <= " - + "threshold({}). If you want more aggressive balancing, either lower " + + "threshold({}) and conditionals do not have opinionated move candidates. " + + "If you want more aggressive balancing, either lower " + "hbase.master.balancer.stochastic.minCostNeedBalance from {} or increase the relative " + "multiplier(s) of the specific cost function(s). functionCost={}", isByTable ? "Table specific (" + tableName + ")" : "Cluster wide", total / sumMultiplier, minCostNeedBalance, minCostNeedBalance, functionCost()); } else { - LOG.info("{} - Calculating plan. may take up to {}ms to complete.", - isByTable ? "Table specific (" + tableName + ")" : "Cluster wide", maxRunningTime); + LOG.info( + "{} - Calculating plan. may take up to {}ms to complete. currentCost={}, targetCost={}", + isByTable ? "Table specific (" + tableName + ")" : "Cluster wide", maxRunningTime, total, + minCostNeedBalance); } return !balanced; } @@ -485,7 +517,7 @@ boolean needsBalance(TableName tableName, BalancerClusterState cluster) { @RestrictedApi(explanation = "Should only be called in tests", link = "", allowedOnPath = ".*(/src/test/.*|StochasticLoadBalancer).java") Pair nextAction(BalancerClusterState cluster) { - CandidateGenerator generator = getRandomGenerator(); + CandidateGenerator generator = getRandomGenerator(cluster); return Pair.newPair(generator, generator.generate(cluster)); } @@ -494,8 +526,20 @@ Pair nextAction(BalancerClusterState cluster) * selecting a candidate generator is proportional to the share of cost of all cost functions * among all cost functions that benefit from it. */ - protected CandidateGenerator getRandomGenerator() { - Preconditions.checkState(!candidateGenerators.isEmpty(), "No candidate generators available."); + protected CandidateGenerator getRandomGenerator(BalancerClusterState cluster) { + // Prefer conditional generators if they have moves to make + if (balancerConditionals.isConditionalBalancingEnabled()) { + for (RegionPlanConditional conditional : balancerConditionals.getConditionals()) { + List generators = + conditional.getCandidateGenerators(); + for (RegionPlanConditionalCandidateGenerator generator : generators) { + if (generator.getWeight(cluster) > 0) { + return generator; + } + } + } + } + List> generatorClasses = shuffledGeneratorClasses.get(); List partialSums = new ArrayList<>(generatorClasses.size()); double sum = 0.0; @@ -583,8 +627,12 @@ protected List balanceTable(TableName tableName, rackManager, regionCacheRatioOnOldServerMap); long startTime = EnvironmentEdgeManager.currentTime(); + cluster.setStopRequestedAt(startTime + maxRunningTime); initCosts(cluster); + balancerConditionals.loadClusterState(cluster); + balancerConditionals.clearConditionalWeightCaches(); + float localSumMultiplier = 0; for (CostFunction c : costFunctions) { if (c.isNeeded()) { @@ -632,6 +680,7 @@ protected List balanceTable(TableName tableName, final String initFunctionTotalCosts = totalCostsPerFunc(); // Perform a stochastic walk to see if we can get a good fit. long step; + boolean planImprovedConditionals = false; Map, Long> generatorToStepCount = new HashMap<>(); Map, Long> generatorToApprovedActionCount = new HashMap<>(); for (step = 0; step < computedMaxSteps; step++) { @@ -643,16 +692,57 @@ protected List balanceTable(TableName tableName, continue; } - cluster.doAction(action); + int conditionalViolationsChange = 0; + boolean isViolatingConditionals = false; + boolean moveImprovedConditionals = false; + // Only check conditionals if they are enabled + if (balancerConditionals.isConditionalBalancingEnabled()) { + // Always accept a conditional generator output. Sometimes conditional generators + // may need to make controversial moves in order to break what would otherwise + // be a deadlocked situation. + // Otherwise, for normal moves, evaluate the action. + if (RegionPlanConditionalCandidateGenerator.class.isAssignableFrom(generator.getClass())) { + conditionalViolationsChange = -1; + } else { + conditionalViolationsChange = + balancerConditionals.getViolationCountChange(cluster, action); + isViolatingConditionals = balancerConditionals.isViolating(cluster, action); + } + moveImprovedConditionals = conditionalViolationsChange < 0; + if (moveImprovedConditionals) { + planImprovedConditionals = true; + } + } + + // Change state and evaluate costs + try { + cluster.doAction(action); + } catch (IllegalStateException | ArrayIndexOutOfBoundsException e) { + LOG.warn( + "Generator {} produced invalid action! " + + "Debug your candidate generator as this is likely a bug, " + + "and may cause a balancer deadlock. {}", + generator.getClass().getSimpleName(), action, e); + continue; + } updateCostsAndWeightsWithAction(cluster, action); - generatorToStepCount.merge(generator.getClass(), 1L, Long::sum); + generatorToStepCount.merge(generator.getClass(), action.getStepCount(), Long::sum); newCost = computeCost(cluster, currentCost); - // Should this be kept? - if (newCost < currentCost) { + double costImprovement = currentCost - newCost; + double minimumImprovement = + Math.max(CostFunction.getCostEpsilon(currentCost), CostFunction.getCostEpsilon(newCost)); + boolean costsImproved = costImprovement > minimumImprovement; + boolean conditionalsSimilarCostsImproved = + (costsImproved && conditionalViolationsChange == 0 && !isViolatingConditionals); + // Our first priority is to reduce conditional violations + // Our second priority is to reduce balancer cost + // change, regardless of cost change + if (moveImprovedConditionals || conditionalsSimilarCostsImproved) { currentCost = newCost; - generatorToApprovedActionCount.merge(generator.getClass(), 1L, Long::sum); + generatorToApprovedActionCount.merge(generator.getClass(), action.getStepCount(), + Long::sum); // save for JMX curOverallCost = currentCost; @@ -665,7 +755,7 @@ protected List balanceTable(TableName tableName, updateCostsAndWeightsWithAction(cluster, undoAction); } - if (EnvironmentEdgeManager.currentTime() - startTime > maxRunningTime) { + if (cluster.isStopRequested()) { break; } } @@ -682,7 +772,7 @@ protected List balanceTable(TableName tableName, metricsBalancer.balanceCluster(endTime - startTime); - if (initCost > currentCost) { + if (planImprovedConditionals || (initCost > currentCost)) { updateStochasticCosts(tableName, curOverallCost, curFunctionCosts); plans = createRegionPlans(cluster); LOG.info( @@ -697,7 +787,8 @@ protected List balanceTable(TableName tableName, } LOG.info( "Could not find a better moving plan. Tried {} different configurations in " - + "{} ms, and did not find anything with an imbalance score less than {}", + + "{} ms, and did not find anything with an imbalance score less than {} " + + "and could not improve conditional violations", step, endTime - startTime, initCost / sumMultiplier); return null; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StoreFileTableSkewCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StoreFileTableSkewCostFunction.java new file mode 100644 index 000000000000..d37f8caa72e1 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StoreFileTableSkewCostFunction.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer; + +import java.util.Collection; +import org.apache.hadoop.conf.Configuration; +import org.apache.yetus.audience.InterfaceAudience; + +/** + * Lightweight cost function that mirrors TableSkewCostFunction but aggregates storefile sizes (in + * MB) per table using the CostFromRegionLoadFunction framework. For each table, it computes a + * per-server aggregated storefile size by summing the average storefile size for each region (if + * there are multiple load metrics, it averages them). The imbalance cost (as computed by + * DoubleArrayCost) is then used to drive the balancer to reduce differences between servers. + */ +@InterfaceAudience.Private +public class StoreFileTableSkewCostFunction extends CostFromRegionLoadFunction { + + private static final String STOREFILE_TABLE_SKEW_COST_KEY = + "hbase.master.balancer.stochastic.storefileTableSkewCost"; + private static final float DEFAULT_STOREFILE_TABLE_SKEW_COST = 35; + + // One DoubleArrayCost instance per table. + private DoubleArrayCost[] costsPerTable; + + public StoreFileTableSkewCostFunction(Configuration conf) { + this.setMultiplier( + conf.getFloat(STOREFILE_TABLE_SKEW_COST_KEY, DEFAULT_STOREFILE_TABLE_SKEW_COST)); + } + + @Override + public void prepare(BalancerClusterState cluster) { + // First, set the cluster state and allocate one DoubleArrayCost per table. + this.cluster = cluster; + costsPerTable = new DoubleArrayCost[cluster.numTables]; + for (int tableIdx = 0; tableIdx < cluster.numTables; tableIdx++) { + costsPerTable[tableIdx] = new DoubleArrayCost(); + costsPerTable[tableIdx].prepare(cluster.numServers); + final int tableIndex = tableIdx; + costsPerTable[tableIdx].applyCostsChange(costs -> { + // For each server, compute the aggregated storefile size for this table. + for (int server = 0; server < cluster.numServers; server++) { + double totalStorefileMB = 0; + // Sum over all regions on this server that belong to the given table. + for (int region : cluster.regionsPerServer[server]) { + if (cluster.regionIndexToTableIndex[region] == tableIndex) { + Collection loads = cluster.getRegionLoads()[region]; + double regionCost = 0; + if (loads != null && !loads.isEmpty()) { + // Average the storefile sizes if there are multiple measurements. + for (BalancerRegionLoad rl : loads) { + regionCost += getCostFromRl(rl); + } + regionCost /= loads.size(); + } + totalStorefileMB += regionCost; + } + } + costs[server] = totalStorefileMB; + } + }); + } + } + + @Override + protected void regionMoved(int region, int oldServer, int newServer) { + // Determine the affected table. + int tableIdx = cluster.regionIndexToTableIndex[region]; + costsPerTable[tableIdx].applyCostsChange(costs -> { + // Recompute for the old server if applicable. + updateStoreFilePerServerPerTableCosts(oldServer, tableIdx, costs); + // Recompute for the new server. + updateStoreFilePerServerPerTableCosts(newServer, tableIdx, costs); + }); + } + + private void updateStoreFilePerServerPerTableCosts(int newServer, int tableIdx, double[] costs) { + if (newServer >= 0) { + double totalStorefileMB = 0; + for (int r : cluster.regionsPerServer[newServer]) { + if (cluster.regionIndexToTableIndex[r] == tableIdx) { + Collection loads = cluster.getRegionLoads()[r]; + double regionCost = 0; + if (loads != null && !loads.isEmpty()) { + for (BalancerRegionLoad rl : loads) { + regionCost += getCostFromRl(rl); + } + regionCost /= loads.size(); + } + totalStorefileMB += regionCost; + } + } + costs[newServer] = totalStorefileMB; + } + } + + @Override + protected double cost() { + double totalCost = 0; + // Sum the imbalance cost over all tables. + for (DoubleArrayCost dac : costsPerTable) { + totalCost += dac.cost(); + } + return totalCost; + } + + @Override + protected double getCostFromRl(BalancerRegionLoad rl) { + // Use storefile size in MB as the metric. + return rl.getStorefileSizeMB(); + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SwapRegionsAction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SwapRegionsAction.java index 6f83d2bc930b..c99de022f038 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SwapRegionsAction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SwapRegionsAction.java @@ -17,8 +17,12 @@ */ package org.apache.hadoop.hbase.master.balancer; +import java.util.List; +import org.apache.hadoop.hbase.master.RegionPlan; import org.apache.yetus.audience.InterfaceAudience; +import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableList; + @InterfaceAudience.Private public class SwapRegionsAction extends BalanceAction { private final int fromServer; @@ -55,6 +59,15 @@ public BalanceAction undoAction() { return new SwapRegionsAction(fromServer, toRegion, toServer, fromRegion); } + @Override + List toRegionPlans(BalancerClusterState cluster) { + return ImmutableList.of( + new RegionPlan(cluster.regions[getFromRegion()], cluster.servers[getFromServer()], + cluster.servers[getToServer()]), + new RegionPlan(cluster.regions[getToRegion()], cluster.servers[getToServer()], + cluster.servers[getFromServer()])); + } + @Override public String toString() { return getType() + ": " + fromRegion + ":" + fromServer + " <-> " + toRegion + ":" + toServer; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SystemTableIsolationCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SystemTableIsolationCandidateGenerator.java new file mode 100644 index 000000000000..7ce8ff202965 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SystemTableIsolationCandidateGenerator.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer; + +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.yetus.audience.InterfaceAudience; + +@InterfaceAudience.Private +public class SystemTableIsolationCandidateGenerator extends TableIsolationCandidateGenerator { + + private final BalancerConditionals balancerConditionals; + + SystemTableIsolationCandidateGenerator(BalancerConditionals balancerConditionals) { + super(balancerConditionals); + this.balancerConditionals = balancerConditionals; + } + + @Override + boolean shouldBeIsolated(RegionInfo regionInfo) { + if (balancerConditionals.isMetaTableIsolationEnabled() && regionInfo.isMetaRegion()) { + // If meta isolation is enabled, we can ignore meta regions here + return false; + } + return regionInfo.getTable().isSystemTable(); + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SystemTableIsolationConditional.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SystemTableIsolationConditional.java new file mode 100644 index 000000000000..b5734b82faf7 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SystemTableIsolationConditional.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer; + +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.yetus.audience.InterfaceAudience; + +@InterfaceAudience.Private +public class SystemTableIsolationConditional extends TableIsolationConditional { + + private final BalancerConditionals balancerConditionals; + + SystemTableIsolationConditional(BalancerConditionals balancerConditionals, + BalancerClusterState cluster) { + super(new SystemTableIsolationCandidateGenerator(balancerConditionals), balancerConditionals, + cluster); + this.balancerConditionals = balancerConditionals; + } + + @Override + boolean isRegionToIsolate(RegionInfo regionInfo) { + if (balancerConditionals.isMetaTableIsolationEnabled() && regionInfo.isMetaRegion()) { + // If meta isolation is enabled, we can ignore meta regions here + return false; + } + return regionInfo.getTable().isSystemTable(); + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/TableIsolationCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/TableIsolationCandidateGenerator.java new file mode 100644 index 000000000000..ec41033999fa --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/TableIsolationCandidateGenerator.java @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@InterfaceAudience.Private +public abstract class TableIsolationCandidateGenerator + extends RegionPlanConditionalCandidateGenerator { + + private static final Logger LOG = LoggerFactory.getLogger(TableIsolationCandidateGenerator.class); + + TableIsolationCandidateGenerator(BalancerConditionals balancerConditionals) { + super(balancerConditionals); + } + + abstract boolean shouldBeIsolated(RegionInfo regionInfo); + + @Override + BalanceAction generate(BalancerClusterState cluster) { + return generateCandidate(cluster, false); + } + + BalanceAction generateCandidate(BalancerClusterState cluster, boolean isWeighing) { + if (!getBalancerConditionals().isTableIsolationEnabled()) { + return BalanceAction.NULL_ACTION; + } + + List moves = new ArrayList<>(); + List serverIndicesHoldingIsolatedRegions = new ArrayList<>(); + int isolatedTableMaxReplicaCount = 1; + for (int serverIdx : cluster.getShuffledServerIndices()) { + if (cluster.isStopRequested()) { + break; + } + boolean hasRegionsToIsolate = false; + Set regionsToMove = new HashSet<>(); + + // Move non-target regions away from target regions, + // and track replica counts so we know how many isolated hosts we need + for (int regionIdx : cluster.regionsPerServer[serverIdx]) { + RegionInfo regionInfo = cluster.regions[regionIdx]; + if (shouldBeIsolated(regionInfo)) { + hasRegionsToIsolate = true; + int replicaCount = regionInfo.getReplicaId() + 1; + if (replicaCount > isolatedTableMaxReplicaCount) { + isolatedTableMaxReplicaCount = replicaCount; + } + } else { + regionsToMove.add(regionIdx); + } + } + + if (hasRegionsToIsolate) { + serverIndicesHoldingIsolatedRegions.add(serverIdx); + } + + // Generate non-system regions to move, if applicable + if (hasRegionsToIsolate && !regionsToMove.isEmpty()) { + for (int regionToMove : regionsToMove) { + for (int i = 0; i < cluster.numServers; i++) { + int targetServer = pickOtherRandomServer(cluster, serverIdx); + MoveRegionAction possibleMove = + new MoveRegionAction(regionToMove, serverIdx, targetServer); + if (!getBalancerConditionals().isViolating(cluster, possibleMove)) { + if (isWeighing) { + return possibleMove; + } + cluster.doAction(possibleMove); // Update cluster state to reflect move + moves.add(possibleMove); + break; + } + } + } + } + } + + // Try to consolidate regions on only n servers, where n is the number of replicas + if (serverIndicesHoldingIsolatedRegions.size() > isolatedTableMaxReplicaCount) { + // One target per replica + List targetServerIndices = new ArrayList<>(); + for (int i = 0; i < isolatedTableMaxReplicaCount; i++) { + targetServerIndices.add(serverIndicesHoldingIsolatedRegions.get(i)); + } + // Move all isolated regions from non-targets to targets + for (int i = isolatedTableMaxReplicaCount; i + < serverIndicesHoldingIsolatedRegions.size(); i++) { + int fromServer = serverIndicesHoldingIsolatedRegions.get(i); + for (int regionIdx : cluster.regionsPerServer[fromServer]) { + RegionInfo regionInfo = cluster.regions[regionIdx]; + if (shouldBeIsolated(regionInfo)) { + int targetServer = targetServerIndices.get(i % isolatedTableMaxReplicaCount); + MoveRegionAction possibleMove = + new MoveRegionAction(regionIdx, fromServer, targetServer); + if (!getBalancerConditionals().isViolating(cluster, possibleMove)) { + if (isWeighing) { + return possibleMove; + } + cluster.doAction(possibleMove); // Update cluster state to reflect move + moves.add(possibleMove); + } + } + } + } + } + return batchMovesAndResetClusterState(cluster, moves); + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/TableIsolationConditional.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/TableIsolationConditional.java new file mode 100644 index 000000000000..24a6f519e8d8 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/TableIsolationConditional.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer; + +import java.util.List; +import java.util.Set; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.master.RegionPlan; + +import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableList; + +abstract class TableIsolationConditional extends RegionPlanConditional { + + private final List candidateGenerators; + + TableIsolationConditional(TableIsolationCandidateGenerator generator, + BalancerConditionals balancerConditionals, BalancerClusterState cluster) { + super(balancerConditionals.getConf(), cluster); + + this.candidateGenerators = + ImmutableList.of(generator, new SlopFixingCandidateGenerator(balancerConditionals)); + } + + abstract boolean isRegionToIsolate(RegionInfo regionInfo); + + boolean isServerHostingIsolatedTables(BalancerClusterState cluster, int serverIdx) { + for (int regionIdx : cluster.regionsPerServer[serverIdx]) { + if (isRegionToIsolate(cluster.regions[regionIdx])) { + return true; + } + } + return false; + } + + @Override + ValidationLevel getValidationLevel() { + return ValidationLevel.SERVER; + } + + @Override + List getCandidateGenerators() { + return candidateGenerators; + } + + @Override + public boolean isViolatingServer(RegionPlan regionPlan, Set serverRegions) { + RegionInfo regionBeingMoved = regionPlan.getRegionInfo(); + boolean shouldIsolateMovingRegion = isRegionToIsolate(regionBeingMoved); + for (RegionInfo destinationRegion : serverRegions) { + if (destinationRegion.getEncodedName().equals(regionBeingMoved.getEncodedName())) { + // Skip the region being moved + continue; + } + if (shouldIsolateMovingRegion && !isRegionToIsolate(destinationRegion)) { + // Ensure every destination region is also a region to isolate + return true; + } else if (!shouldIsolateMovingRegion && isRegionToIsolate(destinationRegion)) { + // Ensure no destination region is a region to isolate + return true; + } + } + return false; + } + +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/replicas/ReplicaKey.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/replicas/ReplicaKey.java new file mode 100644 index 000000000000..f43df965da33 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/replicas/ReplicaKey.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer.replicas; + +import java.util.Arrays; +import org.apache.commons.lang3.builder.HashCodeBuilder; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.yetus.audience.InterfaceAudience; + +@InterfaceAudience.Private +public final class ReplicaKey { + private final TableName tableName; + private final byte[] start; + private final byte[] stop; + + public ReplicaKey(RegionInfo regionInfo) { + this.tableName = regionInfo.getTable(); + this.start = regionInfo.getStartKey(); + this.stop = regionInfo.getEndKey(); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof ReplicaKey)) { + return false; + } + ReplicaKey other = (ReplicaKey) o; + return Arrays.equals(this.start, other.start) && Arrays.equals(this.stop, other.stop) + && this.tableName.equals(other.tableName); + } + + @Override + public int hashCode() { + return new HashCodeBuilder().append(tableName).append(start).append(stop).toHashCode(); + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/replicas/ReplicaKeyCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/replicas/ReplicaKeyCache.java new file mode 100644 index 000000000000..a40e5f9a2f2d --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/replicas/ReplicaKeyCache.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer.replicas; + +import java.time.Duration; +import java.util.function.Supplier; +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.hbase.thirdparty.com.google.common.base.Suppliers; +import org.apache.hbase.thirdparty.com.google.common.cache.CacheBuilder; +import org.apache.hbase.thirdparty.com.google.common.cache.CacheLoader; +import org.apache.hbase.thirdparty.com.google.common.cache.LoadingCache; + +@InterfaceAudience.Private +public final class ReplicaKeyCache implements Configurable { + /** + * ReplicaKey creation is expensive if you have lots of regions. If your HMaster has adequate + * memory, and you would like balancing to be faster, then you can turn on this flag to cache + * ReplicaKey objects. + */ + public static final String CACHE_REPLICA_KEYS_KEY = + "hbase.replica.distribution.conditional.cacheReplicaKeys"; + public static final boolean CACHE_REPLICA_KEYS_DEFAULT = false; + + /** + * If memory is available, then set this to a value greater than your region count to maximize + * replica distribution performance. + */ + public static final String REPLICA_KEY_CACHE_SIZE_KEY = + "hbase.replica.distribution.conditional.replicaKeyCacheSize"; + public static final int REPLICA_KEY_CACHE_SIZE_DEFAULT = 1000; + + private static final Supplier INSTANCE = Suppliers.memoize(ReplicaKeyCache::new); + + private volatile LoadingCache replicaKeyCache = null; + + private Configuration conf; + + public static ReplicaKeyCache getInstance() { + return INSTANCE.get(); + } + + private ReplicaKeyCache() { + } + + public ReplicaKey getReplicaKey(RegionInfo regionInfo) { + return replicaKeyCache == null + ? new ReplicaKey(regionInfo) + : replicaKeyCache.getUnchecked(regionInfo); + } + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + boolean cacheKeys = conf.getBoolean(CACHE_REPLICA_KEYS_KEY, CACHE_REPLICA_KEYS_DEFAULT); + if (cacheKeys && replicaKeyCache == null) { + int replicaKeyCacheSize = + conf.getInt(REPLICA_KEY_CACHE_SIZE_KEY, REPLICA_KEY_CACHE_SIZE_DEFAULT); + replicaKeyCache = CacheBuilder.newBuilder().maximumSize(replicaKeyCacheSize) + .expireAfterAccess(Duration.ofMinutes(30)).build(new CacheLoader() { + @Override + public ReplicaKey load(RegionInfo regionInfo) { + return new ReplicaKey(regionInfo); + } + }); + } else if (!cacheKeys) { + replicaKeyCache = null; + } + } + + @Override + public Configuration getConf() { + return conf; + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/http/MasterHealthServlet.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/http/MasterHealthServlet.java new file mode 100644 index 000000000000..99f2f08ac8bd --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/http/MasterHealthServlet.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.http; + +import java.io.IOException; +import java.util.EnumSet; +import java.util.Optional; +import javax.servlet.http.HttpServletRequest; +import org.apache.hadoop.hbase.ClusterMetrics; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.master.HMaster; +import org.apache.hadoop.hbase.monitoring.HealthCheckServlet; +import org.apache.yetus.audience.InterfaceAudience; + +@InterfaceAudience.Private +public class MasterHealthServlet extends HealthCheckServlet { + + public MasterHealthServlet() { + super(HMaster.MASTER); + } + + @Override + protected Optional check(HMaster master, HttpServletRequest req, Connection conn) + throws IOException { + + if (master.isActiveMaster() && master.isOnline()) { + // this will fail if there is a problem with the active master + conn.getAdmin().getClusterMetrics(EnumSet.of(ClusterMetrics.Option.CLUSTER_ID)); + } + + return Optional.empty(); + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/monitoring/HealthCheckServlet.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/monitoring/HealthCheckServlet.java new file mode 100644 index 000000000000..8d09089b0c64 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/monitoring/HealthCheckServlet.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.monitoring; + +import java.io.IOException; +import java.util.Optional; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.client.ConnectionFactory; +import org.apache.hadoop.hbase.client.RpcConnectionRegistry; +import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.yetus.audience.InterfaceAudience; + +@InterfaceAudience.Private +public abstract class HealthCheckServlet extends HttpServlet { + + private static final String CLIENT_RPC_TIMEOUT = "healthcheck.hbase.client.rpc.timeout"; + private static final int CLIENT_RPC_TIMEOUT_DEFAULT = 5000; + private static final String CLIENT_RETRIES = "healthcheck.hbase.client.retries"; + private static final int CLIENT_RETRIES_DEFAULT = 2; + private static final String CLIENT_OPERATION_TIMEOUT = + "healthcheck.hbase.client.operation.timeout"; + private static final int CLIENT_OPERATION_TIMEOUT_DEFAULT = 15000; + + private final String serverLookupKey; + + public HealthCheckServlet(String serverLookupKey) { + this.serverLookupKey = serverLookupKey; + } + + @SuppressWarnings("unchecked") + @Override + protected void doGet(HttpServletRequest req, HttpServletResponse resp) + throws ServletException, IOException { + T server = (T) getServletContext().getAttribute(serverLookupKey); + try { + check(server, req); + Optional message = check(server, req); + resp.setStatus(200); + resp.getWriter().write(message.orElse("ok")); + } catch (Exception e) { + resp.setStatus(500); + resp.getWriter().write(e.toString()); + } finally { + resp.getWriter().close(); + } + } + + private Optional check(T server, HttpServletRequest req) throws IOException { + if (server == null) { + throw new IOException("Unable to get access to " + serverLookupKey); + } + if (server.isAborted() || server.isStopped() || server.isStopping() || server.isKilled()) { + throw new IOException("The " + serverLookupKey + " is stopping!"); + } + if (!server.getRpcServer().isStarted()) { + throw new IOException("The " + serverLookupKey + "'s RpcServer is not started"); + } + + Configuration conf = new Configuration(server.getConfiguration()); + conf.set(HConstants.CLIENT_CONNECTION_REGISTRY_IMPL_CONF_KEY, + RpcConnectionRegistry.class.getName()); + conf.set(RpcConnectionRegistry.BOOTSTRAP_NODES, server.getServerName().getAddress().toString()); + conf.setInt(HConstants.HBASE_RPC_TIMEOUT_KEY, + conf.getInt(CLIENT_RPC_TIMEOUT, CLIENT_RPC_TIMEOUT_DEFAULT)); + conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, + conf.getInt(CLIENT_RETRIES, CLIENT_RETRIES_DEFAULT)); + conf.setInt(HConstants.HBASE_CLIENT_OPERATION_TIMEOUT, + conf.getInt(CLIENT_OPERATION_TIMEOUT, CLIENT_OPERATION_TIMEOUT_DEFAULT)); + + try (Connection conn = ConnectionFactory.createConnection(conf)) { + // this will fail if the server is not accepting requests + if (conn.getClusterId() == null) { + throw new IOException("Could not retrieve clusterId from self via rpc"); + } + + return check(server, req, conn); + } + } + + protected abstract Optional check(T server, HttpServletRequest req, Connection conn) + throws IOException; +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/DefaultOperationQuota.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/DefaultOperationQuota.java index 29c3667fb352..f153eca2e5a0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/DefaultOperationQuota.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/DefaultOperationQuota.java @@ -62,6 +62,7 @@ public class DefaultOperationQuota implements OperationQuota { private boolean useResultSizeBytes; private long blockSizeBytes; private long maxScanEstimate; + private boolean isAtomic = false; public DefaultOperationQuota(final Configuration conf, final int blockSizeBytes, final QuotaLimiter... limiters) { @@ -92,9 +93,10 @@ public DefaultOperationQuota(final Configuration conf, final List } @Override - public void checkBatchQuota(int numWrites, int numReads) throws RpcThrottlingException { + public void checkBatchQuota(int numWrites, int numReads, boolean isAtomic) + throws RpcThrottlingException { updateEstimateConsumeBatchQuota(numWrites, numReads); - checkQuota(numWrites, numReads); + checkQuota(numWrites, numReads, isAtomic); } @Override @@ -102,10 +104,15 @@ public void checkScanQuota(ClientProtos.ScanRequest scanRequest, long maxScanner long maxBlockBytesScanned, long prevBlockBytesScannedDifference) throws RpcThrottlingException { updateEstimateConsumeScanQuota(scanRequest, maxScannerResultSize, maxBlockBytesScanned, prevBlockBytesScannedDifference); - checkQuota(0, 1); + checkQuota(0, 1, false); } - private void checkQuota(long numWrites, long numReads) throws RpcThrottlingException { + private void checkQuota(long numWrites, long numReads, boolean isAtomic) + throws RpcThrottlingException { + if (isAtomic) { + // Remember this flag for later use in close() + this.isAtomic = true; + } readAvailable = Long.MAX_VALUE; for (final QuotaLimiter limiter : limiters) { if (limiter.isBypass()) { @@ -121,13 +128,13 @@ private void checkQuota(long numWrites, long numReads) throws RpcThrottlingExcep limiter.checkQuota(Math.min(maxWritesToEstimate, numWrites), Math.min(maxWriteSizeToEstimate, writeConsumed), Math.min(maxReadsToEstimate, numReads), Math.min(maxReadSizeToEstimate, readConsumed), writeCapacityUnitConsumed, - readCapacityUnitConsumed); + readCapacityUnitConsumed, isAtomic); readAvailable = Math.min(readAvailable, limiter.getReadAvailable()); } for (final QuotaLimiter limiter : limiters) { limiter.grabQuota(numWrites, writeConsumed, numReads, readConsumed, writeCapacityUnitConsumed, - readCapacityUnitConsumed); + readCapacityUnitConsumed, isAtomic); } } @@ -154,10 +161,10 @@ public void close() { for (final QuotaLimiter limiter : limiters) { if (writeDiff != 0) { - limiter.consumeWrite(writeDiff, writeCapacityUnitDiff); + limiter.consumeWrite(writeDiff, writeCapacityUnitDiff, isAtomic); } if (readDiff != 0) { - limiter.consumeRead(readDiff, readCapacityUnitDiff); + limiter.consumeRead(readDiff, readCapacityUnitDiff, isAtomic); } } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/ExceedOperationQuota.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/ExceedOperationQuota.java index 3077d6dac537..7dcfec6b0623 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/ExceedOperationQuota.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/ExceedOperationQuota.java @@ -49,10 +49,11 @@ public ExceedOperationQuota(final Configuration conf, int blockSizeBytes, } @Override - public void checkBatchQuota(int numWrites, int numReads) throws RpcThrottlingException { + public void checkBatchQuota(int numWrites, int numReads, boolean isAtomic) + throws RpcThrottlingException { Runnable estimateQuota = () -> updateEstimateConsumeBatchQuota(numWrites, numReads); - CheckQuotaRunnable checkQuota = () -> super.checkBatchQuota(numWrites, numReads); - checkQuota(estimateQuota, checkQuota, numWrites, numReads, 0); + CheckQuotaRunnable checkQuota = () -> super.checkBatchQuota(numWrites, numReads, isAtomic); + checkQuota(estimateQuota, checkQuota, numWrites, numReads, 0, isAtomic); } @Override @@ -62,11 +63,11 @@ public void checkScanQuota(ClientProtos.ScanRequest scanRequest, long maxScanner maxBlockBytesScanned, prevBlockBytesScannedDifference); CheckQuotaRunnable checkQuota = () -> super.checkScanQuota(scanRequest, maxScannerResultSize, maxBlockBytesScanned, prevBlockBytesScannedDifference); - checkQuota(estimateQuota, checkQuota, 0, 0, 1); + checkQuota(estimateQuota, checkQuota, 0, 0, 1, false); } private void checkQuota(Runnable estimateQuota, CheckQuotaRunnable checkQuota, int numWrites, - int numReads, int numScans) throws RpcThrottlingException { + int numReads, int numScans, boolean isAtomic) throws RpcThrottlingException { if (regionServerLimiter.isBypass()) { // If region server limiter is bypass, which means no region server quota is set, check and // throttle by all other quotas. In this condition, exceed throttle quota will not work. @@ -77,7 +78,7 @@ private void checkQuota(Runnable estimateQuota, CheckQuotaRunnable checkQuota, i estimateQuota.run(); // 2. Check if region server limiter is enough. If not, throw RpcThrottlingException. regionServerLimiter.checkQuota(numWrites, writeConsumed, numReads + numScans, readConsumed, - writeCapacityUnitConsumed, readCapacityUnitConsumed); + writeCapacityUnitConsumed, readCapacityUnitConsumed, isAtomic); // 3. Check if other limiters are enough. If not, exceed other limiters because region server // limiter is enough. boolean exceed = false; @@ -93,13 +94,13 @@ private void checkQuota(Runnable estimateQuota, CheckQuotaRunnable checkQuota, i // 4. Region server limiter is enough and grab estimated consume quota. readAvailable = Math.max(readAvailable, regionServerLimiter.getReadAvailable()); regionServerLimiter.grabQuota(numWrites, writeConsumed, numReads + numScans, readConsumed, - writeCapacityUnitConsumed, writeCapacityUnitConsumed); + writeCapacityUnitConsumed, writeCapacityUnitConsumed, isAtomic); if (exceed) { // 5. Other quota limiter is exceeded and has not been grabbed (because throw // RpcThrottlingException in Step 3), so grab it. for (final QuotaLimiter limiter : limiters) { limiter.grabQuota(numWrites, writeConsumed, numReads + numScans, readConsumed, - writeCapacityUnitConsumed, writeCapacityUnitConsumed); + writeCapacityUnitConsumed, writeCapacityUnitConsumed, isAtomic); } } } @@ -109,10 +110,10 @@ private void checkQuota(Runnable estimateQuota, CheckQuotaRunnable checkQuota, i public void close() { super.close(); if (writeDiff != 0) { - regionServerLimiter.consumeWrite(writeDiff, writeCapacityUnitDiff); + regionServerLimiter.consumeWrite(writeDiff, writeCapacityUnitDiff, false); } if (readDiff != 0) { - regionServerLimiter.consumeRead(readDiff, readCapacityUnitDiff); + regionServerLimiter.consumeRead(readDiff, readCapacityUnitDiff, false); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/GlobalQuotaSettingsImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/GlobalQuotaSettingsImpl.java index ebde3ed80dc9..6afbebc6e861 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/GlobalQuotaSettingsImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/GlobalQuotaSettingsImpl.java @@ -159,6 +159,21 @@ private boolean hasThrottle(QuotaProtos.ThrottleType quotaType, hasThrottle = true; } break; + case ATOMIC_READ_SIZE: + if (throttleBuilder.hasAtomicReadSize()) { + hasThrottle = true; + } + break; + case ATOMIC_REQUEST_NUMBER: + if (throttleBuilder.hasAtomicReqNum()) { + hasThrottle = true; + } + break; + case ATOMIC_WRITE_SIZE: + if (throttleBuilder.hasAtomicWriteSize()) { + hasThrottle = true; + } + break; default: } return hasThrottle; @@ -212,6 +227,15 @@ protected GlobalQuotaSettingsImpl merge(QuotaSettings other) throws IOException case WRITE_CAPACITY_UNIT: throttleBuilder.clearWriteCapacityUnit(); break; + case ATOMIC_READ_SIZE: + throttleBuilder.clearAtomicReadSize(); + break; + case ATOMIC_REQUEST_NUMBER: + throttleBuilder.clearAtomicReqNum(); + break; + case ATOMIC_WRITE_SIZE: + throttleBuilder.clearAtomicWriteSize(); + break; default: } boolean hasThrottle = false; @@ -262,6 +286,15 @@ protected GlobalQuotaSettingsImpl merge(QuotaSettings other) throws IOException case WRITE_CAPACITY_UNIT: throttleBuilder.setWriteCapacityUnit(otherProto.getTimedQuota()); break; + case ATOMIC_READ_SIZE: + throttleBuilder.setAtomicReadSize(otherProto.getTimedQuota()); + break; + case ATOMIC_REQUEST_NUMBER: + throttleBuilder.setAtomicReqNum(otherProto.getTimedQuota()); + break; + case ATOMIC_WRITE_SIZE: + throttleBuilder.setAtomicWriteSize(otherProto.getTimedQuota()); + break; default: } } @@ -341,11 +374,14 @@ public String toString() { case REQUEST_NUMBER: case WRITE_NUMBER: case READ_NUMBER: + case ATOMIC_REQUEST_NUMBER: builder.append(String.format("%dreq", timedQuota.getSoftLimit())); break; case REQUEST_SIZE: case WRITE_SIZE: case READ_SIZE: + case ATOMIC_READ_SIZE: + case ATOMIC_WRITE_SIZE: builder.append(sizeToString(timedQuota.getSoftLimit())); break; case REQUEST_CAPACITY_UNIT: diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/NoopOperationQuota.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/NoopOperationQuota.java index 63cf97188d86..9143e12de004 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/NoopOperationQuota.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/NoopOperationQuota.java @@ -43,7 +43,8 @@ public static OperationQuota get() { } @Override - public void checkBatchQuota(int numWrites, int numReads) throws RpcThrottlingException { + public void checkBatchQuota(int numWrites, int numReads, boolean isAtomic) + throws RpcThrottlingException { // no-op } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/NoopQuotaLimiter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/NoopQuotaLimiter.java index 5ece0be2b5aa..7c02dbc1134f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/NoopQuotaLimiter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/NoopQuotaLimiter.java @@ -34,24 +34,24 @@ private NoopQuotaLimiter() { @Override public void checkQuota(long writeReqs, long estimateWriteSize, long readReqs, - long estimateReadSize, long estimateWriteCapacityUnit, long estimateReadCapacityUnit) - throws RpcThrottlingException { + long estimateReadSize, long estimateWriteCapacityUnit, long estimateReadCapacityUnit, + boolean isAtomic) throws RpcThrottlingException { // no-op } @Override public void grabQuota(long writeReqs, long writeSize, long readReqs, long readSize, - long writeCapacityUnit, long readCapacityUnit) { + long writeCapacityUnit, long readCapacityUnit, boolean isAtomic) { // no-op } @Override - public void consumeWrite(final long size, long capacityUnit) { + public void consumeWrite(final long size, long capacityUnit, boolean isAtomic) { // no-op } @Override - public void consumeRead(final long size, long capacityUnit) { + public void consumeRead(final long size, long capacityUnit, boolean isAtomic) { // no-op } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/OperationQuota.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/OperationQuota.java index 0d9b48b6074b..b95a617e127f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/OperationQuota.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/OperationQuota.java @@ -57,7 +57,7 @@ public enum OperationType { * @throws RpcThrottlingException if the operation cannot be performed because RPC quota is * exceeded. */ - void checkBatchQuota(int numWrites, int numReads) throws RpcThrottlingException; + void checkBatchQuota(int numWrites, int numReads, boolean isAtomic) throws RpcThrottlingException; /** * Checks if it is possible to execute the scan. The quota will be estimated based on the diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/QuotaCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/QuotaCache.java index 760703a428b2..cecda2a154c6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/QuotaCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/QuotaCache.java @@ -218,6 +218,10 @@ void triggerCacheRefresh() { refreshChore.triggerNow(); } + void forceSynchronousCacheRefresh() { + refreshChore.chore(); + } + long getLastUpdate() { return refreshChore.lastUpdate; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/QuotaLimiter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/QuotaLimiter.java index 12e4c4a7c6a9..1b5a1302a207 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/QuotaLimiter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/QuotaLimiter.java @@ -42,7 +42,8 @@ public interface QuotaLimiter { * @throws RpcThrottlingException thrown if not enough available resources to perform operation. */ void checkQuota(long writeReqs, long estimateWriteSize, long readReqs, long estimateReadSize, - long estimateWriteCapacityUnit, long estimateReadCapacityUnit) throws RpcThrottlingException; + long estimateWriteCapacityUnit, long estimateReadCapacityUnit, boolean isAtomic) + throws RpcThrottlingException; /** * Removes the specified write and read amount from the quota. At this point the write and read @@ -56,19 +57,19 @@ void checkQuota(long writeReqs, long estimateWriteSize, long readReqs, long esti * @param readCapacityUnit the read capacity unit num that will be removed from the current quota */ void grabQuota(long writeReqs, long writeSize, long readReqs, long readSize, - long writeCapacityUnit, long readCapacityUnit); + long writeCapacityUnit, long readCapacityUnit, boolean isAtomic); /** * Removes or add back some write amount to the quota. (called at the end of an operation in case * the estimate quota was off) */ - void consumeWrite(long size, long capacityUnit); + void consumeWrite(long size, long capacityUnit, boolean isAtomic); /** * Removes or add back some read amount to the quota. (called at the end of an operation in case * the estimate quota was off) */ - void consumeRead(long size, long capacityUnit); + void consumeRead(long size, long capacityUnit, boolean isAtomic); /** Returns true if the limiter is a noop */ boolean isBypass(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/QuotaUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/QuotaUtil.java index b4887392196d..ba65cec01d7e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/QuotaUtil.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/QuotaUtil.java @@ -95,6 +95,12 @@ public class QuotaUtil extends QuotaTableUtil { "hbase.quota.default.user.machine.write.num"; public static final String QUOTA_DEFAULT_USER_MACHINE_WRITE_SIZE = "hbase.quota.default.user.machine.write.size"; + public static final String QUOTA_DEFAULT_USER_MACHINE_ATOMIC_READ_SIZE = + "hbase.quota.default.user.machine.atomic.read.size"; + public static final String QUOTA_DEFAULT_USER_MACHINE_ATOMIC_REQUEST_NUM = + "hbase.quota.default.user.machine.atomic.request.num"; + public static final String QUOTA_DEFAULT_USER_MACHINE_ATOMIC_WRITE_SIZE = + "hbase.quota.default.user.machine.atomic.write.size"; /** Table descriptor for Quota internal table */ public static final HTableDescriptor QUOTA_TABLE_DESC = new HTableDescriptor(QUOTA_TABLE_NAME); @@ -388,6 +394,12 @@ protected static UserQuotaState buildDefaultUserQuotaState(Configuration conf, l .ifPresent(throttleBuilder::setWriteNum); buildDefaultTimedQuota(conf, QUOTA_DEFAULT_USER_MACHINE_WRITE_SIZE) .ifPresent(throttleBuilder::setWriteSize); + buildDefaultTimedQuota(conf, QUOTA_DEFAULT_USER_MACHINE_ATOMIC_READ_SIZE) + .ifPresent(throttleBuilder::setAtomicReadSize); + buildDefaultTimedQuota(conf, QUOTA_DEFAULT_USER_MACHINE_ATOMIC_REQUEST_NUM) + .ifPresent(throttleBuilder::setAtomicReqNum); + buildDefaultTimedQuota(conf, QUOTA_DEFAULT_USER_MACHINE_ATOMIC_WRITE_SIZE) + .ifPresent(throttleBuilder::setAtomicWriteSize); UserQuotaState state = new UserQuotaState(nowTs); QuotaProtos.Quotas defaultQuotas = diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/RegionServerRpcQuotaManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/RegionServerRpcQuotaManager.java index f9a7ccba401b..d847a9eb3dc2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/RegionServerRpcQuotaManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/RegionServerRpcQuotaManager.java @@ -186,11 +186,11 @@ public OperationQuota checkBatchQuota(final Region region, final OperationQuota.OperationType type) throws IOException, RpcThrottlingException { switch (type) { case GET: - return this.checkBatchQuota(region, 0, 1); + return this.checkBatchQuota(region, 0, 1, false); case MUTATE: - return this.checkBatchQuota(region, 1, 0); + return this.checkBatchQuota(region, 1, 0, false); case CHECK_AND_MUTATE: - return this.checkBatchQuota(region, 1, 1); + return this.checkBatchQuota(region, 1, 1, true); } throw new RuntimeException("Invalid operation type: " + type); } @@ -201,6 +201,7 @@ public OperationQuota checkBatchQuota(final Region region, throws IOException, RpcThrottlingException { int numWrites = 0; int numReads = 0; + boolean isAtomic = false; for (final ClientProtos.Action action : actions) { if (action.hasMutation()) { numWrites++; @@ -208,12 +209,16 @@ public OperationQuota checkBatchQuota(final Region region, QuotaUtil.getQuotaOperationType(action, hasCondition); if (operationType == OperationQuota.OperationType.CHECK_AND_MUTATE) { numReads++; + // If any mutations in this batch are atomic, we will count the entire batch as atomic. + // This is a conservative approach, but it is the best that we can do without knowing + // the block bytes scanned of each individual action. + isAtomic = true; } } else if (action.hasGet()) { numReads++; } } - return checkBatchQuota(region, numWrites, numReads); + return checkBatchQuota(region, numWrites, numReads, isAtomic); } /** @@ -227,7 +232,7 @@ public OperationQuota checkBatchQuota(final Region region, */ @Override public OperationQuota checkBatchQuota(final Region region, final int numWrites, - final int numReads) throws IOException, RpcThrottlingException { + final int numReads, boolean isAtomic) throws IOException, RpcThrottlingException { Optional user = RpcServer.getRequestUser(); UserGroupInformation ugi; if (user.isPresent()) { @@ -240,7 +245,7 @@ public OperationQuota checkBatchQuota(final Region region, final int numWrites, OperationQuota quota = getQuota(ugi, table, region.getMinBlockSizeBytes()); try { - quota.checkBatchQuota(numWrites, numReads); + quota.checkBatchQuota(numWrites, numReads, isAtomic); } catch (RpcThrottlingException e) { LOG.debug("Throttling exception for user=" + ugi.getUserName() + " table=" + table + " numWrites=" + numWrites + " numReads=" + numReads + ": " + e.getMessage()); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/RpcQuotaManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/RpcQuotaManager.java index 60392ca3b3f6..3f84f11a7e5e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/RpcQuotaManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/RpcQuotaManager.java @@ -87,6 +87,6 @@ OperationQuota checkBatchQuota(final Region region, final List 0) { RpcThrottlingException.throwNumRequestsExceeded(waitInterval); @@ -156,6 +184,12 @@ public void checkQuota(long writeReqs, long estimateWriteSize, long readReqs, if (waitInterval > 0) { RpcThrottlingException.throwRequestCapacityUnitExceeded(waitInterval); } + if (isAtomic) { + waitInterval = atomicReqLimiter.getWaitIntervalMs(writeReqs + readReqs); + if (waitInterval > 0) { + RpcThrottlingException.throwAtomicRequestNumberExceeded(waitInterval); + } + } if (estimateWriteSize > 0) { waitInterval = writeReqsLimiter.getWaitIntervalMs(writeReqs); @@ -170,6 +204,12 @@ public void checkQuota(long writeReqs, long estimateWriteSize, long readReqs, if (waitInterval > 0) { RpcThrottlingException.throwWriteCapacityUnitExceeded(waitInterval); } + if (isAtomic) { + waitInterval = atomicWriteSizeLimiter.getWaitIntervalMs(writeReqs); + if (waitInterval > 0) { + RpcThrottlingException.throwAtomicWriteSizeExceeded(waitInterval); + } + } } if (estimateReadSize > 0) { @@ -185,12 +225,18 @@ public void checkQuota(long writeReqs, long estimateWriteSize, long readReqs, if (waitInterval > 0) { RpcThrottlingException.throwReadCapacityUnitExceeded(waitInterval); } + if (isAtomic) { + waitInterval = atomicReadSizeLimiter.getWaitIntervalMs(writeReqs + readReqs); + if (waitInterval > 0) { + RpcThrottlingException.throwAtomicReadSizeExceeded(waitInterval); + } + } } } @Override public void grabQuota(long writeReqs, long writeSize, long readReqs, long readSize, - long writeCapacityUnit, long readCapacityUnit) { + long writeCapacityUnit, long readCapacityUnit, boolean isAtomic) { assert writeSize != 0 || readSize != 0; reqsLimiter.consume(writeReqs + readReqs); @@ -212,22 +258,37 @@ public void grabQuota(long writeReqs, long writeSize, long readReqs, long readSi reqCapacityUnitLimiter.consume(readCapacityUnit); readCapacityUnitLimiter.consume(readCapacityUnit); } + if (isAtomic) { + atomicReqLimiter.consume(writeReqs + readReqs); + if (readSize > 0) { + atomicReadSizeLimiter.consume(readSize); + } + if (writeSize > 0) { + atomicWriteSizeLimiter.consume(writeSize); + } + } } @Override - public void consumeWrite(final long size, long capacityUnit) { + public void consumeWrite(final long size, long capacityUnit, boolean isAtomic) { reqSizeLimiter.consume(size); writeSizeLimiter.consume(size); reqCapacityUnitLimiter.consume(capacityUnit); writeCapacityUnitLimiter.consume(capacityUnit); + if (isAtomic) { + atomicWriteSizeLimiter.consume(size); + } } @Override - public void consumeRead(final long size, long capacityUnit) { + public void consumeRead(final long size, long capacityUnit, boolean isAtomic) { reqSizeLimiter.consume(size); readSizeLimiter.consume(size); reqCapacityUnitLimiter.consume(capacityUnit); readCapacityUnitLimiter.consume(capacityUnit); + if (isAtomic) { + atomicReadSizeLimiter.consume(size); + } } @Override @@ -307,6 +368,15 @@ public String toString() { if (!readCapacityUnitLimiter.isBypass()) { builder.append(" readCapacityUnit=" + readCapacityUnitLimiter); } + if (!atomicReqLimiter.isBypass()) { + builder.append(" atomicReqLimiter=" + atomicReqLimiter); + } + if (!atomicReadSizeLimiter.isBypass()) { + builder.append(" atomicReadSizeLimiter=" + atomicReadSizeLimiter); + } + if (!atomicWriteSizeLimiter.isBypass()) { + builder.append(" atomicWriteSizeLimiter=" + atomicWriteSizeLimiter); + } builder.append(')'); return builder.toString(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index 2381458a48bb..709b38ae926e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -2269,6 +2269,102 @@ public boolean compact(CompactionContext compaction, HStore store, return compact(compaction, store, throughputController, null); } + /** + *

+ * We are trying to remove / relax the region read lock for compaction. Let's see what are the + * potential race conditions among the operations (user scan, region split, region close and + * region bulk load). + *

+ * + *
+   *   user scan ---> region read lock
+   *   region split --> region close first --> region write lock
+   *   region close --> region write lock
+   *   region bulk load --> region write lock
+   * 
+ *

+ * read lock is compatible with read lock. ---> no problem with user scan/read region bulk load + * does not cause problem for compaction (no consistency problem, store lock will help the store + * file accounting). They can run almost concurrently at the region level. + *

+ *

+ * The only remaining race condition is between the region close and compaction. So we will + * evaluate, below, how region close intervenes with compaction if compaction does not acquire + * region read lock. + *

+ *

+ * Here are the steps for compaction: + *

    + *
  1. obtain list of StoreFile's
  2. + *
  3. create StoreFileScanner's based on list from #1
  4. + *
  5. perform compaction and save resulting files under tmp dir
  6. + *
  7. swap in compacted files
  8. + *
+ *

+ *

+ * #1 is guarded by store lock. This patch does not change this --> no worse or better For #2, we + * obtain smallest read point (for region) across all the Scanners (for both default compactor and + * stripe compactor). The read points are for user scans. Region keeps the read points for all + * currently open user scanners. Compaction needs to know the smallest read point so that during + * re-write of the hfiles, it can remove the mvcc points for the cells if their mvccs are older + * than the smallest since they are not needed anymore. This will not conflict with compaction. + *

+ *

+ * For #3, it can be performed in parallel to other operations. + *

+ *

+ * For #4 bulk load and compaction don't conflict with each other on the region level (for + * multi-family atomicy). + *

+ *

+ * Region close and compaction are guarded pretty well by the 'writestate'. In HRegion#doClose(), + * we have : + * + *

+   * synchronized (writestate) {
+   *   // Disable compacting and flushing by background threads for this
+   *   // region.
+   *   canFlush = !writestate.readOnly;
+   *   writestate.writesEnabled = false;
+   *   LOG.debug("Closing " + this + ": disabling compactions & flushes");
+   *   waitForFlushesAndCompactions();
+   * }
+   * 
+ * + * {@code waitForFlushesAndCompactions()} would wait for {@code writestate.compacting} to come + * down to 0. and in {@code HRegion.compact()} + * + *
+   *   try {
+   *     synchronized (writestate) {
+   *       if (writestate.writesEnabled) {
+   *         wasStateSet = true;
+   *         ++writestate.compacting;
+   *       } else {
+   *         String msg = "NOT compacting region " + this + ". Writes disabled.";
+   *         LOG.info(msg);
+   *         status.abort(msg);
+   *         return false;
+   *       }
+   *     }
+   *   }
+   * 
+ * + * Also in {@code compactor.performCompaction()}: check periodically to see if a system stop is + * requested + * + *
+   * if (closeChecker != null && closeChecker.isTimeLimit(store, now)) {
+   *   progress.cancel();
+   *   return false;
+   * }
+   * if (closeChecker != null && closeChecker.isSizeLimit(store, len)) {
+   *   progress.cancel();
+   *   return false;
+   * }
+   * 
+ *

+ */ public boolean compact(CompactionContext compaction, HStore store, ThroughputController throughputController, User user) throws IOException { assert compaction != null && compaction.hasSelection(); @@ -2280,40 +2376,6 @@ public boolean compact(CompactionContext compaction, HStore store, } MonitoredTask status = null; boolean requestNeedsCancellation = true; - /* - * We are trying to remove / relax the region read lock for compaction. Let's see what are the - * potential race conditions among the operations (user scan, region split, region close and - * region bulk load). user scan ---> region read lock region split --> region close first --> - * region write lock region close --> region write lock region bulk load --> region write lock - * read lock is compatible with read lock. ---> no problem with user scan/read region bulk load - * does not cause problem for compaction (no consistency problem, store lock will help the store - * file accounting). They can run almost concurrently at the region level. The only remaining - * race condition is between the region close and compaction. So we will evaluate, below, how - * region close intervenes with compaction if compaction does not acquire region read lock. Here - * are the steps for compaction: 1. obtain list of StoreFile's 2. create StoreFileScanner's - * based on list from #1 3. perform compaction and save resulting files under tmp dir 4. swap in - * compacted files #1 is guarded by store lock. This patch does not change this --> no worse or - * better For #2, we obtain smallest read point (for region) across all the Scanners (for both - * default compactor and stripe compactor). The read points are for user scans. Region keeps the - * read points for all currently open user scanners. Compaction needs to know the smallest read - * point so that during re-write of the hfiles, it can remove the mvcc points for the cells if - * their mvccs are older than the smallest since they are not needed anymore. This will not - * conflict with compaction. For #3, it can be performed in parallel to other operations. For #4 - * bulk load and compaction don't conflict with each other on the region level (for multi-family - * atomicy). Region close and compaction are guarded pretty well by the 'writestate'. In - * HRegion#doClose(), we have : synchronized (writestate) { // Disable compacting and flushing - * by background threads for this // region. canFlush = !writestate.readOnly; - * writestate.writesEnabled = false; LOG.debug("Closing " + this + - * ": disabling compactions & flushes"); waitForFlushesAndCompactions(); } - * waitForFlushesAndCompactions() would wait for writestate.compacting to come down to 0. and in - * HRegion.compact() try { synchronized (writestate) { if (writestate.writesEnabled) { - * wasStateSet = true; ++writestate.compacting; } else { String msg = "NOT compacting region " + - * this + ". Writes disabled."; LOG.info(msg); status.abort(msg); return false; } } Also in - * compactor.performCompaction(): check periodically to see if a system stop is requested if - * (closeChecker != null && closeChecker.isTimeLimit(store, now)) { progress.cancel(); return - * false; } if (closeChecker != null && closeChecker.isSizeLimit(store, len)) { - * progress.cancel(); return false; } - */ try { byte[] cf = Bytes.toBytes(store.getColumnFamilyName()); if (stores.get(cf) != store) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index 351b4fef191e..89f528af7573 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -169,6 +169,7 @@ import org.apache.hadoop.hbase.regionserver.handler.RSProcedureHandler; import org.apache.hadoop.hbase.regionserver.handler.RegionReplicaFlushHandler; import org.apache.hadoop.hbase.regionserver.http.RSDumpServlet; +import org.apache.hadoop.hbase.regionserver.http.RSHealthServlet; import org.apache.hadoop.hbase.regionserver.http.RSStatusServlet; import org.apache.hadoop.hbase.regionserver.throttle.FlushThroughputControllerFactory; import org.apache.hadoop.hbase.regionserver.throttle.ThroughputController; @@ -383,7 +384,7 @@ public class HRegionServer extends Thread // A state before we go into stopped state. At this stage we're closing user // space regions. - private boolean stopping = false; + private volatile boolean stopping = false; private volatile boolean killed = false; private volatile boolean shutDown = false; @@ -864,6 +865,10 @@ protected Class getDumpServlet() { return RSDumpServlet.class; } + protected Class getHealthServlet() { + return RSHealthServlet.class; + } + /** * Used by {@link RSDumpServlet} to generate debugging information. */ @@ -2466,6 +2471,7 @@ private void putUpWebUI() throws IOException { try { this.infoServer = new InfoServer(getProcessName(), addr, port, false, this.conf); infoServer.addPrivilegedServlet("dump", "/dump", getDumpServlet()); + infoServer.addPrivilegedServlet("health", "/health", getHealthServlet()); configureInfoServer(); this.infoServer.start(); break; @@ -3193,6 +3199,10 @@ public boolean isStopping() { return this.stopping; } + public boolean isKilled() { + return this.killed; + } + @Override public Configuration getConfiguration() { return conf; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java index 1df8d0b95807..710c94753093 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java @@ -832,7 +832,7 @@ protected List flushCache(final long logCacheFlushId, MemStoreSnapshot sna try { for (Path pathName : pathNames) { lastPathName = pathName; - storeEngine.validateStoreFile(pathName); + storeEngine.validateStoreFile(pathName, false); } return pathNames; } catch (Exception e) { @@ -1118,7 +1118,7 @@ public void deleteChangedReaderObserver(ChangedReadersObserver o) { * block for long periods. *

* During this time, the Store can work as usual, getting values from StoreFiles and writing new - * StoreFiles from the memstore. Existing StoreFiles are not destroyed until the new compacted + * StoreFiles from the MemStore. Existing StoreFiles are not destroyed until the new compacted * StoreFile is completely written-out to disk. *

* The compactLock prevents multiple simultaneous compactions. The structureLock prevents us from @@ -1129,21 +1129,29 @@ public void deleteChangedReaderObserver(ChangedReadersObserver o) { *

* Compaction event should be idempotent, since there is no IO Fencing for the region directory in * hdfs. A region server might still try to complete the compaction after it lost the region. That - * is why the following events are carefully ordered for a compaction: 1. Compaction writes new - * files under region/.tmp directory (compaction output) 2. Compaction atomically moves the - * temporary file under region directory 3. Compaction appends a WAL edit containing the - * compaction input and output files. Forces sync on WAL. 4. Compaction deletes the input files - * from the region directory. Failure conditions are handled like this: - If RS fails before 2, - * compaction wont complete. Even if RS lives on and finishes the compaction later, it will only - * write the new data file to the region directory. Since we already have this data, this will be - * idempotent but we will have a redundant copy of the data. - If RS fails between 2 and 3, the - * region will have a redundant copy of the data. The RS that failed won't be able to finish - * sync() for WAL because of lease recovery in WAL. - If RS fails after 3, the region region - * server who opens the region will pick up the the compaction marker from the WAL and replay it - * by removing the compaction input files. Failed RS can also attempt to delete those files, but - * the operation will be idempotent See HBASE-2231 for details. + * is why the following events are carefully ordered for a compaction: + *

    + *
  1. Compaction writes new files under region/.tmp directory (compaction output)
  2. + *
  3. Compaction atomically moves the temporary file under region directory
  4. + *
  5. Compaction appends a WAL edit containing the compaction input and output files. Forces sync + * on WAL.
  6. + *
  7. Compaction deletes the input files from the region directory.
  8. + *
+ * Failure conditions are handled like this: + *
    + *
  • If RS fails before 2, compaction won't complete. Even if RS lives on and finishes the + * compaction later, it will only write the new data file to the region directory. Since we + * already have this data, this will be idempotent, but we will have a redundant copy of the + * data.
  • + *
  • If RS fails between 2 and 3, the region will have a redundant copy of the data. The RS that + * failed won't be able to finish sync() for WAL because of lease recovery in WAL.
  • + *
  • If RS fails after 3, the region server who opens the region will pick up the compaction + * marker from the WAL and replay it by removing the compaction input files. Failed RS can also + * attempt to delete those files, but the operation will be idempotent
  • + *
+ * See HBASE-2231 for details. * @param compaction compaction details obtained from requestCompaction() - * @return Storefile we compacted into or null if we failed or opted out early. + * @return The storefiles that we compacted into or null if we failed or opted out early. */ public List compact(CompactionContext compaction, ThroughputController throughputController, User user) throws IOException { @@ -1186,7 +1194,7 @@ protected List doCompaction(CompactionRequestImpl cr, throws IOException { // Do the steps necessary to complete the compaction. setStoragePolicyFromFileName(newFiles); - List sfs = storeEngine.commitStoreFiles(newFiles, true); + List sfs = storeEngine.commitStoreFiles(newFiles, true, true); if (this.getCoprocessorHost() != null) { for (HStoreFile sf : sfs) { getCoprocessorHost().postCompact(this, sf, cr.getTracker(), cr, user); @@ -1978,7 +1986,7 @@ public boolean commit(MonitoredTask status) throws IOException { return false; } status.setStatus("Flushing " + this + ": reopening flushed file"); - List storeFiles = storeEngine.commitStoreFiles(tempFiles, false); + List storeFiles = storeEngine.commitStoreFiles(tempFiles, false, false); for (HStoreFile sf : storeFiles) { StoreFileReader r = sf.getReader(); if (LOG.isInfoEnabled()) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java index 2bd396242a17..a256e8827a39 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java @@ -29,6 +29,8 @@ import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.fs.GlobalStorageStatistics; +import org.apache.hadoop.fs.StorageStatistics; import org.apache.hadoop.hbase.CompatibilitySingletonFactory; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HDFSBlocksDistribution; @@ -1052,6 +1054,29 @@ public long getLocalBytesRead() { return FSDataInputStreamWrapper.getLocalBytesRead(); } + @Override + public long getLocalRackBytesRead() { + return getGlobalStorageStatistic("bytesReadDistanceOfOneOrTwo"); + } + + @Override + public long getRemoteRackBytesRead() { + return getGlobalStorageStatistic("bytesReadDistanceOfThreeOrFour") + + getGlobalStorageStatistic("bytesReadDistanceOfFiveOrLarger"); + } + + private static long getGlobalStorageStatistic(String name) { + StorageStatistics stats = GlobalStorageStatistics.INSTANCE.get("hdfs"); + if (stats == null) { + return 0; + } + Long val = stats.getLong(name); + if (val == null) { + return 0; + } + return val; + } + @Override public long getShortCircuitBytesRead() { return FSDataInputStreamWrapper.getShortCircuitBytesRead(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionCoprocessorHost.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionCoprocessorHost.java index 929b24e521a2..52b3b54f4b24 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionCoprocessorHost.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionCoprocessorHost.java @@ -229,7 +229,7 @@ public OperationQuota checkBatchQuota(Region region, OperationQuota.OperationTyp @Override public OperationQuota checkBatchQuota(final Region region, int numWrites, int numReads) throws IOException, RpcThrottlingException { - return rpcQuotaManager.checkBatchQuota(region, numWrites, numReads); + return rpcQuotaManager.checkBatchQuota(region, numWrites, numReads, false); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreEngine.java index 5923befbc9de..8d81c90144ff 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreEngine.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreEngine.java @@ -36,7 +36,9 @@ import java.util.function.Function; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.io.hfile.BloomFilterMetrics; import org.apache.hadoop.hbase.log.HBaseMarkers; import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext; @@ -95,6 +97,9 @@ public abstract class StoreEngine openStoreFiles(Collection files, boolean } if (ioe != null) { // close StoreFile readers - boolean evictOnClose = - ctx.getCacheConf() != null ? ctx.getCacheConf().shouldEvictOnClose() : true; + boolean evictOnClose = ctx.getCacheConf() == null || ctx.getCacheConf().shouldEvictOnClose(); for (HStoreFile file : results) { try { if (file != null) { @@ -315,10 +341,8 @@ private List openStoreFiles(Collection files, boolean for (HStoreFile storeFile : results) { if (compactedStoreFiles.contains(storeFile.getPath().getName())) { LOG.warn("Clearing the compacted storefile {} from {}", storeFile, this); - storeFile.getReader() - .close(storeFile.getCacheConf() != null - ? storeFile.getCacheConf().shouldEvictOnClose() - : true); + storeFile.getReader().close( + storeFile.getCacheConf() == null || storeFile.getCacheConf().shouldEvictOnClose()); filesToRemove.add(storeFile); } } @@ -380,7 +404,7 @@ private void refreshStoreFilesInternal(Collection newFiles) throw compactedFilesSet.put(sf.getFileInfo(), sf); } - Set newFilesSet = new HashSet(newFiles); + Set newFilesSet = new HashSet<>(newFiles); // Exclude the files that have already been compacted newFilesSet = Sets.difference(newFilesSet, compactedFilesSet.keySet()); Set toBeAddedFiles = Sets.difference(newFilesSet, currentFilesSet.keySet()); @@ -390,8 +414,8 @@ private void refreshStoreFilesInternal(Collection newFiles) throw return; } - LOG.info("Refreshing store files for " + this + " files to add: " + toBeAddedFiles - + " files to remove: " + toBeRemovedFiles); + LOG.info("Refreshing store files for {} files to add: {} files to remove: {}", this, + toBeAddedFiles, toBeRemovedFiles); Set toBeRemovedStoreFiles = new HashSet<>(toBeRemovedFiles.size()); for (StoreFileInfo sfi : toBeRemovedFiles) { @@ -401,7 +425,7 @@ private void refreshStoreFilesInternal(Collection newFiles) throw // try to open the files List openedFiles = openStoreFiles(toBeAddedFiles, false); - // propogate the file changes to the underlying store file manager + // propagate the file changes to the underlying store file manager replaceStoreFiles(toBeRemovedStoreFiles, openedFiles, () -> { }, () -> { }); // won't throw an exception @@ -411,11 +435,13 @@ private void refreshStoreFilesInternal(Collection newFiles) throw * Commit the given {@code files}. *

* We will move the file into data directory, and open it. - * @param files the files want to commit - * @param validate whether to validate the store files + * @param files the files want to commit + * @param isCompaction whether this is called from the context of a compaction + * @param validate whether to validate the store files * @return the committed store files */ - public List commitStoreFiles(List files, boolean validate) throws IOException { + public List commitStoreFiles(List files, boolean isCompaction, boolean validate) + throws IOException { List committedFiles = new ArrayList<>(files.size()); HRegionFileSystem hfs = ctx.getRegionFileSystem(); String familyName = ctx.getFamily().getNameAsString(); @@ -423,13 +449,13 @@ public List commitStoreFiles(List files, boolean validate) thr for (Path file : files) { try { if (validate) { - validateStoreFile(file); + validateStoreFile(file, isCompaction); } Path committedPath; // As we want to support writing to data directory directly, here we need to check whether // the store file is already in the right place if (file.getParent() != null && file.getParent().equals(storeDir)) { - // already in the right place, skip renmaing + // already in the right place, skip renaming committedPath = file; } else { // Write-out finished successfully, move into the right spot diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileReader.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileReader.java index 09c379227bda..e241bf0a5d34 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileReader.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileReader.java @@ -68,7 +68,7 @@ public class StoreFileReader { protected BloomFilter deleteFamilyBloomFilter = null; private BloomFilterMetrics bloomFilterMetrics = null; protected BloomType bloomFilterType; - private final HFile.Reader reader; + protected final HFile.Reader reader; protected long sequenceID = -1; protected TimeRange timeRange = null; private byte[] lastBloomKey; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java index 6b552bc10f48..451ff93137ae 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java @@ -25,6 +25,7 @@ import java.util.Optional; import java.util.concurrent.CountDownLatch; import java.util.concurrent.locks.ReentrantLock; +import java.util.function.IntConsumer; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.CellUtil; @@ -585,6 +586,13 @@ public boolean next(List outResult, ScannerContext scannerContext) throws Optional rpcCall = matcher.isUserScan() ? RpcServer.getCurrentCall() : Optional.empty(); + // re-useable closure to avoid allocations + IntConsumer recordBlockSize = blockSize -> { + if (rpcCall.isPresent()) { + rpcCall.get().incrementBlockBytesScanned(blockSize); + } + scannerContext.incrementBlockProgress(blockSize); + }; int count = 0; long totalBytesRead = 0; @@ -625,12 +633,7 @@ public boolean next(List outResult, ScannerContext scannerContext) throws scannerContext.returnImmediately(); } - heap.recordBlockSize(blockSize -> { - if (rpcCall.isPresent()) { - rpcCall.get().incrementBlockBytesScanned(blockSize); - } - scannerContext.incrementBlockProgress(blockSize); - }); + heap.recordBlockSize(recordBlockSize); prevCell = cell; scannerContext.setLastPeekedCell(cell); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/UnassignRegionHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/UnassignRegionHandler.java index 2419e709686a..8f8668aa87a8 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/UnassignRegionHandler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/UnassignRegionHandler.java @@ -126,11 +126,9 @@ public void process() throws IOException { region.getCoprocessorHost().preClose(abort); } // This should be true only in the case of splits/merges closing the parent regions, as - // there's no point on keep blocks for those region files. As hbase.rs.evictblocksonclose is - // false by default we don't bother overriding it if evictCache is false. - if (evictCache) { - region.getStores().forEach(s -> s.getCacheConfig().setEvictOnClose(true)); - } + // there's no point on keep blocks for those region files. + region.getStores().forEach(s -> s.getCacheConfig().setEvictOnClose(evictCache)); + if (region.close(abort) == null) { // XXX: Is this still possible? The old comment says about split, but now split is done at // master side, so... diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/http/RSHealthServlet.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/http/RSHealthServlet.java new file mode 100644 index 000000000000..bc0f35193389 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/http/RSHealthServlet.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver.http; + +import java.io.IOException; +import java.time.Duration; +import java.time.Instant; +import java.util.HashSet; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import javax.servlet.http.HttpServletRequest; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.monitoring.HealthCheckServlet; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.yetus.audience.InterfaceAudience; + +@InterfaceAudience.Private +public class RSHealthServlet extends HealthCheckServlet { + + private final Map regionUnavailableSince = new ConcurrentHashMap<>(); + + public RSHealthServlet() { + super(HRegionServer.REGIONSERVER); + } + + @Override + protected Optional check(HRegionServer regionServer, HttpServletRequest req, + Connection conn) throws IOException { + long maxUnavailableMillis = Optional.ofNullable(req.getParameter("maxUnavailableMillis")) + .filter(StringUtils::isNumeric).map(Long::parseLong).orElse(Long.MAX_VALUE); + + Instant oldestUnavailableSince = Instant.MAX; + String longestUnavailableRegion = null; + int unavailableCount = 0; + + synchronized (regionUnavailableSince) { + Set regionsPreviouslyUnavailable = new HashSet<>(regionUnavailableSince.keySet()); + + for (HRegion region : regionServer.getOnlineRegionsLocalContext()) { + regionsPreviouslyUnavailable.remove(region.getRegionInfo().getEncodedName()); + if (!region.isAvailable()) { + unavailableCount++; + Instant unavailableSince = regionUnavailableSince + .computeIfAbsent(region.getRegionInfo().getEncodedName(), k -> Instant.now()); + + if (unavailableSince.isBefore(oldestUnavailableSince)) { + oldestUnavailableSince = unavailableSince; + longestUnavailableRegion = region.getRegionInfo().getEncodedName(); + } + + } else { + regionUnavailableSince.remove(region.getRegionInfo().getEncodedName()); + } + } + + regionUnavailableSince.keySet().removeAll(regionsPreviouslyUnavailable); + } + + String message = "ok"; + + if (unavailableCount > 0) { + Duration longestUnavailableRegionTime = + Duration.between(oldestUnavailableSince, Instant.now()); + if (longestUnavailableRegionTime.toMillis() > maxUnavailableMillis) { + throw new IOException("Region " + longestUnavailableRegion + + " has been unavailable too long, since " + oldestUnavailableSince); + } + + message += " - unavailableRegions: " + unavailableCount + ", longestUnavailableDuration: " + + longestUnavailableRegionTime + ", longestUnavailableRegion: " + longestUnavailableRegion; + } + + return Optional.of(message); + + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/CompressionContext.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/CompressionContext.java index 2481753dfb06..0c5d6047ceec 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/CompressionContext.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/CompressionContext.java @@ -140,6 +140,9 @@ public void decompress(InputStream in, int inLength, byte[] outArray, int outOff } } + /** + * Read an integer from the stream in big-endian byte order. + */ private int rawReadInt(InputStream in) throws IOException { int b1 = in.read(); int b2 = in.read(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/HFilePerformanceEvaluation.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/HFilePerformanceEvaluation.java index c6cee19196cc..344da3db96b3 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/HFilePerformanceEvaluation.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/HFilePerformanceEvaluation.java @@ -52,12 +52,13 @@ public class HFilePerformanceEvaluation { private static final int RFILE_BLOCKSIZE = 8 * 1024; private static StringBuilder testSummary = new StringBuilder(); - // Disable verbose INFO logging from org.apache.hadoop.io.compress.CodecPool + // Disable verbose INFO logging from org.apache.hadoop.hbase.io.compress.CodecPool static { System.setProperty("org.apache.commons.logging.Log", "org.apache.commons.logging.impl.SimpleLog"); System.setProperty( - "org.apache.commons.logging.simplelog.log.org.apache.hadoop.io.compress.CodecPool", "WARN"); + "org.apache.commons.logging.simplelog.log.org.apache.hadoop.hbase.io.compress.CodecPool", + "WARN"); } private static final Logger LOG = diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestSplitWithCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestSplitWithCache.java new file mode 100644 index 000000000000..91e65610f81c --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestSplitWithCache.java @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase; + +import static org.apache.hadoop.hbase.HConstants.BUCKET_CACHE_IOENGINE_KEY; +import static org.apache.hadoop.hbase.HConstants.BUCKET_CACHE_SIZE_KEY; +import static org.apache.hadoop.hbase.io.hfile.CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY; +import static org.apache.hadoop.hbase.io.hfile.CacheConfig.EVICT_BLOCKS_ON_SPLIT_KEY; +import static org.apache.hadoop.hbase.io.hfile.CacheConfig.PREFETCH_BLOCKS_ON_OPEN_KEY; +import static org.junit.Assert.assertTrue; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.function.BiConsumer; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.client.TableDescriptorBuilder; +import org.apache.hadoop.hbase.regionserver.HStoreFile; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.testclassification.MiscTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Pair; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@Category({ MiscTests.class, MediumTests.class }) +public class TestSplitWithCache { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestSplitWithCache.class); + + private static final Logger LOG = LoggerFactory.getLogger(TestSplitWithCache.class); + + private static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); + + @BeforeClass + public static void setUp() throws Exception { + UTIL.getConfiguration().setInt(HConstants.HBASE_CLIENT_META_OPERATION_TIMEOUT, 1000); + UTIL.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 2); + UTIL.getConfiguration().setBoolean(CACHE_BLOCKS_ON_WRITE_KEY, true); + UTIL.getConfiguration().setBoolean(PREFETCH_BLOCKS_ON_OPEN_KEY, true); + UTIL.getConfiguration().set(BUCKET_CACHE_IOENGINE_KEY, "offheap"); + UTIL.getConfiguration().setInt(BUCKET_CACHE_SIZE_KEY, 200); + } + + @Test + public void testEvictOnSplit() throws Exception { + doTest("testEvictOnSplit", true, + (f, m) -> Waiter.waitFor(UTIL.getConfiguration(), 1000, () -> m.get(f) != null), + (f, m) -> Waiter.waitFor(UTIL.getConfiguration(), 1000, () -> m.get(f) == null)); + } + + @Test + public void testDoesntEvictOnSplit() throws Exception { + doTest("testDoesntEvictOnSplit", false, + (f, m) -> Waiter.waitFor(UTIL.getConfiguration(), 1000, () -> m.get(f) != null), + (f, m) -> Waiter.waitFor(UTIL.getConfiguration(), 1000, () -> m.get(f) != null)); + } + + private void doTest(String table, boolean evictOnSplit, + BiConsumer>> predicateBeforeSplit, + BiConsumer>> predicateAfterSplit) throws Exception { + UTIL.getConfiguration().setBoolean(EVICT_BLOCKS_ON_SPLIT_KEY, evictOnSplit); + UTIL.startMiniCluster(1); + try { + TableName tableName = TableName.valueOf(table); + byte[] family = Bytes.toBytes("CF"); + TableDescriptor td = TableDescriptorBuilder.newBuilder(tableName) + .setColumnFamily(ColumnFamilyDescriptorBuilder.of(family)).build(); + UTIL.getAdmin().createTable(td); + UTIL.waitTableAvailable(tableName); + Table tbl = UTIL.getConnection().getTable(tableName); + List puts = new ArrayList<>(); + for (int i = 0; i < 1000; i++) { + Put p = new Put(Bytes.toBytes("row-" + i)); + p.addColumn(family, Bytes.toBytes(1), Bytes.toBytes("val-" + i)); + puts.add(p); + } + tbl.put(puts); + UTIL.getAdmin().flush(tableName); + Collection files = + UTIL.getMiniHBaseCluster().getRegions(tableName).get(0).getStores().get(0).getStorefiles(); + checkCacheForBlocks(tableName, files, predicateBeforeSplit); + UTIL.getAdmin().split(tableName, Bytes.toBytes("row-500")); + Waiter.waitFor(UTIL.getConfiguration(), 30000, + () -> UTIL.getMiniHBaseCluster().getRegions(tableName).size() == 2); + UTIL.waitUntilNoRegionsInTransition(); + checkCacheForBlocks(tableName, files, predicateAfterSplit); + } finally { + UTIL.shutdownMiniCluster(); + } + + } + + private void checkCacheForBlocks(TableName tableName, Collection files, + BiConsumer>> checker) { + files.forEach(f -> { + UTIL.getMiniHBaseCluster().getRegionServer(0).getBlockCache().ifPresent(cache -> { + cache.getFullyCachedFiles().ifPresent(m -> { + checker.accept(f.getPath().getName(), m); + }); + assertTrue(cache.getFullyCachedFiles().isPresent()); + }); + }); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncBufferMutator.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncBufferMutator.java index b479d4de5735..2802c77b5dd7 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncBufferMutator.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncBufferMutator.java @@ -175,6 +175,23 @@ public void testPeriodicFlush() throws InterruptedException, ExecutionException assertArrayEquals(VALUE, table.get(new Get(Bytes.toBytes(0))).get().getValue(CF, CQ)); } + @Test + public void testMaxMutationsFlush() throws InterruptedException, ExecutionException { + AsyncBufferedMutator mutator = + CONN.getBufferedMutatorBuilder(TABLE_NAME).setMaxMutations(3).build(); + CompletableFuture future1 = + mutator.mutate(new Put(Bytes.toBytes(0)).addColumn(CF, CQ, VALUE)); + CompletableFuture future2 = + mutator.mutate(new Put(Bytes.toBytes(1)).addColumn(CF, CQ, VALUE)); + CompletableFuture future3 = + mutator.mutate(new Put(Bytes.toBytes(2)).addColumn(CF, CQ, VALUE)); + CompletableFuture.allOf(future1, future2, future3).join(); + AsyncTable table = CONN.getTable(TABLE_NAME); + assertArrayEquals(VALUE, table.get(new Get(Bytes.toBytes(0))).get().getValue(CF, CQ)); + assertArrayEquals(VALUE, table.get(new Get(Bytes.toBytes(1))).get().getValue(CF, CQ)); + assertArrayEquals(VALUE, table.get(new Get(Bytes.toBytes(2))).get().getValue(CF, CQ)); + } + // a bit deep into the implementation @Test public void testCancelPeriodicFlush() throws InterruptedException, ExecutionException { @@ -244,8 +261,9 @@ private static final class AsyncBufferMutatorForTest extends AsyncBufferedMutato private int flushCount; AsyncBufferMutatorForTest(HashedWheelTimer periodicalFlushTimer, AsyncTable table, - long writeBufferSize, long periodicFlushTimeoutNs, int maxKeyValueSize) { - super(periodicalFlushTimer, table, writeBufferSize, periodicFlushTimeoutNs, maxKeyValueSize); + long writeBufferSize, long periodicFlushTimeoutNs, int maxKeyValueSize, int maxMutation) { + super(periodicalFlushTimer, table, writeBufferSize, periodicFlushTimeoutNs, maxKeyValueSize, + maxMutation); } @Override @@ -261,7 +279,7 @@ public void testRaceBetweenNormalFlushAndPeriodicFlush() Put put = new Put(Bytes.toBytes(0)).addColumn(CF, CQ, VALUE); try (AsyncBufferMutatorForTest mutator = new AsyncBufferMutatorForTest(AsyncConnectionImpl.RETRY_TIMER, CONN.getTable(TABLE_NAME), - 10 * put.heapSize(), TimeUnit.MILLISECONDS.toNanos(200), 1024 * 1024)) { + 10 * put.heapSize(), TimeUnit.MILLISECONDS.toNanos(200), 1024 * 1024, 100)) { CompletableFuture future = mutator.mutate(put); Timeout task = mutator.periodicFlushTask; // we should have scheduled a periodic flush task diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestBufferedMutator2.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestBufferedMutator2.java new file mode 100644 index 000000000000..2dd2057b8c08 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestBufferedMutator2.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.client; + +import static org.junit.Assert.assertArrayEquals; + +import java.io.IOException; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.testclassification.ClientTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category({ MediumTests.class, ClientTests.class }) +public class TestBufferedMutator2 { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestBufferedMutator2.class); + + private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + + private static TableName TABLE_NAME = TableName.valueOf("example-table"); + + private static byte[] CF = Bytes.toBytes("cf"); + private static byte[] CQ = Bytes.toBytes("cq"); + private static byte[] VALUE = new byte[1024]; + + private static Connection CONN; + + @BeforeClass + public static void setUp() throws Exception { + TEST_UTIL.startMiniCluster(1); + TEST_UTIL.createTable(TABLE_NAME, CF); + CONN = ConnectionFactory.createConnection(TEST_UTIL.getConfiguration()); + Bytes.random(VALUE); + } + + @AfterClass + public static void tearDown() throws Exception { + CONN.close(); + TEST_UTIL.shutdownMiniCluster(); + } + + @Test + public void testMaxMutationsFlush() throws IOException { + BufferedMutator mutator = + CONN.getBufferedMutator(new BufferedMutatorParams(TABLE_NAME).setMaxMutations(3)); + mutator.mutate(new Put(Bytes.toBytes(0)).addColumn(CF, CQ, VALUE)); + mutator.mutate(new Put(Bytes.toBytes(1)).addColumn(CF, CQ, VALUE)); + mutator.mutate(new Put(Bytes.toBytes(2)).addColumn(CF, CQ, VALUE)); + Table table = CONN.getTable(TABLE_NAME); + assertArrayEquals(VALUE, table.get(new Get(Bytes.toBytes(0))).getValue(CF, CQ)); + assertArrayEquals(VALUE, table.get(new Get(Bytes.toBytes(1))).getValue(CF, CQ)); + assertArrayEquals(VALUE, table.get(new Get(Bytes.toBytes(2))).getValue(CF, CQ)); + } + +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHalfStoreFileReader.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHalfStoreFileReader.java index 13955ccebfec..0ac03b8d4136 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHalfStoreFileReader.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHalfStoreFileReader.java @@ -25,6 +25,7 @@ import java.util.ArrayList; import java.util.List; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.Cell; @@ -42,6 +43,7 @@ import org.apache.hadoop.hbase.io.hfile.ReaderContext; import org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder; import org.apache.hadoop.hbase.regionserver.StoreFileInfo; +import org.apache.hadoop.hbase.regionserver.StoreFileWriter; import org.apache.hadoop.hbase.testclassification.IOTests; import org.apache.hadoop.hbase.testclassification.SmallTests; import org.apache.hadoop.hbase.util.Bytes; @@ -82,15 +84,19 @@ public static void tearDownAfterClass() throws Exception { */ @Test public void testHalfScanAndReseek() throws IOException { - String root_dir = TEST_UTIL.getDataTestDir().toString(); - Path p = new Path(root_dir, "test"); - Configuration conf = TEST_UTIL.getConfiguration(); FileSystem fs = FileSystem.get(conf); + String root_dir = TEST_UTIL.getDataTestDir().toString(); + Path parentPath = new Path(new Path(root_dir, "parent"), "CF"); + fs.mkdirs(parentPath); + Path splitAPath = new Path(new Path(root_dir, "splita"), "CF"); + Path splitBPath = new Path(new Path(root_dir, "splitb"), "CF"); + Path filePath = StoreFileWriter.getUniqueFile(fs, parentPath); + CacheConfig cacheConf = new CacheConfig(conf); HFileContext meta = new HFileContextBuilder().withBlockSize(1024).build(); HFile.Writer w = - HFile.getWriterFactory(conf, cacheConf).withPath(fs, p).withFileContext(meta).create(); + HFile.getWriterFactory(conf, cacheConf).withPath(fs, filePath).withFileContext(meta).create(); // write some things. List items = genSomeKeys(); @@ -99,26 +105,35 @@ public void testHalfScanAndReseek() throws IOException { } w.close(); - HFile.Reader r = HFile.createReader(fs, p, cacheConf, true, conf); + HFile.Reader r = HFile.createReader(fs, filePath, cacheConf, true, conf); Cell midKV = r.midKey().get(); byte[] midkey = CellUtil.cloneRow(midKV); - // System.out.println("midkey: " + midKV + " or: " + Bytes.toStringBinary(midkey)); + Path splitFileA = new Path(splitAPath, filePath.getName() + ".parent"); + Path splitFileB = new Path(splitBPath, filePath.getName() + ".parent"); Reference bottom = new Reference(midkey, Reference.Range.bottom); - doTestOfScanAndReseek(p, fs, bottom, cacheConf); + bottom.write(fs, splitFileA); + doTestOfScanAndReseek(splitFileA, fs, bottom, cacheConf); Reference top = new Reference(midkey, Reference.Range.top); - doTestOfScanAndReseek(p, fs, top, cacheConf); + top.write(fs, splitFileB); + doTestOfScanAndReseek(splitFileB, fs, top, cacheConf); r.close(); } private void doTestOfScanAndReseek(Path p, FileSystem fs, Reference bottom, CacheConfig cacheConf) throws IOException { - ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, p).build(); - StoreFileInfo storeFileInfo = - new StoreFileInfo(TEST_UTIL.getConfiguration(), fs, fs.getFileStatus(p), bottom); + Path referencePath = StoreFileInfo.getReferredToFile(p); + FSDataInputStreamWrapper in = new FSDataInputStreamWrapper(fs, referencePath, false, 0); + FileStatus status = fs.getFileStatus(referencePath); + long length = status.getLen(); + ReaderContextBuilder contextBuilder = + new ReaderContextBuilder().withInputStreamWrapper(in).withFileSize(length) + .withReaderType(ReaderContext.ReaderType.PREAD).withFileSystem(fs).withFilePath(p); + ReaderContext context = contextBuilder.build(); + StoreFileInfo storeFileInfo = new StoreFileInfo(TEST_UTIL.getConfiguration(), fs, p, true); storeFileInfo.initHFileInfo(context); final HalfStoreFileReader halfreader = (HalfStoreFileReader) storeFileInfo.createReader(context, cacheConf); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestPrefetch.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestPrefetch.java index cd2793b8cea0..8e278e40336e 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestPrefetch.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestPrefetch.java @@ -285,14 +285,6 @@ public void testPrefetchCompressed() throws Exception { conf.setBoolean(CACHE_DATA_BLOCKS_COMPRESSED_KEY, false); } - @Test - public void testPrefetchSkipsRefs() throws Exception { - testPrefetchWhenRefs(true, c -> { - boolean isCached = c != null; - assertFalse(isCached); - }); - } - @Test public void testPrefetchDoesntSkipRefs() throws Exception { testPrefetchWhenRefs(false, c -> { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestPrefetchWithBucketCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestPrefetchWithBucketCache.java index db8f2213d0c0..c3954d3cf901 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestPrefetchWithBucketCache.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestPrefetchWithBucketCache.java @@ -22,6 +22,7 @@ import static org.apache.hadoop.hbase.io.hfile.BlockCacheFactory.BUCKET_CACHE_BUCKETS_KEY; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -39,13 +40,20 @@ import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.Waiter; import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; import org.apache.hadoop.hbase.fs.HFileSystem; import org.apache.hadoop.hbase.io.ByteBuffAllocator; import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache; import org.apache.hadoop.hbase.io.hfile.bucket.BucketEntry; +import org.apache.hadoop.hbase.regionserver.BloomType; +import org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy; +import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; +import org.apache.hadoop.hbase.regionserver.HStoreFile; import org.apache.hadoop.hbase.regionserver.StoreFileWriter; import org.apache.hadoop.hbase.testclassification.IOTests; import org.apache.hadoop.hbase.testclassification.MediumTests; @@ -135,6 +143,55 @@ public void testPrefetchDoesntOverwork() throws Exception { assertTrue(snapshot.get(key).getCachedTime() < bc.getBackingMap().get(key).getCachedTime()); } + @Test + public void testPrefetchRefsAfterSplit() throws Exception { + conf.setLong(BUCKET_CACHE_SIZE_KEY, 200); + blockCache = BlockCacheFactory.createBlockCache(conf); + cacheConf = new CacheConfig(conf, blockCache); + + Path tableDir = new Path(TEST_UTIL.getDataTestDir(), "testPrefetchRefsAfterSplit"); + RegionInfo region = RegionInfoBuilder.newBuilder(TableName.valueOf(tableDir.getName())).build(); + Path regionDir = new Path(tableDir, region.getEncodedName()); + Path cfDir = new Path(regionDir, "cf"); + HRegionFileSystem regionFS = + HRegionFileSystem.createRegionOnFileSystem(conf, fs, tableDir, region); + Path storeFile = writeStoreFile(100, cfDir); + + // Prefetches the file blocks + LOG.debug("First read should prefetch the blocks."); + readStoreFile(storeFile); + BucketCache bc = BucketCache.getBucketCacheFromCacheConfig(cacheConf).get(); + // Our file should have 6 DATA blocks. We should wait for all of them to be cached + Waiter.waitFor(conf, 300, () -> bc.getBackingMap().size() == 6); + + // split the file and return references to the original file + Random rand = ThreadLocalRandom.current(); + byte[] splitPoint = RandomKeyValueUtil.randomOrderedKey(rand, 50); + HStoreFile file = new HStoreFile(fs, storeFile, conf, cacheConf, BloomType.NONE, true); + Path ref = regionFS.splitStoreFile(region, "cf", file, splitPoint, false, + new ConstantSizeRegionSplitPolicy()); + HStoreFile refHsf = new HStoreFile(this.fs, ref, conf, cacheConf, BloomType.NONE, true); + // starts reader for the ref. The ref should resolve to the original file blocks + // and not duplicate blocks in the cache. + refHsf.initReader(); + HFile.Reader reader = refHsf.getReader().getHFileReader(); + while (!reader.prefetchComplete()) { + // Sleep for a bit + Thread.sleep(1000); + } + // the ref file blocks keys should actually resolve to the referred file blocks, + // so we should not see additional blocks in the cache. + Waiter.waitFor(conf, 300, () -> bc.getBackingMap().size() == 6); + + BlockCacheKey refCacheKey = new BlockCacheKey(ref.getName(), 0); + Cacheable result = bc.getBlock(refCacheKey, true, false, true); + assertNotNull(result); + BlockCacheKey fileCacheKey = new BlockCacheKey(file.getPath().getName(), 0); + assertEquals(result, bc.getBlock(fileCacheKey, true, false, true)); + assertNull(bc.getBackingMap().get(refCacheKey)); + assertNotNull(bc.getBlockForReference(refCacheKey)); + } + @Test public void testPrefetchInterruptOnCapacity() throws Exception { conf.setLong(BUCKET_CACHE_SIZE_KEY, 1); @@ -270,10 +327,19 @@ private Path writeStoreFile(String fname, int numKVs) throws IOException { return writeStoreFile(fname, meta, numKVs); } + private Path writeStoreFile(int numKVs, Path regionCFDir) throws IOException { + HFileContext meta = new HFileContextBuilder().withBlockSize(DATA_BLOCK_SIZE).build(); + return writeStoreFile(meta, numKVs, regionCFDir); + } + private Path writeStoreFile(String fname, HFileContext context, int numKVs) throws IOException { - Path storeFileParentDir = new Path(TEST_UTIL.getDataTestDir(), fname); + return writeStoreFile(context, numKVs, new Path(TEST_UTIL.getDataTestDir(), fname)); + } + + private Path writeStoreFile(HFileContext context, int numKVs, Path regionCFDir) + throws IOException { StoreFileWriter sfw = new StoreFileWriter.Builder(conf, cacheConf, fs) - .withOutputDir(storeFileParentDir).withFileContext(context).build(); + .withOutputDir(regionCFDir).withFileContext(context).build(); Random rand = ThreadLocalRandom.current(); final int rowLen = 32; for (int i = 0; i < numKVs; ++i) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestRowIndexV1RoundTrip.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestRowIndexV1RoundTrip.java new file mode 100644 index 000000000000..2004e20aad6c --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestRowIndexV1RoundTrip.java @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.io.hfile; + +import static org.apache.hadoop.hbase.io.ByteBuffAllocator.MIN_ALLOCATE_SIZE_KEY; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.CellComparatorImpl; +import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.SizeCachedNoTagsByteBufferKeyValue; +import org.apache.hadoop.hbase.SizeCachedNoTagsKeyValue; +import org.apache.hadoop.hbase.io.ByteBuffAllocator; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; +import org.apache.hadoop.hbase.testclassification.IOTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category({ IOTests.class, MediumTests.class }) +public class TestRowIndexV1RoundTrip { + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestRowIndexV1RoundTrip.class); + private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private static final DataBlockEncoding DATA_BLOCK_ENCODING = DataBlockEncoding.ROW_INDEX_V1; + private static final int ENTRY_COUNT = 100; + + private Configuration conf; + private FileSystem fs; + + @Before + public void setUp() throws IOException { + conf = TEST_UTIL.getConfiguration(); + conf.setLong(MIN_ALLOCATE_SIZE_KEY, 0); + fs = FileSystem.get(conf); + } + + @Test + public void testReadMyWritesOnHeap() throws IOException { + Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), "testHFileFormatV3"); + writeDataToHFile(hfilePath, ENTRY_COUNT); + readDataFromHFile(hfilePath, ENTRY_COUNT, true); + } + + @Test + public void testReadMyWritesOnDirectMem() throws IOException { + Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), "testHFileFormatV3"); + writeDataToHFile(hfilePath, ENTRY_COUNT); + readDataFromHFile(hfilePath, ENTRY_COUNT, false); + } + + private void writeDataToHFile(Path hfilePath, int entryCount) throws IOException { + HFileContext context = + new HFileContextBuilder().withBlockSize(1024).withDataBlockEncoding(DATA_BLOCK_ENCODING) + .withCellComparator(CellComparatorImpl.COMPARATOR).build(); + CacheConfig cacheConfig = new CacheConfig(conf); + HFile.Writer writer = new HFile.WriterFactory(conf, cacheConfig).withPath(fs, hfilePath) + .withFileContext(context).create(); + + List keyValues = new ArrayList<>(entryCount); + + writeKeyValues(entryCount, writer, keyValues); + } + + private void writeKeyValues(int entryCount, HFile.Writer writer, List keyValues) + throws IOException { + for (int i = 0; i < entryCount; ++i) { + byte[] keyBytes = intToBytes(i); + + byte[] valueBytes = Bytes.toBytes(String.format("value %d", i)); + KeyValue keyValue = new KeyValue(keyBytes, null, null, valueBytes); + + writer.append(keyValue); + keyValues.add(keyValue); + } + writer.close(); + } + + private void readDataFromHFile(Path hfilePath, int entryCount, boolean onHeap) + throws IOException { + CacheConfig cacheConfig; + if (onHeap) { + cacheConfig = new CacheConfig(conf); + } else { + ByteBuffAllocator allocator = ByteBuffAllocator.create(conf, true); + cacheConfig = new CacheConfig(conf, null, null, allocator); + } + HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConfig, false, conf); + HFileScanner scanner = reader.getScanner(conf, false, false); + scanner.seekTo(); + int i = 1; + while (scanner.next()) { + byte[] keyBytes = intToBytes(i); + // check row key from getKey() and getCell() separately because they use different code paths + assertArrayEquals(keyBytes, CellUtil.cloneRow(scanner.getKey())); + assertArrayEquals(keyBytes, CellUtil.cloneRow(scanner.getCell())); + assertArrayEquals(Bytes.toBytes(String.format("value %d", i)), + CellUtil.cloneValue(scanner.getCell())); + if (onHeap) { + assertTrue(scanner.getCell() instanceof SizeCachedNoTagsKeyValue); + } else { + assertTrue(scanner.getCell() instanceof SizeCachedNoTagsByteBufferKeyValue); + } + i += 1; + } + assertEquals(entryCount, i); + } + + private byte[] intToBytes(final int i) { + ByteBuffer bb = ByteBuffer.allocate(4); + bb.putInt(i); + return bb.array(); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCachePersister.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCachePersister.java index 7be959dfad4b..35a60ec93125 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCachePersister.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCachePersister.java @@ -49,6 +49,8 @@ import org.junit.Test; import org.junit.experimental.categories.Category; import org.junit.rules.TestName; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; @Category({ IOTests.class, MediumTests.class }) public class TestBucketCachePersister { @@ -61,6 +63,8 @@ public class TestBucketCachePersister { public int constructedBlockSize = 16 * 1024; + private static final Logger LOG = LoggerFactory.getLogger(TestBucketCachePersister.class); + public int[] constructedBlockSizes = new int[] { 2 * 1024 + 1024, 4 * 1024 + 1024, 8 * 1024 + 1024, 16 * 1024 + 1024, 28 * 1024 + 1024, 32 * 1024 + 1024, 64 * 1024 + 1024, 96 * 1024 + 1024, 128 * 1024 + 1024 }; @@ -164,6 +168,7 @@ public void testPrefetchBlockEvictionWhilePrefetchRunning() throws Exception { HFile.createReader(fs, storeFile, cacheConf, true, conf); boolean evicted = false; while (!PrefetchExecutor.isCompleted(storeFile)) { + LOG.debug("Entered loop as prefetch for {} is still running.", storeFile); if (bucketCache.backingMap.size() > 0 && !evicted) { Iterator> it = bucketCache.backingMap.entrySet().iterator(); @@ -172,6 +177,7 @@ public void testPrefetchBlockEvictionWhilePrefetchRunning() throws Exception { while (it.hasNext() && !evicted) { if (entry.getKey().getBlockType().equals(BlockType.DATA)) { evicted = bucketCache.evictBlock(it.next().getKey()); + LOG.debug("Attempted eviction for {}. Succeeded? {}", storeFile, evicted); } } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestTRSPPersistUninitializedSubProc.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestTRSPPersistUninitializedSubProc.java new file mode 100644 index 000000000000..3145c340102a --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestTRSPPersistUninitializedSubProc.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.assignment; + +import java.io.IOException; +import java.io.UncheckedIOException; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.master.HMaster; +import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.TransitionType; +import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; +import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure; +import org.apache.hadoop.hbase.procedure2.Procedure; +import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; +import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; +import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; +import org.apache.hadoop.hbase.procedure2.ProcedureYieldException; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos.ProcedureState; + +/** + * Testcase for HBASE-29259 + */ +@Category({ MasterTests.class, MediumTests.class }) +public class TestTRSPPersistUninitializedSubProc { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestTRSPPersistUninitializedSubProc.class); + + private static HBaseTestingUtility UTIL = new HBaseTestingUtility(); + + private static byte[] CF = Bytes.toBytes("cf"); + + private static TableName TN = TableName.valueOf("tn"); + + public static class TRSPForTest extends TransitRegionStateProcedure { + + private boolean injected = false; + + public TRSPForTest() { + } + + public TRSPForTest(MasterProcedureEnv env, RegionInfo hri, ServerName assignCandidate, + boolean forceNewPlan, TransitionType type) { + super(env, hri, assignCandidate, forceNewPlan, type); + } + + @Override + protected Procedure[] execute(MasterProcedureEnv env) + throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException { + Procedure[] subProcs = super.execute(env); + if (!injected && subProcs != null && subProcs[0] instanceof CloseRegionProcedure) { + injected = true; + ServerName sn = ((CloseRegionProcedure) subProcs[0]).targetServer; + env.getMasterServices().getServerManager().expireServer(sn); + try { + UTIL.waitFor(15000, () -> env.getMasterServices().getProcedures().stream().anyMatch( + p -> p instanceof ServerCrashProcedure && p.getState() != ProcedureState.INITIALIZING)); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + // sleep 10 seconds to let the SCP interrupt the TRSP, where we will call TRSP.serverCrashed + Thread.sleep(10000); + } + return subProcs; + } + } + + @BeforeClass + public static void setUpBeforeClass() throws Exception { + UTIL.startMiniCluster(2); + UTIL.getAdmin().balancerSwitch(false, true); + UTIL.createTable(TN, CF); + UTIL.waitTableAvailable(TN); + } + + @AfterClass + public static void tearDownAfterClass() throws Exception { + UTIL.shutdownMiniCluster(); + } + + @Test + public void testServerCrash() throws Exception { + HMaster master = UTIL.getHBaseCluster().getMaster(); + ProcedureExecutor procExec = master.getMasterProcedureExecutor(); + RegionInfo region = UTIL.getAdmin().getRegions(TN).get(0); + RegionStateNode rsn = + master.getAssignmentManager().getRegionStates().getRegionStateNode(region); + TRSPForTest trsp = + new TRSPForTest(procExec.getEnvironment(), region, null, false, TransitionType.REOPEN); + // attach it to RegionStateNode, to simulate normal reopen + rsn.setProcedure(trsp); + procExec.submitProcedure(trsp); + ProcedureTestingUtility.waitProcedure(procExec, trsp); + // make sure we do not store invalid procedure to procedure store + ProcedureTestingUtility.restart(procExec); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionalsTestUtil.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionalsTestUtil.java new file mode 100644 index 000000000000..0678cc3b67fb --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionalsTestUtil.java @@ -0,0 +1,221 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotEquals; + +import java.io.IOException; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HRegionLocation; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.quotas.QuotaUtil; +import org.apache.hadoop.hbase.util.Bytes; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSet; + +public final class BalancerConditionalsTestUtil { + + private static final Logger LOG = LoggerFactory.getLogger(BalancerConditionalsTestUtil.class); + + private BalancerConditionalsTestUtil() { + } + + static byte[][] generateSplits(int numRegions) { + byte[][] splitKeys = new byte[numRegions - 1][]; + for (int i = 0; i < numRegions - 1; i++) { + splitKeys[i] = + Bytes.toBytes(String.format("%09d", (i + 1) * (Integer.MAX_VALUE / numRegions))); + } + return splitKeys; + } + + static void printRegionLocations(Connection connection) throws IOException { + Admin admin = connection.getAdmin(); + + // Get all table names in the cluster + Set tableNames = admin.listTableDescriptors().stream() + .map(TableDescriptor::getTableName).collect(Collectors.toSet()); + + // Group regions by server + Map>> serverToRegions = + admin.getClusterMetrics().getLiveServerMetrics().keySet().stream() + .collect(Collectors.toMap(server -> server, server -> { + try { + return listRegionsByTable(connection, server, tableNames); + } catch (IOException e) { + throw new RuntimeException(e); + } + })); + + // Pretty print region locations + StringBuilder regionLocationOutput = new StringBuilder(); + regionLocationOutput.append("Pretty printing region locations...\n"); + serverToRegions.forEach((server, tableRegions) -> { + regionLocationOutput.append("Server: " + server.getServerName() + "\n"); + tableRegions.forEach((table, regions) -> { + if (regions.isEmpty()) { + return; + } + regionLocationOutput.append(" Table: " + table.getNameAsString() + "\n"); + regions.forEach(region -> regionLocationOutput + .append(String.format(" Region: %s, start: %s, end: %s, replica: %s\n", + region.getEncodedName(), Bytes.toString(region.getStartKey()), + Bytes.toString(region.getEndKey()), region.getReplicaId()))); + }); + }); + LOG.info(regionLocationOutput.toString()); + } + + private static Map> listRegionsByTable(Connection connection, + ServerName server, Set tableNames) throws IOException { + Admin admin = connection.getAdmin(); + + // Find regions for each table + return tableNames.stream().collect(Collectors.toMap(tableName -> tableName, tableName -> { + List allRegions = null; + try { + allRegions = admin.getRegions(server); + } catch (IOException e) { + throw new RuntimeException(e); + } + return allRegions.stream().filter(region -> region.getTable().equals(tableName)) + .collect(Collectors.toList()); + })); + } + + static void validateReplicaDistribution(Connection connection, TableName tableName, + boolean shouldBeDistributed) { + Map> serverToRegions = null; + try { + serverToRegions = connection.getRegionLocator(tableName).getAllRegionLocations().stream() + .collect(Collectors.groupingBy(location -> location.getServerName(), + Collectors.mapping(location -> location.getRegion(), Collectors.toList()))); + } catch (IOException e) { + throw new RuntimeException(e); + } + + if (shouldBeDistributed) { + // Ensure no server hosts more than one replica of any region + for (Map.Entry> serverAndRegions : serverToRegions.entrySet()) { + List regionInfos = serverAndRegions.getValue(); + Set startKeys = new HashSet<>(); + for (RegionInfo regionInfo : regionInfos) { + // each region should have a distinct start key + assertFalse( + "Each region should have its own start key, " + + "demonstrating it is not a replica of any others on this host", + startKeys.contains(regionInfo.getStartKey())); + startKeys.add(regionInfo.getStartKey()); + } + } + } else { + // Ensure all replicas are on the same server + assertEquals("All regions should share one server", 1, serverToRegions.size()); + } + } + + static void validateRegionLocations(Map> tableToServers, + TableName productTableName, boolean shouldBeBalanced) { + ServerName metaServer = + tableToServers.get(TableName.META_TABLE_NAME).stream().findFirst().get(); + ServerName quotaServer = + tableToServers.get(QuotaUtil.QUOTA_TABLE_NAME).stream().findFirst().get(); + Set productServers = tableToServers.get(productTableName); + + if (shouldBeBalanced) { + for (ServerName server : productServers) { + assertNotEquals("Meta table and product table should not share servers", server, + metaServer); + assertNotEquals("Quota table and product table should not share servers", server, + quotaServer); + } + assertNotEquals("The meta server and quotas server should be different", metaServer, + quotaServer); + } else { + for (ServerName server : productServers) { + assertEquals("Meta table and product table must share servers", server, metaServer); + assertEquals("Quota table and product table must share servers", server, quotaServer); + } + assertEquals("The meta server and quotas server must be the same", metaServer, quotaServer); + } + } + + static Map> getTableToServers(Connection connection, + Set tableNames) { + return tableNames.stream().collect(Collectors.toMap(t -> t, t -> { + try { + return connection.getRegionLocator(t).getAllRegionLocations().stream() + .map(HRegionLocation::getServerName).collect(Collectors.toSet()); + } catch (IOException e) { + throw new RuntimeException(e); + } + })); + } + + @FunctionalInterface + interface AssertionRunnable { + void run() throws AssertionError; + } + + static void validateAssertionsWithRetries(HBaseTestingUtility testUtil, + boolean runBalancerOnFailure, AssertionRunnable assertion) { + validateAssertionsWithRetries(testUtil, runBalancerOnFailure, ImmutableSet.of(assertion)); + } + + static void validateAssertionsWithRetries(HBaseTestingUtility testUtil, + boolean runBalancerOnFailure, Set assertions) { + int maxAttempts = 50; + for (int i = 0; i < maxAttempts; i++) { + try { + for (AssertionRunnable assertion : assertions) { + assertion.run(); + } + } catch (AssertionError e) { + if (i == maxAttempts - 1) { + throw e; + } + try { + LOG.warn("Failed to validate region locations. Will retry", e); + Thread.sleep(1000); + BalancerConditionalsTestUtil.printRegionLocations(testUtil.getConnection()); + if (runBalancerOnFailure) { + testUtil.getAdmin().balance(); + } + Thread.sleep(1000); + } catch (Exception ex) { + throw new RuntimeException(ex); + } + } + } + } + +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/CandidateGeneratorTestUtil.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/CandidateGeneratorTestUtil.java new file mode 100644 index 000000000000..d2a9d17cdba0 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/CandidateGeneratorTestUtil.java @@ -0,0 +1,312 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer; + +import static org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer.MAX_RUNNING_TIME_KEY; +import static org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer.MIN_COST_NEED_BALANCE_KEY; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.Base64; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Function; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.master.MasterServices; +import org.apache.hadoop.hbase.master.RegionPlan; +import org.apache.hadoop.hbase.master.balancer.replicas.ReplicaKey; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public final class CandidateGeneratorTestUtil { + + private static final Logger LOG = LoggerFactory.getLogger(CandidateGeneratorTestUtil.class); + + private static final MasterServices MOCK_MASTER_SERVICES = mock(MasterServices.class); + + private CandidateGeneratorTestUtil() { + } + + enum ExhaustionType { + COST_GOAL_ACHIEVED, + NO_MORE_MOVES; + } + + static void runBalancerToExhaustion(Configuration conf, + Map> serverToRegions, + Set> expectations, float targetMaxBalancerCost) { + runBalancerToExhaustion(conf, serverToRegions, expectations, targetMaxBalancerCost, 15000, + ExhaustionType.COST_GOAL_ACHIEVED); + } + + static void runBalancerToExhaustion(Configuration conf, + Map> serverToRegions, + Set> expectations, float targetMaxBalancerCost, + long maxRunningTime, ExhaustionType exhaustionType) { + // Do the full plan. We're testing with a lot of regions + conf.setBoolean("hbase.master.balancer.stochastic.runMaxSteps", true); + conf.setLong(MAX_RUNNING_TIME_KEY, maxRunningTime); + + conf.setFloat(MIN_COST_NEED_BALANCE_KEY, targetMaxBalancerCost); + + BalancerClusterState cluster = createMockBalancerClusterState(serverToRegions); + StochasticLoadBalancer stochasticLoadBalancer = buildStochasticLoadBalancer(cluster, conf); + printClusterDistribution(cluster, 0); + int balancerRuns = 0; + int actionsTaken = 0; + long balancingMillis = 0; + boolean isBalanced = false; + while (!isBalanced) { + balancerRuns++; + if (balancerRuns > 10) { + throw new RuntimeException("Balancer failed to find balance & meet expectations"); + } + long start = System.currentTimeMillis(); + List regionPlans = + stochasticLoadBalancer.balanceCluster(partitionRegionsByTable(serverToRegions)); + balancingMillis += System.currentTimeMillis() - start; + actionsTaken++; + if (regionPlans != null) { + // Apply all plans to serverToRegions + for (RegionPlan rp : regionPlans) { + ServerName source = rp.getSource(); + ServerName dest = rp.getDestination(); + RegionInfo region = rp.getRegionInfo(); + + // Update serverToRegions + serverToRegions.get(source).remove(region); + serverToRegions.get(dest).add(region); + actionsTaken++; + } + + // Now rebuild cluster and balancer from updated serverToRegions + cluster = createMockBalancerClusterState(serverToRegions); + stochasticLoadBalancer = buildStochasticLoadBalancer(cluster, conf); + } + printClusterDistribution(cluster, actionsTaken); + isBalanced = true; + for (Function condition : expectations) { + // Check if we've met all expectations for the candidate generator + if (!condition.apply(cluster)) { + isBalanced = false; + break; + } + } + if (isBalanced) { // Check if the balancer thinks we're done too + if (exhaustionType == ExhaustionType.COST_GOAL_ACHIEVED) { + // If we expect to achieve the cost goal, then needsBalance should be false + if (stochasticLoadBalancer.needsBalance(HConstants.ENSEMBLE_TABLE_NAME, cluster)) { + LOG.info("Balancer cost goal is not achieved. needsBalance=true"); + isBalanced = false; + } + } else { + // If we anticipate running out of moves, then our last balance run should have produced + // nothing + if (regionPlans != null && !regionPlans.isEmpty()) { + LOG.info("Balancer is not out of moves. regionPlans.size()={}", regionPlans.size()); + isBalanced = false; + } + } + } + } + LOG.info("Balancer is done. Balancing took {}sec", + Duration.ofMillis(balancingMillis).toMinutes()); + } + + /** + * Prints the current cluster distribution of regions per table per server + */ + static void printClusterDistribution(BalancerClusterState cluster, long actionsTaken) { + LOG.info("=== Cluster Distribution after {} balancer actions taken ===", actionsTaken); + + for (int i = 0; i < cluster.numServers; i++) { + int[] regions = cluster.regionsPerServer[i]; + int regionCount = (regions == null) ? 0 : regions.length; + + LOG.info("Server {}: {} regions", cluster.servers[i].getServerName(), regionCount); + + if (regionCount > 0) { + Map tableRegionCounts = new HashMap<>(); + + for (int regionIndex : regions) { + RegionInfo regionInfo = cluster.regions[regionIndex]; + TableName tableName = regionInfo.getTable(); + tableRegionCounts.put(tableName, tableRegionCounts.getOrDefault(tableName, 0) + 1); + } + + tableRegionCounts + .forEach((table, count) -> LOG.info(" - Table {}: {} regions", table, count)); + } + } + + LOG.info("==========================================="); + } + + /** + * Partitions the given serverToRegions map by table The tables are derived from the RegionInfo + * objects found in serverToRegions. + * @param serverToRegions The map of servers to their assigned regions. + * @return A map of tables to their server-to-region assignments. + */ + public static Map>> + partitionRegionsByTable(Map> serverToRegions) { + + // First, gather all tables from the regions + Set allTables = new HashSet<>(); + for (List regions : serverToRegions.values()) { + for (RegionInfo region : regions) { + allTables.add(region.getTable()); + } + } + + Map>> tablesToServersToRegions = new HashMap<>(); + + // Initialize each table with all servers mapped to empty lists + for (TableName table : allTables) { + Map> serverMap = new HashMap<>(); + for (ServerName server : serverToRegions.keySet()) { + serverMap.put(server, new ArrayList<>()); + } + tablesToServersToRegions.put(table, serverMap); + } + + // Distribute regions to their respective tables + for (Map.Entry> serverAndRegions : serverToRegions.entrySet()) { + ServerName server = serverAndRegions.getKey(); + List regions = serverAndRegions.getValue(); + + for (RegionInfo region : regions) { + TableName regionTable = region.getTable(); + // Now we know for sure regionTable is in allTables + Map> tableServerMap = + tablesToServersToRegions.get(regionTable); + tableServerMap.get(server).add(region); + } + } + + return tablesToServersToRegions; + } + + static StochasticLoadBalancer buildStochasticLoadBalancer(BalancerClusterState cluster, + Configuration conf) { + StochasticLoadBalancer stochasticLoadBalancer = + new StochasticLoadBalancer(new DummyMetricsStochasticBalancer()); + when(MOCK_MASTER_SERVICES.getConfiguration()).thenReturn(conf); + stochasticLoadBalancer.setMasterServices(MOCK_MASTER_SERVICES); + stochasticLoadBalancer.loadConf(conf); + stochasticLoadBalancer.initCosts(cluster); + return stochasticLoadBalancer; + } + + static BalancerClusterState + createMockBalancerClusterState(Map> serverToRegions) { + return new BalancerClusterState(serverToRegions, null, null, null, null); + } + + /** + * Validates that each replica is isolated from its others. Ensures that no server hosts more than + * one replica of the same region (i.e., regions with identical start and end keys). + * @param cluster The current state of the cluster. + * @return true if all replicas are properly isolated, false otherwise. + */ + static boolean areAllReplicasDistributed(BalancerClusterState cluster) { + // Iterate over each server + for (int[] regionsPerServer : cluster.regionsPerServer) { + if (regionsPerServer == null || regionsPerServer.length == 0) { + continue; // Skip empty servers + } + + Set foundKeys = new HashSet<>(); + for (int regionIndex : regionsPerServer) { + RegionInfo regionInfo = cluster.regions[regionIndex]; + ReplicaKey replicaKey = new ReplicaKey(regionInfo); + if (foundKeys.contains(replicaKey)) { + // Violation: Multiple replicas of the same region on the same server + LOG.warn("Replica isolation violated: one server hosts multiple replicas of key [{}].", + generateRegionKey(regionInfo)); + return false; + } + + foundKeys.add(replicaKey); + } + } + + LOG.info( + "Replica isolation validation passed: No server hosts multiple replicas of the same region."); + return true; + } + + /** + * Generic method to validate table isolation. + */ + static boolean isTableIsolated(BalancerClusterState cluster, TableName tableName, + String tableType) { + for (int i = 0; i < cluster.numServers; i++) { + int[] regionsOnServer = cluster.regionsPerServer[i]; + if (regionsOnServer == null || regionsOnServer.length == 0) { + continue; // Skip empty servers + } + + boolean hasTargetTableRegion = false; + boolean hasOtherTableRegion = false; + + for (int regionIndex : regionsOnServer) { + RegionInfo regionInfo = cluster.regions[regionIndex]; + if (regionInfo.getTable().equals(tableName)) { + hasTargetTableRegion = true; + } else { + hasOtherTableRegion = true; + } + + // If the target table and any other table are on the same server, isolation is violated + if (hasTargetTableRegion && hasOtherTableRegion) { + LOG.debug( + "Server {} has both {} table regions and other table regions, violating isolation.", + cluster.servers[i].getServerName(), tableType); + return false; + } + } + } + LOG.debug("{} table isolation validation passed.", tableType); + return true; + } + + /** + * Generates a unique key for a region based on its start and end keys. This method ensures that + * regions with identical start and end keys have the same key. + * @param regionInfo The RegionInfo object. + * @return A string representing the unique key of the region. + */ + private static String generateRegionKey(RegionInfo regionInfo) { + // Using Base64 encoding for byte arrays to ensure uniqueness and readability + String startKey = Base64.getEncoder().encodeToString(regionInfo.getStartKey()); + String endKey = Base64.getEncoder().encodeToString(regionInfo.getEndKey()); + + return regionInfo.getTable().getNameAsString() + ":" + startKey + ":" + endKey; + } + +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/DistributeReplicasTestConditional.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/DistributeReplicasTestConditional.java new file mode 100644 index 000000000000..5a8fa2524fe6 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/DistributeReplicasTestConditional.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer; + +import org.apache.hadoop.conf.Configuration; + +public class DistributeReplicasTestConditional extends DistributeReplicasConditional { + + static void enableConditionalReplicaDistributionForTest(Configuration conf) { + conf.set(BalancerConditionals.ADDITIONAL_CONDITIONALS_KEY, + DistributeReplicasTestConditional.class.getCanonicalName()); + } + + public DistributeReplicasTestConditional(BalancerConditionals balancerConditionals, + BalancerClusterState cluster) { + super(balancerConditionals, cluster); + } + + @Override + public ValidationLevel getValidationLevel() { + // Mini-cluster tests can't validate at host/rack levels + return ValidationLevel.SERVER; + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/LoadOnlyFavoredStochasticBalancer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/LoadOnlyFavoredStochasticBalancer.java index d658f7cfa167..dfacad1a747c 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/LoadOnlyFavoredStochasticBalancer.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/LoadOnlyFavoredStochasticBalancer.java @@ -19,6 +19,7 @@ import java.util.HashMap; import java.util.Map; +import org.apache.hadoop.conf.Configuration; /** * Used for FavoredNode unit tests @@ -27,7 +28,7 @@ public class LoadOnlyFavoredStochasticBalancer extends FavoredStochasticBalancer @Override protected Map, CandidateGenerator> - createCandidateGenerators() { + createCandidateGenerators(Configuration conf) { Map, CandidateGenerator> fnPickers = new HashMap<>(1); fnPickers.put(FavoredNodeLoadPicker.class, new FavoredNodeLoadPicker()); return fnPickers; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestBalancerConditionals.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestBalancerConditionals.java new file mode 100644 index 000000000000..4dc40cda5481 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestBalancerConditionals.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category({ SmallTests.class, MasterTests.class }) +public class TestBalancerConditionals extends BalancerTestBase { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestBalancerConditionals.class); + + private BalancerConditionals balancerConditionals; + private BalancerClusterState mockCluster; + + @Before + public void setUp() { + balancerConditionals = BalancerConditionals.create(); + mockCluster = mockCluster(new int[] { 0, 1, 2 }); + } + + @Test + public void testDefaultConfiguration() { + Configuration conf = new Configuration(); + balancerConditionals.setConf(conf); + balancerConditionals.loadClusterState(mockCluster); + + assertEquals("No conditionals should be loaded by default", 0, + balancerConditionals.getConditionalClasses().size()); + } + + @Test + public void testCustomConditionalsViaConfiguration() { + Configuration conf = new Configuration(); + conf.set(BalancerConditionals.ADDITIONAL_CONDITIONALS_KEY, + DistributeReplicasConditional.class.getName()); + + balancerConditionals.setConf(conf); + balancerConditionals.loadClusterState(mockCluster); + + assertTrue("Custom conditionals should be loaded", + balancerConditionals.isConditionalBalancingEnabled()); + } + + @Test + public void testInvalidCustomConditionalClass() { + Configuration conf = new Configuration(); + conf.set(BalancerConditionals.ADDITIONAL_CONDITIONALS_KEY, "java.lang.String"); + + balancerConditionals.setConf(conf); + balancerConditionals.loadClusterState(mockCluster); + + assertEquals("Invalid classes should not be loaded as conditionals", 0, + balancerConditionals.getConditionalClasses().size()); + } + + @Test + public void testMetaTableIsolationConditionalEnabled() { + Configuration conf = new Configuration(); + conf.setBoolean(BalancerConditionals.ISOLATE_META_TABLE_KEY, true); + + balancerConditionals.setConf(conf); + balancerConditionals.loadClusterState(mockCluster); + + assertTrue("MetaTableIsolationConditional should be active", + balancerConditionals.isTableIsolationEnabled()); + } + +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestLargeClusterBalancingConditionalReplicaDistribution.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestLargeClusterBalancingConditionalReplicaDistribution.java new file mode 100644 index 000000000000..2522a13819f1 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestLargeClusterBalancingConditionalReplicaDistribution.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer; + +import static org.apache.hadoop.hbase.master.balancer.CandidateGeneratorTestUtil.runBalancerToExhaustion; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.master.balancer.replicas.ReplicaKeyCache; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSet; + +@Category({ MediumTests.class, MasterTests.class }) +public class TestLargeClusterBalancingConditionalReplicaDistribution { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestLargeClusterBalancingConditionalReplicaDistribution.class); + + private static final Logger LOG = + LoggerFactory.getLogger(TestLargeClusterBalancingConditionalReplicaDistribution.class); + + private static final int NUM_SERVERS = 1000; + private static final int NUM_REGIONS = 20_000; + private static final int NUM_REPLICAS = 3; + private static final int NUM_TABLES = 100; + + private static final ServerName[] servers = new ServerName[NUM_SERVERS]; + private static final Map> serverToRegions = new HashMap<>(); + + @BeforeClass + public static void setup() { + // Initialize servers + for (int i = 0; i < NUM_SERVERS; i++) { + servers[i] = ServerName.valueOf("server" + i, i, System.currentTimeMillis()); + serverToRegions.put(servers[i], new ArrayList<>()); + } + + // Create primary regions and their replicas + List allRegions = new ArrayList<>(); + for (int i = 0; i < NUM_REGIONS; i++) { + TableName tableName = getTableName(i); + // Define startKey and endKey for the region + byte[] startKey = Bytes.toBytes(i); + byte[] endKey = Bytes.toBytes(i + 1); + + // Create 3 replicas for each primary region + for (int replicaId = 0; replicaId < NUM_REPLICAS; replicaId++) { + RegionInfo regionInfo = RegionInfoBuilder.newBuilder(tableName).setStartKey(startKey) + .setEndKey(endKey).setReplicaId(replicaId).build(); + allRegions.add(regionInfo); + } + } + + // Assign all regions to one server + for (RegionInfo regionInfo : allRegions) { + serverToRegions.get(servers[0]).add(regionInfo); + } + } + + private static TableName getTableName(int i) { + return TableName.valueOf("userTable" + i % NUM_TABLES); + } + + @Test + public void testReplicaDistribution() { + Configuration conf = new Configuration(); + DistributeReplicasTestConditional.enableConditionalReplicaDistributionForTest(conf); + conf.setBoolean(ReplicaKeyCache.CACHE_REPLICA_KEYS_KEY, true); + conf.setInt(ReplicaKeyCache.REPLICA_KEY_CACHE_SIZE_KEY, Integer.MAX_VALUE); + conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 30_000); + + // turn off replica cost functions + conf.setLong("hbase.master.balancer.stochastic.regionReplicaRackCostKey", 0); + conf.setLong("hbase.master.balancer.stochastic.regionReplicaHostCostKey", 0); + + runBalancerToExhaustion(conf, serverToRegions, + ImmutableSet.of(CandidateGeneratorTestUtil::areAllReplicasDistributed), 10.0f); + LOG.info("Region replicas are appropriately distributed across RegionServers."); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestLargeClusterBalancingMetaTableIsolation.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestLargeClusterBalancingMetaTableIsolation.java new file mode 100644 index 000000000000..27360f3cd570 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestLargeClusterBalancingMetaTableIsolation.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer; + +import static org.apache.hadoop.hbase.master.balancer.CandidateGeneratorTestUtil.isTableIsolated; +import static org.apache.hadoop.hbase.master.balancer.CandidateGeneratorTestUtil.runBalancerToExhaustion; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSet; + +@Category({ MediumTests.class, MasterTests.class }) +public class TestLargeClusterBalancingMetaTableIsolation { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestLargeClusterBalancingMetaTableIsolation.class); + + private static final Logger LOG = + LoggerFactory.getLogger(TestLargeClusterBalancingMetaTableIsolation.class); + + private static final TableName NON_META_TABLE_NAME = TableName.valueOf("userTable"); + + private static final int NUM_SERVERS = 1000; + private static final int NUM_REGIONS = 20_000; + + private static final ServerName[] servers = new ServerName[NUM_SERVERS]; + private static final Map> serverToRegions = new HashMap<>(); + + @BeforeClass + public static void setup() { + // Initialize servers + for (int i = 0; i < NUM_SERVERS; i++) { + servers[i] = ServerName.valueOf("server" + i, i, System.currentTimeMillis()); + } + + // Create regions + List allRegions = new ArrayList<>(); + for (int i = 0; i < NUM_REGIONS; i++) { + TableName tableName = i < 3 ? TableName.META_TABLE_NAME : NON_META_TABLE_NAME; + byte[] startKey = new byte[1]; + startKey[0] = (byte) i; + byte[] endKey = new byte[1]; + endKey[0] = (byte) (i + 1); + + RegionInfo regionInfo = + RegionInfoBuilder.newBuilder(tableName).setStartKey(startKey).setEndKey(endKey).build(); + allRegions.add(regionInfo); + } + + // Assign all regions to the first server + serverToRegions.put(servers[0], new ArrayList<>(allRegions)); + for (int i = 1; i < NUM_SERVERS; i++) { + serverToRegions.put(servers[i], new ArrayList<>()); + } + } + + @Test + public void testMetaTableIsolation() { + Configuration conf = new Configuration(false); + conf.setBoolean(BalancerConditionals.ISOLATE_META_TABLE_KEY, true); + runBalancerToExhaustion(conf, serverToRegions, ImmutableSet.of(this::isMetaTableIsolated), + 10.0f); + LOG.info("Meta table regions are successfully isolated."); + } + + private boolean isMetaTableIsolated(BalancerClusterState cluster) { + return isTableIsolated(cluster, TableName.META_TABLE_NAME, "Meta"); + } + +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestLargeClusterBalancingSystemTableIsolation.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestLargeClusterBalancingSystemTableIsolation.java new file mode 100644 index 000000000000..ef26c548c209 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestLargeClusterBalancingSystemTableIsolation.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer; + +import static org.apache.hadoop.hbase.master.balancer.CandidateGeneratorTestUtil.isTableIsolated; +import static org.apache.hadoop.hbase.master.balancer.CandidateGeneratorTestUtil.runBalancerToExhaustion; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSet; + +@Category({ MediumTests.class, MasterTests.class }) +public class TestLargeClusterBalancingSystemTableIsolation { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestLargeClusterBalancingSystemTableIsolation.class); + + private static final Logger LOG = + LoggerFactory.getLogger(TestLargeClusterBalancingSystemTableIsolation.class); + + private static final TableName SYSTEM_TABLE_NAME = TableName.valueOf("hbase:system"); + private static final TableName NON_SYSTEM_TABLE_NAME = TableName.valueOf("userTable"); + + private static final int NUM_SERVERS = 1000; + private static final int NUM_REGIONS = 20_000; + + private static final ServerName[] servers = new ServerName[NUM_SERVERS]; + private static final Map> serverToRegions = new HashMap<>(); + + @BeforeClass + public static void setup() { + // Initialize servers + for (int i = 0; i < NUM_SERVERS; i++) { + servers[i] = ServerName.valueOf("server" + i, i, System.currentTimeMillis()); + } + + // Create regions + List allRegions = new ArrayList<>(); + for (int i = 0; i < NUM_REGIONS; i++) { + TableName tableName = i < 3 ? SYSTEM_TABLE_NAME : NON_SYSTEM_TABLE_NAME; + byte[] startKey = new byte[1]; + startKey[0] = (byte) i; + byte[] endKey = new byte[1]; + endKey[0] = (byte) (i + 1); + + RegionInfo regionInfo = + RegionInfoBuilder.newBuilder(tableName).setStartKey(startKey).setEndKey(endKey).build(); + allRegions.add(regionInfo); + } + + // Assign all regions to the first server + serverToRegions.put(servers[0], new ArrayList<>(allRegions)); + for (int i = 1; i < NUM_SERVERS; i++) { + serverToRegions.put(servers[i], new ArrayList<>()); + } + } + + @Test + public void testSystemTableIsolation() { + Configuration conf = new Configuration(false); + conf.setBoolean(BalancerConditionals.ISOLATE_SYSTEM_TABLES_KEY, true); + runBalancerToExhaustion(conf, serverToRegions, ImmutableSet.of(this::isSystemTableIsolated), + 10.0f); + LOG.info("Meta table regions are successfully isolated."); + } + + private boolean isSystemTableIsolated(BalancerClusterState cluster) { + return isTableIsolated(cluster, SYSTEM_TABLE_NAME, "System"); + } + +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestLargeClusterBalancingTableIsolationAndReplicaDistribution.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestLargeClusterBalancingTableIsolationAndReplicaDistribution.java new file mode 100644 index 000000000000..bc31530f4921 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestLargeClusterBalancingTableIsolationAndReplicaDistribution.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer; + +import static org.apache.hadoop.hbase.master.balancer.CandidateGeneratorTestUtil.isTableIsolated; +import static org.apache.hadoop.hbase.master.balancer.CandidateGeneratorTestUtil.runBalancerToExhaustion; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSet; + +@Category({ MediumTests.class, MasterTests.class }) +public class TestLargeClusterBalancingTableIsolationAndReplicaDistribution { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = HBaseClassTestRule + .forClass(TestLargeClusterBalancingTableIsolationAndReplicaDistribution.class); + + private static final Logger LOG = + LoggerFactory.getLogger(TestLargeClusterBalancingTableIsolationAndReplicaDistribution.class); + private static final TableName SYSTEM_TABLE_NAME = TableName.valueOf("hbase:system"); + private static final TableName NON_ISOLATED_TABLE_NAME = TableName.valueOf("userTable"); + + private static final int NUM_SERVERS = 500; + private static final int NUM_REGIONS = 2_500; + private static final int NUM_REPLICAS = 3; + + private static final ServerName[] servers = new ServerName[NUM_SERVERS]; + private static final Map> serverToRegions = new HashMap<>(); + + @BeforeClass + public static void setup() { + // Initialize servers + for (int i = 0; i < NUM_SERVERS; i++) { + servers[i] = ServerName.valueOf("server" + i, i, System.currentTimeMillis()); + serverToRegions.put(servers[i], new ArrayList<>()); + } + + // Create primary regions and their replicas + for (int i = 0; i < NUM_REGIONS; i++) { + TableName tableName; + if (i < 1) { + tableName = TableName.META_TABLE_NAME; + } else if (i < 10) { + tableName = SYSTEM_TABLE_NAME; + } else { + tableName = NON_ISOLATED_TABLE_NAME; + } + + // Define startKey and endKey for the region + byte[] startKey = new byte[1]; + startKey[0] = (byte) i; + byte[] endKey = new byte[1]; + endKey[0] = (byte) (i + 1); + + Random random = new Random(); + // Create 3 replicas for each primary region + for (int replicaId = 0; replicaId < NUM_REPLICAS; replicaId++) { + RegionInfo regionInfo = RegionInfoBuilder.newBuilder(tableName).setStartKey(startKey) + .setEndKey(endKey).setReplicaId(replicaId).build(); + // Assign region to random server + int randomServer = random.nextInt(servers.length); + serverToRegions.get(servers[randomServer]).add(regionInfo); + } + } + } + + @Test + public void testTableIsolationAndReplicaDistribution() { + Configuration conf = new Configuration(false); + conf.setBoolean(BalancerConditionals.ISOLATE_META_TABLE_KEY, true); + conf.setBoolean(BalancerConditionals.ISOLATE_SYSTEM_TABLES_KEY, true); + DistributeReplicasTestConditional.enableConditionalReplicaDistributionForTest(conf); + runBalancerToExhaustion(conf, serverToRegions, + ImmutableSet.of(this::isMetaTableIsolated, this::isSystemTableIsolated, + CandidateGeneratorTestUtil::areAllReplicasDistributed), + 10.0f, 60_000, CandidateGeneratorTestUtil.ExhaustionType.COST_GOAL_ACHIEVED); + LOG.info("Meta table regions are successfully isolated, " + + "and region replicas are appropriately distributed."); + } + + /** + * Validates whether all meta table regions are isolated. + */ + private boolean isMetaTableIsolated(BalancerClusterState cluster) { + return isTableIsolated(cluster, TableName.META_TABLE_NAME, "Meta"); + } + + /** + * Validates whether all meta table regions are isolated. + */ + private boolean isSystemTableIsolated(BalancerClusterState cluster) { + return isTableIsolated(cluster, SYSTEM_TABLE_NAME, "System"); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestMetaTableIsolationBalancerConditional.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestMetaTableIsolationBalancerConditional.java new file mode 100644 index 000000000000..d2eb7243ec8e --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestMetaTableIsolationBalancerConditional.java @@ -0,0 +1,181 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; + +import java.io.IOException; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionLocation; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.client.TableDescriptorBuilder; +import org.apache.hadoop.hbase.quotas.QuotaUtil; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.After; +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSet; + +@Category(LargeTests.class) +public class TestMetaTableIsolationBalancerConditional { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestMetaTableIsolationBalancerConditional.class); + + private static final Logger LOG = + LoggerFactory.getLogger(TestMetaTableIsolationBalancerConditional.class); + private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + + private static final int NUM_SERVERS = 3; + + @Before + public void setUp() throws Exception { + TEST_UTIL.getConfiguration().setBoolean(BalancerConditionals.ISOLATE_META_TABLE_KEY, true); + TEST_UTIL.getConfiguration().setBoolean(QuotaUtil.QUOTA_CONF_KEY, true); // for another table + TEST_UTIL.getConfiguration().setLong(HConstants.HBASE_BALANCER_PERIOD, 1000L); + TEST_UTIL.getConfiguration().setBoolean("hbase.master.balancer.stochastic.runMaxSteps", true); + + TEST_UTIL.startMiniCluster(NUM_SERVERS); + } + + @After + public void tearDown() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + @Test + public void testTableIsolation() throws Exception { + Connection connection = TEST_UTIL.getConnection(); + Admin admin = connection.getAdmin(); + + // Create "product" table with 3 regions + TableName productTableName = TableName.valueOf("product"); + TableDescriptor productTableDescriptor = TableDescriptorBuilder.newBuilder(productTableName) + .setColumnFamily(ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes("0")).build()) + .build(); + admin.createTable(productTableDescriptor, + BalancerConditionalsTestUtil.generateSplits(2 * NUM_SERVERS)); + + Set tablesToBeSeparated = ImmutableSet. builder() + .add(TableName.META_TABLE_NAME).add(QuotaUtil.QUOTA_TABLE_NAME).add(productTableName).build(); + + // Pause the balancer + admin.balancerSwitch(false, true); + + // Move all regions (product, meta, and quotas) to one RegionServer + List allRegions = tablesToBeSeparated.stream().map(t -> { + try { + return admin.getRegions(t); + } catch (IOException e) { + throw new RuntimeException(e); + } + }).flatMap(Collection::stream).collect(Collectors.toList()); + String targetServer = + TEST_UTIL.getHBaseCluster().getRegionServer(0).getServerName().getServerName(); + for (RegionInfo region : allRegions) { + admin.move(region.getEncodedNameAsBytes(), Bytes.toBytes(targetServer)); + } + + validateRegionLocationsWithRetry(connection, tablesToBeSeparated, productTableName, false, + false); + + // Unpause the balancer and run it + admin.balancerSwitch(true, true); + admin.balance(); + + validateRegionLocationsWithRetry(connection, tablesToBeSeparated, productTableName, true, true); + } + + private static void validateRegionLocationsWithRetry(Connection connection, + Set tableNames, TableName productTableName, boolean areDistributed, + boolean runBalancerOnFailure) throws InterruptedException, IOException { + for (int i = 0; i < 100; i++) { + Map> tableToServers = getTableToServers(connection, tableNames); + try { + validateRegionLocations(tableToServers, productTableName, areDistributed); + } catch (AssertionError e) { + if (i == 99) { + throw e; + } + LOG.warn("Failed to validate region locations. Will retry", e); + BalancerConditionalsTestUtil.printRegionLocations(TEST_UTIL.getConnection()); + if (runBalancerOnFailure) { + connection.getAdmin().balance(); + } + Thread.sleep(1000); + } + } + } + + private static void validateRegionLocations(Map> tableToServers, + TableName productTableName, boolean shouldBeBalanced) { + // Validate that the region assignments + ServerName metaServer = + tableToServers.get(TableName.META_TABLE_NAME).stream().findFirst().get(); + ServerName quotaServer = + tableToServers.get(QuotaUtil.QUOTA_TABLE_NAME).stream().findFirst().get(); + Set productServers = tableToServers.get(productTableName); + + if (shouldBeBalanced) { + assertNotEquals("Meta table and quota table should not share a server", metaServer, + quotaServer); + for (ServerName productServer : productServers) { + assertNotEquals("Meta table and product table should not share servers", productServer, + metaServer); + } + } else { + assertEquals("Quota table and product table must share servers", metaServer, quotaServer); + for (ServerName server : productServers) { + assertEquals("Meta table and product table must share servers", server, metaServer); + } + } + } + + private static Map> getTableToServers(Connection connection, + Set tableNames) { + return tableNames.stream().collect(Collectors.toMap(t -> t, t -> { + try { + return connection.getRegionLocator(t).getAllRegionLocations().stream() + .map(HRegionLocation::getServerName).collect(Collectors.toSet()); + } catch (IOException e) { + throw new RuntimeException(e); + } + })); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestReplicaDistributionBalancerConditional.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestReplicaDistributionBalancerConditional.java new file mode 100644 index 000000000000..7807b07e74f9 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestReplicaDistributionBalancerConditional.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer; + +import static org.apache.hadoop.hbase.master.balancer.BalancerConditionalsTestUtil.validateAssertionsWithRetries; + +import java.util.List; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.client.TableDescriptorBuilder; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil; +import org.junit.After; +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@Category({ LargeTests.class, MasterTests.class }) +public class TestReplicaDistributionBalancerConditional { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestReplicaDistributionBalancerConditional.class); + + private static final Logger LOG = + LoggerFactory.getLogger(TestReplicaDistributionBalancerConditional.class); + private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private static final int REPLICAS = 3; + private static final int NUM_SERVERS = REPLICAS; + private static final int REGIONS_PER_SERVER = 5; + + @Before + public void setUp() throws Exception { + DistributeReplicasTestConditional + .enableConditionalReplicaDistributionForTest(TEST_UTIL.getConfiguration()); + TEST_UTIL.getConfiguration() + .setBoolean(ServerRegionReplicaUtil.REGION_REPLICA_REPLICATION_CONF_KEY, true); + TEST_UTIL.getConfiguration().setLong(HConstants.HBASE_BALANCER_PERIOD, 1000L); + TEST_UTIL.getConfiguration().setBoolean("hbase.master.balancer.stochastic.runMaxSteps", true); + + // turn off replica cost functions + TEST_UTIL.getConfiguration() + .setLong("hbase.master.balancer.stochastic.regionReplicaRackCostKey", 0); + TEST_UTIL.getConfiguration() + .setLong("hbase.master.balancer.stochastic.regionReplicaHostCostKey", 0); + + TEST_UTIL.startMiniCluster(NUM_SERVERS); + } + + @After + public void tearDown() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + @Test + public void testReplicaDistribution() throws Exception { + Connection connection = TEST_UTIL.getConnection(); + Admin admin = connection.getAdmin(); + + // Create a "replicated_table" with region replicas + TableName replicatedTableName = TableName.valueOf("replicated_table"); + TableDescriptor replicatedTableDescriptor = + TableDescriptorBuilder.newBuilder(replicatedTableName) + .setColumnFamily(ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes("0")).build()) + .setRegionReplication(REPLICAS).build(); + admin.createTable(replicatedTableDescriptor, + BalancerConditionalsTestUtil.generateSplits(REGIONS_PER_SERVER * NUM_SERVERS)); + + // Pause the balancer + admin.balancerSwitch(false, true); + + // Collect all region replicas and place them on one RegionServer + List allRegions = admin.getRegions(replicatedTableName); + String targetServer = + TEST_UTIL.getHBaseCluster().getRegionServer(0).getServerName().getServerName(); + + for (RegionInfo region : allRegions) { + admin.move(region.getEncodedNameAsBytes(), Bytes.toBytes(targetServer)); + } + + BalancerConditionalsTestUtil.printRegionLocations(TEST_UTIL.getConnection()); + validateAssertionsWithRetries(TEST_UTIL, false, () -> BalancerConditionalsTestUtil + .validateReplicaDistribution(connection, replicatedTableName, false)); + + // Unpause the balancer and trigger balancing + admin.balancerSwitch(true, true); + admin.balance(); + + validateAssertionsWithRetries(TEST_UTIL, true, () -> BalancerConditionalsTestUtil + .validateReplicaDistribution(connection, replicatedTableName, true)); + BalancerConditionalsTestUtil.printRegionLocations(TEST_UTIL.getConnection()); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java index 9dc7dab65621..661380814ad9 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java @@ -531,6 +531,7 @@ public void testDefaultCostFunctionList() { PrimaryRegionCountSkewCostFunction.class.getSimpleName(), MoveCostFunction.class.getSimpleName(), RackLocalityCostFunction.class.getSimpleName(), TableSkewCostFunction.class.getSimpleName(), + StoreFileTableSkewCostFunction.class.getSimpleName(), RegionReplicaHostCostFunction.class.getSimpleName(), RegionReplicaRackCostFunction.class.getSimpleName(), ReadRequestCostFunction.class.getSimpleName(), WriteRequestCostFunction.class.getSimpleName(), diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerHeterogeneousCost.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerHeterogeneousCost.java index 960783a8467e..188efa64dcd5 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerHeterogeneousCost.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerHeterogeneousCost.java @@ -254,7 +254,7 @@ static class StochasticLoadTestBalancer extends StochasticLoadBalancer { } @Override - protected CandidateGenerator getRandomGenerator() { + protected CandidateGenerator getRandomGenerator(BalancerClusterState cluster) { return fairRandomCandidateGenerator; } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStoreFileTableSkewCostFunction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStoreFileTableSkewCostFunction.java new file mode 100644 index 000000000000..3977ad96dd9a --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStoreFileTableSkewCostFunction.java @@ -0,0 +1,238 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer; + +import static org.apache.hadoop.hbase.master.balancer.CandidateGeneratorTestUtil.createMockBalancerClusterState; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.when; + +import java.util.ArrayDeque; +import java.util.Arrays; +import java.util.Deque; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.RegionMetrics; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.Size; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.mockito.Mockito; + +@Category({ MasterTests.class, SmallTests.class }) +public class TestStoreFileTableSkewCostFunction { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestStoreFileTableSkewCostFunction.class); + + private static final TableName DEFAULT_TABLE = TableName.valueOf("testTable"); + private static final Map REGION_TO_STORE_FILE_SIZE_MB = new HashMap<>(); + + /** + * Tests that a uniform store file distribution (single table) across servers results in zero + * cost. + */ + @Test + public void testUniformDistribution() { + ServerName server1 = ServerName.valueOf("server1.example.org", 1234, 1L); + ServerName server2 = ServerName.valueOf("server2.example.org", 1234, 1L); + ServerName server3 = ServerName.valueOf("server3.example.org", 1234, 1L); + ServerName server4 = ServerName.valueOf("server4.example.org", 1234, 1L); + + Map> serverToRegions = new HashMap<>(); + serverToRegions.put(server1, Arrays.asList(createMockRegionInfo(10), createMockRegionInfo(10))); + serverToRegions.put(server2, Arrays.asList(createMockRegionInfo(10), createMockRegionInfo(10))); + serverToRegions.put(server3, Arrays.asList(createMockRegionInfo(10), createMockRegionInfo(10))); + serverToRegions.put(server4, Arrays.asList(createMockRegionInfo(10), createMockRegionInfo(10))); + + BalancerClusterState clusterState = createMockBalancerClusterState(serverToRegions); + DummyBalancerClusterState state = new DummyBalancerClusterState(clusterState); + + StoreFileTableSkewCostFunction costFunction = + new StoreFileTableSkewCostFunction(new Configuration()); + costFunction.prepare(state); + double cost = costFunction.cost(); + + // Expect zero cost since all regions (from the same table) are balanced. + assertEquals("Uniform distribution should yield zero cost", 0.0, cost, 1e-6); + } + + /** + * Tests that a skewed store file distribution (single table) results in a positive cost. + */ + @Test + public void testSkewedDistribution() { + ServerName server1 = ServerName.valueOf("server1.example.org", 1234, 1L); + ServerName server2 = ServerName.valueOf("server2.example.org", 1234, 1L); + ServerName server3 = ServerName.valueOf("server3.example.org", 1234, 1L); + ServerName server4 = ServerName.valueOf("server4.example.org", 1234, 1L); + + Map> serverToRegions = new HashMap<>(); + // Three servers get regions with 10 store files each, + // while one server gets regions with 30 store files each. + serverToRegions.put(server1, Arrays.asList(createMockRegionInfo(10), createMockRegionInfo(10))); + serverToRegions.put(server2, Arrays.asList(createMockRegionInfo(10), createMockRegionInfo(10))); + serverToRegions.put(server3, Arrays.asList(createMockRegionInfo(10), createMockRegionInfo(10))); + serverToRegions.put(server4, Arrays.asList(createMockRegionInfo(30), createMockRegionInfo(30))); + + BalancerClusterState clusterState = createMockBalancerClusterState(serverToRegions); + DummyBalancerClusterState state = new DummyBalancerClusterState(clusterState); + + StoreFileTableSkewCostFunction costFunction = + new StoreFileTableSkewCostFunction(new Configuration()); + costFunction.prepare(state); + double cost = costFunction.cost(); + + // Expect a positive cost because the distribution is skewed. + assertTrue("Skewed distribution should yield a positive cost", cost > 0.0); + } + + /** + * Tests that an empty cluster (no servers/regions) is handled gracefully. + */ + @Test + public void testEmptyDistribution() { + Map> serverToRegions = new HashMap<>(); + + BalancerClusterState clusterState = createMockBalancerClusterState(serverToRegions); + DummyBalancerClusterState state = new DummyBalancerClusterState(clusterState); + + StoreFileTableSkewCostFunction costFunction = + new StoreFileTableSkewCostFunction(new Configuration()); + costFunction.prepare(state); + double cost = costFunction.cost(); + + // Expect zero cost when there is no load. + assertEquals("Empty distribution should yield zero cost", 0.0, cost, 1e-6); + } + + /** + * Tests that having multiple tables results in a positive cost when each table's regions are not + * balanced across servers – even if the overall load per server is balanced. + */ + @Test + public void testMultipleTablesDistribution() { + // Two servers. + ServerName server1 = ServerName.valueOf("server1.example.org", 1234, 1L); + ServerName server2 = ServerName.valueOf("server2.example.org", 1234, 1L); + + // Define two tables. + TableName table1 = TableName.valueOf("testTable1"); + TableName table2 = TableName.valueOf("testTable2"); + + // For table1, all regions are on server1. + // For table2, all regions are on server2. + Map> serverToRegions = new HashMap<>(); + serverToRegions.put(server1, + Arrays.asList(createMockRegionInfo(table1, 10), createMockRegionInfo(table1, 10))); + serverToRegions.put(server2, + Arrays.asList(createMockRegionInfo(table2, 10), createMockRegionInfo(table2, 10))); + + // Although each server gets 20 MB overall, table1 and table2 are not balanced across servers. + BalancerClusterState clusterState = createMockBalancerClusterState(serverToRegions); + DummyBalancerClusterState state = new DummyBalancerClusterState(clusterState); + + StoreFileTableSkewCostFunction costFunction = + new StoreFileTableSkewCostFunction(new Configuration()); + costFunction.prepare(state); + double cost = costFunction.cost(); + + // Expect a positive cost because the skew is computed per table. + assertTrue("Multiple table distribution should yield a positive cost", cost > 0.0); + } + + /** + * Helper method to create a RegionInfo for the default table with the given store file size. + */ + private static RegionInfo createMockRegionInfo(int storeFileSizeMb) { + return createMockRegionInfo(DEFAULT_TABLE, storeFileSizeMb); + } + + /** + * Helper method to create a RegionInfo for a specified table with the given store file size. + */ + private static RegionInfo createMockRegionInfo(TableName table, int storeFileSizeMb) { + long regionId = new Random().nextLong(); + REGION_TO_STORE_FILE_SIZE_MB.put(regionId, storeFileSizeMb); + return RegionInfoBuilder.newBuilder(table).setStartKey(generateRandomByteArray(4)) + .setEndKey(generateRandomByteArray(4)).setReplicaId(0).setRegionId(regionId).build(); + } + + private static byte[] generateRandomByteArray(int n) { + byte[] byteArray = new byte[n]; + new Random().nextBytes(byteArray); + return byteArray; + } + + /** + * A simplified BalancerClusterState which ensures we provide the intended test RegionMetrics data + * when balancing this cluster + */ + private static class DummyBalancerClusterState extends BalancerClusterState { + private final RegionInfo[] testRegions; + + DummyBalancerClusterState(BalancerClusterState bcs) { + super(bcs.clusterState, null, null, null, null); + this.testRegions = bcs.regions; + } + + @Override + Deque[] getRegionLoads() { + @SuppressWarnings("unchecked") + Deque[] loads = new Deque[testRegions.length]; + for (int i = 0; i < testRegions.length; i++) { + Deque dq = new ArrayDeque<>(); + dq.add(new BalancerRegionLoad(createMockRegionMetrics(testRegions[i])) { + }); + loads[i] = dq; + } + return loads; + } + } + + /** + * Creates a mocked RegionMetrics for the given region. + */ + private static RegionMetrics createMockRegionMetrics(RegionInfo regionInfo) { + RegionMetrics regionMetrics = Mockito.mock(RegionMetrics.class); + + // Important + int storeFileSizeMb = REGION_TO_STORE_FILE_SIZE_MB.get(regionInfo.getRegionId()); + when(regionMetrics.getRegionSizeMB()).thenReturn(new Size(storeFileSizeMb, Size.Unit.MEGABYTE)); + when(regionMetrics.getStoreFileSize()) + .thenReturn(new Size(storeFileSizeMb, Size.Unit.MEGABYTE)); + + // Not important + when(regionMetrics.getReadRequestCount()).thenReturn(0L); + when(regionMetrics.getWriteRequestCount()).thenReturn(0L); + when(regionMetrics.getMemStoreSize()).thenReturn(new Size(0, Size.Unit.MEGABYTE)); + when(regionMetrics.getCurrentRegionCachedRatio()).thenReturn(0.0f); + return regionMetrics; + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestUnattainableBalancerCostGoal.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestUnattainableBalancerCostGoal.java new file mode 100644 index 000000000000..5e95564b6fee --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestUnattainableBalancerCostGoal.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer; + +import static org.apache.hadoop.hbase.master.balancer.CandidateGeneratorTestUtil.isTableIsolated; +import static org.apache.hadoop.hbase.master.balancer.CandidateGeneratorTestUtil.runBalancerToExhaustion; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSet; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * If your minCostNeedsBalance is set too low, then the balancer should still eventually stop making + * moves as further cost improvements become impossible, and balancer plan calculation becomes + * wasteful. This test ensures that the balancer will not get stuck in a loop of continuously moving + * regions. + */ +@Category({ MasterTests.class, MediumTests.class }) +public class TestUnattainableBalancerCostGoal { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestUnattainableBalancerCostGoal.class); + + private static final Logger LOG = LoggerFactory.getLogger(TestUnattainableBalancerCostGoal.class); + + private static final TableName SYSTEM_TABLE_NAME = TableName.valueOf("hbase:system"); + private static final TableName NON_SYSTEM_TABLE_NAME = TableName.valueOf("userTable"); + + private static final int NUM_SERVERS = 10; + private static final int NUM_REGIONS = 1000; + private static final float UNACHIEVABLE_COST_GOAL = 0.01f; + + private static final ServerName[] servers = new ServerName[NUM_SERVERS]; + private static final Map> serverToRegions = new HashMap<>(); + + @BeforeClass + public static void setup() { + // Initialize servers + for (int i = 0; i < NUM_SERVERS; i++) { + servers[i] = ServerName.valueOf("server" + i, i, System.currentTimeMillis()); + } + + // Create regions + List allRegions = new ArrayList<>(); + for (int i = 0; i < NUM_REGIONS; i++) { + TableName tableName = i < 3 ? SYSTEM_TABLE_NAME : NON_SYSTEM_TABLE_NAME; + byte[] startKey = new byte[1]; + startKey[0] = (byte) i; + byte[] endKey = new byte[1]; + endKey[0] = (byte) (i + 1); + + RegionInfo regionInfo = + RegionInfoBuilder.newBuilder(tableName).setStartKey(startKey).setEndKey(endKey).build(); + allRegions.add(regionInfo); + } + + // Assign all regions to the first server + serverToRegions.put(servers[0], new ArrayList<>(allRegions)); + for (int i = 1; i < NUM_SERVERS; i++) { + serverToRegions.put(servers[i], new ArrayList<>()); + } + } + + @Test + public void testSystemTableIsolation() { + Configuration conf = new Configuration(false); + conf.setBoolean(BalancerConditionals.ISOLATE_SYSTEM_TABLES_KEY, true); + runBalancerToExhaustion(conf, serverToRegions, ImmutableSet.of(this::isSystemTableIsolated), + UNACHIEVABLE_COST_GOAL, 10_000, CandidateGeneratorTestUtil.ExhaustionType.NO_MORE_MOVES); + LOG.info("Meta table regions are successfully isolated."); + } + + private boolean isSystemTableIsolated(BalancerClusterState cluster) { + return isTableIsolated(cluster, SYSTEM_TABLE_NAME, "System"); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/quotas/TestAtomicReadQuota.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/quotas/TestAtomicReadQuota.java index f2beb8f5d27f..12bbc26d364a 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/quotas/TestAtomicReadQuota.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/quotas/TestAtomicReadQuota.java @@ -28,8 +28,10 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.CheckAndMutate; +import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.Increment; import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException; import org.apache.hadoop.hbase.client.RowMutations; import org.apache.hadoop.hbase.client.Table; @@ -81,7 +83,7 @@ public static void setUpBeforeClass() throws Exception { @Test public void testIncrementCountedAgainstReadCapacity() throws Exception { - setupQuota(); + setupGenericQuota(); Increment inc = new Increment(Bytes.toBytes(UUID.randomUUID().toString())); inc.addColumn(FAMILY, QUALIFIER, 1); @@ -90,7 +92,7 @@ public void testIncrementCountedAgainstReadCapacity() throws Exception { @Test public void testConditionalRowMutationsCountedAgainstReadCapacity() throws Exception { - setupQuota(); + setupGenericQuota(); byte[] row = Bytes.toBytes(UUID.randomUUID().toString()); Increment inc = new Increment(row); @@ -106,7 +108,7 @@ public void testConditionalRowMutationsCountedAgainstReadCapacity() throws Excep @Test public void testNonConditionalRowMutationsOmittedFromReadCapacity() throws Exception { - setupQuota(); + setupGenericQuota(); byte[] row = Bytes.toBytes(UUID.randomUUID().toString()); Put put = new Put(row); @@ -123,44 +125,19 @@ public void testNonConditionalRowMutationsOmittedFromReadCapacity() throws Excep @Test public void testNonAtomicPutOmittedFromReadCapacity() throws Exception { - setupQuota(); - - byte[] row = Bytes.toBytes(UUID.randomUUID().toString()); - Put put = new Put(row); - put.addColumn(FAMILY, Bytes.toBytes("doot"), Bytes.toBytes("v")); - try (Table table = getTable()) { - for (int i = 0; i < 100; i++) { - table.put(put); - } - } + setupGenericQuota(); + runNonAtomicPuts(); } @Test public void testNonAtomicMultiPutOmittedFromReadCapacity() throws Exception { - setupQuota(); - - Put put1 = new Put(Bytes.toBytes(UUID.randomUUID().toString())); - put1.addColumn(FAMILY, Bytes.toBytes("doot"), Bytes.toBytes("v")); - Put put2 = new Put(Bytes.toBytes(UUID.randomUUID().toString())); - put2.addColumn(FAMILY, Bytes.toBytes("doot"), Bytes.toBytes("v")); - - Increment inc = new Increment(Bytes.toBytes(UUID.randomUUID().toString())); - inc.addColumn(FAMILY, Bytes.toBytes("doot"), 1); - - List puts = new ArrayList<>(2); - puts.add(put1); - puts.add(put2); - - try (Table table = getTable()) { - for (int i = 0; i < 100; i++) { - table.put(puts); - } - } + setupGenericQuota(); + runNonAtomicPuts(); } @Test public void testCheckAndMutateCountedAgainstReadCapacity() throws Exception { - setupQuota(); + setupGenericQuota(); byte[] row = Bytes.toBytes(UUID.randomUUID().toString()); byte[] value = Bytes.toBytes("v"); @@ -174,7 +151,49 @@ public void testCheckAndMutateCountedAgainstReadCapacity() throws Exception { @Test public void testAtomicBatchCountedAgainstReadCapacity() throws Exception { - setupQuota(); + setupGenericQuota(); + + byte[] row = Bytes.toBytes(UUID.randomUUID().toString()); + Increment inc = new Increment(row); + inc.addColumn(FAMILY, Bytes.toBytes("doot"), 1); + + List incs = new ArrayList<>(2); + incs.add(inc); + incs.add(inc); + + testThrottle(table -> { + List results = new ArrayList<>(incs.size()); + for (Increment increment : incs) { + results.add(table.increment(increment)); + } + return results; + }); + } + + @Test + public void testAtomicBatchCountedAgainstAtomicOnlyReqNum() throws Exception { + setupAtomicOnlyReqNumQuota(); + + byte[] row = Bytes.toBytes(UUID.randomUUID().toString()); + Increment inc = new Increment(row); + inc.addColumn(FAMILY, Bytes.toBytes("doot"), 1); + + List incs = new ArrayList<>(2); + incs.add(inc); + incs.add(inc); + + testThrottle(table -> { + List results = new ArrayList<>(incs.size()); + for (Increment increment : incs) { + results.add(table.increment(increment)); + } + return results; + }); + } + + @Test + public void testAtomicBatchCountedAgainstAtomicOnlyReadSize() throws Exception { + setupAtomicOnlyReadSizeQuota(); byte[] row = Bytes.toBytes(UUID.randomUUID().toString()); Increment inc = new Increment(row); @@ -185,13 +204,67 @@ public void testAtomicBatchCountedAgainstReadCapacity() throws Exception { incs.add(inc); testThrottle(table -> { - Object[] results = new Object[incs.size()]; - table.batch(incs, results); + List results = new ArrayList<>(incs.size()); + for (Increment increment : incs) { + results.add(table.increment(increment)); + } return results; }); } - private void setupQuota() throws Exception { + @Test + public void testNonAtomicWritesIgnoredByAtomicOnlyReqNum() throws Exception { + setupAtomicOnlyReqNumQuota(); + runNonAtomicPuts(); + } + + @Test + public void testNonAtomicWritesIgnoredByAtomicOnlyReadSize() throws Exception { + setupAtomicOnlyReadSizeQuota(); + runNonAtomicPuts(); + } + + @Test + public void testNonAtomicReadsIgnoredByAtomicOnlyReqNum() throws Exception { + setupAtomicOnlyReqNumQuota(); + runNonAtomicReads(); + } + + @Test + public void testNonAtomicReadsIgnoredByAtomicOnlyReadSize() throws Exception { + setupAtomicOnlyReadSizeQuota(); + runNonAtomicReads(); + } + + private void runNonAtomicPuts() throws Exception { + Put put1 = new Put(Bytes.toBytes(UUID.randomUUID().toString())); + put1.addColumn(FAMILY, Bytes.toBytes("doot"), Bytes.toBytes("v")); + Put put2 = new Put(Bytes.toBytes(UUID.randomUUID().toString())); + put2.addColumn(FAMILY, Bytes.toBytes("doot"), Bytes.toBytes("v")); + + Increment inc = new Increment(Bytes.toBytes(UUID.randomUUID().toString())); + inc.addColumn(FAMILY, Bytes.toBytes("doot"), 1); + + List puts = new ArrayList<>(2); + puts.add(put1); + puts.add(put2); + + try (Table table = getTable()) { + for (int i = 0; i < 100; i++) { + table.put(puts); + } + } + } + + private void runNonAtomicReads() throws Exception { + try (Table table = getTable()) { + byte[] row = Bytes.toBytes(UUID.randomUUID().toString()); + Get get = new Get(row); + table.get(get); + } + } + + private void setupGenericQuota() throws Exception { try (Admin admin = TEST_UTIL.getAdmin()) { admin.setQuota(QuotaSettingsFactory.throttleUser(User.getCurrent().getShortName(), ThrottleType.READ_NUMBER, 1, TimeUnit.MINUTES)); @@ -199,6 +272,22 @@ private void setupQuota() throws Exception { ThrottleQuotaTestUtil.triggerUserCacheRefresh(TEST_UTIL, false, TABLE_NAME); } + private void setupAtomicOnlyReqNumQuota() throws Exception { + try (Admin admin = TEST_UTIL.getAdmin()) { + admin.setQuota(QuotaSettingsFactory.throttleUser(User.getCurrent().getShortName(), + ThrottleType.ATOMIC_REQUEST_NUMBER, 1, TimeUnit.MINUTES)); + } + ThrottleQuotaTestUtil.triggerUserCacheRefresh(TEST_UTIL, false, TABLE_NAME); + } + + private void setupAtomicOnlyReadSizeQuota() throws Exception { + try (Admin admin = TEST_UTIL.getAdmin()) { + admin.setQuota(QuotaSettingsFactory.throttleUser(User.getCurrent().getShortName(), + ThrottleType.ATOMIC_READ_SIZE, 1, TimeUnit.MINUTES)); + } + ThrottleQuotaTestUtil.triggerUserCacheRefresh(TEST_UTIL, false, TABLE_NAME); + } + private void cleanupQuota() throws Exception { try (Admin admin = TEST_UTIL.getAdmin()) { admin.setQuota(QuotaSettingsFactory.unthrottleUser(User.getCurrent().getShortName())); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/quotas/TestDefaultAtomicQuota.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/quotas/TestDefaultAtomicQuota.java new file mode 100644 index 000000000000..966bce6bcdb9 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/quotas/TestDefaultAtomicQuota.java @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.quotas; + +import static org.apache.hadoop.hbase.quotas.ThrottleQuotaTestUtil.triggerUserCacheRefresh; +import static org.apache.hadoop.hbase.quotas.ThrottleQuotaTestUtil.waitMinuteQuota; + +import java.io.IOException; +import java.util.UUID; +import java.util.concurrent.TimeUnit; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.security.User; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.testclassification.RegionServerTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category({ RegionServerTests.class, MediumTests.class }) +public class TestDefaultAtomicQuota { + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestDefaultAtomicQuota.class); + private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private static final TableName TABLE_NAME = TableName.valueOf(UUID.randomUUID().toString()); + private static final int REFRESH_TIME = 5; + private static final byte[] FAMILY = Bytes.toBytes("cf"); + private static final byte[] QUALIFIER = Bytes.toBytes("q"); + + @AfterClass + public static void tearDown() throws Exception { + ThrottleQuotaTestUtil.clearQuotaCache(TEST_UTIL); + EnvironmentEdgeManager.reset(); + TEST_UTIL.deleteTable(TABLE_NAME); + TEST_UTIL.shutdownMiniCluster(); + } + + @BeforeClass + public static void setUpBeforeClass() throws Exception { + // quotas enabled, using block bytes scanned + TEST_UTIL.getConfiguration().setBoolean(QuotaUtil.QUOTA_CONF_KEY, true); + TEST_UTIL.getConfiguration().setInt(QuotaCache.REFRESH_CONF_KEY, REFRESH_TIME); + TEST_UTIL.getConfiguration().setInt(QuotaUtil.QUOTA_DEFAULT_USER_MACHINE_ATOMIC_READ_SIZE, 1); + TEST_UTIL.getConfiguration().setInt(QuotaUtil.QUOTA_DEFAULT_USER_MACHINE_ATOMIC_REQUEST_NUM, 1); + TEST_UTIL.getConfiguration().setInt(QuotaUtil.QUOTA_DEFAULT_USER_MACHINE_ATOMIC_WRITE_SIZE, 1); + + // don't cache blocks to make IO predictable + TEST_UTIL.getConfiguration().setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f); + + TEST_UTIL.startMiniCluster(1); + TEST_UTIL.waitTableAvailable(QuotaTableUtil.QUOTA_TABLE_NAME); + TEST_UTIL.createTable(TABLE_NAME, FAMILY); + TEST_UTIL.waitTableAvailable(TABLE_NAME); + QuotaCache.TEST_FORCE_REFRESH = true; + TEST_UTIL.flush(TABLE_NAME); + } + + @Test + public void testDefaultAtomicReadLimits() throws Exception { + // No write throttling + configureLenientThrottle(ThrottleType.ATOMIC_WRITE_SIZE); + refreshQuotas(); + + // Should have a strict throttle by default + TEST_UTIL.waitFor(60_000, () -> runIncTest(100) < 100); + + // Add big quota and should be effectively unlimited + configureLenientThrottle(ThrottleType.ATOMIC_READ_SIZE); + configureLenientThrottle(ThrottleType.ATOMIC_REQUEST_NUMBER); + refreshQuotas(); + // Should run without error + TEST_UTIL.waitFor(60_000, () -> runIncTest(100) == 100); + + // Remove all the limits, and should revert to strict default + unsetQuota(); + TEST_UTIL.waitFor(60_000, () -> runIncTest(100) < 100); + } + + @Test + public void testDefaultAtomicWriteLimits() throws Exception { + // No read throttling + configureLenientThrottle(ThrottleType.ATOMIC_REQUEST_NUMBER); + configureLenientThrottle(ThrottleType.ATOMIC_READ_SIZE); + refreshQuotas(); + + // Should have a strict throttle by default + TEST_UTIL.waitFor(60_000, () -> runIncTest(100) < 100); + + // Add big quota and should be effectively unlimited + configureLenientThrottle(ThrottleType.ATOMIC_WRITE_SIZE); + refreshQuotas(); + // Should run without error + TEST_UTIL.waitFor(60_000, () -> runIncTest(100) == 100); + + // Remove all the limits, and should revert to strict default + unsetQuota(); + TEST_UTIL.waitFor(60_000, () -> runIncTest(100) < 100); + } + + private void configureLenientThrottle(ThrottleType throttleType) throws IOException { + try (Admin admin = TEST_UTIL.getAdmin()) { + admin.setQuota( + QuotaSettingsFactory.throttleUser(getUserName(), throttleType, 100_000, TimeUnit.SECONDS)); + } + } + + private static String getUserName() throws IOException { + return User.getCurrent().getShortName(); + } + + private void refreshQuotas() throws Exception { + triggerUserCacheRefresh(TEST_UTIL, false, TABLE_NAME); + waitMinuteQuota(); + } + + private void unsetQuota() throws Exception { + try (Admin admin = TEST_UTIL.getAdmin()) { + admin.setQuota(QuotaSettingsFactory.unthrottleUser(getUserName())); + } + refreshQuotas(); + } + + private long runIncTest(int attempts) throws Exception { + refreshQuotas(); + try (Table table = getTable()) { + return ThrottleQuotaTestUtil.doIncrements(attempts, FAMILY, QUALIFIER, table); + } + } + + private Table getTable() throws IOException { + TEST_UTIL.getConfiguration().setInt("hbase.client.pause", 100); + TEST_UTIL.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1); + return TEST_UTIL.getConnection().getTableBuilder(TABLE_NAME, null).setOperationTimeout(250) + .build(); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/quotas/TestDefaultOperationQuota.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/quotas/TestDefaultOperationQuota.java index a6b7ba6fee59..beeab8aef5c4 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/quotas/TestDefaultOperationQuota.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/quotas/TestDefaultOperationQuota.java @@ -153,14 +153,14 @@ public void testLargeBatchSaturatesReadNumLimit() DefaultOperationQuota quota = new DefaultOperationQuota(new Configuration(), 65536, limiter); // use the whole limit - quota.checkBatchQuota(0, limit); + quota.checkBatchQuota(0, limit, false); // the next request should be rejected - assertThrows(RpcThrottlingException.class, () -> quota.checkBatchQuota(0, 1)); + assertThrows(RpcThrottlingException.class, () -> quota.checkBatchQuota(0, 1, false)); envEdge.incValue(1000); // after the TimeUnit, the limit should be refilled - quota.checkBatchQuota(0, limit); + quota.checkBatchQuota(0, limit, false); } @Test @@ -174,14 +174,14 @@ public void testLargeBatchSaturatesReadWriteLimit() DefaultOperationQuota quota = new DefaultOperationQuota(new Configuration(), 65536, limiter); // use the whole limit - quota.checkBatchQuota(limit, 0); + quota.checkBatchQuota(limit, 0, false); // the next request should be rejected - assertThrows(RpcThrottlingException.class, () -> quota.checkBatchQuota(1, 0)); + assertThrows(RpcThrottlingException.class, () -> quota.checkBatchQuota(1, 0, false)); envEdge.incValue(1000); // after the TimeUnit, the limit should be refilled - quota.checkBatchQuota(limit, 0); + quota.checkBatchQuota(limit, 0, false); } @Test @@ -195,14 +195,14 @@ public void testTooLargeReadBatchIsNotBlocked() DefaultOperationQuota quota = new DefaultOperationQuota(new Configuration(), 65536, limiter); // use more than the limit, which should succeed rather than being indefinitely blocked - quota.checkBatchQuota(0, 10 + limit); + quota.checkBatchQuota(0, 10 + limit, false); // the next request should be blocked - assertThrows(RpcThrottlingException.class, () -> quota.checkBatchQuota(0, 1)); + assertThrows(RpcThrottlingException.class, () -> quota.checkBatchQuota(0, 1, false)); envEdge.incValue(1000); // even after the TimeUnit, the limit should not be refilled because we oversubscribed - assertThrows(RpcThrottlingException.class, () -> quota.checkBatchQuota(0, limit)); + assertThrows(RpcThrottlingException.class, () -> quota.checkBatchQuota(0, limit, false)); } @Test @@ -216,14 +216,14 @@ public void testTooLargeWriteBatchIsNotBlocked() DefaultOperationQuota quota = new DefaultOperationQuota(new Configuration(), 65536, limiter); // use more than the limit, which should succeed rather than being indefinitely blocked - quota.checkBatchQuota(10 + limit, 0); + quota.checkBatchQuota(10 + limit, 0, false); // the next request should be blocked - assertThrows(RpcThrottlingException.class, () -> quota.checkBatchQuota(1, 0)); + assertThrows(RpcThrottlingException.class, () -> quota.checkBatchQuota(1, 0, false)); envEdge.incValue(1000); // even after the TimeUnit, the limit should not be refilled because we oversubscribed - assertThrows(RpcThrottlingException.class, () -> quota.checkBatchQuota(limit, 0)); + assertThrows(RpcThrottlingException.class, () -> quota.checkBatchQuota(limit, 0, false)); } @Test @@ -237,14 +237,14 @@ public void testTooLargeWriteSizeIsNotBlocked() DefaultOperationQuota quota = new DefaultOperationQuota(new Configuration(), 65536, limiter); // writes are estimated a 100 bytes, so this will use 2x the limit but should not be blocked - quota.checkBatchQuota(1, 0); + quota.checkBatchQuota(1, 0, false); // the next request should be blocked - assertThrows(RpcThrottlingException.class, () -> quota.checkBatchQuota(1, 0)); + assertThrows(RpcThrottlingException.class, () -> quota.checkBatchQuota(1, 0, false)); envEdge.incValue(1000); // even after the TimeUnit, the limit should not be refilled because we oversubscribed - assertThrows(RpcThrottlingException.class, () -> quota.checkBatchQuota(limit, 0)); + assertThrows(RpcThrottlingException.class, () -> quota.checkBatchQuota(limit, 0, false)); } @Test @@ -260,14 +260,14 @@ public void testTooLargeReadSizeIsNotBlocked() new DefaultOperationQuota(new Configuration(), (int) blockSize, limiter); // reads are estimated at 1 block each, so this will use ~2x the limit but should not be blocked - quota.checkBatchQuota(0, 1); + quota.checkBatchQuota(0, 1, false); // the next request should be blocked - assertThrows(RpcThrottlingException.class, () -> quota.checkBatchQuota(0, 1)); + assertThrows(RpcThrottlingException.class, () -> quota.checkBatchQuota(0, 1, false)); envEdge.incValue(1000); // even after the TimeUnit, the limit should not be refilled because we oversubscribed - assertThrows(RpcThrottlingException.class, () -> quota.checkBatchQuota((int) limit, 1)); + assertThrows(RpcThrottlingException.class, () -> quota.checkBatchQuota((int) limit, 1, false)); } @Test @@ -283,13 +283,13 @@ public void testTooLargeRequestSizeIsNotBlocked() new DefaultOperationQuota(new Configuration(), (int) blockSize, limiter); // reads are estimated at 1 block each, so this will use ~2x the limit but should not be blocked - quota.checkBatchQuota(0, 1); + quota.checkBatchQuota(0, 1, false); // the next request should be blocked - assertThrows(RpcThrottlingException.class, () -> quota.checkBatchQuota(0, 1)); + assertThrows(RpcThrottlingException.class, () -> quota.checkBatchQuota(0, 1, false)); envEdge.incValue(1000); // even after the TimeUnit, the limit should not be refilled because we oversubscribed - assertThrows(RpcThrottlingException.class, () -> quota.checkBatchQuota((int) limit, 1)); + assertThrows(RpcThrottlingException.class, () -> quota.checkBatchQuota((int) limit, 1, false)); } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/quotas/TestNoopOperationQuota.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/quotas/TestNoopOperationQuota.java index ad2b79075a31..7fd686de94b8 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/quotas/TestNoopOperationQuota.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/quotas/TestNoopOperationQuota.java @@ -28,7 +28,8 @@ public class TestNoopOperationQuota implements OperationQuota { public static final TestNoopOperationQuota INSTANCE = new TestNoopOperationQuota(); @Override - public void checkBatchQuota(int numWrites, int numReads) throws RpcThrottlingException { + public void checkBatchQuota(int numWrites, int numReads, boolean isAtomic) + throws RpcThrottlingException { } @Override diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/quotas/TestQuotaAdmin.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/quotas/TestQuotaAdmin.java index 5b560129ecea..ac037909bbc8 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/quotas/TestQuotaAdmin.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/quotas/TestQuotaAdmin.java @@ -773,6 +773,14 @@ private void assertRPCQuota(ThrottleType type, long limit, TimeUnit tu, QuotaSco assertTrue(rpcQuota.hasWriteCapacityUnit()); t = rpcQuota.getWriteCapacityUnit(); break; + case ATOMIC_READ_SIZE: + assertTrue(rpcQuota.hasAtomicReadSize()); + t = rpcQuota.getAtomicReadSize(); + break; + case ATOMIC_REQUEST_NUMBER: + assertTrue(rpcQuota.hasAtomicReqNum()); + t = rpcQuota.getAtomicReqNum(); + break; default: } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/quotas/TestQuotaState.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/quotas/TestQuotaState.java index cbd40f7bd81c..d64b1002b1e5 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/quotas/TestQuotaState.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/quotas/TestQuotaState.java @@ -224,7 +224,7 @@ public void testTableThrottleWithBatch() { assertFalse(quotaInfo.isBypass()); QuotaLimiter limiter = quotaInfo.getTableLimiter(TABLE_A); try { - limiter.checkQuota(TABLE_A_THROTTLE_1 + 1, TABLE_A_THROTTLE_1 + 1, 0, 0, 1, 0); + limiter.checkQuota(TABLE_A_THROTTLE_1 + 1, TABLE_A_THROTTLE_1 + 1, 0, 0, 1, 0, false); fail("Should have thrown RpcThrottlingException"); } catch (RpcThrottlingException e) { // expected @@ -241,7 +241,7 @@ private Quotas buildReqNumThrottle(final long limit) { private void assertThrottleException(final QuotaLimiter limiter, final int availReqs) { assertNoThrottleException(limiter, availReqs); try { - limiter.checkQuota(1, 1, 0, 0, 1, 0); + limiter.checkQuota(1, 1, 0, 0, 1, 0, false); fail("Should have thrown RpcThrottlingException"); } catch (RpcThrottlingException e) { // expected @@ -251,11 +251,11 @@ private void assertThrottleException(final QuotaLimiter limiter, final int avail private void assertNoThrottleException(final QuotaLimiter limiter, final int availReqs) { for (int i = 0; i < availReqs; ++i) { try { - limiter.checkQuota(1, 1, 0, 0, 1, 0); + limiter.checkQuota(1, 1, 0, 0, 1, 0, false); } catch (RpcThrottlingException e) { fail("Unexpected RpcThrottlingException after " + i + " requests. limit=" + availReqs); } - limiter.grabQuota(1, 1, 0, 0, 1, 0); + limiter.grabQuota(1, 1, 0, 0, 1, 0, false); } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/quotas/ThrottleQuotaTestUtil.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/quotas/ThrottleQuotaTestUtil.java index adfc46bb4a57..b343799b89db 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/quotas/ThrottleQuotaTestUtil.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/quotas/ThrottleQuotaTestUtil.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.Waiter.ExplainingPredicate; import org.apache.hadoop.hbase.client.Get; +import org.apache.hadoop.hbase.client.Increment; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; @@ -129,6 +130,23 @@ static long doGets(int maxOps, byte[] family, byte[] qualifier, final Table... t return count; } + static long doIncrements(int maxOps, byte[] family, byte[] qualifier, final Table... tables) { + int count = 0; + try { + while (count < maxOps) { + Increment inc = new Increment(Bytes.toBytes("row-" + count)); + inc.addColumn(family, qualifier, 1L); + for (final Table table : tables) { + table.increment(inc); + } + count += tables.length; + } + } catch (IOException e) { + LOG.error("increment failed after nRetries=" + count, e); + } + return count; + } + static long doMultiGets(int maxOps, int batchSize, int rowCount, byte[] family, byte[] qualifier, final Table... tables) { int opCount = 0; @@ -202,7 +220,7 @@ private static void triggerCacheRefresh(HBaseTestingUtility testUtil, boolean by RegionServerRpcQuotaManager quotaManager = rst.getRegionServer().getRegionServerRpcQuotaManager(); QuotaCache quotaCache = quotaManager.getQuotaCache(); - quotaCache.triggerCacheRefresh(); + quotaCache.forceSynchronousCacheRefresh(); Thread.sleep(250); testUtil.waitFor(60000, 250, new ExplainingPredicate() { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperStub.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperStub.java index 0e77ae89fef2..84654784c58d 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperStub.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperStub.java @@ -537,6 +537,16 @@ public long getLocalBytesRead() { return 0; } + @Override + public long getLocalRackBytesRead() { + return 0; + } + + @Override + public long getRemoteRackBytesRead() { + return 0; + } + @Override public long getShortCircuitBytesRead() { return 0; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompaction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompaction.java index 332ecd8a95a0..86fe54c98151 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompaction.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompaction.java @@ -23,6 +23,12 @@ import static org.apache.hadoop.hbase.regionserver.Store.PRIORITY_USER; import static org.apache.hadoop.hbase.regionserver.compactions.CloseChecker.SIZE_LIMIT_KEY; import static org.apache.hadoop.hbase.regionserver.compactions.CloseChecker.TIME_LIMIT_KEY; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.allOf; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.hasProperty; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.notNullValue; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertThrows; @@ -35,13 +41,17 @@ import static org.mockito.Mockito.when; import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.List; +import java.util.Objects; import java.util.Optional; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; +import java.util.zip.GZIPInputStream; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; @@ -60,6 +70,7 @@ import org.apache.hadoop.hbase.client.Durability; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.hfile.HFileScanner; import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext; import org.apache.hadoop.hbase.regionserver.compactions.CompactionLifeCycleTracker; @@ -75,6 +86,7 @@ import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.Threads; import org.apache.hadoop.hbase.wal.WAL; +import org.apache.hadoop.io.IOUtils; import org.junit.After; import org.junit.Assume; import org.junit.Before; @@ -86,6 +98,7 @@ import org.mockito.Mockito; import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; +import org.slf4j.LoggerFactory; /** * Test compaction framework and common functions @@ -114,8 +127,6 @@ public class TestCompaction { /** constructor */ public TestCompaction() { - super(); - // Set cache flush size to 1MB conf.setInt(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 1024 * 1024); conf.setInt(HConstants.HREGION_MEMSTORE_BLOCK_MULTIPLIER, 100); @@ -143,6 +154,12 @@ public void setUp() throws Exception { hcd.setMaxVersions(65536); this.htd.addFamily(hcd); } + if (name.getMethodName().equals("testCompactionWithCorruptBlock")) { + UTIL.getConfiguration().setBoolean("hbase.hstore.validate.read_fully", true); + HColumnDescriptor hcd = new HColumnDescriptor(FAMILY); + hcd.setCompressionType(Compression.Algorithm.GZ); + this.htd.addFamily(hcd); + } this.r = UTIL.createLocalHRegion(htd, null, null); } @@ -354,6 +371,7 @@ public void testCompactionWithCorruptResult() throws Exception { try (FSDataOutputStream stream = fs.create(tmpPath, null, true, 512, (short) 3, 1024L, null)) { stream.writeChars("CORRUPT FILE!!!!"); } + // The complete compaction should fail and the corrupt file should remain // in the 'tmp' directory; assertThrows(IOException.class, () -> store.doCompaction(null, null, null, @@ -361,6 +379,59 @@ public void testCompactionWithCorruptResult() throws Exception { assertTrue(fs.exists(tmpPath)); } + /** + * This test uses a hand-modified HFile, which is loaded in from the resources' path. That file + * was generated from the test support code in this class and then edited to corrupt the + * GZ-encoded block by zeroing-out the first two bytes of the GZip header, the "standard + * declaration" of {@code 1f 8b}, found at offset 33 in the file. I'm not sure why, but it seems + * that in this test context we do not enforce CRC checksums. Thus, this corruption manifests in + * the Decompressor rather than in the reader when it loads the block bytes and compares vs. the + * header. + */ + @Test + public void testCompactionWithCorruptBlock() throws Exception { + createStoreFile(r, Bytes.toString(FAMILY)); + createStoreFile(r, Bytes.toString(FAMILY)); + HStore store = r.getStore(FAMILY); + + Collection storeFiles = store.getStorefiles(); + DefaultCompactor tool = (DefaultCompactor) store.storeEngine.getCompactor(); + CompactionRequestImpl request = new CompactionRequestImpl(storeFiles); + tool.compact(request, NoLimitThroughputController.INSTANCE, null); + + // insert the hfile with a corrupted data block into the region's tmp directory, where + // compaction output is collected. + FileSystem fs = store.getFileSystem(); + Path tmpPath = store.getRegionFileSystem().createTempName(); + try ( + InputStream inputStream = + getClass().getResourceAsStream("TestCompaction_HFileWithCorruptBlock.gz"); + GZIPInputStream gzipInputStream = new GZIPInputStream(Objects.requireNonNull(inputStream)); + OutputStream outputStream = fs.create(tmpPath, null, true, 512, (short) 3, 1024L, null)) { + assertThat(gzipInputStream, notNullValue()); + assertThat(outputStream, notNullValue()); + IOUtils.copyBytes(gzipInputStream, outputStream, 512); + } + LoggerFactory.getLogger(TestCompaction.class).info("Wrote corrupted HFile to {}", tmpPath); + + // The complete compaction should fail and the corrupt file should remain + // in the 'tmp' directory; + try { + store.doCompaction(request, storeFiles, null, EnvironmentEdgeManager.currentTime(), + Collections.singletonList(tmpPath)); + } catch (IOException e) { + Throwable rootCause = e; + while (rootCause.getCause() != null) { + rootCause = rootCause.getCause(); + } + assertThat(rootCause, allOf(instanceOf(IOException.class), + hasProperty("message", containsString("not a gzip file")))); + assertTrue(fs.exists(tmpPath)); + return; + } + fail("Compaction should have failed due to corrupt block"); + } + /** * Create a custom compaction request and be sure that we can track it through the queue, knowing * when the compaction is completed. diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestScannerLeaseCount.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestScannerLeaseCount.java index cf99c53e1d9f..fc7387b48069 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestScannerLeaseCount.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestScannerLeaseCount.java @@ -199,8 +199,8 @@ public OperationQuota checkBatchQuota(Region region, List a } @Override - public OperationQuota checkBatchQuota(Region region, int numWrites, int numReads) - throws IOException, RpcThrottlingException { + public OperationQuota checkBatchQuota(Region region, int numWrites, int numReads, + boolean isAtomic) throws IOException, RpcThrottlingException { if (SHOULD_THROW) { throw EX; } diff --git a/hbase-server/src/test/resources/org/apache/hadoop/hbase/regionserver/TestCompaction_HFileWithCorruptBlock.gz b/hbase-server/src/test/resources/org/apache/hadoop/hbase/regionserver/TestCompaction_HFileWithCorruptBlock.gz new file mode 100644 index 000000000000..c93407b455c8 Binary files /dev/null and b/hbase-server/src/test/resources/org/apache/hadoop/hbase/regionserver/TestCompaction_HFileWithCorruptBlock.gz differ diff --git a/hubspot-client-bundles/.blazar.yaml b/hubspot-client-bundles/.blazar.yaml new file mode 100644 index 000000000000..a57d5eeb071b --- /dev/null +++ b/hubspot-client-bundles/.blazar.yaml @@ -0,0 +1,24 @@ +buildpack: + name: Blazar-Buildpack-Java + +env: + # Below variables are generated in prepare_environment.sh. + # The build environment requires environment variables to be explicitly defined before they may + # be modified by the `write-build-env-var` utilty script to persist changes to an environment variable + # throughout a build + REPO_NAME: "" + SET_VERSION: "" + HBASE_VERSION: "" + PKG_RELEASE: "" + FULL_BUILD_VERSION: "" + MAVEN_BUILD_ARGS: "" + +before: + - description: "Prepare build environment" + commands: + - $WORKSPACE/build-scripts/prepare_environment.sh + +depends: + - hbase +provides: + - hubspot-client-bundles diff --git a/hubspot-client-bundles/.build-jdk17 b/hubspot-client-bundles/.build-jdk17 new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/hubspot-client-bundles/README.md b/hubspot-client-bundles/README.md new file mode 100644 index 000000000000..01462b6e0953 --- /dev/null +++ b/hubspot-client-bundles/README.md @@ -0,0 +1,59 @@ +# hubspot-client-bundles + +Bundles up the hbase client in a way that is most friendly to the hubspot dependency trees + +## Why? + +HBase provides some shaded artifacts, but they don't really work for us for two reasons: + +1. We have little control over what's included in them, so the jars end up being unnecessarily fat and/or leaking dependencies we don't want. +2. The shaded artifacts have significant class overlaps because one is a superset of the other. This bloats our classpath and also makes `mvn dependency:analyze` complain. This can be a cause for the classic flappy "Unused declared"/"Used undeclared" dependency issue. + +One option would be to fix those existing artifacts to work how we'd like. I tried that in hbase2, but it was very complicated without fully redoing how the shading works. Rather than maintain a large rewrite of those poms, I'd rather start fresh with our artifacts. This also will give us greater flexibility in the future for changing the includes/excludes as we see fit. + +## Why here? + +The other design choice here was to include these artifacts in this repo as opposed to a separate repo. One pain point with developing on hbase has been the number of repos necessary to develop and/or test any change -- the client fork has historically had 2 branches (staging and master) and similar for hbase-shading. In order to get a branch out there for testing you need to modify two repos. Iterating on those branches is annoying because builds are not automatically in-sync. + +Putting the bundling here makes it part of the build, so we automatically have client artifacts created for every branch. + +One new guiding principle of our forking strategy is to minimize the number of customizations in our forks, instead aiming to get things upstreamed. The goal is to eliminate the tech debt inherent in having to re-analyze, copy patches, handle merge conflicts, etc, every time we upgrade. This module is an omission to that rule -- regardless of where it lives, we will want to be cognizant of dependency changes in new releases. Putting it here gives us the option to bake that process directly into our build and introduces no potential for merge conflicts because it's entirely isolated in a new module. + +## How it works + +These artifacts are produced with the usual maven-shade-plugin. Some understanding of that plugin is helpful, but I wanted to give a little clarity on a few techniques used. + +In general our goal with shading is to control two things: + +- Which classes end up in the jar, and the fully qualified class names (i.e. including package) of those classes. +- Which dependencies are exposed in the resulting pom. + +At a very high level, the shade plugin does the following: + +1. Collect all the dependencies in your pom.xml, including transitive dependencies. It's worth noting that this flattens your dependency tree, so if your project A previously depended on project B which depended on project C, your project A now directly depends on B and C. +2. Include any selected dependencies (via artifactSet) directly into your jar by copying the class files in. +3. Rewrite those class packages and imports, if configured via relocations. +4. Write a new dependency-reduced-pom.xml, which only includes the dependencies that weren't included in the jar. This pom becomes the new pom for your artifact. + +In terms of our two goals, choosing which classes end up in the jar is easy via artifactSet. Controlling which dependencies end up in your final pom is a lot trickier: + +- **Exclusions** - Since the shade plugin starts with your initial dependencies, you can eliminate transitive dependencies by excluding them from your direct dependencies. This is effective but typically involves needing to apply those same exclusions to all direct dependencies, because the ones you're trying to exclude will often come from multiple. +- **Marking a dependency as scope provided** - The shade plugin seems to ignore scope provided dependencies, as well as all of their transitive dependencies (as long as they aren't converted to compile scope by some other dependency). This sometimes doesn't work and seems kind of magic, so might make sense to only use for cases where your jar actually provides that dependency. +- **Inclusion in the jar** - Any dependencies included in the jar will be removed from the resulting pom. In general if you include something in the jar, it should be relocated or filtered. Otherwise, you run the risk of duplicate class conflicts. You can include something in the jar and then filter out all classes, which sort of wipes it out. But it requires configuring in multiple places and is again sort of magic, so another last resort. + +My strategy has evolved here over time since none of these are perfect and there's no easy answer as far as I can tell. But I've listed the above in approximately the order +I chose to solve each dependency. So I mostly preferred exclusions here, then marked some stuff as scope provided, and mostly didn't use the last strategy. + +## How to make changes + +In general the best way I've found to iterate here is: + +1. Create a simple downstream project which depends on one or both of these bundles +2. Run `mvn dependency:list -DoutputFile=dependencies.out` to see a full list of dependencies +3. You can pass that through something like `cat dependencies.out | sed -E -e 's/^ +//' | sed -E -e 's/:(compile|runtime|provided|test).*/:\1/' | sed -E -e 's/:(compile|runtime)$/:compile/' | sort | uniq > dependencies.sorted` to get a file that can be compared with another such-processed file +4. Make the change you want in the bundle, then `mvn clean install` +5. Re-run steps 2 and 3, outputting to a new file +6. Run `comm -13 first second` to see what might be newly added after your change, or `comm -23` to see what might have been removed +7. If trying to track a specific dependency from the list, go back here and run `mvn dependency:tree -Dincludes=`. This might show you what dependency you need to add an exclusion to + +This ends up being pretty iterative and trial/error, but can eventually get to a jar which has what you want (and doesn't what you don't). diff --git a/hubspot-client-bundles/hbase-backup-restore-bundle/.blazar.yaml b/hubspot-client-bundles/hbase-backup-restore-bundle/.blazar.yaml new file mode 100644 index 000000000000..9399e5dc0aa4 --- /dev/null +++ b/hubspot-client-bundles/hbase-backup-restore-bundle/.blazar.yaml @@ -0,0 +1,26 @@ +buildpack: + name: Blazar-Buildpack-Java + +env: + # Below variables are generated in prepare_environment.sh. + # The build environment requires environment variables to be explicitly defined before they may + # be modified by the `write-build-env-var` utilty script to persist changes to an environment variable + # throughout a build + REPO_NAME: "" + SET_VERSION: "" + HBASE_VERSION: "" + PKG_RELEASE: "" + FULL_BUILD_VERSION: "" + MAVEN_BUILD_ARGS: "" + +before: + - description: "Prepare build environment" + commands: + - $WORKSPACE/build-scripts/prepare_environment.sh + +depends: + - hubspot-client-bundles + - hbase-client-bundle + - hbase-mapreduce-bundle +provides: + - hbase-backup-restore-bundle diff --git a/hubspot-client-bundles/hbase-backup-restore-bundle/.build-jdk17 b/hubspot-client-bundles/hbase-backup-restore-bundle/.build-jdk17 new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/hubspot-client-bundles/hbase-backup-restore-bundle/pom.xml b/hubspot-client-bundles/hbase-backup-restore-bundle/pom.xml new file mode 100644 index 000000000000..9707d9d8118d --- /dev/null +++ b/hubspot-client-bundles/hbase-backup-restore-bundle/pom.xml @@ -0,0 +1,119 @@ + + + 4.0.0 + + + com.hubspot.hbase + hubspot-client-bundles + ${revision} + + + hbase-backup-restore-bundle + + + + com.hubspot.hbase + hbase-client-bundle + + + + commons-io + commons-io + + + + + com.hubspot.hbase + hbase-mapreduce-bundle + + + + commons-io + commons-io + + + + + org.apache.hbase + hbase-backup + + + + commons-io + commons-io + + + + org.apache.hbase + * + + + + commons-logging + commons-logging + + + javax.servlet.jsp + * + + + javax.servlet + * + + + org.glassfish.web + * + + + org.jamon + jamon-runtime + + + io.netty + * + + + org.slf4j + slf4j-log4j12 + + + + + + + + + org.apache.maven.plugins + maven-shade-plugin + + + create-bundle-with-relocations + + + + org.apache.hbase:* + + io.opentelemetry:opentelemetry-api + io.opentelemetry:opentelemetry-context + com.google.protobuf:protobuf-java + io.dropwizard.metrics:metrics-core + + + + + org.apache.kerby:* + + krb5-template.conf + krb5_udp-template.conf + ccache.txt + keytab.txt + + + + + + + + + + diff --git a/hubspot-client-bundles/hbase-client-bundle/.blazar.yaml b/hubspot-client-bundles/hbase-client-bundle/.blazar.yaml new file mode 100644 index 000000000000..300be28892e8 --- /dev/null +++ b/hubspot-client-bundles/hbase-client-bundle/.blazar.yaml @@ -0,0 +1,25 @@ +buildpack: + name: Blazar-Buildpack-Java + +env: + # Below variables are generated in prepare_environment.sh. + # The build environment requires environment variables to be explicitly defined before they may + # be modified by the `write-build-env-var` utilty script to persist changes to an environment variable + # throughout a build + REPO_NAME: "" + SET_VERSION: "" + HBASE_VERSION: "" + PKG_RELEASE: "" + FULL_BUILD_VERSION: "" + MAVEN_BUILD_ARGS: "" + +before: + - description: "Prepare build environment" + commands: + - $WORKSPACE/build-scripts/prepare_environment.sh + +depends: + - hubspot-client-bundles +provides: + - hbase-client-bundle + diff --git a/hubspot-client-bundles/hbase-client-bundle/.build-jdk17 b/hubspot-client-bundles/hbase-client-bundle/.build-jdk17 new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/hubspot-client-bundles/hbase-client-bundle/pom.xml b/hubspot-client-bundles/hbase-client-bundle/pom.xml new file mode 100644 index 000000000000..24ce44daf93a --- /dev/null +++ b/hubspot-client-bundles/hbase-client-bundle/pom.xml @@ -0,0 +1,127 @@ + + + 4.0.0 + + + com.hubspot.hbase + hubspot-client-bundles + ${revision} + + + hbase-client-bundle + + + + org.apache.hbase + hbase-openssl + + + + org.apache.hbase + hbase-client + + + + org.apache.hbase + hbase-hadoop-compat + + + org.apache.hbase + hbase-hadoop2-compat + + + + commons-logging + commons-logging + + + org.jruby.joni + joni + + + org.jruby.jcodings + jcodings + + + org.slf4j + slf4j-log4j12 + + + + + org.apache.hbase + hbase-endpoint + + + * + * + + + + + + + + + org.apache.maven.plugins + maven-shade-plugin + + + create-bundle-with-relocations + + + + + org.apache.hbase:hbase-client + org.apache.hbase:hbase-common + org.apache.hbase:hbase-logging + org.apache.hbase:hbase-protocol + org.apache.hbase:hbase-protocol-shaded + org.apache.hbase:hbase-openssl + + org.apache.hbase.thirdparty:* + + org.apache.hbase:hbase-endpoint + + + + com.google.protobuf:protobuf-java + + io.dropwizard.metrics:metrics-core + + commons-io:commons-io + + + + + org.apache.hbase:hbase-endpoint + + org/apache/hadoop/hbase/client/coprocessor/** + org/apache/hadoop/hbase/protobuf/generated/** + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + diff --git a/hubspot-client-bundles/hbase-mapreduce-bundle/.blazar.yaml b/hubspot-client-bundles/hbase-mapreduce-bundle/.blazar.yaml new file mode 100644 index 000000000000..5c020e374927 --- /dev/null +++ b/hubspot-client-bundles/hbase-mapreduce-bundle/.blazar.yaml @@ -0,0 +1,25 @@ +buildpack: + name: Blazar-Buildpack-Java + +env: + # Below variables are generated in prepare_environment.sh. + # The build environment requires environment variables to be explicitly defined before they may + # be modified by the `write-build-env-var` utilty script to persist changes to an environment variable + # throughout a build + REPO_NAME: "" + SET_VERSION: "" + HBASE_VERSION: "" + PKG_RELEASE: "" + FULL_BUILD_VERSION: "" + MAVEN_BUILD_ARGS: "" + +before: + - description: "Prepare build environment" + commands: + - $WORKSPACE/build-scripts/prepare_environment.sh + +depends: + - hubspot-client-bundles + - hbase-client-bundle +provides: + - hbase-mapreduce-bundle diff --git a/hubspot-client-bundles/hbase-mapreduce-bundle/.build-jdk17 b/hubspot-client-bundles/hbase-mapreduce-bundle/.build-jdk17 new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/hubspot-client-bundles/hbase-mapreduce-bundle/pom.xml b/hubspot-client-bundles/hbase-mapreduce-bundle/pom.xml new file mode 100644 index 000000000000..233e33750fe1 --- /dev/null +++ b/hubspot-client-bundles/hbase-mapreduce-bundle/pom.xml @@ -0,0 +1,251 @@ + + + 4.0.0 + + + com.hubspot.hbase + hubspot-client-bundles + ${revision} + + + hbase-mapreduce-bundle + + + + + com.hubspot.hbase + hbase-client-bundle + + + + commons-io + commons-io + + + + + + org.apache.hbase + hbase-mapreduce + + + org.apache.hbase + hbase-client + + + org.apache.hbase + hbase-common + + + org.apache.hbase + hbase-annotations + + + org.apache.hbase + hbase-protocol + + + org.apache.hbase + hbase-protocol-shaded + + + org.apache.hbase + hbase-logging + + + com.google.protobuf + protobuf-java + + + org.apache.hbase.thirdparty + hbase-shaded-gson + + + org.apache.hbase.thirdparty + hbase-shaded-protobuf + + + org.apache.hbase.thirdparty + hbase-unsafe + + + org.apache.hbase.thirdparty + hbase-shaded-miscellaneous + + + org.apache.hbase.thirdparty + hbase-shaded-netty + + + + commons-logging + commons-logging + + + + commons-io + commons-io + + + com.sun.jersey + * + + + tomcat + jasper-runtime + + + org.mortbay.jetty + * + + + org.slf4j + slf4j-log4j12 + + + + + org.apache.hbase + hbase-server + + + org.apache.hbase + hbase-client + + + org.apache.hbase + hbase-common + + + org.apache.hbase + hbase-annotations + + + org.apache.hbase + hbase-protocol + + + org.apache.hbase + hbase-protocol-shaded + + + org.apache.hbase + hbase-logging + + + com.google.protobuf + protobuf-java + + + org.apache.hbase.thirdparty + hbase-shaded-gson + + + org.apache.hbase.thirdparty + hbase-shaded-protobuf + + + org.apache.hbase.thirdparty + hbase-unsafe + + + org.apache.hbase.thirdparty + hbase-shaded-miscellaneous + + + org.apache.hbase.thirdparty + hbase-shaded-netty + + + + + commons-logging + commons-logging + + + + commons-io + commons-io + + + javax.servlet.jsp + * + + + javax.servlet + * + + + org.glassfish.web + * + + + org.jamon + jamon-runtime + + + io.netty + * + + + org.slf4j + slf4j-log4j12 + + + org.glassfish.hk2.external + jakarta.inject + + + jakarta.ws.rs + jakarta.ws.rs-api + + + + + org.apache.hbase + hbase-compression-zstd + + + org.apache.hbase + * + + + + commons-io + commons-io + + + + + + + + + org.apache.maven.plugins + maven-shade-plugin + + + create-bundle-with-relocations + + + + + org.apache.hbase:* + + org.apache.hbase.thirdparty:* + + + + + + + + + diff --git a/hubspot-client-bundles/hbase-mapreduce-bundle/src/main/resources/hbase-site.xml b/hubspot-client-bundles/hbase-mapreduce-bundle/src/main/resources/hbase-site.xml new file mode 100644 index 000000000000..629c6f84f30e --- /dev/null +++ b/hubspot-client-bundles/hbase-mapreduce-bundle/src/main/resources/hbase-site.xml @@ -0,0 +1,12 @@ + + + + + + hbase.io.compress.zstd.codec + org.apache.hadoop.hbase.io.compress.zstd.ZstdCodec + + diff --git a/hubspot-client-bundles/hbase-server-it-bundle/.blazar.yaml b/hubspot-client-bundles/hbase-server-it-bundle/.blazar.yaml new file mode 100644 index 000000000000..26db8c8066b3 --- /dev/null +++ b/hubspot-client-bundles/hbase-server-it-bundle/.blazar.yaml @@ -0,0 +1,26 @@ +buildpack: + name: Blazar-Buildpack-Java + +env: + # Below variables are generated in prepare_environment.sh. + # The build environment requires environment variables to be explicitly defined before they may + # be modified by the `write-build-env-var` utilty script to persist changes to an environment variable + # throughout a build + YUM_REPO_UPLOAD_OVERRIDE_CENTOS_8: "" + SET_VERSION: "" + HBASE_VERSION: "" + PKG_RELEASE: "" + FULL_BUILD_VERSION: "" + MAVEN_BUILD_ARGS: "" + REPO_NAME: "" + +before: + - description: "Prepare build environment" + commands: + - $WORKSPACE/build-scripts/prepare_environment.sh + +depends: + - hbase +provides: + - hbase-server-it-bundle + diff --git a/hubspot-client-bundles/hbase-server-it-bundle/.build-jdk17 b/hubspot-client-bundles/hbase-server-it-bundle/.build-jdk17 new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/hubspot-client-bundles/hbase-server-it-bundle/pom.xml b/hubspot-client-bundles/hbase-server-it-bundle/pom.xml new file mode 100644 index 000000000000..fa617258f82d --- /dev/null +++ b/hubspot-client-bundles/hbase-server-it-bundle/pom.xml @@ -0,0 +1,168 @@ + + + 4.0.0 + + + com.hubspot.hbase + hubspot-client-bundles + ${revision} + + + hbase-server-it-bundle + + + + org.apache.hbase + hbase-it + test-jar + + + + commons-logging + commons-logging + + + javax.servlet.jsp + * + + + javax.servlet + * + + + org.glassfish.web + * + + + org.jamon + jamon-runtime + + + io.netty + * + + + org.slf4j + slf4j-log4j12 + + + + + org.apache.hbase + hbase-server + test-jar + ${project.version} + + + + commons-logging + commons-logging + + + javax.servlet.jsp + * + + + javax.servlet + * + + + org.glassfish.web + * + + + org.jamon + jamon-runtime + + + io.netty + * + + + org.slf4j + slf4j-log4j12 + + + + + org.apache.hbase + hbase-testing-util + ${project.version} + + + + commons-logging + commons-logging + + + javax.servlet.jsp + * + + + javax.servlet + * + + + org.glassfish.web + * + + + org.jamon + jamon-runtime + + + io.netty + * + + + org.slf4j + slf4j-log4j12 + + + + + + + + + org.apache.maven.plugins + maven-shade-plugin + + + create-bundle-with-relocations + + + + org.apache.hbase:* + + junit:junit + + commons-io:commons-io + + org.apache.hbase.thirdparty:* + com.google.protobuf:protobuf-java + + io.opentelemetry:opentelemetry-api + io.opentelemetry:opentelemetry-context + com.google.protobuf:protobuf-java + io.dropwizard.metrics:metrics-core + + + + + org.apache.kerby:* + + krb5-template.conf + krb5_udp-template.conf + ccache.txt + keytab.txt + + + + + + + + + + diff --git a/hubspot-client-bundles/pom.xml b/hubspot-client-bundles/pom.xml new file mode 100644 index 000000000000..0105ffd28c43 --- /dev/null +++ b/hubspot-client-bundles/pom.xml @@ -0,0 +1,458 @@ + + + 4.0.0 + + com.hubspot.hbase + hubspot-client-bundles + ${revision} + pom + Bundled versions of the hbase client + + + hbase-client-bundle + hbase-mapreduce-bundle + hbase-backup-restore-bundle + hbase-server-it-bundle + + + + org.apache.hadoop.hbase.shaded + + 3.6.3-shaded-SNAPSHOT + + 2.6-hubspot-SNAPSHOT + + + + + + + com.hubspot.hbase + hbase-client-bundle + ${project.version} + + + org.apache.hbase + hbase-client + + + + + com.hubspot.hbase + hbase-mapreduce-bundle + ${project.version} + + + com.hubspot.hbase + hbase-backup-restore-bundle + ${project.version} + + + + + org.apache.zookeeper + zookeeper + ${zookeeper.version} + + + org.apache.hbase + hbase-openssl + ${project.version} + + + org.apache.hbase + hbase-compression-zstd + ${project.version} + + + org.apache.hbase + hbase-client + ${project.version} + + + + javax.activation + javax.activation-api + + + javax.annotation + javax.annotation-api + + + org.slf4j + slf4j-reload4j + + + com.google.code.findbugs + jsr305 + + + com.sun.jersey + jersey-servlet + + + com.sun.jersey.contribs + jersey-guice + + + com.github.pjfanning + jersey-json + + + org.apache.avro + avro + + + org.eclipse.jetty + jetty-client + + + com.google.j2objc + j2objc-annotations + + + + + org.apache.hbase + hbase-server + ${project.version} + + + javax.activation + javax.activation-api + + + javax.annotation + javax.annotation-api + + + org.slf4j + slf4j-reload4j + + + com.google.code.findbugs + jsr305 + + + com.sun.jersey + jersey-servlet + + + com.sun.jersey.contribs + jersey-guice + + + com.github.pjfanning + jersey-json + + + org.apache.avro + avro + + + org.eclipse.jetty + jetty-client + + + com.google.j2objc + j2objc-annotations + + + + + org.apache.hbase + hbase-mapreduce + ${project.version} + + + javax.activation + javax.activation-api + + + javax.annotation + javax.annotation-api + + + org.slf4j + slf4j-reload4j + + + com.google.code.findbugs + jsr305 + + + com.sun.jersey + jersey-servlet + + + com.sun.jersey.contribs + jersey-guice + + + com.github.pjfanning + jersey-json + + + org.apache.avro + avro + + + org.eclipse.jetty + jetty-client + + + com.google.j2objc + j2objc-annotations + + + + + org.apache.hbase + hbase-endpoint + ${project.version} + + + javax.activation + javax.activation-api + + + javax.annotation + javax.annotation-api + + + org.slf4j + slf4j-reload4j + + + com.google.code.findbugs + jsr305 + + + com.sun.jersey + jersey-servlet + + + com.sun.jersey.contribs + jersey-guice + + + com.github.pjfanning + jersey-json + + + org.apache.avro + avro + + + org.eclipse.jetty + jetty-client + + + com.google.j2objc + j2objc-annotations + + + + + org.apache.hbase + hbase-backup + ${project.version} + + + javax.activation + javax.activation-api + + + javax.annotation + javax.annotation-api + + + org.slf4j + slf4j-reload4j + + + com.google.code.findbugs + jsr305 + + + com.sun.jersey + jersey-servlet + + + com.sun.jersey.contribs + jersey-guice + + + com.github.pjfanning + jersey-json + + + org.apache.avro + avro + + + org.eclipse.jetty + jetty-client + + + com.google.j2objc + j2objc-annotations + + + + + org.apache.hbase + hbase-hadoop2-compat + ${project.version} + + + javax.activation + javax.activation-api + + + javax.annotation + javax.annotation-api + + + org.slf4j + slf4j-reload4j + + + com.google.code.findbugs + jsr305 + + + com.sun.jersey + jersey-servlet + + + com.sun.jersey.contribs + jersey-guice + + + com.github.pjfanning + jersey-json + + + org.apache.avro + avro + + + org.eclipse.jetty + jetty-client + + + com.google.j2objc + j2objc-annotations + + + + + org.apache.hbase + hbase-it + test-jar + ${project.version} + + + javax.activation + javax.activation-api + + + javax.annotation + javax.annotation-api + + + org.slf4j + slf4j-reload4j + + + com.google.code.findbugs + jsr305 + + + com.sun.jersey + jersey-servlet + + + com.sun.jersey.contribs + jersey-guice + + + com.github.pjfanning + jersey-json + + + org.apache.avro + avro + + + org.eclipse.jetty + jetty-client + + + com.google.j2objc + j2objc-annotations + + + + + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 3.6.0 + + + create-bundle-with-relocations + + shade + + package + + + true + true + true + true + true + + + com.google.protobuf + ${shade.prefix}.com.google.protobuf + + + com.codahale.metrics + ${shade.prefix}.com.codahale.metrics + + + org.apache.commons.io + ${shade.prefix}.org.apache.commons.io + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + + + + + diff --git a/pom.xml b/pom.xml index e4be54a0adf9..0cff942e8dad 100644 --- a/pom.xml +++ b/pom.xml @@ -662,7 +662,7 @@ 1.11.0 1.8.0 1.1.10.4 - 1.5.5-2 + 1.5.7-2