Skip to content

Commit 5f9f2cf

Browse files
goldvitalycopybara-github
authored andcommitted
Optimize InitControlBytesAfterSoo to have less writes and make them with compile time known size.
X86: ``` name old CYCLES/op new CYCLES/op delta BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:1 14.8 ± 0% 14.8 ± 0% -0.05% (p=0.001 n=49+50) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:2 49.2 ± 0% 44.7 ± 0% -9.19% (p=0.000 n=54+50) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:4 59.3 ± 0% 57.2 ± 0% -3.61% (p=0.000 n=54+53) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:8 56.1 ± 1% 55.1 ± 1% -1.86% (p=0.000 n=54+54) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:16 61.8 ± 1% 61.2 ± 1% -0.98% (p=0.000 n=56+55) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:32 60.5 ± 1% 60.2 ± 1% -0.41% (p=0.000 n=42+55) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:64 57.7 ± 2% 57.5 ± 2% -0.32% (p=0.031 n=55+56) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:128 55.2 ± 1% 55.2 ± 1% ~ (p=0.493 n=52+51) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:256 52.9 ± 1% 53.0 ± 1% ~ (p=0.877 n=45+49) ``` ARM (altra,arch=aarch64) ``` name old CYCLES/op new CYCLES/op delta BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:1 16.9 ± 3% 16.8 ± 3% ~ (p=0.107 n=57+57) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:2 55.9 ± 1% 51.6 ±11% -7.62% (p=0.000 n=46+57) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:4 61.6 ± 0% 59.9 ± 7% -2.77% (p=0.000 n=33+57) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:8 61.9 ± 0% 61.0 ± 4% -1.47% (p=0.000 n=33+57) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:16 58.0 ± 2% 57.7 ± 3% ~ (p=0.073 n=40+57) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:32 54.4 ± 3% 54.2 ± 3% ~ (p=0.138 n=57+57) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:64 51.3 ± 2% 51.1 ± 2% ~ (p=0.110 n=57+57) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:128 48.8 ± 2% 48.7 ± 2% ~ (p=0.163 n=57+57) ``` PiperOrigin-RevId: 720984174 Change-Id: I3297a89e678421e4e784af6b5f66b862fd1aae05
1 parent da2b12d commit 5f9f2cf

File tree

2 files changed

+37
-6
lines changed

2 files changed

+37
-6
lines changed

absl/container/internal/raw_hash_set.cc

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -481,13 +481,44 @@ void HashSetResizeHelper::GrowIntoSingleGroupShuffleControlBytes(
481481
void HashSetResizeHelper::InitControlBytesAfterSoo(ctrl_t* new_ctrl, ctrl_t h2,
482482
size_t new_capacity) {
483483
assert(is_single_group(new_capacity));
484-
std::memset(new_ctrl, static_cast<int8_t>(ctrl_t::kEmpty),
485-
NumControlBytes(new_capacity));
486-
assert(HashSetResizeHelper::SooSlotIndex() == 1);
484+
static_assert(HashSetResizeHelper::SooSlotIndex() == 1, "");
487485
// This allows us to avoid branching on had_soo_slot_.
488486
assert(had_soo_slot_ || h2 == ctrl_t::kEmpty);
489-
new_ctrl[1] = new_ctrl[new_capacity + 2] = h2;
490-
new_ctrl[new_capacity] = ctrl_t::kSentinel;
487+
488+
if (Group::kWidth == 16) {
489+
// Initialize the second 8 bytes in the original and mirrored control bytes.
490+
// The ranges can overlap.
491+
absl::little_endian::Store64(new_ctrl + 8, kMsbs8Bytes);
492+
absl::little_endian::Store64(new_ctrl + new_capacity + 8, kMsbs8Bytes);
493+
}
494+
static constexpr uint64_t kAllEmptyExceptSoo =
495+
kMsbs8Bytes ^ (static_cast<uint64_t>(static_cast<uint8_t>(ctrl_t::kEmpty))
496+
<< (8 * HashSetResizeHelper::SooSlotIndex()));
497+
// Initialize the first 8 bytes in the original control bytes.
498+
// The first 8 bytes are all empty except the SOO slot.
499+
// The range may overlap with the mirrored control bytes. These bytes will be
500+
// overwritten later.
501+
uint64_t first_ctrl_bytes =
502+
kAllEmptyExceptSoo ^ (static_cast<uint64_t>(static_cast<uint8_t>(h2))
503+
<< (8 * HashSetResizeHelper::SooSlotIndex()));
504+
absl::little_endian::Store64(new_ctrl, first_ctrl_bytes);
505+
// Initialize Sentinel byte and the first 7 bytes in the mirrored control
506+
// bytes.
507+
// We are adding kSentinel as the first byte of the mirrored control bytes.
508+
uint64_t mirrored_ctrl_bytes =
509+
(first_ctrl_bytes << 8) ^
510+
static_cast<uint64_t>(static_cast<uint8_t>(ctrl_t::kSentinel));
511+
absl::little_endian::Store64(new_ctrl + new_capacity, mirrored_ctrl_bytes);
512+
513+
// Example for capacity 3:
514+
// new_ctrl after 2 stores = ????????EEEEEEEEEEE
515+
// new_ctrl after 3rd store = E0EEEEEEEEEEEEEEEEE
516+
// new_ctrl after 4th store = E0ESE0EEEEEEEEEEEEE
517+
518+
// Example for capacity 15:
519+
// new_ctrl after 2 stores = ????????EEEEEEEE???????EEEEEEEE
520+
// new_ctrl after 3rd store = E0EEEEEEEEEEEEEE???????EEEEEEEE
521+
// new_ctrl after 4th store = E0EEEEEEEEEEEEESE0EEEEEEEEEEEEE
491522
}
492523

493524
void HashSetResizeHelper::GrowIntoSingleGroupShuffleTransferableSlots(

absl/container/internal/raw_hash_set.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2033,7 +2033,7 @@ class HashSetResizeHelper {
20332033
// index 1 so that when resizing from capacity 1 to 3, we can still have
20342034
// random iteration order between the first two inserted elements.
20352035
// I.e. it allows inserting the second element at either index 0 or 2.
2036-
static size_t SooSlotIndex() { return 1; }
2036+
static constexpr size_t SooSlotIndex() { return 1; }
20372037

20382038
// Allocates a backing array for the hashtable.
20392039
// Reads `capacity` and updates all other fields based on the result of

0 commit comments

Comments
 (0)