Skip to content

Commit fbc0df2

Browse files
goldvitalycopybara-github
authored andcommitted
Limit slot_size to 2^16-1 and maximum table size to 2^43-1.
These two changes are bundled together in order to simplify and improve table size overflow verification. 1. We verify use input directly without any transformation that may cause overflow. 2. Verification for 64 bit platform doesn't require division anymore. 3. We also leave the room for using extra bits in size for metadata (for ongoing experiment with storing seed in the size). PiperOrigin-RevId: 731394588 Change-Id: I0c4336d945f86d74dc64ad67171e176efd1cdd80
1 parent 1af129f commit fbc0df2

File tree

3 files changed

+136
-48
lines changed

3 files changed

+136
-48
lines changed

absl/container/internal/raw_hash_set.cc

Lines changed: 36 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,16 @@ static_assert(NumControlBytes(SooCapacity()) <= 17,
6969

7070
namespace {
7171

72+
[[noreturn]] ABSL_ATTRIBUTE_NOINLINE void HashTableSizeOverflow() {
73+
ABSL_RAW_LOG(FATAL, "Hash table size overflow");
74+
}
75+
76+
void ValidateMaxSize(size_t size, size_t slot_size) {
77+
if (IsAboveMaxValidSize(size, slot_size)) {
78+
HashTableSizeOverflow();
79+
}
80+
}
81+
7282
// Returns "random" seed.
7383
inline size_t RandomSeed() {
7484
#ifdef ABSL_HAVE_THREAD_LOCAL
@@ -515,9 +525,6 @@ void ResizeEmptyNonAllocatedTableImpl(CommonFields& common, size_t new_capacity,
515525
assert(common.capacity() <= policy.soo_capacity);
516526
assert(common.empty());
517527
const size_t slot_size = policy.slot_size;
518-
if (ABSL_PREDICT_FALSE(new_capacity > MaxValidCapacity(slot_size))) {
519-
HashTableSizeOverflow();
520-
}
521528
HashtablezInfoHandle infoz;
522529
const bool should_sample =
523530
policy.is_hashtablez_eligible && (force_infoz || ShouldSampleNextTable());
@@ -923,10 +930,25 @@ void ResizeAllocatedTable(CommonFields& common, size_t new_capacity,
923930
common, new_capacity, common.infoz(), policy);
924931
}
925932

926-
void ResizeEmptyNonAllocatedTable(CommonFields& common, size_t new_capacity,
927-
const PolicyFunctions& policy) {
928-
ResizeEmptyNonAllocatedTableImpl(common, new_capacity, /*force_infoz=*/false,
929-
policy);
933+
void ReserveEmptyNonAllocatedTableToFitNewSize(CommonFields& common,
934+
size_t new_size,
935+
const PolicyFunctions& policy) {
936+
ValidateMaxSize(new_size, policy.slot_size);
937+
ResizeEmptyNonAllocatedTableImpl(
938+
common, NormalizeCapacity(GrowthToLowerboundCapacity(new_size)),
939+
/*force_infoz=*/false, policy);
940+
// This is after resize, to ensure that we have completed the allocation
941+
// and have potentially sampled the hashtable.
942+
common.infoz().RecordReservation(new_size);
943+
common.reset_reserved_growth(new_size);
944+
common.set_reservation_size(new_size);
945+
}
946+
947+
void ReserveEmptyNonAllocatedTableToFitBucketCount(
948+
CommonFields& common, size_t bucket_count, const PolicyFunctions& policy) {
949+
ValidateMaxSize(bucket_count, policy.slot_size);
950+
ResizeEmptyNonAllocatedTableImpl(common, NormalizeCapacity(bucket_count),
951+
/*force_infoz=*/false, policy);
930952
}
931953

932954
void GrowEmptySooTableToNextCapacityForceSampling(
@@ -993,12 +1015,11 @@ void Rehash(CommonFields& common, size_t n, const PolicyFunctions& policy) {
9931015
NormalizeCapacity(n | GrowthToLowerboundCapacity(common.size()));
9941016
// n == 0 unconditionally rehashes as per the standard.
9951017
if (n == 0 || new_capacity > cap) {
996-
if (ABSL_PREDICT_FALSE(new_capacity > MaxValidCapacity(slot_size))) {
997-
HashTableSizeOverflow();
998-
}
1018+
ValidateMaxSize(n, policy.slot_size);
9991019
if (cap == policy.soo_capacity) {
10001020
if (common.empty()) {
1001-
ResizeEmptyNonAllocatedTable(common, new_capacity, policy);
1021+
ResizeEmptyNonAllocatedTableImpl(common, new_capacity,
1022+
/*force_infoz=*/false, policy);
10021023
} else {
10031024
ResizeFullSooTable(common, new_capacity,
10041025
ResizeFullSooTableSamplingMode::kNoSampling, policy);
@@ -1014,6 +1035,9 @@ void Rehash(CommonFields& common, size_t n, const PolicyFunctions& policy) {
10141035

10151036
void ReserveAllocatedTable(CommonFields& common, size_t n,
10161037
const PolicyFunctions& policy) {
1038+
common.reset_reserved_growth(n);
1039+
common.set_reservation_size(n);
1040+
10171041
const size_t cap = common.capacity();
10181042
assert(!common.empty() || cap > policy.soo_capacity);
10191043
assert(cap > 0);
@@ -1023,10 +1047,8 @@ void ReserveAllocatedTable(CommonFields& common, size_t n,
10231047
if (n <= max_size_before_growth) {
10241048
return;
10251049
}
1050+
ValidateMaxSize(n, policy.slot_size);
10261051
const size_t new_capacity = NormalizeCapacity(GrowthToLowerboundCapacity(n));
1027-
if (ABSL_PREDICT_FALSE(new_capacity > MaxValidCapacity(policy.slot_size))) {
1028-
HashTableSizeOverflow();
1029-
}
10301052
if (cap == policy.soo_capacity) {
10311053
assert(!common.empty());
10321054
ResizeFullSooTable(common, new_capacity,
@@ -1066,10 +1088,6 @@ size_t PrepareInsertNonSoo(CommonFields& common, size_t hash,
10661088
return target.offset;
10671089
}
10681090

1069-
void HashTableSizeOverflow() {
1070-
ABSL_RAW_LOG(FATAL, "Hash table size overflow");
1071-
}
1072-
10731091
} // namespace container_internal
10741092
ABSL_NAMESPACE_END
10751093
} // namespace absl

absl/container/internal/raw_hash_set.h

Lines changed: 52 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1097,14 +1097,6 @@ constexpr size_t NormalizeCapacity(size_t n) {
10971097
return n ? ~size_t{} >> countl_zero(n) : 1;
10981098
}
10991099

1100-
constexpr size_t MaxValidCapacity(size_t slot_size) {
1101-
return NormalizeCapacity((std::numeric_limits<size_t>::max)() / 4 /
1102-
slot_size);
1103-
}
1104-
1105-
// Use a non-inlined function to avoid code bloat.
1106-
[[noreturn]] void HashTableSizeOverflow();
1107-
11081100
// General notes on capacity/growth methods below:
11091101
// - We use 7/8th as maximum load factor. For 16-wide groups, that gives an
11101102
// average of two empty slots per group.
@@ -1537,7 +1529,7 @@ ABSL_ATTRIBUTE_NOINLINE void DeallocateBackingArray(
15371529
struct PolicyFunctions {
15381530
uint32_t key_size;
15391531
uint32_t value_size;
1540-
uint32_t slot_size;
1532+
uint16_t slot_size;
15411533
uint16_t slot_align;
15421534
uint8_t soo_capacity;
15431535
bool is_hashtablez_eligible;
@@ -1584,14 +1576,20 @@ constexpr size_t SooSlotIndex() { return 1; }
15841576
// Allowing till 16 would require additional store that can be avoided.
15851577
constexpr size_t MaxSmallAfterSooCapacity() { return 7; }
15861578

1587-
// Resizes empty non-allocated table to the new capacity.
1579+
// Resizes empty non-allocated table to the capacity to fit new_size elements.
15881580
// Requires:
15891581
// 1. `c.capacity() == policy.soo_capacity`.
15901582
// 2. `c.empty()`.
1591-
// 3. `new_capacity > policy.soo_capacity`.
1583+
// 3. `new_size > policy.soo_capacity`.
15921584
// The table will be attempted to be sampled.
1593-
void ResizeEmptyNonAllocatedTable(CommonFields& common, size_t new_capacity,
1594-
const PolicyFunctions& policy);
1585+
void ReserveEmptyNonAllocatedTableToFitNewSize(CommonFields& common,
1586+
size_t new_size,
1587+
const PolicyFunctions& policy);
1588+
1589+
// The same as ReserveEmptyNonAllocatedTableToFitNewSize, but resizes to the
1590+
// next valid capacity after `bucket_count`.
1591+
void ReserveEmptyNonAllocatedTableToFitBucketCount(
1592+
CommonFields& common, size_t bucket_count, const PolicyFunctions& policy);
15951593

15961594
// Resizes empty non-allocated SOO table to NextCapacity(SooCapacity()) and
15971595
// forces the table to be sampled.
@@ -1659,6 +1657,33 @@ InitializeThreeElementsControlBytesAfterSoo(size_t hash, ctrl_t* new_ctrl) {
16591657
// new_ctrl after 2nd store = EHESEHEEEEE
16601658
}
16611659

1660+
// Template parameter is only used to enable testing.
1661+
template <size_t kSizeOfSizeT = sizeof(size_t)>
1662+
constexpr size_t MaxValidSize(size_t slot_size) {
1663+
if constexpr (kSizeOfSizeT == 4) {
1664+
return (size_t{1} << (kSizeOfSizeT * 8 - 2)) / slot_size - 1;
1665+
} else {
1666+
static_assert(kSizeOfSizeT == 8);
1667+
constexpr size_t kSizeBits = 43;
1668+
static_assert(
1669+
kSizeBits + sizeof(PolicyFunctions::slot_size) * 8 < 64,
1670+
"we expect that slot size is small enough that allocation size "
1671+
"will not overflow");
1672+
return CapacityToGrowth(static_cast<size_t>(uint64_t{1} << kSizeBits) - 1);
1673+
}
1674+
}
1675+
1676+
// Template parameter is only used to enable testing.
1677+
template <size_t kSizeOfSizeT = sizeof(size_t)>
1678+
constexpr size_t IsAboveMaxValidSize(size_t size, size_t slot_size) {
1679+
if constexpr (kSizeOfSizeT == 4) {
1680+
return uint64_t{size} * slot_size >
1681+
MaxValidSize<kSizeOfSizeT>(/*slot_size=*/1);
1682+
} else {
1683+
return size > MaxValidSize(slot_size);
1684+
}
1685+
}
1686+
16621687
// Returns the optimal size for memcpy when transferring SOO slot.
16631688
// Otherwise, returns the optimal size for memcpy SOO slot transfer
16641689
// to SooSlotIndex().
@@ -2125,8 +2150,8 @@ class raw_hash_set {
21252150
: settings_(CommonFields::CreateDefault<SooEnabled()>(), hash, eq,
21262151
alloc) {
21272152
if (bucket_count > DefaultCapacity()) {
2128-
ResizeEmptyNonAllocatedTable(common(), NormalizeCapacity(bucket_count),
2129-
GetPolicyFunctions());
2153+
ReserveEmptyNonAllocatedTableToFitBucketCount(common(), bucket_count,
2154+
GetPolicyFunctions());
21302155
}
21312156
}
21322157

@@ -2402,9 +2427,7 @@ class raw_hash_set {
24022427
ABSL_ASSUME(cap >= kDefaultCapacity);
24032428
return cap;
24042429
}
2405-
size_t max_size() const {
2406-
return CapacityToGrowth(MaxValidCapacity(sizeof(slot_type)));
2407-
}
2430+
size_t max_size() const { return MaxValidSize(sizeof(slot_type)); }
24082431

24092432
ABSL_ATTRIBUTE_REINITIALIZES void clear() {
24102433
if (SwisstableGenerationsEnabled() &&
@@ -2813,16 +2836,10 @@ class raw_hash_set {
28132836
ReserveAllocatedTable(common(), n, GetPolicyFunctions());
28142837
} else {
28152838
if (ABSL_PREDICT_TRUE(n > DefaultCapacity())) {
2816-
ResizeEmptyNonAllocatedTable(
2817-
common(), NormalizeCapacity(GrowthToLowerboundCapacity(n)),
2818-
GetPolicyFunctions());
2819-
// This is after resize, to ensure that we have completed the allocation
2820-
// and have potentially sampled the hashtable.
2821-
infoz().RecordReservation(n);
2839+
ReserveEmptyNonAllocatedTableToFitNewSize(common(), n,
2840+
GetPolicyFunctions());
28222841
}
28232842
}
2824-
common().reset_reserved_growth(n);
2825-
common().set_reservation_size(n);
28262843
}
28272844

28282845
// Extension API: support for heterogeneous keys.
@@ -3558,10 +3575,15 @@ class raw_hash_set {
35583575
}
35593576

35603577
static const PolicyFunctions& GetPolicyFunctions() {
3561-
static_assert(sizeof(slot_type) <= (std::numeric_limits<uint32_t>::max)());
3562-
static_assert(alignof(slot_type) <= (std::numeric_limits<uint16_t>::max)());
3563-
static_assert(sizeof(key_type) <= (std::numeric_limits<uint32_t>::max)());
3564-
static_assert(sizeof(value_type) <= (std::numeric_limits<uint32_t>::max)());
3578+
static_assert(sizeof(slot_type) <= (std::numeric_limits<uint16_t>::max)(),
3579+
"Slot size is too large. Use std::unique_ptr for value type "
3580+
"or use absl::node_hash_{map,set}.");
3581+
static_assert(alignof(slot_type) <=
3582+
size_t{(std::numeric_limits<uint16_t>::max)()});
3583+
static_assert(sizeof(key_type) <=
3584+
size_t{(std::numeric_limits<uint32_t>::max)()});
3585+
static_assert(sizeof(value_type) <=
3586+
size_t{(std::numeric_limits<uint32_t>::max)()});
35653587
static constexpr size_t kBackingArrayAlignment =
35663588
BackingArrayAlignment(alignof(slot_type));
35673589
static constexpr PolicyFunctions value = {

absl/container/internal/raw_hash_set_test.cc

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4039,12 +4039,60 @@ TEST(Table, MovedFromCallsFail) {
40394039
}
40404040
}
40414041

4042+
TEST(Table, MaxValidSize) {
4043+
IntTable t;
4044+
EXPECT_EQ(MaxValidSize(sizeof(IntTable::value_type)), t.max_size());
4045+
if constexpr (sizeof(size_t) == 8) {
4046+
for (size_t i = 0; i < 16; ++i) {
4047+
size_t slot_size = size_t{1} << i;
4048+
size_t max_size = MaxValidSize(slot_size);
4049+
ASSERT_LT(max_size, uint64_t{1} << 60);
4050+
ASSERT_TRUE(IsAboveMaxValidSize(max_size + 1, slot_size));
4051+
ASSERT_TRUE(IsAboveMaxValidSize(uint64_t{1} << 63, slot_size));
4052+
ASSERT_TRUE(IsAboveMaxValidSize(~size_t{}, slot_size));
4053+
ASSERT_TRUE(IsAboveMaxValidSize(~size_t{} / 8 * 7, slot_size));
4054+
// Given that key size have to be at least 6 bytes to reach so many
4055+
// different values, total memory usage of the table will be at least
4056+
// 2^42*7 bytes (28 TB).
4057+
// So that value should be enough for all practical purposes.
4058+
ASSERT_GE(max_size, uint64_t{1} << 42);
4059+
// We leave some headroom for the table metadata.
4060+
ASSERT_LT(NormalizeCapacity(GrowthToLowerboundCapacity(max_size)),
4061+
uint64_t{1} << 44);
4062+
}
4063+
}
4064+
EXPECT_LT(MaxValidSize</*kSizeOfSizeT=*/4>(1), 1 << 30);
4065+
EXPECT_LT(MaxValidSize</*kSizeOfSizeT=*/4>(2), 1 << 29);
4066+
EXPECT_TRUE(IsAboveMaxValidSize</*kSizeOfSizeT=*/4>(1 << 30, 1));
4067+
EXPECT_TRUE(IsAboveMaxValidSize</*kSizeOfSizeT=*/4>(1 << 29, 2));
4068+
EXPECT_TRUE(IsAboveMaxValidSize</*kSizeOfSizeT=*/4>(~uint32_t{}, 1));
4069+
EXPECT_TRUE(IsAboveMaxValidSize</*kSizeOfSizeT=*/4>(~uint32_t{} / 8 * 7, 1));
4070+
for (size_t i = 0; i < 16; ++i) {
4071+
size_t slot_size = size_t{1} << i;
4072+
size_t max_size = MaxValidSize</*kSizeOfSizeT=*/4>(slot_size);
4073+
ASSERT_LT(max_size, 1 << 30);
4074+
ASSERT_TRUE(
4075+
IsAboveMaxValidSize</*kSizeOfSizeT=*/4>(max_size + 1, slot_size));
4076+
size_t max_capacity =
4077+
NormalizeCapacity(GrowthToLowerboundCapacity(max_size));
4078+
ASSERT_LT(max_capacity, (size_t{1} << 31) / slot_size);
4079+
ASSERT_GT(max_capacity, (1 << 29) / slot_size);
4080+
}
4081+
}
4082+
40424083
TEST(Table, MaxSizeOverflow) {
40434084
size_t overflow = (std::numeric_limits<size_t>::max)();
40444085
EXPECT_DEATH_IF_SUPPORTED(IntTable t(overflow), "Hash table size overflow");
40454086
IntTable t;
40464087
EXPECT_DEATH_IF_SUPPORTED(t.reserve(overflow), "Hash table size overflow");
40474088
EXPECT_DEATH_IF_SUPPORTED(t.rehash(overflow), "Hash table size overflow");
4089+
size_t slightly_overflow = MaxValidSize(sizeof(IntTable::value_type)) + 1;
4090+
EXPECT_DEATH_IF_SUPPORTED(IntTable t2(slightly_overflow),
4091+
"Hash table size overflow");
4092+
EXPECT_DEATH_IF_SUPPORTED(t.reserve(slightly_overflow),
4093+
"Hash table size overflow");
4094+
EXPECT_DEATH_IF_SUPPORTED(t.rehash(slightly_overflow),
4095+
"Hash table size overflow");
40484096
}
40494097

40504098
// TODO(b/397453582): Remove support for const hasher and ermove this test.

0 commit comments

Comments
 (0)