Skip to content

Commit 12fa65e

Browse files
dwblaikiezygoloid
andauthored
Check for use of InstIds from the wrong SemIR::File (#5997)
Use the `CheckIRId` as a unique identifier for the scope of an `InstId` - if an `InstId` is created within the scope of one `CheckIRId` it must not be used in the scope of a different `CheckIRId`. This is achieved without extra storage, but with false negatives for large inputs. When an `InstId` is created, the original index of the `Inst` is XORed with a tag derived from the `CheckIRId` to produce the final `InstId`. When the `InstId` is used, the expected tag is XORed with the `InstId` to get back to the original index - if the tags don't match, the resulting index will be corrupted, likely too large - resulting in an out of bounds index CHECK-failure. (the tag value is derived as such: * take the CheckIRId * left shift one bit (padding zero) * left shift another bit (padding 1 - used to signify that the resulting `InstId` has a tag combined into it) * reverse the bits In this way, the tag is unlikely to overlap with the index for small test cases - making it possible to separate out the `CheckIRId` from the index in these cases to provide more meaningful debugging/CHECK messages, and more informative `SemIR` textual dumping that can now include the `CheckIRId` along with the `Inst`'s index in the name of an `inst`) The test churn here is improved printing as tagged `InstId`s can now, with best effort (more likely for small test cases where the `CheckIRId` and the `Inst` index aren't at risk of overlapping from the high and low bits), render the `CheckIRId` as part of the inst's name. Going from `instNN` to `irMM.instNN`. --------- Co-authored-by: Richard Smith <[email protected]>
1 parent ce6bf91 commit 12fa65e

31 files changed

+2193
-2032
lines changed

toolchain/base/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ cc_library(
6666
hdrs = ["fixed_size_value_store.h"],
6767
deps = [
6868
":mem_usage",
69+
":value_store",
6970
":value_store_types",
7071
"//common:check",
7172
"@llvm-project//llvm:Support",

toolchain/base/fixed_size_value_store.h

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "llvm/ADT/StringRef.h"
1414
#include "llvm/ADT/iterator_range.h"
1515
#include "toolchain/base/mem_usage.h"
16+
#include "toolchain/base/value_store.h"
1617
#include "toolchain/base/value_store_types.h"
1718

1819
namespace Carbon {
@@ -47,9 +48,11 @@ class FixedSizeValueStore {
4748
}
4849

4950
// Makes a ValueStore of the specified size, initialized to a default.
50-
static auto MakeWithExplicitSize(size_t size, ConstRefType default_value)
51+
static auto MakeWithExplicitSize(IdTag tag, size_t size,
52+
ConstRefType default_value)
5153
-> FixedSizeValueStore {
5254
FixedSizeValueStore store;
55+
store.tag_ = tag;
5356
store.values_.resize(size, default_value);
5457
return store;
5558
}
@@ -73,7 +76,8 @@ class FixedSizeValueStore {
7376
template <typename ValueStoreT>
7477
requires std::same_as<IdT, typename ValueStoreT::IdType>
7578
explicit FixedSizeValueStore(const ValueStoreT& size_source,
76-
ConstRefType default_value) {
79+
ConstRefType default_value)
80+
: tag_(size_source.GetIdTag()) {
7781
values_.resize(size_source.size(), default_value);
7882
}
7983

@@ -86,7 +90,8 @@ class FixedSizeValueStore {
8690
llvm::function_ref<
8791
auto(IdT, typename ValueStoreT::ConstRefType)->ValueType>
8892
factory_fn)
89-
: values_(llvm::map_range(source.enumerate(), factory_fn)) {}
93+
: values_(llvm::map_range(source.enumerate(), factory_fn)),
94+
tag_(GetIdTag(source)) {}
9095

9196
// Move-only.
9297
FixedSizeValueStore(FixedSizeValueStore&&) noexcept = default;
@@ -95,20 +100,23 @@ class FixedSizeValueStore {
95100

96101
// Sets the value for an ID.
97102
auto Set(IdT id, ValueType value) -> void {
98-
CARBON_DCHECK(id.index >= 0, "{0}", id);
99-
values_[id.index] = value;
103+
auto index = tag_.Remove(id.index);
104+
CARBON_DCHECK(index >= 0, "{0}", id);
105+
values_[index] = value;
100106
}
101107

102108
// Returns a mutable value for an ID.
103109
auto Get(IdT id) -> RefType {
104-
CARBON_DCHECK(id.index >= 0, "{0}", id);
105-
return values_[id.index];
110+
auto index = tag_.Remove(id.index);
111+
CARBON_DCHECK(index >= 0, "{0}", id);
112+
return values_[index];
106113
}
107114

108115
// Returns the value for an ID.
109116
auto Get(IdT id) const -> ConstRefType {
110-
CARBON_DCHECK(id.index >= 0, "{0}", id);
111-
return values_[id.index];
117+
auto index = tag_.Remove(id.index);
118+
CARBON_DCHECK(index >= 0, "{0}", id);
119+
return values_[index];
112120
}
113121

114122
// Collects memory usage of the values.
@@ -135,6 +143,8 @@ class FixedSizeValueStore {
135143

136144
// Storage for the `ValueT` objects, indexed by the id.
137145
llvm::SmallVector<ValueT, 0> values_;
146+
147+
IdTag tag_;
138148
};
139149

140150
} // namespace Carbon

toolchain/base/value_store.h

Lines changed: 116 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,81 @@ class ValueStoreNotPrintable {};
3232

3333
} // namespace Internal
3434

35+
struct IdTag {
36+
IdTag()
37+
: id_tag_(0),
38+
initial_reserved_ids_(std::numeric_limits<int32_t>::max()) {}
39+
explicit IdTag(int32_t id_index, int32_t initial_reserved_ids)
40+
: initial_reserved_ids_(initial_reserved_ids) {
41+
// Shift down by 1 to get out of the high bit to avoid using any negative
42+
// ids, since they have special uses.
43+
// Shift down by another 1 to free up the second highest bit for a marker to
44+
// indicate whether the index is tagged (& needs to be untagged) or not.
45+
// Add one to the index so it's not zero-based, to make it a bit less likely
46+
// this doesn't collide with anything else (though with the
47+
// second-highest-bit-tagging this might not be needed).
48+
id_tag_ = llvm::reverseBits((((id_index + 1) << 1) | 1) << 1);
49+
}
50+
auto GetCheckIRId() const -> int32_t {
51+
return (llvm::reverseBits(id_tag_) >> 2) - 1;
52+
}
53+
auto Apply(int32_t index) const -> int32_t {
54+
if (index < initial_reserved_ids_) {
55+
return index;
56+
}
57+
// assert that id_tag_ doesn't have the second highest bit set
58+
return index ^ id_tag_;
59+
}
60+
static auto DecomposeWithBestEffort(int32_t tagged_index)
61+
-> std::pair<int32_t, int32_t> {
62+
if (tagged_index < 0) {
63+
return {-1, tagged_index};
64+
}
65+
if ((llvm::reverseBits(2) & tagged_index) == 0) {
66+
return {-1, tagged_index};
67+
}
68+
int length = 0;
69+
int location = 0;
70+
for (int i = 0; i != 32; ++i) {
71+
int current_run = 0;
72+
int location_of_current_run = i;
73+
while (i != 32 && (tagged_index & (1 << i)) == 0) {
74+
++current_run;
75+
++i;
76+
}
77+
if (current_run != 0) {
78+
--i;
79+
}
80+
if (current_run > length) {
81+
length = current_run;
82+
location = location_of_current_run;
83+
}
84+
}
85+
if (length < 8) {
86+
return {-1, tagged_index};
87+
}
88+
auto index_mask = llvm::maskTrailingOnes<uint32_t>(location);
89+
auto ir_id = (llvm::reverseBits(tagged_index & ~index_mask) >> 2) - 1;
90+
auto index = tagged_index & index_mask;
91+
return {ir_id, index};
92+
}
93+
auto Remove(int32_t tagged_index) const -> int32_t {
94+
if ((llvm::reverseBits(2) & tagged_index) == 0) {
95+
CARBON_DCHECK(tagged_index < initial_reserved_ids_,
96+
"This untagged index is outside the initial reserved ids "
97+
"and should have been tagged.");
98+
return tagged_index;
99+
}
100+
auto index = tagged_index ^ id_tag_;
101+
CARBON_DCHECK(index >= initial_reserved_ids_,
102+
"When removing tagging bits, found an index that "
103+
"shouldn't've been tagged in the first place.");
104+
return index;
105+
}
106+
int32_t id_tag_;
107+
int32_t initial_reserved_ids_;
108+
};
109+
35110
// A simple wrapper for accumulating values, providing IDs to later retrieve the
36111
// value. This does not do deduplication.
37112
template <typename IdT, typename ValueT>
@@ -74,6 +149,7 @@ class ValueStore
74149
};
75150

76151
ValueStore() = default;
152+
explicit ValueStore(IdTag tag) : tag_(tag) {}
77153

78154
// Stores the value and returns an ID to reference it.
79155
auto Add(ValueType value) -> IdType {
@@ -82,8 +158,8 @@ class ValueStore
82158
// tracking down issues easier.
83159
CARBON_DCHECK(size_ < std::numeric_limits<int32_t>::max(), "Id overflow");
84160

85-
IdType id(size_);
86-
auto [chunk_index, pos] = IdToChunkIndices(id);
161+
IdType id(tag_.Apply(size_));
162+
auto [chunk_index, pos] = RawIndexToChunkIndices(size_);
87163
++size_;
88164

89165
CARBON_DCHECK(static_cast<size_t>(chunk_index) <= chunks_.size(),
@@ -99,16 +175,12 @@ class ValueStore
99175

100176
// Returns a mutable value for an ID.
101177
auto Get(IdType id) -> RefType {
102-
CARBON_DCHECK(id.index >= 0, "{0}", id);
103-
CARBON_DCHECK(id.index < size_, "{0}", id);
104178
auto [chunk_index, pos] = IdToChunkIndices(id);
105179
return chunks_[chunk_index].Get(pos);
106180
}
107181

108182
// Returns the value for an ID.
109183
auto Get(IdType id) const -> ConstRefType {
110-
CARBON_DCHECK(id.index >= 0, "{0}", id);
111-
CARBON_DCHECK(id.index < size_, "{0}", id);
112184
auto [chunk_index, pos] = IdToChunkIndices(id);
113185
return chunks_[chunk_index].Get(pos);
114186
}
@@ -118,7 +190,7 @@ class ValueStore
118190
if (size <= size_) {
119191
return;
120192
}
121-
auto [final_chunk_index, _] = IdToChunkIndices(IdType(size - 1));
193+
auto [final_chunk_index, _] = RawIndexToChunkIndices(size - 1);
122194
chunks_.resize(final_chunk_index + 1);
123195
}
124196

@@ -128,10 +200,10 @@ class ValueStore
128200
return;
129201
}
130202

131-
auto [begin_chunk_index, begin_pos] = IdToChunkIndices(IdType(size_));
203+
auto [begin_chunk_index, begin_pos] = RawIndexToChunkIndices(size_);
132204
// Use an inclusive range so that if `size` would be the next chunk, we
133205
// don't try doing something with it.
134-
auto [end_chunk_index, end_pos] = IdToChunkIndices(IdType(size - 1));
206+
auto [end_chunk_index, end_pos] = RawIndexToChunkIndices(size - 1);
135207
chunks_.resize(end_chunk_index + 1);
136208

137209
// If the begin and end chunks are the same, we only fill from begin to end.
@@ -192,13 +264,33 @@ class ValueStore
192264
// `mapped_iterator` incorrectly infers the pointer type for `PointerProxy`.
193265
// NOLINTNEXTLINE(readability-const-return-type)
194266
auto index_to_id = [&](int32_t i) -> const std::pair<IdType, ConstRefType> {
195-
return std::pair<IdType, ConstRefType>(IdType(i), Get(IdType(i)));
267+
IdType id(tag_.Apply(i));
268+
return std::pair<IdType, ConstRefType>(id, Get(id));
196269
};
197270
// Because indices into `ValueStore` are all sequential values from 0, we
198271
// can use llvm::seq to walk all indices in the store.
199272
return llvm::map_range(llvm::seq(size_), index_to_id);
200273
}
201274

275+
auto GetIdTag() const -> IdTag { return tag_; }
276+
auto GetRawIndex(IdT id) const -> int32_t {
277+
auto index = tag_.Remove(id.index);
278+
CARBON_DCHECK(index >= 0, "{0}", index);
279+
// Attempt to decompose id.index to include extra detail in the check here
280+
#ifndef NDEBUG
281+
if (index >= size_) {
282+
auto [ir_id, decomposed_index] = IdTag::DecomposeWithBestEffort(id.index);
283+
CARBON_DCHECK(
284+
index < size_,
285+
"Untagged index was outside of container range. Possibly tagged "
286+
"index {0}. Best-effort decomposition: CheckIRId: {1}, index: {2}. "
287+
"The IdTag for this container is: {3}",
288+
id.index, ir_id, decomposed_index, tag_.GetCheckIRId());
289+
}
290+
#endif
291+
return index;
292+
}
293+
202294
private:
203295
// A chunk of `ValueType`s which has a fixed capacity, but variable size.
204296
// Tracks the size internally for verifying bounds.
@@ -312,9 +404,10 @@ class ValueStore
312404
int32_t num_ = 0;
313405
};
314406

315-
// Converts an id into an index into the set of chunks, and an offset into
316-
// that specific chunk. Looks for index overflow in non-optimized builds.
317-
static auto IdToChunkIndices(IdType id) -> std::pair<int32_t, int32_t> {
407+
// Converts a raw index into an index into the set of chunks, and an offset
408+
// into that specific chunk. Looks for index overflow in non-optimized builds.
409+
static auto RawIndexToChunkIndices(int32_t index)
410+
-> std::pair<int32_t, int32_t> {
318411
constexpr auto LowBits = Chunk::IndexBits();
319412

320413
// Verify there are no unused bits when indexing up to the `Capacity`. This
@@ -328,16 +421,24 @@ class ValueStore
328421
static_assert(LowBits < 30);
329422

330423
// The index of the chunk is the high bits.
331-
auto chunk = id.index >> LowBits;
424+
auto chunk = index >> LowBits;
332425
// The index into the chunk is the low bits.
333-
auto pos = id.index & ((1 << LowBits) - 1);
426+
auto pos = index & ((1 << LowBits) - 1);
334427
return {chunk, pos};
335428
}
336429

430+
// Converts an id into an index into the set of chunks, and an offset into
431+
// that specific chunk.
432+
auto IdToChunkIndices(IdType id) const -> std::pair<int32_t, int32_t> {
433+
return RawIndexToChunkIndices(GetRawIndex(id));
434+
}
435+
337436
// Number of elements added to the store. The number should never exceed what
338437
// fits in an `int32_t`, which is checked in non-optimized builds in Add().
339438
int32_t size_ = 0;
340439

440+
IdTag tag_;
441+
341442
// Storage for the `ValueType` objects, indexed by the id. We use a vector of
342443
// chunks of `ValueType` instead of just a vector of `ValueType` so that
343444
// addresses of `ValueType` objects are stable. This allows the rest of the

toolchain/check/check_unit.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ auto CheckUnit::CollectTransitiveImports(SemIR::InstId import_decl_id,
195195
// exported to this IR.
196196
auto ir_to_result_index =
197197
FixedSizeValueStore<SemIR::CheckIRId, int>::MakeWithExplicitSize(
198-
unit_and_imports_->unit->total_ir_count, -1);
198+
IdTag(), unit_and_imports_->unit->total_ir_count, -1);
199199

200200
// First add direct imports. This means that if an entity is imported both
201201
// directly and indirectly, the import path will reflect the direct import.

toolchain/check/context.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@ Context::Context(DiagnosticEmitterBase* emitter,
3131
scope_stack_(sem_ir_),
3232
deferred_definition_worklist_(vlog_stream),
3333
vtable_stack_("vtable_stack_", *sem_ir, vlog_stream),
34-
check_ir_map_(
35-
FixedSizeValueStore<SemIR::CheckIRId, SemIR::ImportIRId>::
36-
MakeWithExplicitSize(total_ir_count_, SemIR::ImportIRId::None)),
34+
check_ir_map_(FixedSizeValueStore<SemIR::CheckIRId, SemIR::ImportIRId>::
35+
MakeWithExplicitSize(IdTag(), total_ir_count_,
36+
SemIR::ImportIRId::None)),
3737
global_init_(this),
3838
region_stack_([this](SemIR::LocId loc_id, std::string label) {
3939
TODO(loc_id, label);

toolchain/check/import_ref.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,9 @@ namespace Carbon::Check {
3636
// Adds the ImportIR, excluding the update to the check_ir_map.
3737
static auto InternalAddImportIR(Context& context, SemIR::ImportIR import_ir)
3838
-> SemIR::ImportIRId {
39-
context.import_ir_constant_values().push_back(
40-
SemIR::ConstantValueStore(SemIR::ConstantId::None));
39+
context.import_ir_constant_values().push_back(SemIR::ConstantValueStore(
40+
SemIR::ConstantId::None,
41+
import_ir.sem_ir ? &import_ir.sem_ir->insts() : nullptr));
4142
return context.import_irs().Add(import_ir);
4243
}
4344

@@ -50,7 +51,9 @@ static auto SetSpecialImportIR(Context& context, SemIR::ImportIR import_ir,
5051
ir_id = AddImportIR(context, import_ir);
5152
} else {
5253
// We don't have a check_ir_id, so add without touching check_ir_map.
53-
ir_id = InternalAddImportIR(context, import_ir);
54+
context.import_ir_constant_values().push_back(
55+
SemIR::ConstantValueStore(SemIR::ConstantValueStore::Unusable));
56+
ir_id = context.import_irs().Add(import_ir);
5457
}
5558
CARBON_CHECK(ir_id == expected_import_ir_id,
5659
"Actual ImportIRId ($0) != Expected ImportIRId ({1})", ir_id,

0 commit comments

Comments
 (0)