Skip to content

Commit a29c235

Browse files
authored
Global type descriptor split (#180)
1 parent 81eaffc commit a29c235

File tree

10 files changed

+302
-115
lines changed

10 files changed

+302
-115
lines changed

README.md

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -203,13 +203,14 @@ Type serialization for each user-defined type (mode `hybrid`) or *all* types (mo
203203
struct GlobalTypeInfo {
204204
std::int32_t type_id;
205205
const std::uint32_t extent;
206-
const std::uint16_t num_members;
207-
const std::uint16_t flag;
206+
const GlobalTypeInfoData* data; // nullptr for built-ins
207+
};
208+
struct GlobalTypeInfoData {
208209
const char* type_name;
209-
const std::uint16_t* offsets;
210-
const std::uint16_t* array_sizes;
210+
// data : [ num_member, flag, offsets[num_member], array_sizes[num_member] ]:
211+
const std::uint16_t* data;
211212
const GlobalTypeInfo** member_types;
212-
};
213+
}
213214
```
214215

215216
Each type is registered at startup with the TypeART runtime using the callback `void __typeart_register_type(const void* type_ptr);`. This adds the type information to the type database (for user queries) and assigns a unique `type-id`.

lib/passes/analysis/MemOpVisitor.cpp

Lines changed: 64 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -167,33 +167,75 @@ std::optional<InstTy*> getSingleUserAs(llvm::Instruction* value) {
167167
using MallocGeps = SmallPtrSet<GetElementPtrInst*, 2>;
168168
using MallocBcasts = SmallPtrSet<BitCastInst*, 4>;
169169

170-
std::pair<MallocGeps, MallocBcasts> collectRelevantMallocUsers(llvm::CallBase& ci) {
171-
auto geps = MallocGeps{};
172-
auto bcasts = MallocBcasts{};
173-
for (auto user : ci.users()) {
174-
// Simple case: Pointer is immediately casted
175-
if (auto inst = dyn_cast<BitCastInst>(user)) {
176-
bcasts.insert(inst);
177-
}
178-
// Pointer is first stored, then loaded and subsequently casted
179-
if (auto storeInst = dyn_cast<StoreInst>(user)) {
180-
auto storeAddr = storeInst->getPointerOperand();
181-
for (auto storeUser : storeAddr->users()) { // TODO: Ensure that load occurs after store?
182-
if (auto loadInst = dyn_cast<LoadInst>(storeUser)) {
183-
for (auto loadUser : loadInst->users()) {
184-
if (auto bcastInst = dyn_cast<BitCastInst>(loadUser)) {
185-
// LOG_MSG(*bcastInst)
186-
bcasts.insert(bcastInst);
187-
}
188-
}
170+
// std::pair<MallocGeps, MallocBcasts> collectRelevantMallocUsers(llvm::CallBase& ci) {
171+
// auto geps = MallocGeps{};
172+
// auto bcasts = MallocBcasts{};
173+
// for (auto user : ci.users()) {
174+
// // Simple case: Pointer is immediately casted
175+
// if (auto inst = dyn_cast<BitCastInst>(user)) {
176+
// bcasts.insert(inst);
177+
// }
178+
// // Pointer is first stored, then loaded and subsequently casted
179+
// if (auto storeInst = dyn_cast<StoreInst>(user)) {
180+
// auto storeAddr = storeInst->getPointerOperand();
181+
// if (!(storeAddr == nullptr || llvm::isa<llvm::ConstantPointerNull>(storeAddr))) {
182+
// for (auto storeUser : storeAddr->users()) { // TODO: Ensure that load occurs after store?
183+
// if (auto loadInst = dyn_cast<LoadInst>(storeUser)) {
184+
// for (auto loadUser : loadInst->users()) {
185+
// if (auto bcastInst = dyn_cast<BitCastInst>(loadUser)) {
186+
// // LOG_MSG(*bcastInst)
187+
// bcasts.insert(bcastInst);
188+
// }
189+
// }
190+
// }
191+
// }
192+
// } else {
193+
// LOG_DEBUG("Null, must skip")
194+
// }
195+
// }
196+
// // GEP indicates that an array cookie is added to the allocation. (Fixes #13)
197+
// if (auto gep = dyn_cast<GetElementPtrInst>(user)) {
198+
// geps.insert(gep);
199+
// }
200+
// }
201+
// return {geps, bcasts};
202+
// }
203+
204+
void collect_casts_from_stack(llvm::StoreInst* store_inst, MallocBcasts& out_bcasts) {
205+
auto* slot = store_inst->getPointerOperand();
206+
207+
// Guard: Skip invalid or null storage locations
208+
if (llvm::isa<llvm::ConstantPointerNull>(slot)) {
209+
LOG_DEBUG("Skipping null storage");
210+
return;
211+
}
212+
213+
for (auto* slot_user : slot->users()) {
214+
// TODO: Ensure that load occurs after store?
215+
if (auto* load_inst = llvm::dyn_cast<llvm::LoadInst>(slot_user)) {
216+
for (auto* load_user : load_inst->users()) {
217+
if (auto* bit_cast = llvm::dyn_cast<llvm::BitCastInst>(load_user)) {
218+
out_bcasts.insert(bit_cast);
189219
}
190220
}
191221
}
192-
// GEP indicates that an array cookie is added to the allocation. (Fixes #13)
193-
if (auto gep = dyn_cast<GetElementPtrInst>(user)) {
194-
geps.insert(gep);
222+
}
223+
}
224+
225+
std::pair<MallocGeps, MallocBcasts> collectRelevantMallocUsers(llvm::CallBase& call_inst) {
226+
auto geps = MallocGeps{};
227+
auto bcasts = MallocBcasts{};
228+
229+
for (auto* user : call_inst.users()) {
230+
if (auto* bit_cast = llvm::dyn_cast<llvm::BitCastInst>(user)) {
231+
bcasts.insert(bit_cast);
232+
} else if (auto* gep_inst = llvm::dyn_cast<llvm::GetElementPtrInst>(user)) {
233+
geps.insert(gep_inst);
234+
} else if (auto* store_inst = llvm::dyn_cast<llvm::StoreInst>(user)) {
235+
collect_casts_from_stack(store_inst, bcasts);
195236
}
196237
}
238+
197239
return {geps, bcasts};
198240
}
199241

lib/passes/instrumentation/TypeIDProvider.cpp

Lines changed: 88 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "support/ConfigurationBase.h"
99
#include "support/Logger.h"
1010

11+
#include <cstdint>
1112
#include <llvm/ADT/ArrayRef.h>
1213
#include <llvm/ADT/StringMap.h>
1314
#include <llvm/ADT/StringRef.h>
@@ -29,6 +30,7 @@
2930
#include <optional>
3031
#include <string>
3132
#include <utility>
33+
#include <vector>
3234

3335
namespace typeart {
3436

@@ -72,6 +74,40 @@ inline std::string create_prefixed_name(Args&&... args) {
7274
return name;
7375
}
7476

77+
namespace detail {
78+
template <typename T, typename SourceT>
79+
T safe_cast(SourceT val) {
80+
// Check if value exceeds the maximum limit of the target type T
81+
// We cast max() to size_t to ensure we are comparing compatible types safely
82+
assert(static_cast<size_t>(val) <= static_cast<size_t>(std::numeric_limits<T>::max()) &&
83+
"Data loss detected: Value exceeds target type limits!");
84+
return static_cast<T>(val);
85+
}
86+
} // namespace detail
87+
88+
template <typename T>
89+
std::vector<T> get_serialized_members_for(const StructTypeInfo& info) {
90+
using namespace detail;
91+
std::vector<T> dest;
92+
const size_t required_space = info.offsets.size() + info.array_sizes.size() + 2;
93+
dest.reserve(required_space);
94+
95+
// Layout : [ num_member, flag, offsets...[num_member], array_sizes...[num_member] ]
96+
97+
dest.push_back(safe_cast<T>(info.num_members));
98+
dest.push_back(safe_cast<T>(static_cast<std::underlying_type_t<StructTypeFlag>>(info.flag)));
99+
100+
for (size_t offset : info.offsets) {
101+
dest.push_back(safe_cast<T>(offset));
102+
}
103+
104+
for (size_t size : info.array_sizes) {
105+
dest.push_back(safe_cast<T>(size));
106+
}
107+
108+
return dest;
109+
}
110+
75111
} // namespace helper
76112

77113
namespace typedb {
@@ -151,7 +187,8 @@ enum class IGlobalType : short {
151187
member_types,
152188
member_count,
153189
type_flag,
154-
ptr
190+
ptr,
191+
info_holder
155192
};
156193

157194
struct TypeHelper {
@@ -181,13 +218,14 @@ struct TypeHelper {
181218
}
182219
case IGlobalType::name:
183220
case IGlobalType::ptr:
221+
case IGlobalType::info_holder:
184222
#if LLVM_VERSION_MAJOR < 15
185223
return ir_build_.getInt8PtrTy();
186224
#else
187225
return ir_build_.getPtrTy();
188226
#endif
189227
}
190-
llvm_unreachable("Should not be reached disk");
228+
llvm_unreachable("Should not be reached");
191229
}
192230

193231
llvm::Constant* get_constant_for(IGlobalType type, size_t value) {
@@ -218,22 +256,23 @@ struct GlobalTypeRegistrar {
218256
llvm::IRBuilder<> ir_build;
219257
GlobalTypeCallback type_callback;
220258
llvm::StructType* struct_layout_type_;
259+
llvm::StructType* struct_layout_type_cold_;
221260
TypeHelper types_helper;
222261
const bool builtin_emit_name{false};
223262

224263
void declare_layout() {
225264
auto& context = module_->getContext();
226265
struct_layout_type_ = llvm::StructType::create(context, "struct._typeart_struct_layout_t");
227266
struct_layout_type_->setBody({
228-
types_helper.get_type_for(IGlobalType::type_id), // int type_id
229-
types_helper.get_type_for(IGlobalType::extent), // uint32 extent
230-
types_helper.get_type_for(IGlobalType::num_members), // uint16 num_members
231-
types_helper.get_type_for(IGlobalType::type_flag), // uint16 type_flag
267+
types_helper.get_type_for(IGlobalType::type_id), // uint32 type_id
268+
types_helper.get_type_for(IGlobalType::extent), // uint32 extent
269+
types_helper.get_type_for(IGlobalType::info_holder),
270+
});
271+
struct_layout_type_cold_ = llvm::StructType::create(context, "struct._typeart_struct_layout_info_t");
272+
struct_layout_type_cold_->setBody({
232273
types_helper.get_type_for(IGlobalType::name), // const char* name
233274
types_helper.get_type_for(IGlobalType::member_offsets), // const uint16* offsets
234-
types_helper.get_type_for(IGlobalType::member_count), // const uint16* count
235275
types_helper.get_type_for(IGlobalType::member_types), // const typeart_struct_layout_t** member_types
236-
237276
});
238277
}
239278

@@ -274,10 +313,11 @@ struct GlobalTypeRegistrar {
274313
return create_global(global_name, array_ty, constant_array);
275314
}
276315

277-
llvm::Constant* create_global_array_ptr(const llvm::StringRef name, llvm::ArrayRef<uint64_t> values,
316+
template <typename T>
317+
llvm::Constant* create_global_array_ptr(const llvm::StringRef name, llvm::ArrayRef<T> values,
278318
IGlobalType type = IGlobalType::member_offsets) {
279319
return create_global_array_from_range(name, values, types_helper.get_type_for(IGlobalType::member_offsets, true),
280-
[&](uint64_t val) { return types_helper.get_constant_for(type, val); });
320+
[&](const T& val) { return types_helper.get_constant_for(type, val); });
281321
}
282322

283323
llvm::Constant* create_global_member_array_ptr(const llvm::StringRef name, llvm::ArrayRef<int> member_types) {
@@ -294,51 +334,55 @@ struct GlobalTypeRegistrar {
294334
LOG_DEBUG("Type is forward decl " << base_name)
295335
}
296336

297-
llvm::Constant* offsets_ptr = create_global_array_ptr(helper::concat("offsets_", link_name), type_struct->offsets);
298-
llvm::Constant* counts_ptr = create_global_array_ptr(helper::concat("counts_", link_name), type_struct->array_sizes,
299-
IGlobalType::member_count);
300-
llvm::Constant* members_ptr =
301-
create_global_member_array_ptr(helper::concat("member_types_", link_name), type_struct->member_types);
302-
303337
const bool is_builtin = type_struct->flag == StructTypeFlag::BUILTIN;
304338
const bool emit_name = !is_builtin || builtin_emit_name;
305339

306-
llvm::Constant* name_str_ptr =
307-
emit_name ? create_global_constant_string(link_name, base_name) : types_helper.get_constant_nullptr();
308-
309340
llvm::GlobalVariable* global_struct =
310341
create_global(link_name, struct_layout_type_, nullptr, llvm::GlobalValue::LinkOnceODRLinkage);
311342
global_struct->setConstant(false);
312343

313-
std::vector<llvm::Constant*> init_fields = {
314-
types_helper.get_constant_for(IGlobalType::type_id, type_struct->type_id),
315-
types_helper.get_constant_for(IGlobalType::extent, type_struct->extent),
316-
types_helper.get_constant_for(IGlobalType::member_count, type_struct->member_types.size()),
317-
types_helper.get_constant_for(IGlobalType::type_flag, static_cast<int>(type_struct->flag)),
318-
name_str_ptr,
319-
offsets_ptr,
320-
counts_ptr,
321-
members_ptr};
322-
323-
llvm::Constant* init = llvm::ConstantStruct::get(struct_layout_type_, init_fields);
324-
global_struct->setInitializer(init);
344+
llvm::Comdat* comdat = this->module_->getOrInsertComdat(helper::create_prefixed_name(link_name));
345+
comdat->setSelectionKind(llvm::Comdat::Any);
346+
global_struct->setComdat(comdat);
325347

326-
{
327-
llvm::Comdat* comdat = this->module_->getOrInsertComdat(helper::create_prefixed_name(link_name));
328-
comdat->setSelectionKind(llvm::Comdat::Any);
329-
global_struct->setComdat(comdat);
348+
auto add_to_comdat = [&](llvm::Constant* ptr) {
349+
if (auto* global = llvm::dyn_cast_or_null<llvm::GlobalObject>(ptr)) {
350+
global->setComdat(comdat);
351+
}
352+
};
330353

331-
auto add_to_comdat = [&](llvm::Constant* ptr) {
332-
if (auto* global = llvm::dyn_cast_or_null<llvm::GlobalObject>(ptr)) {
333-
global->setComdat(comdat);
354+
const auto get_info_object = [&]() -> llvm::Constant* {
355+
if (emit_name) {
356+
llvm::Constant* name_str_ptr = create_global_constant_string(link_name, base_name);
357+
const auto info_data = helper::get_serialized_members_for<uint16_t>(*type_struct);
358+
llvm::Constant* data_ptr =
359+
create_global_array_ptr<uint16_t>(helper::concat("info_data_", link_name), info_data);
360+
llvm::Constant* members_ptr =
361+
create_global_member_array_ptr(helper::concat("member_types_", link_name), type_struct->member_types);
362+
363+
llvm::GlobalVariable* global_struct_info =
364+
create_global(helper::concat(link_name, "_info"), struct_layout_type_cold_, nullptr);
365+
366+
std::vector<llvm::Constant*> init_fields_cold = {name_str_ptr, data_ptr, members_ptr};
367+
global_struct_info->setInitializer(llvm::ConstantStruct::get(struct_layout_type_cold_, init_fields_cold));
368+
369+
{
370+
add_to_comdat(global_struct_info);
371+
add_to_comdat(data_ptr);
372+
add_to_comdat(members_ptr);
373+
add_to_comdat(name_str_ptr);
334374
}
335-
};
336375

337-
add_to_comdat(offsets_ptr);
338-
add_to_comdat(counts_ptr);
339-
add_to_comdat(members_ptr);
340-
add_to_comdat(name_str_ptr);
341-
}
376+
return global_struct_info;
377+
}
378+
return types_helper.get_constant_nullptr();
379+
};
380+
381+
std::vector<llvm::Constant*> init_fields = {
382+
types_helper.get_constant_for(IGlobalType::type_id, type_struct->type_id),
383+
types_helper.get_constant_for(IGlobalType::extent, type_struct->extent), get_info_object()};
384+
llvm::Constant* init = llvm::ConstantStruct::get(struct_layout_type_, init_fields);
385+
global_struct->setInitializer(init);
342386

343387
return global_struct;
344388
}

lib/runtime/AllocationTracking.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ void __typeart_leave_scope_omp(int alloca_count) {
292292
void __typeart_alloc_mty(const void* addr, const void* info, size_t count) {
293293
TYPEART_RUNTIME_GUARD;
294294
const void* retAddr = __builtin_return_address(0);
295-
const auto type_id = reinterpret_cast<const typeart::GlobalTypeInfo*>(info)->type_id;
295+
const auto type_id = reinterpret_cast<const typeart::global_types::GlobalTypeInfo*>(info)->type_id;
296296
auto& rt = typeart::RuntimeSystem::get();
297297
assert(type_id == rt.type_translator().get_type_id_for(info) && "Type ID of global and lookup must match");
298298
rt.allocation_tracker().onAlloc(addr, type_id, count, retAddr);
@@ -301,7 +301,7 @@ void __typeart_alloc_mty(const void* addr, const void* info, size_t count) {
301301
void __typeart_alloc_stack_mty(const void* addr, const void* info, size_t count) {
302302
TYPEART_RUNTIME_GUARD;
303303
const void* retAddr = __builtin_return_address(0);
304-
const auto type_id = reinterpret_cast<const typeart::GlobalTypeInfo*>(info)->type_id;
304+
const auto type_id = reinterpret_cast<const typeart::global_types::GlobalTypeInfo*>(info)->type_id;
305305
auto& rt = typeart::RuntimeSystem::get();
306306
assert(type_id == rt.type_translator().get_type_id_for(info) && "Type ID of global and lookup must match");
307307
rt.allocation_tracker().onAllocStack(addr, type_id, count, retAddr);
@@ -310,7 +310,7 @@ void __typeart_alloc_stack_mty(const void* addr, const void* info, size_t count)
310310
void __typeart_alloc_global_mty(const void* addr, const void* info, size_t count) {
311311
TYPEART_RUNTIME_GUARD;
312312
const void* retAddr = __builtin_return_address(0);
313-
const auto type_id = reinterpret_cast<const typeart::GlobalTypeInfo*>(info)->type_id;
313+
const auto type_id = reinterpret_cast<const typeart::global_types::GlobalTypeInfo*>(info)->type_id;
314314
auto& rt = typeart::RuntimeSystem::get();
315315
assert(type_id == rt.type_translator().get_type_id_for(info) && "Type ID of global and lookup must match");
316316
rt.allocation_tracker().onAllocGlobal(addr, type_id, count, retAddr);
@@ -319,7 +319,7 @@ void __typeart_alloc_global_mty(const void* addr, const void* info, size_t count
319319
void __typeart_alloc_omp_mty(const void* addr, const void* info, size_t count) {
320320
TYPEART_RUNTIME_GUARD;
321321
const void* retAddr = __builtin_return_address(0);
322-
const auto type_id = reinterpret_cast<const typeart::GlobalTypeInfo*>(info)->type_id;
322+
const auto type_id = reinterpret_cast<const typeart::global_types::GlobalTypeInfo*>(info)->type_id;
323323
auto& rt = typeart::RuntimeSystem::get();
324324
assert(type_id == rt.type_translator().get_type_id_for(info) && "Type ID of global and lookup must match");
325325
rt.allocation_tracker().onAlloc(addr, type_id, count, retAddr);
@@ -328,7 +328,7 @@ void __typeart_alloc_omp_mty(const void* addr, const void* info, size_t count) {
328328
void __typeart_alloc_stack_omp_mty(const void* addr, const void* info, size_t count) {
329329
TYPEART_RUNTIME_GUARD;
330330
const void* retAddr = __builtin_return_address(0);
331-
const auto type_id = reinterpret_cast<const typeart::GlobalTypeInfo*>(info)->type_id;
331+
const auto type_id = reinterpret_cast<const typeart::global_types::GlobalTypeInfo*>(info)->type_id;
332332
auto& rt = typeart::RuntimeSystem::get();
333333
assert(type_id == rt.type_translator().get_type_id_for(info) && "Type ID of global and lookup must match");
334334
rt.allocation_tracker().onAllocStack(addr, type_id, count, retAddr);

0 commit comments

Comments
 (0)