Skip to content

Commit 9088cef

Browse files
author
Dwight Guth
authored
merge allocations in same basic block up to a size bound (#1125)
Previously, when we were allocating a large term, we would issue a number of repeated calls to kore_alloc. This is inefficient because it has to check whether there is enough space and execute a branch instruction on each allocation. It is more efficient if we do one, larger call to kore_alloc for all the memory that we need. In order to efficiently implement this, we modify the way we generate calls to kore_alloc in the code generator so that it will place the allocation function at the beginning of the basic block and then, if further allocations are required in the same basic block, it will modify the call to the allocation function so it requests a larger amount of memory. This only happens up to a certain bound, in order to prevent memory from being wasted due to requesting particularly large chunks of memory. In practice, this leads to most apply_rule functions only calling kore_alloc once.
1 parent 5002ecd commit 9088cef

File tree

4 files changed

+68
-14
lines changed

4 files changed

+68
-14
lines changed

include/kllvm/codegen/CreateTerm.h

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,18 @@
11
#ifndef CREATE_TERM_H
22
#define CREATE_TERM_H
33

4+
#include <concepts>
5+
46
#include "kllvm/ast/AST.h"
57
#include "kllvm/codegen/DecisionParser.h"
68

79
#include "llvm/ADT/StringMap.h"
810
#include "llvm/IR/Module.h"
911
#include "llvm/IR/Value.h"
1012

13+
// size up to which calls to the same allocation function will be merged.
14+
static constexpr int max_block_merge_size = 4096;
15+
1116
namespace kllvm {
1217

1318
class create_term {
@@ -143,12 +148,15 @@ bool is_injection_symbol(kore_pattern *p, kore_symbol *sym);
143148

144149
void add_abort(llvm::BasicBlock *block, llvm::Module *module);
145150

151+
template <typename T>
152+
requires std::same_as<T, llvm::BasicBlock>
153+
|| std::same_as<T, llvm::Instruction>
146154
llvm::Value *allocate_term(
147-
llvm::Type *alloc_type, llvm::BasicBlock *block,
155+
llvm::Type *alloc_type, llvm::Value *len, T *insert_point,
148156
char const *alloc_fn = "kore_alloc");
149157
llvm::Value *allocate_term(
150-
llvm::Type *alloc_type, llvm::Value *len, llvm::BasicBlock *block,
151-
char const *alloc_fn = "kore_alloc");
158+
llvm::Type *alloc_type, llvm::BasicBlock *block,
159+
char const *alloc_fn = "kore_alloc", bool mergeable = false);
152160
} // namespace kllvm
153161

154162
#endif // CREATE_TERM_H

include/kllvm/codegen/Util.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ llvm::Function *kore_heap_alloc(std::string const &name, llvm::Module *module);
2323

2424
llvm::Instruction *create_malloc(
2525
llvm::BasicBlock *block, llvm::Value *alloc_size, llvm::Function *malloc_f);
26+
llvm::Instruction *create_malloc(
27+
llvm::Instruction *inst, llvm::Value *alloc_size, llvm::Function *malloc_f);
2628

2729
// getOrInsertFunction on module, aborting on failure
2830
template <class... Ts>

lib/codegen/CreateTerm.cpp

Lines changed: 49 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -168,22 +168,58 @@ llvm::Value *get_block_header(
168168
llvm::Type::getInt64Ty(module->getContext()), header_val));
169169
}
170170

171+
template <typename T>
172+
requires std::same_as<T, llvm::BasicBlock>
173+
|| std::same_as<T, llvm::Instruction>
171174
llvm::Value *allocate_term(
172-
llvm::Type *alloc_type, llvm::BasicBlock *block, char const *alloc_fn) {
173-
return allocate_term(
174-
alloc_type, llvm::ConstantExpr::getSizeOf(alloc_type), block, alloc_fn);
175-
}
176-
177-
llvm::Value *allocate_term(
178-
llvm::Type *alloc_type, llvm::Value *len, llvm::BasicBlock *block,
175+
llvm::Type *alloc_type, llvm::Value *len, T *insert_point,
179176
char const *alloc_fn) {
180177
auto *malloc = create_malloc(
181-
block, len, kore_heap_alloc(alloc_fn, block->getModule()));
178+
insert_point, len, kore_heap_alloc(alloc_fn, insert_point->getModule()));
182179

183180
set_debug_loc(malloc);
184181
return malloc;
185182
}
186183

184+
static bool is_basic_alloc(std::string const &alloc_fn) {
185+
return alloc_fn == "kore_alloc" || alloc_fn == "kore_alloc_old"
186+
|| alloc_fn == "kore_alloc_always_gc";
187+
}
188+
189+
llvm::Value *allocate_term(
190+
llvm::Type *alloc_type, llvm::BasicBlock *block, char const *alloc_fn,
191+
bool mergeable) {
192+
llvm::DataLayout layout(block->getModule());
193+
auto type_size = layout.getTypeAllocSize(alloc_type).getFixedValue();
194+
auto *ty = llvm::Type::getInt64Ty(block->getContext());
195+
if (mergeable) {
196+
if (auto *first = block->getFirstNonPHI()) {
197+
if (auto *call = llvm::dyn_cast<llvm::CallInst>(first)) {
198+
if (auto *func = call->getCalledFunction()) {
199+
if (auto *size
200+
= llvm::dyn_cast<llvm::ConstantInt>(call->getOperand(0))) {
201+
if (func->getName() == alloc_fn && is_basic_alloc(alloc_fn)
202+
&& size->getLimitedValue() + type_size < max_block_merge_size) {
203+
call->setOperand(
204+
0, llvm::ConstantExpr::getAdd(
205+
size, llvm::ConstantInt::get(ty, type_size)));
206+
auto *ret = llvm::GetElementPtrInst::Create(
207+
llvm::Type::getInt8Ty(block->getContext()), call, {size},
208+
"alloc_chunk", block);
209+
set_debug_loc(ret);
210+
return ret;
211+
}
212+
}
213+
}
214+
}
215+
}
216+
return allocate_term(
217+
alloc_type, llvm::ConstantInt::get(ty, type_size), block, alloc_fn);
218+
}
219+
return allocate_term(
220+
alloc_type, llvm::ConstantInt::get(ty, type_size), block, alloc_fn);
221+
}
222+
187223
value_type term_type(
188224
kore_pattern *pattern, llvm::StringMap<value_type> &substitution,
189225
kore_definition *definition) {
@@ -686,7 +722,8 @@ llvm::Value *create_term::create_function_call(
686722
// we don't use alloca here because the tail call optimization pass for llvm
687723
// doesn't handle correctly functions with alloca
688724
alloc_sret = allocate_term(
689-
return_type, current_block_, get_collection_alloc_fn(return_cat.cat));
725+
return_type, current_block_, get_collection_alloc_fn(return_cat.cat),
726+
true);
690727
sret_type = return_type;
691728
real_args.insert(real_args.begin(), alloc_sret);
692729
types.insert(types.begin(), alloc_sret->getType());
@@ -759,7 +796,8 @@ llvm::Value *create_term::not_injection_case(
759796
children.push_back(child_value);
760797
idx++;
761798
}
762-
llvm::Value *block = allocate_term(block_type, current_block_);
799+
llvm::Value *block
800+
= allocate_term(block_type, current_block_, "kore_alloc", true);
763801
llvm::Value *block_header_ptr = llvm::GetElementPtrInst::CreateInBounds(
764802
block_type, block,
765803
{llvm::ConstantInt::get(llvm::Type::getInt64Ty(ctx_), 0),
@@ -1162,7 +1200,7 @@ std::string make_apply_rule_function(
11621200
if (!arg->getType()->isPointerTy()) {
11631201
auto *ptr = allocate_term(
11641202
arg->getType(), creator.get_current_block(),
1165-
get_collection_alloc_fn(cat.cat));
1203+
get_collection_alloc_fn(cat.cat), true);
11661204
new llvm::StoreInst(arg, ptr, creator.get_current_block());
11671205
arg = ptr;
11681206
}

lib/codegen/Util.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,12 @@ llvm::Instruction *create_malloc(
2929
return llvm::CallInst::Create(malloc_f, {alloc_size}, "", block);
3030
}
3131

32+
llvm::Instruction *create_malloc(
33+
llvm::Instruction *inst, llvm::Value *alloc_size,
34+
llvm::Function *malloc_f) {
35+
return llvm::CallInst::Create(malloc_f, {alloc_size}, "", inst);
36+
}
37+
3238
llvm::Constant *get_offset_of_member(
3339
[[maybe_unused]] llvm::Module *mod, llvm::StructType *struct_ty,
3440
int nth_member) {

0 commit comments

Comments
 (0)