Skip to content

Commit a00c53b

Browse files
Improve suballocator efficiency, add efficiency metrics
1 parent 59de8f2 commit a00c53b

File tree

4 files changed

+53
-5
lines changed

4 files changed

+53
-5
lines changed

src/memory.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ memory_requirements get_trackedimage_memory_requirements(VkDevice device, const
8181
wrap_vkGetDeviceImageMemoryRequirements(device, &info, &req);
8282
reqs.requirements = req.memoryRequirements;
8383
reqs.memory_flags = data.memory_flags;
84+
assert(reqs.requirements.alignment != 0);
8485
return reqs;
8586
}
8687

src/read.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -132,9 +132,16 @@ void lava_reader::finalize(bool terminate)
132132
assert(stop_process_cpu_usage.tv_sec >= process_cpu_usage.tv_sec);
133133
const uint64_t process_time = diff_timespec(&stop_process_cpu_usage, &process_cpu_usage);
134134
ILOG("CPU time spent in ms - readhead workers %lu, API runners %lu, full process %lu", (long unsigned)worker, (long unsigned)runner, (long unsigned)process_time);
135-
out["readahead_workers_time"] = (Json::Value::UInt64)worker;
136-
out["api_runners_time"] = (Json::Value::UInt64)runner;
137-
out["process_time"] = (Json::Value::UInt64)process_time;
135+
out["readahead_workers_time"] = worker;
136+
out["api_runners_time"] = runner;
137+
out["process_time"] = process_time;
138+
suballoc_metrics sm = allocator.performance();
139+
out["suballocator_used"] = sm.used;
140+
out["suballocator_allocated"] = sm.allocated;
141+
out["suballocator_heaps"] = sm.heaps;
142+
out["suballocator_objects"] = sm.objects;
143+
out["suballocator_efficiency"] = sm.efficiency;
144+
ILOG("Suballocator used=%lu allocated=%lu heaps=%u objects=%u efficiency=%g", (unsigned long)sm.used, (unsigned long)sm.allocated, (unsigned)sm.heaps, (unsigned)sm.objects, sm.efficiency);
138145
if (terminate)
139146
{
140147
for (auto& v : *thread_call_numbers) v = 0; // stop waiting threads from progressing

src/suballocator.cpp

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ struct heap
4040
VkDeviceMemory mem;
4141
VkDeviceSize free;
4242
VkDeviceSize total;
43+
VkMemoryPropertyFlags flags;
4344
/// This one does not need to be concurrent safe, since each thread owns its own heap
4445
/// and only it may iterate over and modify the allocations list.
4546
std::list<suballocation> subs;
@@ -76,6 +77,10 @@ struct suballocator_private
7677
std::vector<lookup> tensor_lookup;
7778
/// Does this device have the an annoying optimal-to-linear padding requirement? If so, put optimal and linear objects in different memory heaps
7879
bool allow_mixed_tiling = true;
80+
std::atomic_uint64_t used_bytes { 0 };
81+
std::atomic_uint_least32_t used_count { 0 };
82+
std::atomic_uint64_t allocated_bytes { 0 };
83+
std::atomic_uint_least32_t allocated_heaps { 0 };
7984

8085
void print_memory_usage();
8186
uint32_t get_device_memory_type(uint32_t type_filter, VkMemoryPropertyFlags properties);
@@ -87,6 +92,7 @@ struct suballocator_private
8792
lava_tiling tiling, bool dedicated, VkMemoryAllocateFlags allocflags);
8893
void self_test();
8994
void bind(heap& h, const suballocation& s);
95+
suballoc_metrics performance() const;
9096

9197
inline bool needs_flush(unsigned memoryTypeIndex) { return !(memory_properties.memoryTypes[memoryTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); }
9298
};
@@ -102,6 +108,22 @@ static VkMemoryPropertyFlags prune_memory_flags(VkMemoryPropertyFlags flags)
102108
return flags;
103109
}
104110

111+
suballoc_metrics suballocator::performance() const
112+
{
113+
return priv->performance();
114+
}
115+
116+
suballoc_metrics suballocator_private::performance() const
117+
{
118+
suballoc_metrics m;
119+
m.used = used_bytes;
120+
m.objects = used_count;
121+
m.heaps = allocated_heaps;
122+
m.allocated = allocated_bytes;
123+
m.efficiency = (double)used_bytes / (double)allocated_bytes;
124+
return m;
125+
}
126+
105127
void suballocator_private::print_memory_usage()
106128
{
107129
printf("Suballocator memory usage:\n");
@@ -244,11 +266,14 @@ suballoc_location suballocator_private::add_object_new(VkDevice device, uint16_t
244266
{
245267
h.mem = (VkDeviceMemory)malloc(info.allocationSize);
246268
}
269+
allocated_bytes += info.allocationSize;
270+
allocated_heaps++;
247271
h.free = info.allocationSize - s.size;
248272
h.total = info.allocationSize;
249273
h.memoryTypeIndex = memoryTypeIndex;
250274
h.tiling = tiling;
251275
h.subs.push_back(s);
276+
h.flags = flags;
252277
DLOG2("allocating new memory pool with size = %lu, free = %lu (memoryTypeIndex=%u, tiling=%u)", (unsigned long)info.allocationSize,
253278
(unsigned long)h.free, (unsigned)memoryTypeIndex, (unsigned)tiling);
254279
auto it = heaps.push_back(h);
@@ -260,13 +285,15 @@ suballoc_location suballocator_private::add_object_new(VkDevice device, uint16_t
260285
suballoc_location suballocator_private::add_object(VkDevice device, uint16_t tid, uint32_t memoryTypeIndex, suballocation &s, VkMemoryPropertyFlags flags,
261286
lava_tiling tiling, bool dedicated, VkMemoryAllocateFlags allocflags)
262287
{
288+
used_count++;
289+
used_bytes += s.size;
290+
assert(s.alignment != 0);
263291
if (dedicated)
264292
{
265293
return add_object_new(device, tid, memoryTypeIndex, s, flags, tiling, dedicated, allocflags);
266294
}
267295
for (heap& h : heaps)
268296
{
269-
VkMemoryPropertyFlags f = memory_properties.memoryTypes[h.memoryTypeIndex].propertyFlags;
270297
// this is a safe time to actually delete things
271298
if (!h.deletes.empty())
272299
{
@@ -287,7 +314,7 @@ suballoc_location suballocator_private::add_object(VkDevice device, uint16_t tid
287314
h.deletes.clear();
288315
}
289316
// find suballocation
290-
if (h.tid == tid && (flags & f) == flags && h.free >= s.size && h.memoryTypeIndex == memoryTypeIndex && (h.tiling == tiling || allow_mixed_tiling))
317+
if (h.tid == tid && (flags & h.flags) == flags && h.free >= s.size && h.memoryTypeIndex == memoryTypeIndex && (h.tiling == tiling || allow_mixed_tiling))
291318
{
292319
// First case: nothing allocated in heap. In this case, we do not care about alignment, because according to the spec:
293320
// "Allocations returned by vkAllocateMemory are guaranteed to meet any alignment requirement of the implementation."
@@ -398,6 +425,7 @@ suballoc_location suballocator::add_image(uint16_t tid, VkDevice device, VkImage
398425

399426
suballoc_location suballocator::add_trackedobject(uint16_t tid, VkDevice device, const memory_requirements& reqs, uint64_t native, const trackedobject& data)
400427
{
428+
assert(reqs.requirements.alignment != 0); // not properly initialized!
401429
const VkMemoryPropertyFlags memory_flags = prune_memory_flags(data.memory_flags);
402430
const uint32_t memoryTypeIndex = priv->get_device_memory_type(reqs.requirements.memoryTypeBits, memory_flags);
403431
suballocation s;

src/suballocator.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,23 @@ struct suballoc_location
1414

1515
struct suballocator_private;
1616

17+
struct suballoc_metrics
18+
{
19+
uint64_t used = 0;
20+
uint64_t allocated = 0;
21+
uint32_t heaps = 0;
22+
uint32_t objects = 0;
23+
double efficiency = 0.0;
24+
};
25+
1726
struct suballocator
1827
{
1928
/// Call as early as possible to set up internal data structures. Must be called before any other suballoc function.
2029
void init(int num_images, int num_buffers, int tensors, int heap_size = -1, bool fake = false);
2130

31+
/// Get performance metrics for the suballocator. This is not thread safe.
32+
suballoc_metrics performance() const;
33+
2234
suballocator();
2335
~suballocator();
2436

0 commit comments

Comments
 (0)