Skip to content

Commit 4aa8d36

Browse files
Clean up suballocator and meta data storage for Vulkan objects
Fixing one of lavatube's earliest design mistakes that tried to avoid relying on information stored outside of the command stream. Improve suballocator interface and make it mix linear and optimal allocations if bufferImageGranularity is one.
1 parent 5ae6f08 commit 4aa8d36

File tree

6 files changed

+56
-50
lines changed

6 files changed

+56
-50
lines changed

scripts/util.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -716,15 +716,17 @@ def print_load(self, name, owner): # called for each parameter
716716
z.do('%s = reader.read_%s();' % (varname, self.type))
717717

718718
if self.funcname in ['vkBindImageMemory', 'VkBindImageMemoryInfoKHR', 'VkBindImageMemoryInfo'] and self.name == 'image':
719-
z.do('const VkMemoryPropertyFlags special_flags = static_cast<VkMemoryPropertyFlags>(reader.read_uint32_t()); // fetch memory flags especially added')
720-
z.do('const VkImageTiling tiling = static_cast<VkImageTiling>(reader.read_uint32_t()); // fetch tiling property especially added')
721-
z.do('const VkDeviceSize min_size = static_cast<VkDeviceSize>(reader.read_uint64_t()); // fetch padded memory size')
722719
z.do('trackedimage& image_data = VkImage_index.at(image_index);')
723-
z.do('suballoc_location loc = reader.parent->allocator.add_image(reader.thread_index(), reader.device, %s, image_index, special_flags, tiling, min_size);' % varname)
720+
z.do('image_data.memory_flags = static_cast<VkMemoryPropertyFlags>(reader.read_uint32_t()); // fetch memory flags especially added') # TBD remove me
721+
z.do('const VkImageTiling tiling = static_cast<VkImageTiling>(reader.read_uint32_t()); // fetch tiling property especially added') # TBD remove me
722+
z.do('assert(tiling == image_data.tiling);')
723+
z.do('const VkDeviceSize min_size = static_cast<VkDeviceSize>(reader.read_uint64_t()); // fetch padded memory size') # TBD remove me
724+
z.do('assert(min_size == image_data.size);')
725+
z.do('suballoc_location loc = reader.parent->allocator.add_image(reader.thread_index(), reader.device, %s, image_data);' % varname)
724726
elif self.funcname in ['vkBindBufferMemory', 'VkBindBufferMemoryInfo', 'VkBindBufferMemoryInfoKHR'] and self.name == 'buffer':
725-
z.do('const VkMemoryPropertyFlags special_flags = static_cast<VkMemoryPropertyFlags>(reader.read_uint32_t()); // fetch memory flags especially added')
726727
z.do('trackedbuffer& buffer_data = VkBuffer_index.at(buffer_index);')
727-
z.do('suballoc_location loc = reader.parent->allocator.add_buffer(reader.thread_index(), reader.device, %s, special_flags, buffer_data);' % varname)
728+
z.do('buffer_data.memory_flags = static_cast<VkMemoryPropertyFlags>(reader.read_uint32_t()); // fetch memory flags especially added') # TBD remove me
729+
z.do('suballoc_location loc = reader.parent->allocator.add_buffer(reader.thread_index(), reader.device, %s, buffer_data);' % varname)
728730

729731
if self.funcname in ['vkBindImageMemory', 'vkBindBufferMemory', 'VkBindBufferMemoryInfo', 'VkBindBufferMemoryInfoKHR', 'VkBindImageMemoryInfoKHR', 'VkBindImageMemoryInfo']:
730732
if self.name == 'memory':
@@ -1007,12 +1009,13 @@ def print_save(self, name, owner, postprocess = False): # called for each parame
10071009
z.do('writer.commandBuffer = %s;' % varname) # always earlier in the parameter list than images and buffers, fortunately
10081010
z.do('writer.device = commandbuffer_data->device;')
10091011
z.do('writer.physicalDevice = commandbuffer_data->physicalDevice;')
1010-
if self.funcname in ['vkBindImageMemory', 'vkBindBufferMemory', 'VkBindImageMemoryInfo', 'VkBindImageMemoryInfoKHR', 'VkBindBufferMemoryInfo', 'VkBindBufferMemoryInfoKHR'] and self.name in ['image', 'buffer']:
1012+
if self.funcname in ['vkBindImageMemory', 'vkBindBufferMemory', 'VkBindImageMemoryInfo', 'VkBindImageMemoryInfoKHR', 'VkBindBufferMemoryInfo', 'VkBindBufferMemoryInfoKHR', 'VkBindTensorMemoryInfoARM'] and self.name in ['image', 'buffer', 'tensor']:
10111013
z.do('const auto* meminfo = writer.parent->records.VkDeviceMemory_index.at(%s);' % (owner + 'memory'))
1012-
z.do('writer.write_uint32_t(static_cast<uint32_t>(meminfo->propertyFlags)); // save memory flags')
1013-
if self.funcname in ['vkBindImageMemory', 'VkBindImageMemoryInfo', 'VkBindImageMemoryInfoKHR'] and self.name == 'image':
1014-
z.do('writer.write_uint32_t(static_cast<uint32_t>(image_data->tiling)); // save tiling info')
1015-
z.do('writer.write_uint64_t(static_cast<uint64_t>(image_data->size)); // save padded image size')
1014+
z.do('%s->memory_flags = meminfo->propertyFlags;' % totrackable(self.type))
1015+
z.do('writer.write_uint32_t(static_cast<uint32_t>(meminfo->propertyFlags)); // save memory flags') # TBD remove
1016+
if self.funcname in ['vkBindImageMemory', 'VkBindImageMemoryInfo', 'VkBindImageMemoryInfoKHR'] and self.name == 'image': # TBD remove
1017+
z.do('writer.write_uint32_t(static_cast<uint32_t>(image_data->tiling)); // save tiling info') # TBD remove
1018+
z.do('writer.write_uint64_t(static_cast<uint64_t>(image_data->size)); // save padded image size') # TBD remove
10161019
if self.funcname == 'vkAllocateMemory' and self.name == 'pAllocateInfo' and not postprocess:
10171020
z.do('frame_mutex.lock();')
10181021
z.do('assert(real_memory_properties.memoryTypeCount > 0);')

src/hardcode_write.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -400,7 +400,8 @@ static void trace_post_vkBindBufferMemory(lava_file_writer& writer, VkResult res
400400
{
401401
wrap_vkGetBufferMemoryRequirements(device, buffer, &buffer_data->req);
402402
}
403-
// we pass in size recorded from vkCreateBuffer, which is the actually used size, rather than required allocation size
403+
// we store size recorded from vkCreateBuffer, which is the actually used size, rather than required allocation size
404+
assert(buffer_data->size >= buffer_data->req.size);
404405
buffer_data->accessible = (memory_data->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
405406
buffer_data->enter_bound();
406407
writer.parent->memory_mutex.unlock();

src/json_helpers.cpp

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ static Json::Value trackedobject_json(const trackedobject *t)
3636
v["alias_index"] = t->alias_index;
3737
v["alias_type"] = (unsigned)t->alias_type;
3838
}
39+
v["req_size"] = (Json::Value::UInt64)t->req.size; // info only, do not read
40+
v["req_alignment"] = (unsigned)t->req.alignment; // info only, do not read
41+
v["memory_flags"] = (unsigned)t->memory_flags;
3942
return v;
4043
}
4144

@@ -55,8 +58,6 @@ Json::Value trackedbuffer_json(const trackedbuffer* t)
5558
v["sharingMode"] = (unsigned)t->sharingMode;
5659
v["usage"] = (unsigned)t->usage;
5760
v["usage2"] = (Json::Value::UInt64)t->usage2;
58-
v["req_size"] = (Json::Value::UInt64)t->req.size;
59-
v["req_alignment"] = (unsigned)t->req.alignment;
6061
v["written"] = (Json::Value::UInt64)t->written;
6162
v["updates"] = (unsigned)t->updates;
6263
return v;
@@ -66,8 +67,6 @@ Json::Value trackedtensor_json(const trackedtensor* t)
6667
{
6768
Json::Value v = trackedobject_json(t);
6869
v["sharingMode"] = (unsigned)t->sharingMode;
69-
v["req_size"] = (Json::Value::UInt64)t->req.size;
70-
v["req_alignment"] = (unsigned)t->req.alignment;
7170
v["tiling"] = (unsigned)t->tiling;
7271
v["format"] = (unsigned)t->format;
7372
v["usage"] = (unsigned)t->usage;
@@ -92,8 +91,6 @@ Json::Value trackedimage_json(const trackedimage* t)
9291
v["sharingMode"] = (unsigned)t->sharingMode;
9392
v["usage"] = (unsigned)t->usage;
9493
v["imageType"] = (unsigned)t->imageType;
95-
v["req_size"] = (Json::Value::UInt64)t->req.size;
96-
v["req_alignment"] = (unsigned)t->req.alignment;
9794
v["format"] = (unsigned)t->format;
9895
v["written"] = (Json::Value::UInt64)t->written;
9996
v["updates"] = (unsigned)t->updates;
@@ -305,6 +302,7 @@ trackedaccelerationstructure trackedaccelerationstructure_json(const Json::Value
305302
t.alias_index = v["alias_index"].asUInt();
306303
t.alias_type = (VkObjectType)v["alias_type"].asUInt();
307304
}
305+
if (v.isMember("memory_flags")) t.memory_flags = (VkMemoryPropertyFlags)v["memory_flags"].asUInt();
308306
t.enter_initialized();
309307
return t;
310308
}
@@ -318,15 +316,13 @@ trackedbuffer trackedbuffer_json(const Json::Value& v)
318316
t.sharingMode = (VkSharingMode)v["sharingMode"].asUInt();
319317
t.usage = (VkBufferUsageFlags)v["usage"].asUInt();
320318
if (v.isMember("usage2")) t.usage2 = (VkBufferUsageFlags2)v["usage2"].asUInt64();
321-
t.req.size = v["req_size"].asUInt64();
322-
t.req.alignment = v["req_alignment"].asUInt();
323-
t.req.memoryTypeBits = 0;
324319
t.object_type = VK_OBJECT_TYPE_BUFFER;
325320
if (v.isMember("alias_index"))
326321
{
327322
t.alias_index = v["alias_index"].asUInt();
328323
t.alias_type = (VkObjectType)v["alias_type"].asUInt();
329324
}
325+
if (v.isMember("memory_flags")) t.memory_flags = (VkMemoryPropertyFlags)v["memory_flags"].asUInt();
330326
t.enter_initialized();
331327
return t;
332328
}
@@ -340,9 +336,6 @@ trackedimage trackedimage_json(const Json::Value& v)
340336
t.sharingMode = (VkSharingMode)v["sharingMode"].asUInt();
341337
t.usage = (VkImageUsageFlags)v["usage"].asUInt();
342338
t.imageType = (VkImageType)v["imageType"].asUInt();
343-
t.req.size = v["req_size"].asUInt64();
344-
t.req.alignment = v["req_alignment"].asUInt();
345-
t.req.memoryTypeBits = 0;
346339
t.initialLayout = (VkImageLayout)(v.get("initialLayout", 0).asUInt());
347340
t.currentLayout = t.initialLayout;
348341
t.samples = (VkSampleCountFlagBits)(v.get("samples", 0).asUInt());
@@ -360,6 +353,7 @@ trackedimage trackedimage_json(const Json::Value& v)
360353
t.alias_index = v["alias_index"].asUInt();
361354
t.alias_type = (VkObjectType)v["alias_type"].asUInt();
362355
}
356+
if (v.isMember("memory_flags")) t.memory_flags = (VkMemoryPropertyFlags)v["memory_flags"].asUInt();
363357
t.object_type = VK_OBJECT_TYPE_IMAGE;
364358
t.enter_initialized();
365359
return t;
@@ -370,9 +364,6 @@ trackedtensor trackedtensor_json(const Json::Value& v)
370364
trackedtensor t;
371365
trackable_helper(t, v);
372366
t.sharingMode = (VkSharingMode)v["sharingMode"].asUInt();
373-
t.req.size = v["req_size"].asUInt64();
374-
t.req.alignment = v["req_alignment"].asUInt();
375-
t.req.memoryTypeBits = 0;
376367
t.object_type = VK_OBJECT_TYPE_TENSOR_ARM;
377368
t.tiling = (VkTensorTilingARM)v["tiling"].asUInt();
378369
t.format = (VkFormat)v["format"].asUInt();
@@ -387,6 +378,7 @@ trackedtensor trackedtensor_json(const Json::Value& v)
387378
t.alias_index = v["alias_index"].asUInt();
388379
t.alias_type = (VkObjectType)v["alias_type"].asUInt();
389380
}
381+
if (v.isMember("memory_flags")) t.memory_flags = (VkMemoryPropertyFlags)v["memory_flags"].asUInt();
390382
t.enter_initialized();
391383
return t;
392384
}

src/lavatube.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ struct trackedobject : trackable
175175
VkObjectType alias_type = VK_OBJECT_TYPE_UNKNOWN;
176176
uint32_t alias_index = UINT32_MAX;
177177
VkDeviceAddress device_address = 0;
178+
VkMemoryPropertyFlags memory_flags = 0;
178179

179180
bool is_state(states s) const { return (uint8_t)s == state; }
180181
void set_state(states s) { state = (uint8_t)s; }

src/suballocator.cpp

Lines changed: 30 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ struct suballocator_private
7373
std::vector<lookup> image_lookup;
7474
std::vector<lookup> buffer_lookup;
7575
std::vector<lookup> tensor_lookup;
76+
/// Does this device have the an annoying optimal-to-linear padding requirement? If so, put optimal and linear objects in different memory heaps
77+
bool allow_mixed_tiling = true;
7678

7779
void print_memory_usage();
7880
uint32_t get_device_memory_type(uint32_t type_filter, VkMemoryPropertyFlags& properties);
@@ -90,6 +92,15 @@ struct suballocator_private
9092

9193
// helpers
9294

95+
static VkMemoryPropertyFlags prune_memory_flags(VkMemoryPropertyFlags flags)
96+
{
97+
if ((flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) || (flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT))
98+
{
99+
flags &= ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; // do not require this bit in these cases
100+
}
101+
return flags;
102+
}
103+
93104
void suballocator_private::print_memory_usage()
94105
{
95106
printf("Suballocator memory usage:\n");
@@ -166,6 +177,9 @@ void suballocator::setup(VkPhysicalDevice physicaldevice)
166177
if (priv->run)
167178
{
168179
wrap_vkGetPhysicalDeviceMemoryProperties(physicaldevice, &priv->memory_properties);
180+
VkPhysicalDeviceProperties pdprops = {};
181+
wrap_vkGetPhysicalDeviceProperties(physicaldevice, &pdprops);
182+
priv->allow_mixed_tiling = (pdprops.limits.bufferImageGranularity == 1);
169183
}
170184
else
171185
{
@@ -175,6 +189,7 @@ void suballocator::setup(VkPhysicalDevice physicaldevice)
175189
priv->memory_properties.memoryHeapCount = 1;
176190
priv->memory_properties.memoryHeaps[0].size = UINT64_MAX;
177191
priv->memory_properties.memoryHeaps[0].flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
192+
priv->allow_mixed_tiling = true;
178193
}
179194
assert(priv->memory_properties.memoryTypeCount > 0);
180195
}
@@ -262,12 +277,11 @@ suballoc_location suballocator_private::add_object(VkDevice device, uint16_t tid
262277
h.deletes.clear();
263278
}
264279
// find suballocation
265-
if (h.tid == tid && (flags & f) == flags && h.free >= s.size && h.memoryTypeIndex == memoryTypeIndex && h.tiling == tiling)
280+
if (h.tid == tid && (flags & f) == flags && h.free >= s.size && h.memoryTypeIndex == memoryTypeIndex && (h.tiling == tiling || allow_mixed_tiling))
266281
{
267282
// First case: nothing allocated in heap. In this case, we do not care about alignment, because according to the spec:
268283
// "Allocations returned by vkAllocateMemory are guaranteed to meet any alignment requirement of the implementation."
269-
// Also second case: some memory is available before the first allocation. Like above, we are guaranteed not to have
270-
// to care about alignment in this case.
284+
// Also second case: We place our allocation first. Like above, we are guaranteed not to have to care about alignment.
271285
if (h.subs.empty() || (h.subs.front().offset >= s.size))
272286
{
273287
s.offset = 0;
@@ -354,30 +368,28 @@ void suballocator_private::bind(heap& h, const suballocation& s)
354368
assert(s.offset + s.size <= h.total);
355369
}
356370

357-
suballoc_location suballocator::add_image(uint16_t tid, VkDevice device, VkImage image, uint32_t image_index, VkMemoryPropertyFlags flags, VkImageTiling tiling, VkDeviceSize min_size)
371+
suballoc_location suballocator::add_image(uint16_t tid, VkDevice device, VkImage image, const trackedimage& image_data)
358372
{
359-
if ((flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) || (flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT))
360-
{
361-
flags &= ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; // do not require this bit in these cases
362-
}
363373
VkMemoryRequirements2 req = {};
364-
const bool dedicated = priv->fill_image_memreq(device, image, req, min_size);
365-
const uint32_t memoryTypeIndex = priv->get_device_memory_type(req.memoryRequirements.memoryTypeBits, flags);
374+
VkMemoryPropertyFlags memory_flags = prune_memory_flags(image_data.memory_flags);
375+
const bool dedicated = priv->fill_image_memreq(device, image, req, image_data.size);
376+
const uint32_t memoryTypeIndex = priv->get_device_memory_type(req.memoryRequirements.memoryTypeBits, memory_flags);
366377
suballocation s;
367378
s.type = VK_OBJECT_TYPE_IMAGE;
368379
s.handle.image = image;
369-
s.size = std::max(req.memoryRequirements.size, min_size);
380+
s.size = std::max(req.memoryRequirements.size, image_data.size);
370381
s.offset = 0;
371-
s.index = image_index;
382+
s.index = image_data.index;
372383
s.alignment = req.memoryRequirements.alignment;
373-
auto r = priv->add_object(device, tid, memoryTypeIndex, s, flags, tiling, dedicated, 0);
384+
auto r = priv->add_object(device, tid, memoryTypeIndex, s, memory_flags, image_data.tiling, dedicated, 0);
374385
assert(r.offset == s.offset);
375386
return r;
376387
}
377388

378-
void suballocator::virtualswap_images(VkDevice device, const std::vector<VkImage>& images, VkMemoryPropertyFlags flags)
389+
void suballocator::virtualswap_images(VkDevice device, const std::vector<VkImage>& images, VkMemoryPropertyFlags memory_flags)
379390
{
380391
assert(priv->run);
392+
VkMemoryPropertyFlags flags = prune_memory_flags(memory_flags);
381393
VkMemoryRequirements2 req = {};
382394
const bool dedicated = priv->fill_image_memreq(device, images.at(0), req, 0);
383395
const uint32_t memoryTypeIndex = priv->get_device_memory_type(req.memoryRequirements.memoryTypeBits, flags);
@@ -407,13 +419,10 @@ void suballocator::virtualswap_images(VkDevice device, const std::vector<VkImage
407419
}
408420
}
409421

410-
suballoc_location suballocator::add_buffer(uint16_t tid, VkDevice device, VkBuffer buffer, VkMemoryPropertyFlags mempropflags, const trackedbuffer& buffer_data)
422+
suballoc_location suballocator::add_buffer(uint16_t tid, VkDevice device, VkBuffer buffer, const trackedbuffer& buffer_data)
411423
{
424+
VkMemoryPropertyFlags memory_flags = prune_memory_flags(buffer_data.memory_flags);
412425
const VkBufferUsageFlags buffer_flags = buffer_data.usage;
413-
if ((mempropflags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) || (mempropflags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT))
414-
{
415-
mempropflags &= ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; // do not require this bit in these cases
416-
}
417426
VkMemoryRequirements2 req = { VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, nullptr };
418427
VkMemoryDedicatedRequirements dedicated = { VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, nullptr };
419428
if (use_dedicated_allocation() && priv->run)
@@ -433,7 +442,7 @@ suballoc_location suballocator::add_buffer(uint16_t tid, VkDevice device, VkBuff
433442
req.memoryRequirements.alignment = 1;
434443
req.memoryRequirements.memoryTypeBits = 1;
435444
}
436-
uint32_t memoryTypeIndex = priv->get_device_memory_type(req.memoryRequirements.memoryTypeBits, mempropflags);
445+
uint32_t memoryTypeIndex = priv->get_device_memory_type(req.memoryRequirements.memoryTypeBits, memory_flags);
437446
suballocation s;
438447
s.type = VK_OBJECT_TYPE_BUFFER;
439448
s.handle.buffer = buffer;
@@ -443,7 +452,7 @@ suballoc_location suballocator::add_buffer(uint16_t tid, VkDevice device, VkBuff
443452
s.alignment = req.memoryRequirements.alignment;
444453
VkMemoryAllocateFlags allocflags = 0;
445454
if (buffer_flags & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) { dedicated.prefersDedicatedAllocation = VK_TRUE; allocflags |= VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR; }
446-
auto r = priv->add_object(device, tid, memoryTypeIndex, s, mempropflags, VK_IMAGE_TILING_LINEAR, dedicated.prefersDedicatedAllocation, allocflags);
455+
auto r = priv->add_object(device, tid, memoryTypeIndex, s, memory_flags, VK_IMAGE_TILING_LINEAR, dedicated.prefersDedicatedAllocation, allocflags);
447456
assert(r.offset == s.offset);
448457
return r;
449458
}

src/suballocator.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,10 @@ struct suballocator
2929

3030
/// Add an image to our memory pools. Thread safe because each thread gets its own set of memory pools that only they
3131
/// can modify. Other threads may access the objects stored inside subject to Vulkan external synchronization rules.
32-
suballoc_location add_image(uint16_t tid, VkDevice device, VkImage image, uint32_t image_index, VkMemoryPropertyFlags flags, VkImageTiling tiling, VkDeviceSize min_size);
32+
suballoc_location add_image(uint16_t tid, VkDevice device, VkImage image, const trackedimage& image_data);
3333

3434
/// Add a buffer to our memory pools. See above.
35-
suballoc_location add_buffer(uint16_t tid, VkDevice device, VkBuffer buffer, VkMemoryPropertyFlags memory_flags, const trackedbuffer& buffer_data);
35+
suballoc_location add_buffer(uint16_t tid, VkDevice device, VkBuffer buffer, const trackedbuffer& buffer_data);
3636

3737
/// Delete an image from our memory pools. Thread safe because the internal data structure is preallocated and never resized,
3838
/// and deleted entries are never reused.

0 commit comments

Comments
 (0)