Skip to content

Commit 62465b9

Browse files
committed
gpu & pmem: add direct guest memory resource
1 parent db2e120 commit 62465b9

File tree

11 files changed

+365
-144
lines changed

11 files changed

+365
-144
lines changed

kernel/orbis/include/orbis/pmem.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ struct File;
1313
} // namespace orbis
1414

1515
namespace orbis::pmem {
16-
ErrorCode initialize(std::uint64_t size);
16+
ErrorCode initialize(rx::Mappable mappable, std::uint64_t size);
1717
void destroy();
1818
std::pair<rx::AddressRange, ErrorCode>
1919
allocate(std::uint64_t addressHint, std::uint64_t size,

kernel/orbis/src/pmem.cpp

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,28 @@ struct PhysicalMemoryAllocation {
3333
bool operator==(const PhysicalMemoryAllocation &) const = default;
3434
};
3535

36-
using MappableMemoryResource =
37-
kernel::MappableResource<decltype([](std::size_t size) {
38-
return rx::Mappable::CreateMemory(size);
39-
})>;
40-
41-
using PhysicalMemoryResource =
42-
kernel::AllocableResource<PhysicalMemoryAllocation, orbis::kallocator,
43-
MappableMemoryResource>;
36+
struct PhysicalMemoryResource
37+
: kernel::AllocableResource<PhysicalMemoryAllocation, orbis::kallocator,
38+
kernel::ExternalResource> {
39+
std::size_t size;
40+
rx::Mappable mappable;
41+
42+
std::errc create(rx::Mappable mappable, std::size_t size) {
43+
if (size == 0 || !mappable) {
44+
return std::errc::invalid_argument;
45+
}
46+
47+
if (auto errc =
48+
BaseResource::create(rx::AddressRange::fromBeginSize(0, size));
49+
errc != std::errc{}) {
50+
return errc;
51+
}
52+
53+
this->size = size;
54+
this->mappable = std::move(mappable);
55+
return {};
56+
}
57+
};
4458

4559
static auto g_pmemInstance = orbis::createGlobalObject<
4660
kernel::LockableKernelObject<PhysicalMemoryResource>>();
@@ -76,12 +90,12 @@ struct PhysicalMemory : orbis::IoDevice {
7690

7791
static auto g_phyMemory = orbis::createGlobalObject<PhysicalMemory>();
7892

79-
orbis::ErrorCode orbis::pmem::initialize(std::uint64_t size) {
93+
orbis::ErrorCode orbis::pmem::initialize(rx::Mappable mappable,
94+
std::uint64_t size) {
8095
std::lock_guard lock(*g_pmemInstance);
8196
rx::println("pmem: {:x}", size);
8297

83-
return toErrorCode(
84-
g_pmemInstance->create(rx::AddressRange::fromBeginSize(0, size)));
98+
return toErrorCode(g_pmemInstance->create(std::move(mappable), size));
8599
}
86100

87101
void orbis::pmem::destroy() {

rpcsx/gpu/Device.cpp

Lines changed: 72 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -63,19 +63,10 @@ makeDisplayEvent(DisplayEvent id, std::uint16_t unk0 = 0,
6363
return result;
6464
}
6565

66-
static vk::Context createVkContext(Device *device) {
66+
static vk::Context createVkContext(Device *device, std::size_t dmemSize) {
6767
std::vector<const char *> optionalLayers;
6868
bool enableValidation = rx::g_config.validateGpu;
6969

70-
for (std::size_t process = 0; process < 6; ++process) {
71-
auto range = rx::AddressRange::fromBeginSize(
72-
0x40'0000 + 0x100'0000'0000 * process, 0x100'0000'0000 - 0x40'0000);
73-
if (auto errc = rx::mem::reserve(range); errc != std::errc{}) {
74-
rx::die("failed to reserve userspace memory: {} {:x}-{:x}", (int)errc,
75-
range.beginAddress(), range.endAddress());
76-
}
77-
}
78-
7970
auto createWindow = [=] {
8071
glfwWindowHint(GLFW_CLIENT_API, GLFW_NO_API);
8172
device->window = glfwCreateWindow(1920, 1080, "RPCSX", nullptr, nullptr);
@@ -142,17 +133,22 @@ static vk::Context createVkContext(Device *device) {
142133

143134
result.createDevice(device->surface, rx::g_config.gpuIndex,
144135
{
145-
// VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME,
146-
// VK_EXT_DEPTH_CLIP_ENABLE_EXTENSION_NAME,
147-
// VK_EXT_INLINE_UNIFORM_BLOCK_EXTENSION_NAME,
148-
// VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME,
149-
// VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME,
150-
// VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
136+
137+
#ifdef _WIN32
138+
VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME,
139+
#else
140+
VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
141+
#endif
142+
VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME,
151143
VK_EXT_SEPARATE_STENCIL_USAGE_EXTENSION_NAME,
152144
VK_KHR_SWAPCHAIN_EXTENSION_NAME,
153145
VK_EXT_SHADER_OBJECT_EXTENSION_NAME,
154146
VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME,
155147
VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME,
148+
// VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME,
149+
// VK_EXT_DEPTH_CLIP_ENABLE_EXTENSION_NAME,
150+
// VK_EXT_INLINE_UNIFORM_BLOCK_EXTENSION_NAME,
151+
// VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME,
156152
},
157153
{
158154
VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME,
@@ -180,18 +176,70 @@ static vk::Context createVkContext(Device *device) {
180176
getTotalMemorySize(VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
181177
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
182178

183-
vk::getHostVisibleMemory().initHostVisible(
184-
std::min(hostVisibleMemoryTotalSize / 2, 1ul * 1024 * 1024 * 1024));
179+
vk::getDirectMemory().initHostDirect(dmemSize);
180+
181+
vk::getHostVisibleMemory().initHostVisible(512 * 1024 *
182+
1024); // FIXME: reduce RAM usage
185183
vk::getDeviceLocalMemory().initDeviceLocal(
186184
std::min(localMemoryTotalSize / 4, 4ul * 1024 * 1024 * 1024));
187-
188185
vk::context = &device->vkContext;
189186
return result;
190187
}
191188

192189
const auto kCachePageSize = 0x100'0000'0000 / rx::mem::pageSize;
193190

194-
Device::Device() : vkContext(createVkContext(this)) {
191+
Device::Device(std::size_t dmemSize)
192+
: vkContext(createVkContext(this, dmemSize)) {
193+
for (auto &pipe : graphicsPipes) {
194+
pipe.device = this;
195+
}
196+
197+
for (auto &cachePage : cachePages) {
198+
cachePage = static_cast<std::atomic<std::uint8_t> *>(
199+
orbis::kalloc(kCachePageSize, 1));
200+
std::memset(cachePage, 0, kCachePageSize);
201+
}
202+
203+
commandPipe.device = this;
204+
commandPipe.ring = {
205+
.base = std::data(cmdRing),
206+
.size = std::size(cmdRing),
207+
.rptr = std::data(cmdRing),
208+
.wptr = std::data(cmdRing),
209+
};
210+
211+
for (auto &pipe : computePipes) {
212+
pipe.device = this;
213+
}
214+
215+
for (int i = 0; i < kGfxPipeCount; ++i) {
216+
graphicsPipes[i].setDeQueue(
217+
Ring{
218+
.base = mainGfxRings[i],
219+
.size = std::size(mainGfxRings[i]),
220+
.rptr = mainGfxRings[i],
221+
.wptr = mainGfxRings[i],
222+
},
223+
0);
224+
}
225+
}
226+
227+
Device::~Device() {
228+
vkDeviceWaitIdle(vk::context->device);
229+
230+
if (debugMessenger != VK_NULL_HANDLE) {
231+
vk::DestroyDebugUtilsMessengerEXT(vk::context->instance, debugMessenger,
232+
vk::context->allocator);
233+
}
234+
235+
for (auto &cachePage : cachePages) {
236+
orbis::kfree(cachePage, kCachePageSize);
237+
}
238+
}
239+
240+
void Device::initialize() {
241+
vk::context = &vkContext;
242+
195243
if (!shader::spv::validate(g_rdna_semantic_spirv)) {
196244
shader::spv::dump(g_rdna_semantic_spirv, true);
197245
rx::die("builtin semantic validation failed");
@@ -209,16 +257,6 @@ Device::Device() : vkContext(createVkContext(this)) {
209257
rx::die("failed to deserialize builtin semantics\n");
210258
}
211259

212-
for (auto &pipe : graphicsPipes) {
213-
pipe.device = this;
214-
}
215-
216-
for (auto &cachePage : cachePages) {
217-
cachePage = static_cast<std::atomic<std::uint8_t> *>(
218-
orbis::kalloc(kCachePageSize, 1));
219-
std::memset(cachePage, 0, kCachePageSize);
220-
}
221-
222260
cacheUpdateThread = std::jthread([this](const std::stop_token &stopToken) {
223261
auto &sched = graphicsPipes[0].scheduler;
224262
std::uint32_t prevIdleValue = 0;
@@ -262,42 +300,6 @@ Device::Device() : vkContext(createVkContext(this)) {
262300
}
263301
}
264302
});
265-
266-
commandPipe.device = this;
267-
commandPipe.ring = {
268-
.base = std::data(cmdRing),
269-
.size = std::size(cmdRing),
270-
.rptr = std::data(cmdRing),
271-
.wptr = std::data(cmdRing),
272-
};
273-
274-
for (auto &pipe : computePipes) {
275-
pipe.device = this;
276-
}
277-
278-
for (int i = 0; i < kGfxPipeCount; ++i) {
279-
graphicsPipes[i].setDeQueue(
280-
Ring{
281-
.base = mainGfxRings[i],
282-
.size = std::size(mainGfxRings[i]),
283-
.rptr = mainGfxRings[i],
284-
.wptr = mainGfxRings[i],
285-
},
286-
0);
287-
}
288-
}
289-
290-
Device::~Device() {
291-
vkDeviceWaitIdle(vk::context->device);
292-
293-
if (debugMessenger != VK_NULL_HANDLE) {
294-
vk::DestroyDebugUtilsMessengerEXT(vk::context->instance, debugMessenger,
295-
vk::context->allocator);
296-
}
297-
298-
for (auto &cachePage : cachePages) {
299-
orbis::kfree(cachePage, kCachePageSize);
300-
}
301303
}
302304

303305
void Device::start() {
@@ -984,10 +986,10 @@ void Device::mapMemory(std::uint32_t pid, rx::AddressRange virtualRange,
984986
auto memory = amdgpu::RemoteMemory{process.vmId};
985987

986988
auto vmemAddress = memory.getVirtualAddress(virtualRange.beginAddress());
987-
auto errc = orbis::pmem::map(vmemAddress,
988-
rx::AddressRange::fromBeginSize(
989-
physicalOffset, virtualRange.size()),
990-
orbis::vmem::toGpuProtection(prot));
989+
auto errc = orbis::pmem::map(
990+
vmemAddress,
991+
rx::AddressRange::fromBeginSize(physicalOffset, virtualRange.size()),
992+
orbis::vmem::toGpuProtection(prot));
991993
if (errc != orbis::ErrorCode{}) {
992994
rx::die("failed to map process {} memory, address {:x}-{:x}, type {}, "
993995
"offset {:x}, prot {}, error {}",

rpcsx/gpu/Device.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,9 +103,10 @@ struct Device : rx::RcBase, DeviceContext {
103103
std::uint32_t mainGfxRings[kGfxPipeCount][0x4000 / sizeof(std::uint32_t)];
104104
std::uint32_t cmdRing[0x4000 / sizeof(std::uint32_t)];
105105

106-
Device();
106+
Device(std::size_t dmemSize);
107107
~Device();
108108

109+
void initialize();
109110
void start();
110111

111112
Cache::Tag getCacheTag(int vmId, Scheduler &scheduler) {

rpcsx/gpu/DeviceCtl.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@ DeviceCtl &DeviceCtl::operator=(const DeviceCtl &) = default;
2121

2222
DeviceCtl::~DeviceCtl() = default;
2323

24-
DeviceCtl DeviceCtl::createDevice() {
24+
DeviceCtl DeviceCtl::createDevice(std::uint64_t dmemSize) {
2525
DeviceCtl result;
26-
result.mDevice = orbis::knew<Device>();
26+
result.mDevice = orbis::knew<Device>(dmemSize);
2727
return result;
2828
}
2929

@@ -229,7 +229,10 @@ void DeviceCtl::submitComputeQueue(std::uint32_t meId, std::uint32_t pipeId,
229229
pipe.submit(queueId, offset);
230230
}
231231

232-
void DeviceCtl::start() { mDevice->start(); }
232+
void DeviceCtl::start() {
233+
mDevice->initialize();
234+
mDevice->start();
235+
}
233236
void DeviceCtl::waitForIdle() { mDevice->waitForIdle(); }
234237

235238
void amdgpu::mapMemory(std::uint32_t pid, rx::AddressRange virtualRange,

rpcsx/gpu/DeviceCtl.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ class DeviceCtl {
2525
DeviceCtl &operator=(const DeviceCtl &);
2626
~DeviceCtl();
2727

28-
static DeviceCtl createDevice();
28+
static DeviceCtl createDevice(std::uint64_t dmemSize);
2929
DeviceContext &getContext();
3030
rx::Ref<rx::RcBase> getOpaque();
3131

0 commit comments

Comments
 (0)