@@ -63,19 +63,10 @@ makeDisplayEvent(DisplayEvent id, std::uint16_t unk0 = 0,
6363 return result;
6464}
6565
66- static vk::Context createVkContext (Device *device) {
66+ static vk::Context createVkContext (Device *device, std:: size_t dmemSize ) {
6767 std::vector<const char *> optionalLayers;
6868 bool enableValidation = rx::g_config.validateGpu ;
6969
70- for (std::size_t process = 0 ; process < 6 ; ++process) {
71- auto range = rx::AddressRange::fromBeginSize (
72- 0x40'0000 + 0x100'0000'0000 * process, 0x100'0000'0000 - 0x40'0000 );
73- if (auto errc = rx::mem::reserve (range); errc != std::errc{}) {
74- rx::die (" failed to reserve userspace memory: {} {:x}-{:x}" , (int )errc,
75- range.beginAddress (), range.endAddress ());
76- }
77- }
78-
7970 auto createWindow = [=] {
8071 glfwWindowHint (GLFW_CLIENT_API, GLFW_NO_API);
8172 device->window = glfwCreateWindow (1920 , 1080 , " RPCSX" , nullptr , nullptr );
@@ -142,17 +133,22 @@ static vk::Context createVkContext(Device *device) {
142133
143134 result.createDevice (device->surface , rx::g_config.gpuIndex ,
144135 {
145- // VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME,
146- // VK_EXT_DEPTH_CLIP_ENABLE_EXTENSION_NAME,
147- // VK_EXT_INLINE_UNIFORM_BLOCK_EXTENSION_NAME,
148- // VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME,
149- // VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME,
150- // VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
136+
137+ #ifdef _WIN32
138+ VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME,
139+ #else
140+ VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
141+ #endif
142+ VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME,
151143 VK_EXT_SEPARATE_STENCIL_USAGE_EXTENSION_NAME,
152144 VK_KHR_SWAPCHAIN_EXTENSION_NAME,
153145 VK_EXT_SHADER_OBJECT_EXTENSION_NAME,
154146 VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME,
155147 VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME,
148+ // VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME,
149+ // VK_EXT_DEPTH_CLIP_ENABLE_EXTENSION_NAME,
150+ // VK_EXT_INLINE_UNIFORM_BLOCK_EXTENSION_NAME,
151+ // VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME,
156152 },
157153 {
158154 VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME,
@@ -180,18 +176,70 @@ static vk::Context createVkContext(Device *device) {
180176 getTotalMemorySize (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
181177 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
182178
183- vk::getHostVisibleMemory ().initHostVisible (
184- std::min (hostVisibleMemoryTotalSize / 2 , 1ul * 1024 * 1024 * 1024 ));
179+ vk::getDirectMemory ().initHostDirect (dmemSize);
180+
181+ vk::getHostVisibleMemory ().initHostVisible (512 * 1024 *
182+ 1024 ); // FIXME: reduce RAM usage
185183 vk::getDeviceLocalMemory ().initDeviceLocal (
186184 std::min (localMemoryTotalSize / 4 , 4ul * 1024 * 1024 * 1024 ));
187-
188185 vk::context = &device->vkContext ;
189186 return result;
190187}
191188
192189const auto kCachePageSize = 0x100'0000'0000 / rx::mem::pageSize;
193190
194- Device::Device () : vkContext(createVkContext(this )) {
191+ Device::Device (std::size_t dmemSize)
192+ : vkContext(createVkContext(this , dmemSize)) {
193+ for (auto &pipe : graphicsPipes) {
194+ pipe.device = this ;
195+ }
196+
197+ for (auto &cachePage : cachePages) {
198+ cachePage = static_cast <std::atomic<std::uint8_t > *>(
199+ orbis::kalloc (kCachePageSize , 1 ));
200+ std::memset (cachePage, 0 , kCachePageSize );
201+ }
202+
203+ commandPipe.device = this ;
204+ commandPipe.ring = {
205+ .base = std::data (cmdRing),
206+ .size = std::size (cmdRing),
207+ .rptr = std::data (cmdRing),
208+ .wptr = std::data (cmdRing),
209+ };
210+
211+ for (auto &pipe : computePipes) {
212+ pipe.device = this ;
213+ }
214+
215+ for (int i = 0 ; i < kGfxPipeCount ; ++i) {
216+ graphicsPipes[i].setDeQueue (
217+ Ring{
218+ .base = mainGfxRings[i],
219+ .size = std::size (mainGfxRings[i]),
220+ .rptr = mainGfxRings[i],
221+ .wptr = mainGfxRings[i],
222+ },
223+ 0 );
224+ }
225+ }
226+
227+ Device::~Device () {
228+ vkDeviceWaitIdle (vk::context->device );
229+
230+ if (debugMessenger != VK_NULL_HANDLE) {
231+ vk::DestroyDebugUtilsMessengerEXT (vk::context->instance , debugMessenger,
232+ vk::context->allocator );
233+ }
234+
235+ for (auto &cachePage : cachePages) {
236+ orbis::kfree (cachePage, kCachePageSize );
237+ }
238+ }
239+
240+ void Device::initialize () {
241+ vk::context = &vkContext;
242+
195243 if (!shader::spv::validate (g_rdna_semantic_spirv)) {
196244 shader::spv::dump (g_rdna_semantic_spirv, true );
197245 rx::die (" builtin semantic validation failed" );
@@ -209,16 +257,6 @@ Device::Device() : vkContext(createVkContext(this)) {
209257 rx::die (" failed to deserialize builtin semantics\n " );
210258 }
211259
212- for (auto &pipe : graphicsPipes) {
213- pipe.device = this ;
214- }
215-
216- for (auto &cachePage : cachePages) {
217- cachePage = static_cast <std::atomic<std::uint8_t > *>(
218- orbis::kalloc (kCachePageSize , 1 ));
219- std::memset (cachePage, 0 , kCachePageSize );
220- }
221-
222260 cacheUpdateThread = std::jthread ([this ](const std::stop_token &stopToken) {
223261 auto &sched = graphicsPipes[0 ].scheduler ;
224262 std::uint32_t prevIdleValue = 0 ;
@@ -262,42 +300,6 @@ Device::Device() : vkContext(createVkContext(this)) {
262300 }
263301 }
264302 });
265-
266- commandPipe.device = this ;
267- commandPipe.ring = {
268- .base = std::data (cmdRing),
269- .size = std::size (cmdRing),
270- .rptr = std::data (cmdRing),
271- .wptr = std::data (cmdRing),
272- };
273-
274- for (auto &pipe : computePipes) {
275- pipe.device = this ;
276- }
277-
278- for (int i = 0 ; i < kGfxPipeCount ; ++i) {
279- graphicsPipes[i].setDeQueue (
280- Ring{
281- .base = mainGfxRings[i],
282- .size = std::size (mainGfxRings[i]),
283- .rptr = mainGfxRings[i],
284- .wptr = mainGfxRings[i],
285- },
286- 0 );
287- }
288- }
289-
290- Device::~Device () {
291- vkDeviceWaitIdle (vk::context->device );
292-
293- if (debugMessenger != VK_NULL_HANDLE) {
294- vk::DestroyDebugUtilsMessengerEXT (vk::context->instance , debugMessenger,
295- vk::context->allocator );
296- }
297-
298- for (auto &cachePage : cachePages) {
299- orbis::kfree (cachePage, kCachePageSize );
300- }
301303}
302304
303305void Device::start () {
@@ -984,10 +986,10 @@ void Device::mapMemory(std::uint32_t pid, rx::AddressRange virtualRange,
984986 auto memory = amdgpu::RemoteMemory{process.vmId };
985987
986988 auto vmemAddress = memory.getVirtualAddress (virtualRange.beginAddress ());
987- auto errc = orbis::pmem::map (vmemAddress,
988- rx::AddressRange::fromBeginSize (
989- physicalOffset, virtualRange.size ()),
990- orbis::vmem::toGpuProtection (prot));
989+ auto errc = orbis::pmem::map (
990+ vmemAddress,
991+ rx::AddressRange::fromBeginSize ( physicalOffset, virtualRange.size ()),
992+ orbis::vmem::toGpuProtection (prot));
991993 if (errc != orbis::ErrorCode{}) {
992994 rx::die (" failed to map process {} memory, address {:x}-{:x}, type {}, "
993995 " offset {:x}, prot {}, error {}" ,
0 commit comments