|
| 1 | +// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. |
| 2 | +// This file is part of the "Nabla Engine". |
| 3 | +// For conditions of distribution and use, see copyright notice in nabla.h |
| 4 | +#ifndef _NBL_VIDEO_C_CUDA_DEVICE_H_ |
| 5 | +#define _NBL_VIDEO_C_CUDA_DEVICE_H_ |
| 6 | + |
| 7 | + |
| 8 | +#include "nbl/video/IPhysicalDevice.h" |
| 9 | + |
| 10 | + |
| 11 | +#ifdef _NBL_COMPILE_WITH_CUDA_ |
| 12 | + |
| 13 | +#include "cuda.h" |
| 14 | +#include "nvrtc.h" |
| 15 | +#if CUDA_VERSION < 9000 |
| 16 | + #error "Need CUDA 9.0 SDK or higher." |
| 17 | +#endif |
| 18 | + |
| 19 | +// useful includes in the future |
| 20 | +//#include "cudaEGL.h" |
| 21 | +//#include "cudaVDPAU.h" |
| 22 | + |
| 23 | +namespace nbl::video |
| 24 | +{ |
| 25 | +class CCUDAHandler; |
| 26 | + |
| 27 | +class CCUDADevice : public core::IReferenceCounted |
| 28 | +{ |
| 29 | + public: |
| 30 | + enum E_VIRTUAL_ARCHITECTURE |
| 31 | + { |
| 32 | + EVA_30, |
| 33 | + EVA_32, |
| 34 | + EVA_35, |
| 35 | + EVA_37, |
| 36 | + EVA_50, |
| 37 | + EVA_52, |
| 38 | + EVA_53, |
| 39 | + EVA_60, |
| 40 | + EVA_61, |
| 41 | + EVA_62, |
| 42 | + EVA_70, |
| 43 | + EVA_72, |
| 44 | + EVA_75, |
| 45 | + EVA_80, |
| 46 | + EVA_COUNT |
| 47 | + }; |
| 48 | + static inline constexpr const char* virtualArchCompileOption[] = { |
| 49 | + "-arch=compute_30", |
| 50 | + "-arch=compute_32", |
| 51 | + "-arch=compute_35", |
| 52 | + "-arch=compute_37", |
| 53 | + "-arch=compute_50", |
| 54 | + "-arch=compute_52", |
| 55 | + "-arch=compute_53", |
| 56 | + "-arch=compute_60", |
| 57 | + "-arch=compute_61", |
| 58 | + "-arch=compute_62", |
| 59 | + "-arch=compute_70", |
| 60 | + "-arch=compute_72", |
| 61 | + "-arch=compute_75", |
| 62 | + "-arch=compute_80" |
| 63 | + }; |
| 64 | + inline E_VIRTUAL_ARCHITECTURE getVirtualArchitecture() {return m_virtualArchitecture;} |
| 65 | + |
| 66 | + inline const auto& geDefaultCompileOptions() const {return m_defaultCompileOptions;} |
| 67 | + |
| 68 | + // TODO/REDO Vulkan: https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__EXTRES__INTEROP.html |
| 69 | + // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#vulkan-interoperability |
| 70 | + // Watch out, use Driver API (`cu` functions) NOT the Runtime API (`cuda` functions) |
| 71 | + // Also maybe separate this out into its own `CCUDA` class instead of nesting it here? |
| 72 | +#if 0 |
| 73 | + template<typename ObjType> |
| 74 | + struct GraphicsAPIObjLink |
| 75 | + { |
| 76 | + GraphicsAPIObjLink() : obj(nullptr), cudaHandle(nullptr), acquired(false) |
| 77 | + { |
| 78 | + asImage = {nullptr}; |
| 79 | + } |
| 80 | + GraphicsAPIObjLink(core::smart_refctd_ptr<ObjType>&& _obj) : GraphicsAPIObjLink() |
| 81 | + { |
| 82 | + obj = std::move(_obj); |
| 83 | + } |
| 84 | + GraphicsAPIObjLink(GraphicsAPIObjLink&& other) : GraphicsAPIObjLink() |
| 85 | + { |
| 86 | + operator=(std::move(other)); |
| 87 | + } |
| 88 | + |
| 89 | + GraphicsAPIObjLink(const GraphicsAPIObjLink& other) = delete; |
| 90 | + GraphicsAPIObjLink& operator=(const GraphicsAPIObjLink& other) = delete; |
| 91 | + GraphicsAPIObjLink& operator=(GraphicsAPIObjLink&& other) |
| 92 | + { |
| 93 | + std::swap(obj,other.obj); |
| 94 | + std::swap(cudaHandle,other.cudaHandle); |
| 95 | + std::swap(acquired,other.acquired); |
| 96 | + std::swap(asImage,other.asImage); |
| 97 | + return *this; |
| 98 | + } |
| 99 | + |
| 100 | + ~GraphicsAPIObjLink() |
| 101 | + { |
| 102 | + assert(!acquired); // you've fucked up, there's no way for us to fix it, you need to release the objects on a proper stream |
| 103 | + if (obj) |
| 104 | + CCUDAHandler::cuda.pcuGraphicsUnregisterResource(cudaHandle); |
| 105 | + } |
| 106 | + |
| 107 | + // |
| 108 | + auto* getObject() const {return obj.get();} |
| 109 | + |
| 110 | + private: |
| 111 | + core::smart_refctd_ptr<ObjType> obj; |
| 112 | + CUgraphicsResource cudaHandle; |
| 113 | + bool acquired; |
| 114 | + |
| 115 | + friend class CCUDAHandler; |
| 116 | + public: |
| 117 | + union |
| 118 | + { |
| 119 | + struct |
| 120 | + { |
| 121 | + CUdeviceptr pointer; |
| 122 | + } asBuffer; |
| 123 | + struct |
| 124 | + { |
| 125 | + CUmipmappedArray mipmappedArray; |
| 126 | + CUarray array; |
| 127 | + } asImage; |
| 128 | + }; |
| 129 | + }; |
| 130 | + |
| 131 | + // |
| 132 | + static CUresult registerBuffer(GraphicsAPIObjLink<video::IGPUBuffer>* link, uint32_t flags = CU_GRAPHICS_REGISTER_FLAGS_NONE); |
| 133 | + static CUresult registerImage(GraphicsAPIObjLink<video::IGPUImage>* link, uint32_t flags = CU_GRAPHICS_REGISTER_FLAGS_NONE); |
| 134 | + |
| 135 | + |
| 136 | + template<typename ObjType> |
| 137 | + static CUresult acquireResourcesFromGraphics(void* tmpStorage, GraphicsAPIObjLink<ObjType>* linksBegin, GraphicsAPIObjLink<ObjType>* linksEnd, CUstream stream) |
| 138 | + { |
| 139 | + auto count = std::distance(linksBegin,linksEnd); |
| 140 | + |
| 141 | + auto resources = reinterpret_cast<CUgraphicsResource*>(tmpStorage); |
| 142 | + auto rit = resources; |
| 143 | + for (auto iit=linksBegin; iit!=linksEnd; iit++,rit++) |
| 144 | + { |
| 145 | + if (iit->acquired) |
| 146 | + return CUDA_ERROR_UNKNOWN; |
| 147 | + *rit = iit->cudaHandle; |
| 148 | + } |
| 149 | + |
| 150 | + auto retval = cuda.pcuGraphicsMapResources(count,resources,stream); |
| 151 | + for (auto iit=linksBegin; iit!=linksEnd; iit++) |
| 152 | + iit->acquired = true; |
| 153 | + return retval; |
| 154 | + } |
| 155 | + template<typename ObjType> |
| 156 | + static CUresult releaseResourcesToGraphics(void* tmpStorage, GraphicsAPIObjLink<ObjType>* linksBegin, GraphicsAPIObjLink<ObjType>* linksEnd, CUstream stream) |
| 157 | + { |
| 158 | + auto count = std::distance(linksBegin,linksEnd); |
| 159 | + |
| 160 | + auto resources = reinterpret_cast<CUgraphicsResource*>(tmpStorage); |
| 161 | + auto rit = resources; |
| 162 | + for (auto iit=linksBegin; iit!=linksEnd; iit++,rit++) |
| 163 | + { |
| 164 | + if (!iit->acquired) |
| 165 | + return CUDA_ERROR_UNKNOWN; |
| 166 | + *rit = iit->cudaHandle; |
| 167 | + } |
| 168 | + |
| 169 | + auto retval = cuda.pcuGraphicsUnmapResources(count,resources,stream); |
| 170 | + for (auto iit=linksBegin; iit!=linksEnd; iit++) |
| 171 | + iit->acquired = false; |
| 172 | + return retval; |
| 173 | + } |
| 174 | + |
| 175 | + static CUresult acquireAndGetPointers(GraphicsAPIObjLink<video::IGPUBuffer>* linksBegin, GraphicsAPIObjLink<video::IGPUBuffer>* linksEnd, CUstream stream, size_t* outbufferSizes = nullptr); |
| 176 | + static CUresult acquireAndGetMipmappedArray(GraphicsAPIObjLink<video::IGPUImage>* linksBegin, GraphicsAPIObjLink<video::IGPUImage>* linksEnd, CUstream stream); |
| 177 | + static CUresult acquireAndGetArray(GraphicsAPIObjLink<video::IGPUImage>* linksBegin, GraphicsAPIObjLink<video::IGPUImage>* linksEnd, uint32_t* arrayIndices, uint32_t* mipLevels, CUstream stream); |
| 178 | +#endif |
| 179 | + |
| 180 | + protected: |
| 181 | + friend class CCUDAHandler; |
| 182 | + CCUDADevice(core::smart_refctd_ptr<CVulkanConnection>&& _vulkanConnection, IPhysicalDevice* const _vulkanDevice, const E_VIRTUAL_ARCHITECTURE _virtualArchitecture); |
| 183 | + ~CCUDADevice() = default; |
| 184 | + |
| 185 | + std::vector<const char*> m_defaultCompileOptions; |
| 186 | + core::smart_refctd_ptr<CVulkanConnection> m_vulkanConnection; |
| 187 | + IPhysicalDevice* const m_vulkanDevice; |
| 188 | + E_VIRTUAL_ARCHITECTURE m_virtualArchitecture; |
| 189 | +}; |
| 190 | + |
| 191 | +} |
| 192 | + |
| 193 | +#endif // _NBL_COMPILE_WITH_CUDA_ |
| 194 | + |
| 195 | +#endif |
0 commit comments