Skip to content

Commit fafa8e7

Browse files
fix CUDA JIT headers
1 parent a15d1da commit fafa8e7

File tree

3 files changed

+112
-187
lines changed

3 files changed

+112
-187
lines changed

include/nbl/video/CCUDADevice.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,10 @@ class CCUDADevice : public core::IReferenceCounted
6363
};
6464
inline E_VIRTUAL_ARCHITECTURE getVirtualArchitecture() {return m_virtualArchitecture;}
6565

66-
inline const auto& geDefaultCompileOptions() const {return m_defaultCompileOptions;}
66+
inline core::SRange<const char* const> geDefaultCompileOptions() const
67+
{
68+
return {m_defaultCompileOptions.data(),m_defaultCompileOptions.data()+m_defaultCompileOptions.size()};
69+
}
6770

6871
// TODO/REDO Vulkan: https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__EXTRES__INTEROP.html
6972
// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#vulkan-interoperability

include/nbl/video/CCUDAHandler.h

Lines changed: 80 additions & 147 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
#ifndef _NBL_VIDEO_C_CUDA_HANDLER_H_
55
#define _NBL_VIDEO_C_CUDA_HANDLER_H_
66

7+
#include "nbl/core/declarations.h"
8+
#include "nbl/core/definitions.h"
9+
710
#include "nbl/system/declarations.h"
811

912
#include "nbl/video/CCUDADevice.h"
@@ -134,203 +137,133 @@ class CCUDAHandler : public core::IReferenceCounted
134137
);
135138
const NVRTC& getNVRTCFunctionTable() const {return m_nvrtc;}
136139

137-
#if 0
138140
//
139-
static core::SRange<const io::IReadFile* const> getCUDASTDHeaders()
141+
inline core::SRange<system::IFile* const> getSTDHeaders()
140142
{
141-
auto begin = headers.empty() ? nullptr:reinterpret_cast<const io::IReadFile* const*>(&headers[0].get());
142-
return {begin,begin+headers.size()};
143+
auto begin = m_headers.empty() ? nullptr:(&m_headers[0].get());
144+
return {begin,begin+m_headers.size()};
143145
}
144-
static const auto& getCUDASTDHeaderContents() { return headerContents; }
145-
static const auto& getCUDASTDHeaderNames() { return headerNames; }
146+
inline const auto& getSTDHeaderContents() { return m_headerContents; }
147+
inline const auto& getSTDHeaderNames() { return m_headerNames; }
146148

147149
//
148-
static nvrtcResult createProgram( nvrtcProgram* prog, const char* source, const char* name,
149-
const char* const* headersBegin=nullptr, const char* const* headersEnd=nullptr,
150-
const char* const* includeNamesBegin=nullptr, const char* const* includeNamesEnd=nullptr)
150+
nvrtcResult createProgram(nvrtcProgram* prog, std::string&& source, const char* name, const int headerCount=0, const char* const* headerContents=nullptr, const char* const* includeNames=nullptr);
151+
inline nvrtcResult createProgram(nvrtcProgram* prog, const char* source, const char* name, const int headerCount=0, const char* const* headerContents=nullptr, const char* const* includeNames=nullptr)
151152
{
152-
auto headerCount = std::distance(headersBegin, headersEnd);
153-
if (headerCount)
154-
{
155-
if (std::distance(includeNamesBegin,includeNamesEnd)!=headerCount)
156-
return NVRTC_ERROR_INVALID_INPUT;
157-
}
158-
else
159-
{
160-
headersBegin = nullptr;
161-
includeNamesBegin = nullptr;
162-
}
163-
auto extraLen = strlen(CUDA_EXTRA_DEFINES);
164-
auto origLen = strlen(source);
165-
auto totalLen = extraLen+origLen;
166-
auto tmp = _NBL_NEW_ARRAY(char,totalLen+1u);
167-
memcpy(tmp, CUDA_EXTRA_DEFINES, extraLen);
168-
memcpy(tmp+extraLen, source, origLen);
169-
tmp[totalLen] = 0;
170-
auto result = nvrtc.pnvrtcCreateProgram(prog, tmp, name, headerCount, headersBegin, includeNamesBegin);
171-
_NBL_DELETE_ARRAY(tmp,totalLen);
172-
return result;
153+
return createProgram(prog,std::string(source),name,headerCount,headerContents,includeNames);
173154
}
174-
175-
template<typename HeaderFileIt>
176-
static nvrtcResult createProgram( nvrtcProgram* prog, nbl::io::IReadFile* main,
177-
const HeaderFileIt includesBegin, const HeaderFileIt includesEnd)
155+
inline nvrtcResult createProgram(nvrtcProgram* prog, system::IFile* file, const int headerCount=0, const char* const* headerContents=nullptr, const char* const* includeNames=nullptr)
178156
{
179-
int numHeaders = std::distance(includesBegin,includesEnd);
180-
core::vector<const char*> headers(numHeaders);
181-
core::vector<const char*> includeNames(numHeaders);
182-
size_t sourceIt = strlen(CUDA_EXTRA_DEFINES);
183-
size_t sourceSize = sourceIt+main->getSize();
184-
sourceSize++;
185-
for (auto it=includesBegin; it!=includesEnd; it++)
186-
{
187-
sourceSize += it->getSize()+1u;
188-
includeNames.emplace_back(it->getFileName().c_str());
189-
}
190-
core::vector<char> sources(sourceSize);
191-
memcpy(sources.data(),CUDA_EXTRA_DEFINES,sourceIt);
192-
auto filesize = main->getSize();
193-
main->read(sources.data()+sourceIt,filesize);
194-
sourceIt += filesize;
195-
sources[sourceIt++] = 0;
196-
for (auto it=includesBegin; it!=includesEnd; it++)
197-
{
198-
auto oldpos = it->getPos();
199-
it->seek(0ull);
157+
const auto filesize = file->getSize();
158+
std::string source(filesize+1u,'0');
200159

201-
auto ptr = sources.data()+sourceIt;
202-
headers.push_back(ptr);
203-
filesize = it->getSize();
204-
it->read(ptr,filesize);
205-
sourceIt += filesize;
206-
sources[sourceIt++] = 0;
160+
system::future<size_t> bytesRead;
161+
file->read(bytesRead,source.data(),0u,file->getSize());
162+
source.resize(bytesRead.get());
207163

208-
it->seek(oldpos);
209-
}
210-
return nvrtc.pnvrtcCreateProgram(prog, sources.data(), main->getFileName().c_str(), numHeaders, headers.data(), includeNames.data());
164+
return createProgram(prog,std::move(source),file->getFileName().string().c_str(),headerCount,headerContents,includeNames);
211165
}
212-
#endif
166+
213167
//
214-
inline nvrtcResult compileProgram(nvrtcProgram prog, const size_t optionCount, const char* const* options)
168+
inline nvrtcResult compileProgram(nvrtcProgram prog, core::SRange<const char* const> options)
215169
{
216-
return m_nvrtc.pnvrtcCompileProgram(prog, optionCount, options);
217-
}
218-
template<typename OptionsT = const std::initializer_list<const char*>&>
219-
inline nvrtcResult compileProgram(nvrtcProgram prog, OptionsT options)
220-
{
221-
return compileProgram(prog, options.size(), options.begin());
222-
}
223-
inline nvrtcResult compileProgram(nvrtcProgram prog, const std::vector<const char*>& options)
224-
{
225-
return compileProgram(prog, options.size(), options.data());
170+
return m_nvrtc.pnvrtcCompileProgram(prog,options.size(),options.begin());
226171
}
227172

228173
//
229174
nvrtcResult getProgramLog(nvrtcProgram prog, std::string& log);
230175

231176
//
232-
std::pair<core::smart_refctd_ptr<asset::ICPUBuffer>,nvrtcResult> getPTX(nvrtcProgram prog);
177+
struct ptx_and_nvrtcResult_t
178+
{
179+
core::smart_refctd_ptr<asset::ICPUBuffer> ptx;
180+
nvrtcResult result;
181+
};
182+
ptx_and_nvrtcResult_t getPTX(nvrtcProgram prog);
233183

234-
#if 0
235184
//
236-
template<typename OptionsT = const std::initializer_list<const char*>&>
237-
static nvrtcResult compileDirectlyToPTX(std::string& ptx, const char* source, const char* filename,
238-
const char* const* headersBegin = nullptr, const char* const* headersEnd = nullptr,
239-
const char* const* includeNamesBegin = nullptr, const char* const* includeNamesEnd = nullptr,
240-
OptionsT options = { _NBL_DEFAULT_NVRTC_OPTIONS },
241-
std::string* log = nullptr)
185+
inline ptx_and_nvrtcResult_t compileDirectlyToPTX(
186+
std::string&& source, const char* filename, core::SRange<const char* const> nvrtcOptions,
187+
const int headerCount=0, const char* const* headerContents=nullptr, const char* const* includeNames=nullptr,
188+
std::string* log=nullptr
189+
)
242190
{
243191
nvrtcProgram program = nullptr;
244192
nvrtcResult result = NVRTC_ERROR_PROGRAM_CREATION_FAILURE;
245-
auto cleanup = core::makeRAIIExiter([&program, &result]() -> void {
246-
if (result != NVRTC_SUCCESS && program)
247-
nvrtc.pnvrtcDestroyProgram(&program);
248-
});
249-
250-
result = createProgram(&program, source, filename, headersBegin, headersEnd, includeNamesBegin, includeNamesEnd);
251-
252-
if (result != NVRTC_SUCCESS)
253-
return result;
193+
auto cleanup = core::makeRAIIExiter([&]() -> void
194+
{
195+
if (result!=NVRTC_SUCCESS && program)
196+
m_nvrtc.pnvrtcDestroyProgram(&program); // TODO: do we need to destroy the program if we successfully get PTX?
197+
});
254198

255-
return result = compileDirectlyToPTX_helper<OptionsT>(ptx, program, std::forward<OptionsT>(options), log);
199+
result = createProgram(&program,std::move(source),filename,headerCount,headerContents,includeNames);
200+
return compileDirectlyToPTX_impl(result,program,nvrtcOptions,log);
256201
}
257-
258-
template<typename OptionsT = const std::initializer_list<const char*>&>
259-
static nvrtcResult compileDirectlyToPTX(std::string& ptx, nbl::io::IReadFile* main,
260-
const char* const* headersBegin = nullptr, const char* const* headersEnd = nullptr,
261-
const char* const* includeNamesBegin = nullptr, const char* const* includeNamesEnd = nullptr,
262-
OptionsT options = { _NBL_DEFAULT_NVRTC_OPTIONS },
263-
std::string* log = nullptr)
202+
inline ptx_and_nvrtcResult_t compileDirectlyToPTX(
203+
const char* source, const char* filename, core::SRange<const char* const> nvrtcOptions,
204+
const int headerCount=0, const char* const* headerContents=nullptr, const char* const* includeNames=nullptr,
205+
std::string* log=nullptr
206+
)
264207
{
265-
char* data = new char[main->getSize()+1ull];
266-
main->read(data, main->getSize());
267-
data[main->getSize()] = 0;
268-
auto result = compileDirectlyToPTX<OptionsT>(ptx, data, main->getFileName().c_str(), headersBegin, headersEnd, std::forward<OptionsT>(options), log);
269-
delete[] data;
270-
271-
return result;
208+
return compileDirectlyToPTX(std::string(source),filename,nvrtcOptions,headerCount,headerContents,includeNames,log);
272209
}
273-
274-
template<typename CompileArgsT, typename OptionsT=const std::initializer_list<const char*>&>
275-
static nvrtcResult compileDirectlyToPTX(std::string& ptx, nbl::io::IReadFile* main,
276-
CompileArgsT includesBegin, CompileArgsT includesEnd,
277-
OptionsT options={_NBL_DEFAULT_NVRTC_OPTIONS},
278-
std::string* log=nullptr)
210+
inline ptx_and_nvrtcResult_t compileDirectlyToPTX(
211+
system::IFile* file, core::SRange<const char* const> nvrtcOptions,
212+
const int headerCount=0, const char* const* headerContents=nullptr, const char* const* includeNames=nullptr,
213+
std::string* log=nullptr
214+
)
279215
{
280216
nvrtcProgram program = nullptr;
281217
nvrtcResult result = NVRTC_ERROR_PROGRAM_CREATION_FAILURE;
282-
auto cleanup = core::makeRAIIExiter([&program,&result]() -> void {
218+
auto cleanup = core::makeRAIIExiter([&]() -> void
219+
{
283220
if (result!=NVRTC_SUCCESS && program)
284-
nvrtc.pnvrtcDestroyProgram(&program);
221+
m_nvrtc.pnvrtcDestroyProgram(&program); // TODO: do we need to destroy the program if we successfully get PTX?
285222
});
286-
result = createProgram(&program, main, includesBegin, includesEnd);
287-
if (result!=NVRTC_SUCCESS)
288-
return result;
289223

290-
return result = compileDirectlyToPTX_helper<OptionsT>(ptx,program,std::forward<OptionsT>(options),log);
224+
result = createProgram(&program,file,headerCount,headerContents,includeNames);
225+
return compileDirectlyToPTX_impl(result,program,nvrtcOptions,log);
291226
}
292-
#endif
293227

294228
core::smart_refctd_ptr<CCUDADevice> createDevice(core::smart_refctd_ptr<CVulkanConnection>&& vulkanConnection, IPhysicalDevice* physicalDevice);
295229

296230
protected:
297-
CCUDAHandler(
298-
CUDA&& _cuda,
299-
NVRTC&& _nvrtc,
300-
core::vector<core::smart_refctd_ptr<system::IFile>>&& _headers,
301-
core::smart_refctd_ptr<system::ILogger>&& _logger,
302-
int _version
303-
);
231+
CCUDAHandler(CUDA&& _cuda, NVRTC&& _nvrtc, core::vector<core::smart_refctd_ptr<system::IFile>>&& _headers, core::smart_refctd_ptr<system::ILogger>&& _logger, int _version)
232+
: m_cuda(std::move(_cuda)), m_nvrtc(std::move(_nvrtc)), m_headers(std::move(_headers)), m_logger(std::move(_logger)), m_version(_version)
233+
{
234+
for (auto& header : m_headers)
235+
{
236+
m_headerContents.push_back(reinterpret_cast<const char*>(header->getMappedPointer()));
237+
m_headerNamesStorage.push_back(header->getFileName().string());
238+
m_headerNames.push_back(m_headerNamesStorage.back().c_str());
239+
}
240+
}
304241
~CCUDAHandler() = default;
305-
306-
#if 0
307-
static core::vector<const char*> headerContents;
308-
static core::vector<const char*> headerNames;
309-
310-
#ifdef _MSC_VER
311-
_NBL_STATIC_INLINE_CONSTEXPR const char* CUDA_EXTRA_DEFINES = "#ifndef _WIN64\n#define _WIN64\n#endif\n";
312-
#else
313-
_NBL_STATIC_INLINE_CONSTEXPR const char* CUDA_EXTRA_DEFINES = "#ifndef __LP64__\n#define __LP64__\n#endif\n";
314-
#endif
315-
316-
template<typename OptionsT = const std::initializer_list<const char*>&>
317-
static nvrtcResult compileDirectlyToPTX_helper(std::string& ptx, nvrtcProgram program, OptionsT options, std::string* log=nullptr)
242+
243+
//
244+
inline ptx_and_nvrtcResult_t compileDirectlyToPTX_impl(nvrtcResult result, nvrtcProgram program, core::SRange<const char* const> nvrtcOptions, std::string* log)
318245
{
319-
nvrtcResult result = compileProgram(program,options);
320-
if (log)
321-
getProgramLog(program, *log);
322246
if (result!=NVRTC_SUCCESS)
323-
return result;
247+
return {nullptr,result};
324248

325-
return getPTX(program, ptx);
249+
result = compileProgram(program,nvrtcOptions);
250+
if (log)
251+
getProgramLog(program,*log);
252+
if (result!=NVRTC_SUCCESS)
253+
return {nullptr,result};
254+
255+
return getPTX(program);
326256
}
327-
#endif
257+
328258
// function tables
329259
CUDA m_cuda;
330260
NVRTC m_nvrtc;
331261

332262
//
333263
core::vector<core::smart_refctd_ptr<system::IFile>> m_headers;
264+
core::vector<const char*> m_headerContents;
265+
core::vector<std::string> m_headerNamesStorage;
266+
core::vector<const char*> m_headerNames;
334267
system::logger_opt_smart_ptr m_logger;
335268
int m_version;
336269
};

0 commit comments

Comments
 (0)