@@ -473,6 +473,18 @@ NVPTXSerializer::compileToBinary(const std::string &ptxCode) {
473473 } \
474474 } while (false )
475475
476+ #include " nvFatbin.h"
477+
478+ #define RETURN_ON_NVFATBIN_ERROR (expr ) \
479+ do { \
480+ auto result = (expr); \
481+ if (result != nvFatbinResult::NVFATBIN_SUCCESS) { \
482+ emitError (loc) << llvm::Twine (#expr).concat (" failed with error: " ) \
483+ << nvFatbinGetErrorString (result); \
484+ return std::nullopt ; \
485+ } \
486+ } while (false )
487+
476488std::optional<SmallVector<char , 0 >>
477489NVPTXSerializer::compileToBinaryNVPTX (const std::string &ptxCode) {
478490 Location loc = getOperation ().getLoc ();
@@ -538,6 +550,32 @@ NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) {
538550 });
539551#undef DEBUG_TYPE
540552 RETURN_ON_NVPTXCOMPILER_ERROR (nvPTXCompilerDestroy (&compiler));
553+
554+ if (targetOptions.getCompilationTarget () == gpu::CompilationTarget::Fatbin) {
555+ bool useFatbin32 = llvm::any_of (cmdOpts.second , [](const char *option) {
556+ return llvm::StringRef (option) == " -32" ;
557+ });
558+
559+ const char *cubinOpts[1 ] = {useFatbin32 ? " -32" : " -64" };
560+ nvFatbinHandle handle;
561+
562+ auto chip = getTarget ().getChip ();
563+ chip.consume_front (" sm_" );
564+
565+ RETURN_ON_NVFATBIN_ERROR (nvFatbinCreate (&handle, cubinOpts, 1 ));
566+ RETURN_ON_NVFATBIN_ERROR (nvFatbinAddCubin (
567+ handle, binary.data (), binary.size (), chip.data (), nullptr ));
568+ RETURN_ON_NVFATBIN_ERROR (nvFatbinAddPTX (
569+ handle, ptxCode.data (), ptxCode.size (), chip.data (), nullptr , nullptr ));
570+
571+ size_t fatbinSize;
572+ RETURN_ON_NVFATBIN_ERROR (nvFatbinSize (handle, &fatbinSize));
573+ SmallVector<char , 0 > fatbin (fatbinSize, 0 );
574+ RETURN_ON_NVFATBIN_ERROR (nvFatbinGet (handle, (void *)fatbin.data ()));
575+ RETURN_ON_NVFATBIN_ERROR (nvFatbinDestroy (&handle));
576+ return fatbin;
577+ }
578+
541579 return binary;
542580}
543581#endif // MLIR_ENABLE_NVPTXCOMPILER
0 commit comments