diff --git a/mlir/lib/Target/LLVM/CMakeLists.txt b/mlir/lib/Target/LLVM/CMakeLists.txt
index 543e917b528b3..4be147d02d579 100644
--- a/mlir/lib/Target/LLVM/CMakeLists.txt
+++ b/mlir/lib/Target/LLVM/CMakeLists.txt
@@ -119,6 +119,45 @@ if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
   )
 endif()
 
+
+function(embed_binary_to_src file output_file symbol)
+    file(READ ${file} filedata HEX)
+    # Convert hex data for C compatibility
+    string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," filedata ${filedata})
+    # Write data to output file
+    file(WRITE ${output_file} "const char ${symbol}[] = {${filedata}};\nconst int ${symbol}_size = sizeof(${symbol});\n")
+endfunction()
+
+set(MLIR_NVVM_EMBED_LIBDEVICE 0 CACHE BOOL "Embed CUDA libdevice.bc in the binary at build time instead of looking it up at runtime")
+if (MLIR_NVVM_EMBED_LIBDEVICE)
+  if (NOT MLIR_NVVM_LIBDEVICE_PATH)
+    if(CUDAToolkit_FOUND)
+      find_file(MLIR_NVVM_LIBDEVICE_PATH libdevice.10.bc
+                PATHS ${CUDAToolkit_LIBRARY_ROOT}
+                PATH_SUFFIXES "nvvm/libdevice" NO_DEFAULT_PATH REQUIRED)
+    else()
+      message(FATAL_ERROR
+              "Requested using the `nvptxcompiler` library backend but it couldn't be found.")
+    endif()
+  endif()
+  
+  embed_binary_to_src(${MLIR_NVVM_LIBDEVICE_PATH} ${CMAKE_CURRENT_BINARY_DIR}/libdevice_embedded.c _mlir_embedded_libdevice)
+  add_mlir_library(MLIRNVVMLibdevice
+    ${CMAKE_CURRENT_BINARY_DIR}/libdevice_embedded.c
+  )
+  target_link_libraries(MLIRNVVMTarget PRIVATE MLIRNVVMLibdevice)
+  target_compile_definitions(obj.MLIRNVVMTarget
+    PRIVATE
+    MLIR_NVVM_EMBED_LIBDEVICE=1
+  )
+else()
+  target_compile_definitions(obj.MLIRNVVMTarget
+    PRIVATE
+    MLIR_NVVM_EMBED_LIBDEVICE=0
+  )
+endif()
+
+
 if (MLIR_ENABLE_ROCM_CONVERSIONS)
   set(AMDGPU_LIBS
     AMDGPUAsmParser
@@ -169,3 +208,4 @@ if(MLIR_ENABLE_ROCM_CONVERSIONS)
     __DEFAULT_ROCM_PATH__="${DEFAULT_ROCM_PATH}"
   )
 endif()
+
diff --git a/mlir/lib/Target/LLVM/NVVM/Target.cpp b/mlir/lib/Target/LLVM/NVVM/Target.cpp
index a9f7806b10f40..86ff848d6c6c2 100644
--- a/mlir/lib/Target/LLVM/NVVM/Target.cpp
+++ b/mlir/lib/Target/LLVM/NVVM/Target.cpp
@@ -16,6 +16,10 @@
 #include "mlir/Dialect/GPU/IR/CompilationInterfaces.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
 #include "mlir/Dialect/LLVMIR/NVVMDialect.h"
+#include "mlir/IR/BuiltinAttributeInterfaces.h"
+#include "mlir/IR/BuiltinDialect.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/DialectResourceBlobManager.h"
 #include "mlir/Target/LLVM/NVVM/Utils.h"
 #include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
 #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
@@ -33,6 +37,7 @@
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/raw_ostream.h"
 
+#include <cstdint>
 #include <cstdlib>
 
 using namespace mlir;
@@ -42,6 +47,9 @@ using namespace mlir::NVVM;
 #define __DEFAULT_CUDATOOLKIT_PATH__ ""
 #endif
 
+extern "C" const char _mlir_embedded_libdevice[];
+extern "C" const unsigned _mlir_embedded_libdevice_size;
+
 namespace {
 // Implementation of the `TargetAttrInterface` model.
 class NVVMTargetAttrImpl
@@ -130,6 +138,33 @@ ArrayRef<Attribute> SerializeGPUModuleBase::getLibrariesToLink() const {
 
 // Try to append `libdevice` from a CUDA toolkit installation.
 LogicalResult SerializeGPUModuleBase::appendStandardLibs() {
+#if MLIR_NVVM_EMBED_LIBDEVICE
+  // If libdevice is embedded in the binary, we don't look it up on the
+  // filesystem.
+  MLIRContext *ctx = target.getContext();
+  auto type =
+      RankedTensorType::get(ArrayRef<int64_t>{_mlir_embedded_libdevice_size},
+                            IntegerType::get(ctx, 8));
+  auto resourceManager = DenseResourceElementsHandle::getManagerInterface(ctx);
+
+  // Lookup if we already loaded the resource, otherwise create it.
+  DialectResourceBlobManager::BlobEntry *blob =
+      resourceManager.getBlobManager().lookup("_mlir_embedded_libdevice");
+  if (blob) {
+    librariesToLink.push_back(DenseResourceElementsAttr::get(
+        type, DenseResourceElementsHandle(
+                  blob, ctx->getLoadedDialect<BuiltinDialect>())));
+    return success();
+  }
+
+  // Allocate a resource using one of the UnManagedResourceBlob method to wrap
+  // the embedded data.
+  auto unmanagedBlob = UnmanagedAsmResourceBlob::allocateInferAlign(
+      ArrayRef<char>{_mlir_embedded_libdevice, _mlir_embedded_libdevice_size});
+  librariesToLink.push_back(DenseResourceElementsAttr::get(
+      type, resourceManager.insert("_mlir_embedded_libdevice",
+                                   std::move(unmanagedBlob))));
+#else
   StringRef pathRef = getToolkitPath();
   if (!pathRef.empty()) {
     SmallVector<char, 256> path;
@@ -149,6 +184,7 @@ LogicalResult SerializeGPUModuleBase::appendStandardLibs() {
     }
     librariesToLink.push_back(StringAttr::get(target.getContext(), pathRef));
   }
+#endif
   return success();
 }