diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 0ed2f337477b1..367f6b7f9c895 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2738,7 +2738,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
           ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt;
   }
   if (GenerateIntrinsics &&
-      !(getLangOpts().SYCLIsDevice && getTarget().getTriple().isNVPTX())) {
+      !(getLangOpts().SYCLIsDevice && (getTarget().getTriple().isNVPTX() ||
+                                       getTarget().getTriple().isAMDGCN()))) {
     switch (BuiltinIDIfNoAsmLabel) {
     case Builtin::BIacos:
     case Builtin::BIacosf:
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index d69f54c563ae6..fc6cf6290f0f3 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -5555,7 +5555,7 @@ class OffloadingActionBuilder final {
         // AOT compilation.
         bool SYCLDeviceLibLinked = false;
         Action *NativeCPULib = nullptr;
-        if (IsSPIR || IsNVPTX || IsSYCLNativeCPU) {
+        if (IsSPIR || IsNVPTX || IsAMDGCN || IsSYCLNativeCPU) {
           bool UseJitLink =
               IsSPIR &&
               Args.hasFlag(options::OPT_fsycl_device_lib_jit_link,
diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp
index 1f3cad761e4b3..9c48051dd29f4 100644
--- a/clang/lib/Driver/ToolChains/SYCL.cpp
+++ b/clang/lib/Driver/ToolChains/SYCL.cpp
@@ -165,9 +165,9 @@ static bool selectBfloatLibs(const llvm::Triple &Triple, const Compilation &C,
 
   // spir64 target is actually JIT compilation, so we defer selection of
   // bfloat16 libraries to runtime. For AOT we need libraries, but skip
-  // for Nvidia.
-  NeedLibs =
-      Triple.getSubArch() != llvm::Triple::NoSubArch && !Triple.isNVPTX();
+  // for Nvidia and AMD.
+  NeedLibs = Triple.getSubArch() != llvm::Triple::NoSubArch &&
+             !Triple.isNVPTX() && !Triple.isAMDGCN();
   UseNative = false;
   if (NeedLibs && Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen &&
       C.hasOffloadToolChain<Action::OFK_SYCL>()) {
@@ -212,9 +212,9 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
   SmallVector<std::string, 8> LibraryList;
   const llvm::opt::ArgList &Args = C.getArgs();
 
-  // For NVPTX we only use one single bitcode library and ignore
+  // For NVPTX and AMDGCN we only use one single bitcode library and ignore
   // manually specified SYCL device libraries.
-  bool IgnoreSingleLibs = TargetTriple.isNVPTX();
+  bool IgnoreSingleLibs = TargetTriple.isNVPTX() || TargetTriple.isAMDGCN();
 
   struct DeviceLibOptInfo {
     StringRef DeviceLibName;
@@ -278,6 +278,9 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
   if (TargetTriple.isNVPTX() && IgnoreSingleLibs)
     LibraryList.push_back(Args.MakeArgString("devicelib--cuda.bc"));
 
+  if (TargetTriple.isAMDGCN() && IgnoreSingleLibs)
+    LibraryList.push_back(Args.MakeArgString("devicelib--amd.bc"));
+
   if (IgnoreSingleLibs)
     return LibraryList;
 
diff --git a/clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp b/clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp
index 2a2043ac5dc55..5c282449dc851 100644
--- a/clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp
+++ b/clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp
@@ -7,6 +7,8 @@
 
 // RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -ffast-math -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amd-amdhsa -ffast-math -emit-llvm -o - | FileCheck %s
 
 #include "Inputs/sycl.hpp"
 
diff --git a/clang/test/Driver/Inputs/SYCL/lib/devicelib--amd.bc b/clang/test/Driver/Inputs/SYCL/lib/devicelib--amd.bc
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/sycl-device-lib-amdgcn.cpp b/clang/test/Driver/sycl-device-lib-amdgcn.cpp
new file mode 100644
index 0000000000000..134e7835d11d7
--- /dev/null
+++ b/clang/test/Driver/sycl-device-lib-amdgcn.cpp
@@ -0,0 +1,44 @@
+// Tests specific to `-fsycl-targets=amdgcn-amd-amdhsa`
+// Verify that the correct devicelib linking actions are spawned by the driver.
+// Check also if the correct warnings are generated.
+
+// UNSUPPORTED: system-windows
+
+// Check if internal libraries are still linked against when linkage of all
+// device libs is manually excluded.
+// RUN: %clangxx -ccc-print-phases -std=c++11 -fsycl -fno-sycl-device-lib=all --sysroot=%S/Inputs/SYCL \
+// RUN: -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-NO-DEVLIB %s
+
+// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
+// CHK-NO-DEVLIB: [[LIB1:[0-9]+]]: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, gfx906)
+// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
+// CHK-NO-DEVLIB: [[LIB2:[0-9]+]]: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, gfx906)
+// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
+// CHK-NO-DEVLIB: [[LIB3:[0-9]+]]: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, gfx906)
+// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
+// CHK-NO-DEVLIB: {{[0-9]+}}: linker, {{{.*}}[[LIB1]], [[LIB2]], [[LIB3]]{{.*}}}, ir, (device-sycl, gfx906)
+
+// Check that the -fsycl-device-lib flag has no effect when "all" is specified.
+// RUN: %clangxx -ccc-print-phases -std=c++11 -fsycl -fsycl-device-lib=all --sysroot=%S/Inputs/SYCL \
+// RUN: -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-ALL %s
+
+// Check that the -fsycl-device-lib flag has no effect when subsets of libs
+// are specified.
+// RUN: %clangxx -ccc-print-phases -std=c++11 --sysroot=%S/Inputs/SYCL \
+// RUN: -fsycl -fsycl-device-lib=libc,libm-fp32,libm-fp64,libimf-fp32,libimf-fp64,libimf-bf16,libm-bfloat16 \
+// RUN: -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-ALL %s
+
+// Check that -fno-sycl-device-lib is ignored when it does not contain "all".
+// A warning should be printed that the flag got ignored.
+// RUN: %clangxx -ccc-print-phases -std=c++11 -fsycl --sysroot=%S/Inputs/SYCL \
+// RUN: -fno-sycl-device-lib=libc,libm-fp32,libm-fp64,libimf-fp32,libimf-fp64,libimf-bf16,libm-bfloat16 \
+// RUN: -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \
+// RUN: | FileCheck -check-prefixes=CHK-UNUSED-WARN,CHK-ALL %s
+
+// CHK-UNUSED-WARN: warning: argument unused during compilation: '-fno-sycl-device-lib='
+// CHK-ALL: [[DEVLIB:[0-9]+]]: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
+// CHK-ALL: {{[0-9]+}}: linker, {{{.*}}[[DEVLIB]]{{.*}}}, ir, (device-sycl, gfx906)
+
diff --git a/clang/test/Driver/sycl-offload-amdgcn.cpp b/clang/test/Driver/sycl-offload-amdgcn.cpp
index 389cdc641119c..dbbddf1670dba 100644
--- a/clang/test/Driver/sycl-offload-amdgcn.cpp
+++ b/clang/test/Driver/sycl-offload-amdgcn.cpp
@@ -25,7 +25,7 @@
 
 /// Check phases w/out specifying a compute capability.
 // RUN: %clangxx -ccc-print-phases -std=c++11 -target x86_64-unknown-linux-gnu -fsycl \
-// RUN: -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \
+// RUN: -fsycl-targets=amdgcn-amd-amdhsa -fsycl-device-lib=all -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \
 // RUN: | FileCheck -check-prefix=CHK-PHASES-NO-CC %s
 // CHK-PHASES-NO-CC: 0: input, "{{.*}}", c++, (host-sycl)
 // CHK-PHASES-NO-CC: 1: preprocessor, {0}, c++-cpp-output, (host-sycl)
@@ -37,17 +37,19 @@
 // CHK-PHASES-NO-CC: 7: backend, {6}, assembler, (host-sycl)
 // CHK-PHASES-NO-CC: 8: assembler, {7}, object, (host-sycl)
 // CHK-PHASES-NO-CC: 9: linker, {4}, ir, (device-sycl, gfx906)
-// CHK-PHASES-NO-CC: 10: sycl-post-link, {9}, ir, (device-sycl, gfx906)
-// CHK-PHASES-NO-CC: 11: file-table-tform, {10}, ir, (device-sycl, gfx906)
-// CHK-PHASES-NO-CC: 12: backend, {11}, assembler, (device-sycl, gfx906)
-// CHK-PHASES-NO-CC: 13: assembler, {12}, object, (device-sycl, gfx906)
-// CHK-PHASES-NO-CC: 14: linker, {13}, image, (device-sycl, gfx906)
-// CHK-PHASES-NO-CC: 15: linker, {14}, hip-fatbin, (device-sycl, gfx906)
-// CHK-PHASES-NO-CC: 16: foreach, {11, 15}, hip-fatbin, (device-sycl, gfx906)
-// CHK-PHASES-NO-CC: 17: file-table-tform, {10, 16}, tempfiletable, (device-sycl, gfx906)
-// CHK-PHASES-NO-CC: 18: clang-offload-wrapper, {17}, object, (device-sycl, gfx906)
-// CHK-PHASES-NO-CC: 19: offload, "device-sycl (amdgcn-amd-amdhsa:gfx906)" {18}, object
-// CHK-PHASES-NO-CC: 20: linker, {8, 19}, image, (host-sycl)
+// CHK-PHASES-NO-CC: 10: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
+// CHK-PHASES-NO-CC: 11: linker, {9, 10}, ir, (device-sycl, gfx906)
+// CHK-PHASES-NO-CC: 12: sycl-post-link, {11}, ir, (device-sycl, gfx906)
+// CHK-PHASES-NO-CC: 13: file-table-tform, {12}, ir, (device-sycl, gfx906)
+// CHK-PHASES-NO-CC: 14: backend, {13}, assembler, (device-sycl, gfx906)
+// CHK-PHASES-NO-CC: 15: assembler, {14}, object, (device-sycl, gfx906)
+// CHK-PHASES-NO-CC: 16: linker, {15}, image, (device-sycl, gfx906)
+// CHK-PHASES-NO-CC: 17: linker, {16}, hip-fatbin, (device-sycl, gfx906)
+// CHK-PHASES-NO-CC: 18: foreach, {13, 17}, hip-fatbin, (device-sycl, gfx906)
+// CHK-PHASES-NO-CC: 19: file-table-tform, {12, 18}, tempfiletable, (device-sycl, gfx906)
+// CHK-PHASES-NO-CC: 20: clang-offload-wrapper, {19}, object, (device-sycl, gfx906)
+// CHK-PHASES-NO-CC: 21: offload, "device-sycl (amdgcn-amd-amdhsa:gfx906)" {20}, object
+// CHK-PHASES-NO-CC: 22: linker, {8, 21}, image, (host-sycl)
 
 /// Check that we only unbundle an archive once.
 // RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fsycl -nogpulib \
diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake
index b502512fdacb4..4574b98ab5f99 100644
--- a/libdevice/cmake/modules/SYCLLibdevice.cmake
+++ b/libdevice/cmake/modules/SYCLLibdevice.cmake
@@ -65,7 +65,7 @@ foreach(filetype IN LISTS filetypes)
   add_dependencies(libsycldevice libsycldevice-${filetype})
 endforeach()
 
-# For NVPTX each device libary is compiled into a single bitcode
+# For NVPTX and AMDGCN each device libary is compiled into a single bitcode
 # file and all files created this way are linked into one large bitcode
 # library.
 # Additional compilation options are needed for compiling each device library.
@@ -76,6 +76,13 @@ if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
   "-Xsycl-target-backend" "--cuda-gpu-arch=sm_50" "-nocudalib")
   set(opt_flags_cuda "-O3" "--nvvm-reflect-enable=false")
 endif()
+if("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD)
+  list(APPEND devicelib_arch amd)
+  set(compile_opts_amd "-nogpulib" "-fsycl-targets=amdgcn-amd-amdhsa"
+  "-Xsycl-target-backend" "--offload-arch=gfx940")
+  set(opt_flags_amd "-O3" "--amdgpu-oclc-reflect-enable=false")
+endif()
+
 
 set(spv_device_compile_opts -fsycl-device-only -fsycl-device-obj=spirv)
 set(bc_device_compile_opts -fsycl-device-only -fsycl-device-obj=llvmir)
@@ -444,7 +451,7 @@ foreach(dtype IN ITEMS bf16 fp32 fp64)
   endforeach()
 endforeach()
 
-# Add device fallback imf libraries for the CUDA target.
+# Add device fallback imf libraries for the NVPTX and AMD targets.
 # The output files are bitcode.
 foreach(arch IN LISTS devicelib_arch)
   foreach(dtype IN ITEMS bf16 fp32 fp64)
@@ -464,7 +471,7 @@ foreach(arch IN LISTS devicelib_arch)
   endforeach()
 endforeach()
 
-# Create one large bitcode file for the CUDA targets.
+# Create one large bitcode file for the CUDA and AMD targets.
 # Use all the files collected in the respective global properties.
 foreach(arch IN LISTS devicelib_arch)
   get_property(BC_DEVICE_LIBS_${arch} GLOBAL PROPERTY BC_DEVICE_LIBS_${arch})
diff --git a/libdevice/cmath_wrapper.cpp b/libdevice/cmath_wrapper.cpp
index 64b4c7e4ecc54..27d705d5406b2 100644
--- a/libdevice/cmath_wrapper.cpp
+++ b/libdevice/cmath_wrapper.cpp
@@ -8,7 +8,8 @@
 
 #include "device_math.h"
 
-#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
+#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) ||           \
+    defined(__AMDGCN__)
 
 DEVICE_EXTERN_C_INLINE
 int abs(int x) { return __devicelib_abs(x); }
@@ -199,4 +200,4 @@ DEVICE_EXTERN_C_INLINE
 float rintf(float x) { return __nv_rintf(x); }
 #endif // __NVPTX__
 
-#endif // __SPIR__ || __SPIRV__ || __NVPTX__
+#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__
diff --git a/libdevice/cmath_wrapper_fp64.cpp b/libdevice/cmath_wrapper_fp64.cpp
index b99a0c6fcef50..1dc9c0283dd3b 100644
--- a/libdevice/cmath_wrapper_fp64.cpp
+++ b/libdevice/cmath_wrapper_fp64.cpp
@@ -9,7 +9,8 @@
 
 #include "device_math.h"
 
-#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
+#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) ||           \
+    defined(__AMDGCN__)
 
 // All exported functions in math and complex device libraries are weak
 // reference. If users provide their own math or complex functions(with
@@ -496,4 +497,4 @@ double _Sinh(double x, double y) { // compute y * sinh(x), |y| <= 1
   }
 }
 #endif // defined(_WIN32)
-#endif // __SPIR__ || __SPIRV__ || __NVPTX__
+#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__
diff --git a/libdevice/crt_wrapper.cpp b/libdevice/crt_wrapper.cpp
index e8160013a66df..8978c32d2d5e4 100644
--- a/libdevice/crt_wrapper.cpp
+++ b/libdevice/crt_wrapper.cpp
@@ -17,7 +17,8 @@
 DeviceGlobal<uint64_t[RAND_NEXT_LEN]> RandNext;
 #endif
 
-#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
+#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) ||           \
+    defined(__AMDGCN__)
 DEVICE_EXTERN_C_INLINE
 void *memcpy(void *dest, const void *src, size_t n) {
   return __devicelib_memcpy(dest, src, n);
@@ -126,4 +127,4 @@ void __assert_fail(const char *expr, const char *file, unsigned int line,
       __spirv_LocalInvocationId_z());
 }
 #endif
-#endif // __SPIR__ || __SPIRV__ || __NVPTX__
+#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__
diff --git a/libdevice/device.h b/libdevice/device.h
index 360af54f9b4c4..2ab565f5a939c 100644
--- a/libdevice/device.h
+++ b/libdevice/device.h
@@ -15,7 +15,8 @@
 #define EXTERN_C
 #endif // __cplusplus
 
-#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
+#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) ||           \
+    defined(__AMDGCN__)
 #ifdef __SYCL_DEVICE_ONLY__
 #define DEVICE_EXTERNAL SYCL_EXTERNAL __attribute__((weak))
 #else // __SYCL_DEVICE_ONLY__
@@ -27,7 +28,7 @@
   DEVICE_EXTERNAL EXTERN_C __attribute__((always_inline))
 #define DEVICE_EXTERN_C_NOINLINE                                               \
   DEVICE_EXTERNAL EXTERN_C __attribute__((noinline))
-#endif // __SPIR__ || __SPIRV__ || __NVPTX__
+#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__
 
 #if defined(__SPIR__) || defined(__SPIRV__) || defined(__LIBDEVICE_HOST_IMPL__)
 #define __LIBDEVICE_IMF_ENABLED__
diff --git a/libdevice/device_math.h b/libdevice/device_math.h
index faed0a2687dea..f4ee1711060c6 100644
--- a/libdevice/device_math.h
+++ b/libdevice/device_math.h
@@ -10,7 +10,8 @@
 #define __LIBDEVICE_DEVICE_MATH_H__
 
 #include "device.h"
-#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
+#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) ||           \
+    defined(__AMDGCN__)
 #include <cstdint>
 
 typedef struct {
@@ -367,5 +368,5 @@ float __devicelib_scalbnf(float x, int n);
 DEVICE_EXTERN_C
 double __devicelib_scalbn(double x, int exp);
 
-#endif // __SPIR__ || __SPIRV__ || __NVPTX__
+#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__
 #endif // __LIBDEVICE_DEVICE_MATH_H__
diff --git a/libdevice/fallback-cassert.cpp b/libdevice/fallback-cassert.cpp
index 5d3c99d63c556..5f7bcafa6ecc0 100644
--- a/libdevice/fallback-cassert.cpp
+++ b/libdevice/fallback-cassert.cpp
@@ -100,7 +100,7 @@ DEVICE_EXTERN_C void __devicelib_assert_fail(const char *expr, const char *file,
 }
 #endif // __SPIR__ || __SPIRV__
 
-#ifdef __NVPTX__
+#if defined(__NVPTX__) || defined(__AMDGCN__)
 
 DEVICE_EXTERN_C void __assertfail(const char *__message, const char *__file,
                                   unsigned __line, const char *__function,
@@ -119,4 +119,4 @@ DEVICE_EXTERN_C void _wassert(const char *_Message, const char *_File,
   __assertfail(_Message, _File, _Line, 0, 1);
 }
 
-#endif
+#endif // __NVPTX__ || __AMDGCN__
diff --git a/libdevice/fallback-cmath-fp64.cpp b/libdevice/fallback-cmath-fp64.cpp
index 8affe7aa86cf6..d7e4364e2595d 100644
--- a/libdevice/fallback-cmath-fp64.cpp
+++ b/libdevice/fallback-cmath-fp64.cpp
@@ -9,7 +9,8 @@
 
 #include "device_math.h"
 
-#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
+#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) ||           \
+    defined(__AMDGCN__)
 
 // To support fallback device libraries on-demand loading, please update the
 // DeviceLibFuncMap in llvm/tools/sycl-post-link/sycl-post-link.cpp if you add
@@ -193,4 +194,4 @@ DEVICE_EXTERN_C_INLINE
 double __devicelib_scalbn(double x, int exp) {
   return __spirv_ocl_ldexp(x, exp);
 }
-#endif // __SPIR__ || __SPIRV__ || __NVPTX__
+#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__
diff --git a/libdevice/fallback-cmath.cpp b/libdevice/fallback-cmath.cpp
index 39ff2e2252f05..37b2250b4b6aa 100644
--- a/libdevice/fallback-cmath.cpp
+++ b/libdevice/fallback-cmath.cpp
@@ -8,7 +8,8 @@
 
 #include "device_math.h"
 
-#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
+#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) ||           \
+    defined(__AMDGCN__)
 
 // To support fallback device libraries on-demand loading, please update the
 // DeviceLibFuncMap in llvm/tools/sycl-post-link/sycl-post-link.cpp if you add
@@ -209,4 +210,4 @@ float __devicelib_asinhf(float x) { return __spirv_ocl_asinh(x); }
 DEVICE_EXTERN_C_INLINE
 float __devicelib_atanhf(float x) { return __spirv_ocl_atanh(x); }
 
-#endif // __SPIR__ || __SPIRV__ || __NVPTX__
+#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__
diff --git a/libdevice/fallback-cstring.cpp b/libdevice/fallback-cstring.cpp
index 5d384f00a78cb..e63e83052dfeb 100644
--- a/libdevice/fallback-cstring.cpp
+++ b/libdevice/fallback-cstring.cpp
@@ -9,7 +9,8 @@
 #include "wrapper.h"
 #include <cstdint>
 
-#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
+#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) ||           \
+    defined(__AMDGCN__)
 
 static void *__devicelib_memcpy_uint8_aligned(void *dest, const void *src,
                                               size_t n) {
@@ -202,4 +203,4 @@ int __devicelib_memcmp(const void *s1, const void *s2, size_t n) {
 
   return head_cmp;
 }
-#endif // __SPIR__ || __SPIRV__ || __NVPTX__
+#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN_
diff --git a/libdevice/imf/imf_fp32_dl.cpp b/libdevice/imf/imf_fp32_dl.cpp
index f08ee1d305999..eff8c2ac7472d 100644
--- a/libdevice/imf/imf_fp32_dl.cpp
+++ b/libdevice/imf/imf_fp32_dl.cpp
@@ -11,9 +11,12 @@
 /// overhead in these deep learning frameworks.
 //===----------------------------------------------------------------------===//
 
-#include "../device_imf.hpp"
+#include "../device.h"
+
 #ifdef __LIBDEVICE_IMF_ENABLED__
 
+#include "../device_imf.hpp"
+
 DEVICE_EXTERN_C_INLINE int32_t __devicelib_imf_abs(int32_t x) {
   return (x >= 0) ? x : -x;
 }
diff --git a/libdevice/imf/imf_fp64_dl.cpp b/libdevice/imf/imf_fp64_dl.cpp
index 37fbd906f71eb..d9382bc2ddc21 100644
--- a/libdevice/imf/imf_fp64_dl.cpp
+++ b/libdevice/imf/imf_fp64_dl.cpp
@@ -11,9 +11,12 @@
 /// overhead in these deep learning frameworks.
 //===----------------------------------------------------------------------===//
 
-#include "../device_imf.hpp"
+#include "../device.h"
+
 #ifdef __LIBDEVICE_IMF_ENABLED__
 
+#include "../device_imf.hpp"
+
 DEVICE_EXTERN_C_INLINE double __devicelib_imf_fabs(double x) {
   return __fabs(x);
 }
diff --git a/libdevice/imf/imf_inline_bf16.cpp b/libdevice/imf/imf_inline_bf16.cpp
index c7165a1ee0183..96335de774fd0 100644
--- a/libdevice/imf/imf_inline_bf16.cpp
+++ b/libdevice/imf/imf_inline_bf16.cpp
@@ -5,9 +5,12 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-#include "../device_imf.hpp"
+#include "../device.h"
 
 #ifdef __LIBDEVICE_IMF_ENABLED__
+
+#include "../device_imf.hpp"
+
 DEVICE_EXTERN_C_INLINE
 _iml_bf16_internal __devicelib_imf_fmabf16(_iml_bf16_internal a,
                                            _iml_bf16_internal b,
diff --git a/libdevice/imf/imf_inline_fp32.cpp b/libdevice/imf/imf_inline_fp32.cpp
index e71499f8fe057..44061ec40ab45 100644
--- a/libdevice/imf/imf_inline_fp32.cpp
+++ b/libdevice/imf/imf_inline_fp32.cpp
@@ -5,9 +5,13 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-#include "../device_imf.hpp"
+
+#include "../device.h"
+
 #ifdef __LIBDEVICE_IMF_ENABLED__
 
+#include "../device_imf.hpp"
+
 DEVICE_EXTERN_C_INLINE _iml_half_internal __devicelib_imf_fmaf16(
     _iml_half_internal a, _iml_half_internal b, _iml_half_internal c) {
   _iml_half ha(a), hb(b), hc(c);
diff --git a/libdevice/imf/imf_inline_fp64.cpp b/libdevice/imf/imf_inline_fp64.cpp
index f8d5418513f11..24c016c49344c 100644
--- a/libdevice/imf/imf_inline_fp64.cpp
+++ b/libdevice/imf/imf_inline_fp64.cpp
@@ -5,9 +5,13 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-#include "../device_imf.hpp"
+
+#include "../device.h"
+
 #ifdef __LIBDEVICE_IMF_ENABLED__
 
+#include "../device_imf.hpp"
+
 DEVICE_EXTERN_C_INLINE double __devicelib_imf_fma(double a, double b,
                                                   double c) {
   return __fma(a, b, c);
diff --git a/libdevice/imf_utils/bfloat16_convert.cpp b/libdevice/imf_utils/bfloat16_convert.cpp
index 750cb4e5877b4..1591c90768939 100644
--- a/libdevice/imf_utils/bfloat16_convert.cpp
+++ b/libdevice/imf_utils/bfloat16_convert.cpp
@@ -7,9 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "../device_imf.hpp"
+#include "../device.h"
 
 #ifdef __LIBDEVICE_IMF_ENABLED__
+
+#include "../device_imf.hpp"
+
 DEVICE_EXTERN_C_INLINE
 float __devicelib_imf_bfloat162float(_iml_bf16_internal b) {
   return __bfloat162float(b);
diff --git a/libdevice/imf_utils/double_convert.cpp b/libdevice/imf_utils/double_convert.cpp
index c4cd6dea07bf1..3c80dfe3ae769 100644
--- a/libdevice/imf_utils/double_convert.cpp
+++ b/libdevice/imf_utils/double_convert.cpp
@@ -7,10 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "../device_imf.hpp"
+#include "../device.h"
 
 #ifdef __LIBDEVICE_IMF_ENABLED__
 
+#include "../device_imf.hpp"
+
 static inline float __double2float_rd(double x) {
 #if defined(__LIBDEVICE_HOST_IMPL__)
   return __double2Tp_host<float>(x, FE_DOWNWARD);
diff --git a/libdevice/imf_utils/float_convert.cpp b/libdevice/imf_utils/float_convert.cpp
index 85299c0f33823..299ea5c25f96f 100644
--- a/libdevice/imf_utils/float_convert.cpp
+++ b/libdevice/imf_utils/float_convert.cpp
@@ -7,10 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "../device_imf.hpp"
+#include "../device.h"
 
 #ifdef __LIBDEVICE_IMF_ENABLED__
 
+#include "../device_imf.hpp"
+
 static inline int __float2int_rd(float x) {
 #if defined(__LIBDEVICE_HOST_IMPL__)
   return __float2Tp_host<int>(x, FE_DOWNWARD);
diff --git a/libdevice/imf_utils/fp32_round.cpp b/libdevice/imf_utils/fp32_round.cpp
index 32548b1ccf912..973371feca0d9 100644
--- a/libdevice/imf_utils/fp32_round.cpp
+++ b/libdevice/imf_utils/fp32_round.cpp
@@ -6,10 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "../device.h"
+
+#ifdef __LIBDEVICE_IMF_ENABLED__
+
 #include "../device_imf.hpp"
 #include "../imf_rounding_op.hpp"
 
-#ifdef __LIBDEVICE_IMF_ENABLED__
 DEVICE_EXTERN_C_INLINE
 float __devicelib_imf_fadd_rd(float x, float y) {
   return __fp_add_sub_entry(x, y, __IML_RTN);
diff --git a/libdevice/imf_utils/fp64_round.cpp b/libdevice/imf_utils/fp64_round.cpp
index aa4de27a669e1..2f88265a1103a 100644
--- a/libdevice/imf_utils/fp64_round.cpp
+++ b/libdevice/imf_utils/fp64_round.cpp
@@ -6,10 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "../device.h"
+
+#ifdef __LIBDEVICE_IMF_ENABLED__
+
 #include "../device_imf.hpp"
 #include "../imf_rounding_op.hpp"
 
-#ifdef __LIBDEVICE_IMF_ENABLED__
 DEVICE_EXTERN_C_INLINE
 double __devicelib_imf_dadd_rd(double x, double y) {
   return __fp_add_sub_entry(x, y, __IML_RTN);
diff --git a/libdevice/imf_utils/half_convert.cpp b/libdevice/imf_utils/half_convert.cpp
index 3e23d3a46f01e..e16b9ec699f65 100644
--- a/libdevice/imf_utils/half_convert.cpp
+++ b/libdevice/imf_utils/half_convert.cpp
@@ -7,10 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "../device_imf.hpp"
+#include "../device.h"
 
 #ifdef __LIBDEVICE_IMF_ENABLED__
 
+#include "../device_imf.hpp"
+
 DEVICE_EXTERN_C_INLINE
 float __devicelib_imf_half2float(_iml_half_internal x) {
   return __half2float(x);
diff --git a/libdevice/imf_utils/integer_misc.cpp b/libdevice/imf_utils/integer_misc.cpp
index fdc850ee42281..06642eec7d267 100644
--- a/libdevice/imf_utils/integer_misc.cpp
+++ b/libdevice/imf_utils/integer_misc.cpp
@@ -7,9 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "../device_imf.hpp"
+#include "../device.h"
+
 #ifdef __LIBDEVICE_IMF_ENABLED__
 
+#include "../device_imf.hpp"
+
 DEVICE_EXTERN_C_INLINE
 unsigned int __devicelib_imf_brev(unsigned int x) {
   unsigned int res = 0;
diff --git a/libdevice/imf_utils/simd_emulate.cpp b/libdevice/imf_utils/simd_emulate.cpp
index 7369a1598aacb..a8ac73f42ab8b 100644
--- a/libdevice/imf_utils/simd_emulate.cpp
+++ b/libdevice/imf_utils/simd_emulate.cpp
@@ -7,9 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "../device_imf.hpp"
+#include "../device.h"
+
 #ifdef __LIBDEVICE_IMF_ENABLED__
 
+#include "../device_imf.hpp"
+
 template <typename Tp> struct __twice_size;
 template <typename Tp> using __twice_size_t = typename __twice_size<Tp>::type;
 template <typename Tp> struct __twice_size_tag {
diff --git a/libdevice/imf_wrapper.cpp b/libdevice/imf_wrapper.cpp
index 336725cad5f63..be630bccbf579 100644
--- a/libdevice/imf_wrapper.cpp
+++ b/libdevice/imf_wrapper.cpp
@@ -6,10 +6,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "device_imf.hpp"
+#include "device.h"
 
 #ifdef __LIBDEVICE_IMF_ENABLED__
 
+#include "device_imf.hpp"
+
 DEVICE_EXTERN_C_INLINE
 float __devicelib_imf_saturatef(float);
 
diff --git a/libdevice/imf_wrapper_bf16.cpp b/libdevice/imf_wrapper_bf16.cpp
index d02903b0a720f..0c72d95bccc63 100644
--- a/libdevice/imf_wrapper_bf16.cpp
+++ b/libdevice/imf_wrapper_bf16.cpp
@@ -7,10 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "imf_bf16.hpp"
+#include "device.h"
 
 #ifdef __LIBDEVICE_IMF_ENABLED__
 
+#include "imf_bf16.hpp"
+
 DEVICE_EXTERN_C_INLINE
 float __devicelib_imf_bfloat162float(_iml_bf16_internal);
 
diff --git a/libdevice/imf_wrapper_fp64.cpp b/libdevice/imf_wrapper_fp64.cpp
index 10cf98e844774..e90979d2bb724 100644
--- a/libdevice/imf_wrapper_fp64.cpp
+++ b/libdevice/imf_wrapper_fp64.cpp
@@ -7,10 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "device_imf.hpp"
+#include "device.h"
 
 #ifdef __LIBDEVICE_IMF_ENABLED__
 
+#include "device_imf.hpp"
+
 DEVICE_EXTERN_C_INLINE
 float __devicelib_imf_double2float_rd(double);
 
diff --git a/libdevice/spirv_vars.h b/libdevice/spirv_vars.h
index 4445520bff741..869c343206994 100644
--- a/libdevice/spirv_vars.h
+++ b/libdevice/spirv_vars.h
@@ -11,7 +11,8 @@
 
 #include "device.h"
 
-#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
+#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) ||           \
+    defined(__AMDGCN__)
 
 #include <cstddef>
 #include <cstdint>
@@ -58,5 +59,5 @@ const size_t_vec __spirv_BuiltInGlobalInvocationId{};
 const size_t_vec __spirv_BuiltInLocalInvocationId{};
 #endif // !__SPIR__ && !__SPIRV__
 
-#endif // __SPIR__ || __SPIRV__ || __NVPTX__
+#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__
 #endif // __LIBDEVICE_SPIRV_VARS_H
diff --git a/libdevice/wrapper.h b/libdevice/wrapper.h
index bbc0cd4f1ca87..45555785fd2fb 100644
--- a/libdevice/wrapper.h
+++ b/libdevice/wrapper.h
@@ -11,7 +11,8 @@
 
 #include "device.h"
 
-#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
+#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) ||           \
+    defined(__AMDGCN__)
 
 #include <cstddef>
 #include <cstdint>
@@ -29,5 +30,5 @@ void __devicelib_assert_fail(const char *expr, const char *file, int32_t line,
                              const char *func, uint64_t gid0, uint64_t gid1,
                              uint64_t gid2, uint64_t lid0, uint64_t lid1,
                              uint64_t lid2);
-#endif // __SPIR__ || __SPIRV__ || __NVPTX__
+#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__
 #endif // __LIBDEVICE_WRAPPER_H__
diff --git a/sycl/test-e2e/DeviceLib/assert.cpp b/sycl/test-e2e/DeviceLib/assert.cpp
index 6edabb07a34b3..ad5a015020f9b 100644
--- a/sycl/test-e2e/DeviceLib/assert.cpp
+++ b/sycl/test-e2e/DeviceLib/assert.cpp
@@ -1,4 +1,4 @@
-// REQUIRES: (cpu || cuda ) && linux
+// REQUIRES: (cpu || cuda || hip ) && linux
 // RUN: %{build} -DSYCL_FALLBACK_ASSERT=1 -o %t.out
 // (see the other RUN lines below; it is a bit complicated)
 //
diff --git a/sycl/test-e2e/DeviceLib/cmath_test.cpp b/sycl/test-e2e/DeviceLib/cmath_test.cpp
index 3d5786e97fe2d..2fe760fff2a34 100644
--- a/sycl/test-e2e/DeviceLib/cmath_test.cpp
+++ b/sycl/test-e2e/DeviceLib/cmath_test.cpp
@@ -1,6 +1,5 @@
 // DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%}
 
-// UNSUPPORTED: hip
 // RUN: %{build} -fno-builtin %{mathflags} -o %t.out
 // RUN: %{run} %t.out
 
diff --git a/sycl/test-e2e/DeviceLib/math_fp64_test.cpp b/sycl/test-e2e/DeviceLib/math_fp64_test.cpp
index 94b91255a5f1b..235029d766228 100644
--- a/sycl/test-e2e/DeviceLib/math_fp64_test.cpp
+++ b/sycl/test-e2e/DeviceLib/math_fp64_test.cpp
@@ -1,12 +1,11 @@
 // REQUIRES: aspect-fp64
-// UNSUPPORTED: hip
 
 // DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%}
 
 // RUN: %{build} %{mathflags} -o %t.out
 // RUN: %{run} %t.out
 
-// RUN: %clangxx -fsycl -fsycl-device-lib-jit-link %{mathflags} %s -o %t.out
+// RUN: %clangxx -Wno-error=unused-command-line-argument -fsycl -fsycl-device-lib-jit-link %{mathflags} %s -o %t.out
 // RUN: %if !gpu %{ %{run} %t.out %}
 
 #include "math_utils.hpp"
diff --git a/sycl/test-e2e/DeviceLib/math_test.cpp b/sycl/test-e2e/DeviceLib/math_test.cpp
index 0380234575061..aeda8550294da 100644
--- a/sycl/test-e2e/DeviceLib/math_test.cpp
+++ b/sycl/test-e2e/DeviceLib/math_test.cpp
@@ -1,5 +1,3 @@
-// UNSUPPORTED: hip
-
 // DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%}
 
 // RUN: %{build} %{mathflags} -o %t.out
diff --git a/sycl/test-e2e/DeviceLib/string_test.cpp b/sycl/test-e2e/DeviceLib/string_test.cpp
index 0d71417b54bce..6b61e0a51a043 100644
--- a/sycl/test-e2e/DeviceLib/string_test.cpp
+++ b/sycl/test-e2e/DeviceLib/string_test.cpp
@@ -1,4 +1,3 @@
-// UNSUPPORTED: hip
 // RUN: %{build} -Wno-error=deprecated-declarations -Wno-error=pointer-to-int-cast -fno-builtin -o %t.out
 // RUN: %{run} %t.out
 //