diff --git a/buildbot/check.py b/buildbot/check.py
index 70e98c1a701ea..6d84031a6dfb5 100644
--- a/buildbot/check.py
+++ b/buildbot/check.py
@@ -6,6 +6,7 @@
 
 DEFAULT_CPU_COUNT = 4
 
+
 def do_check(args):
     try:
         cpu_count = multiprocessing.cpu_count()
@@ -14,45 +15,67 @@ def do_check(args):
 
     # Get absolute path to source directory
     if args.src_dir:
-      abs_src_dir = os.path.abspath(args.src_dir)
+        abs_src_dir = os.path.abspath(args.src_dir)
     else:
-      abs_src_dir = os.path.abspath(os.path.join(__file__, "../.."))
+        abs_src_dir = os.path.abspath(os.path.join(__file__, "../.."))
     # Get absolute path to build directory
     if args.obj_dir:
-      abs_obj_dir = os.path.abspath(args.obj_dir)
+        abs_obj_dir = os.path.abspath(args.obj_dir)
     else:
-      abs_obj_dir = os.path.join(abs_src_dir, "build")
+        abs_obj_dir = os.path.join(abs_src_dir, "build")
 
     cmake_cmd = [
         "cmake",
-        "--build", abs_obj_dir,
+        "--build",
+        abs_obj_dir,
         "--",
         args.test_suite,
-        "-j", str(cpu_count)]
+        "-j",
+        str(cpu_count),
+    ]
 
     print("[Cmake Command]: {}".format(" ".join(cmake_cmd)))
 
-    env_tmp=os.environ
-    env_tmp["LIT_ARGS"]="\"{}\"".format("-v")
+    env_tmp = os.environ
+    env_tmp["LIT_ARGS"] = '"{}"'.format("-v")
 
     subprocess.check_call(cmake_cmd, cwd=abs_obj_dir, env=env_tmp)
 
     ret = True
     return ret
 
+
 def main():
-    parser = argparse.ArgumentParser(prog="check.py",
-                                     description="script to do LIT testing",
-                                     formatter_class=argparse.RawTextHelpFormatter)
-    parser.add_argument("-n", "--build-number", metavar="BUILD_NUM", help="build number")
+    parser = argparse.ArgumentParser(
+        prog="check.py",
+        description="script to do LIT testing",
+        formatter_class=argparse.RawTextHelpFormatter,
+    )
+    parser.add_argument(
+        "-n", "--build-number", metavar="BUILD_NUM", help="build number"
+    )
     parser.add_argument("-b", "--branch", metavar="BRANCH", help="pull request branch")
-    parser.add_argument("-d", "--base-branch", metavar="BASE_BRANCH", help="pull request base branch")
-    parser.add_argument("-r", "--pr-number", metavar="PR_NUM", help="pull request number")
-    parser.add_argument("-w", "--builder-dir", metavar="BUILDER_DIR",
-                        help="builder directory, which is the directory containing source and build directories")
+    parser.add_argument(
+        "-d", "--base-branch", metavar="BASE_BRANCH", help="pull request base branch"
+    )
+    parser.add_argument(
+        "-r", "--pr-number", metavar="PR_NUM", help="pull request number"
+    )
+    parser.add_argument(
+        "-w",
+        "--builder-dir",
+        metavar="BUILDER_DIR",
+        help="builder directory, which is the directory containing source and build directories",
+    )
     parser.add_argument("-s", "--src-dir", metavar="SRC_DIR", help="source directory")
     parser.add_argument("-o", "--obj-dir", metavar="OBJ_DIR", help="build directory")
-    parser.add_argument("-t", "--test-suite", metavar="TEST_SUITE", default="check-all", help="check-xxx target")
+    parser.add_argument(
+        "-t",
+        "--test-suite",
+        metavar="TEST_SUITE",
+        default="check-all",
+        help="check-xxx target",
+    )
 
     args = parser.parse_args()
 
@@ -60,6 +83,7 @@ def main():
 
     return do_check(args)
 
+
 if __name__ == "__main__":
     ret = main()
     exit_code = 0 if ret else 1
diff --git a/buildbot/clang_tidy.py b/buildbot/clang_tidy.py
index c9c9eb37d7b68..3d5e46ac0df9c 100644
--- a/buildbot/clang_tidy.py
+++ b/buildbot/clang_tidy.py
@@ -5,10 +5,16 @@
 
 FILE_EXTENSIONS = [".h", ".hpp", ".c", ".cc", ".cpp"]
 
+
 def do_clang_tidy(args):
     ret = False
 
-    merge_base_cmd = ["git", "merge-base", "origin/{}".format(args.base_branch), args.branch]
+    merge_base_cmd = [
+        "git",
+        "merge-base",
+        "origin/{}".format(args.base_branch),
+        args.branch,
+    ]
     print(merge_base_cmd)
     base_commit = subprocess.check_output(merge_base_cmd, cwd=args.src_dir)
     base_commit = base_commit.rstrip()
@@ -19,13 +25,15 @@ def do_clang_tidy(args):
 
     diff_cmd = ["git", "--no-pager", "diff", base_commit, args.branch, "--name-only"]
     print(diff_cmd)
-    with open(changed_files, 'w') as f:
-        subprocess.check_call(merge_base_cmd, cwd=args.src_dir, stdout=f, stderr=subprocess.STDOUT)
+    with open(changed_files, "w") as f:
+        subprocess.check_call(
+            merge_base_cmd, cwd=args.src_dir, stdout=f, stderr=subprocess.STDOUT
+        )
 
     if os.path.isfile(changed_files):
         clang_tidy_binary = os.path.join(args.obj_dir, "bin", "clang-tidy")
         if os.path.isfile(clang_tidy_binary):
-            with open(changed_files, 'r') as f:
+            with open(changed_files, "r") as f:
                 for line in f:
                     filename, file_extension = os.path.splitext(line)
                     if file_extension.lower() in FILE_EXTENSIONS:
@@ -41,19 +49,42 @@ def do_clang_tidy(args):
 
     return ret
 
+
 def main():
-    parser = argparse.ArgumentParser(prog="clang_tidy.py",
-                                     description="script to do clang_tidy",
-                                     formatter_class=argparse.RawTextHelpFormatter)
-    parser.add_argument("-n", "--build-number", metavar="BUILD_NUM", help="build number")
-    parser.add_argument("-b", "--branch", metavar="BRANCH", required=True, help="pull request branch")
-    parser.add_argument("-d", "--base-branch", metavar="BASE_BRANCH", required=True,
-                        help="pull request base branch")
-    parser.add_argument("-r", "--pr-number", metavar="PR_NUM", help="pull request number")
-    parser.add_argument("-w", "--builder-dir", metavar="BUILDER_DIR", required=True,
-                        help="builder directory, which is the directory containing source and build directories")
-    parser.add_argument("-s", "--src-dir", metavar="SRC_DIR", required=True, help="source directory")
-    parser.add_argument("-o", "--obj-dir", metavar="OBJ_DIR", required=True, help="build directory")
+    parser = argparse.ArgumentParser(
+        prog="clang_tidy.py",
+        description="script to do clang_tidy",
+        formatter_class=argparse.RawTextHelpFormatter,
+    )
+    parser.add_argument(
+        "-n", "--build-number", metavar="BUILD_NUM", help="build number"
+    )
+    parser.add_argument(
+        "-b", "--branch", metavar="BRANCH", required=True, help="pull request branch"
+    )
+    parser.add_argument(
+        "-d",
+        "--base-branch",
+        metavar="BASE_BRANCH",
+        required=True,
+        help="pull request base branch",
+    )
+    parser.add_argument(
+        "-r", "--pr-number", metavar="PR_NUM", help="pull request number"
+    )
+    parser.add_argument(
+        "-w",
+        "--builder-dir",
+        metavar="BUILDER_DIR",
+        required=True,
+        help="builder directory, which is the directory containing source and build directories",
+    )
+    parser.add_argument(
+        "-s", "--src-dir", metavar="SRC_DIR", required=True, help="source directory"
+    )
+    parser.add_argument(
+        "-o", "--obj-dir", metavar="OBJ_DIR", required=True, help="build directory"
+    )
 
     args = parser.parse_args()
 
@@ -61,8 +92,8 @@ def main():
 
     return do_clang_tidy(args)
 
+
 if __name__ == "__main__":
     ret = main()
     exit_code = 0 if ret else 1
     sys.exit(exit_code)
-
diff --git a/buildbot/compile.py b/buildbot/compile.py
index b1c8e22ed1537..055abd6dab739 100644
--- a/buildbot/compile.py
+++ b/buildbot/compile.py
@@ -18,24 +18,27 @@ def do_compile(args):
 
     # Get absolute path to source directory
     if args.src_dir:
-      abs_src_dir = os.path.abspath(args.src_dir)
+        abs_src_dir = os.path.abspath(args.src_dir)
     else:
-      abs_src_dir = os.path.abspath(os.path.join(__file__, "../.."))
+        abs_src_dir = os.path.abspath(os.path.join(__file__, "../.."))
     # Get absolute path to build directory
     if args.obj_dir:
-      abs_obj_dir = os.path.abspath(args.obj_dir)
+        abs_obj_dir = os.path.abspath(args.obj_dir)
     else:
-      abs_obj_dir = os.path.join(abs_src_dir, "build")
+        abs_obj_dir = os.path.join(abs_src_dir, "build")
 
     cmake_cmd = [
         "cmake",
-        "--build", abs_obj_dir,
+        "--build",
+        abs_obj_dir,
         "--",
         args.build_target,
-        "-j", str(cpu_count)]
+        "-j",
+        str(cpu_count),
+    ]
 
     if args.verbose:
-      cmake_cmd.append("--verbose")
+        cmake_cmd.append("--verbose")
 
     print("[Cmake Command]: {}".format(" ".join(cmake_cmd)))
 
@@ -45,20 +48,45 @@ def do_compile(args):
 
 
 def main():
-    parser = argparse.ArgumentParser(prog="compile.py",
-                                     description="script to do compile",
-                                     formatter_class=argparse.RawTextHelpFormatter)
-    parser.add_argument("-n", "--build-number", metavar="BUILD_NUM", help="build number")
+    parser = argparse.ArgumentParser(
+        prog="compile.py",
+        description="script to do compile",
+        formatter_class=argparse.RawTextHelpFormatter,
+    )
+    parser.add_argument(
+        "-n", "--build-number", metavar="BUILD_NUM", help="build number"
+    )
     parser.add_argument("-b", "--branch", metavar="BRANCH", help="pull request branch")
-    parser.add_argument("-d", "--base-branch", metavar="BASE_BRANCH", help="pull request base branch")
-    parser.add_argument("-r", "--pr-number", metavar="PR_NUM", help="pull request number")
-    parser.add_argument("-w", "--builder-dir", metavar="BUILDER_DIR",
-                        help="builder directory, which is the directory containing source and build directories")
+    parser.add_argument(
+        "-d", "--base-branch", metavar="BASE_BRANCH", help="pull request base branch"
+    )
+    parser.add_argument(
+        "-r", "--pr-number", metavar="PR_NUM", help="pull request number"
+    )
+    parser.add_argument(
+        "-w",
+        "--builder-dir",
+        metavar="BUILDER_DIR",
+        help="builder directory, which is the directory containing source and build directories",
+    )
     parser.add_argument("-s", "--src-dir", metavar="SRC_DIR", help="source directory")
     parser.add_argument("-o", "--obj-dir", metavar="OBJ_DIR", help="build directory")
-    parser.add_argument("-j", "--build-parallelism", metavar="BUILD_PARALLELISM", help="build parallelism")
-    parser.add_argument("-v", "--verbose", action='store_true', help="verbose build output")
-    parser.add_argument("-t", "--build-target", metavar="BUILD_TARGET", default="deploy-sycl-toolchain", help="set build target")
+    parser.add_argument(
+        "-j",
+        "--build-parallelism",
+        metavar="BUILD_PARALLELISM",
+        help="build parallelism",
+    )
+    parser.add_argument(
+        "-v", "--verbose", action="store_true", help="verbose build output"
+    )
+    parser.add_argument(
+        "-t",
+        "--build-target",
+        metavar="BUILD_TARGET",
+        default="deploy-sycl-toolchain",
+        help="set build target",
+    )
 
     args = parser.parse_args()
 
diff --git a/buildbot/configure.py b/buildbot/configure.py
index f172be352ba7d..fc89f8b7b00bf 100644
--- a/buildbot/configure.py
+++ b/buildbot/configure.py
@@ -5,24 +5,31 @@
 import subprocess
 import sys
 
+
 def do_configure(args):
     # Get absolute path to source directory
-    abs_src_dir = os.path.abspath(args.src_dir if args.src_dir else os.path.join(__file__, "../.."))
+    abs_src_dir = os.path.abspath(
+        args.src_dir if args.src_dir else os.path.join(__file__, "../..")
+    )
     # Get absolute path to build directory
-    abs_obj_dir = os.path.abspath(args.obj_dir) if args.obj_dir else os.path.join(abs_src_dir, "build")
+    abs_obj_dir = (
+        os.path.abspath(args.obj_dir)
+        if args.obj_dir
+        else os.path.join(abs_src_dir, "build")
+    )
     # Create build directory if it doesn't exist
     if not os.path.isdir(abs_obj_dir):
-      os.makedirs(abs_obj_dir)
+        os.makedirs(abs_obj_dir)
 
-    llvm_external_projects = 'sycl;llvm-spirv;opencl;xpti;xptifw'
+    llvm_external_projects = "sycl;llvm-spirv;opencl;xpti;xptifw"
 
     # libdevice build requires a working SYCL toolchain, which is not the case
     # with macOS target right now.
     if sys.platform != "darwin":
-        llvm_external_projects += ';libdevice'
+        llvm_external_projects += ";libdevice"
 
-    libclc_amd_target_names = ';amdgcn--amdhsa'
-    libclc_nvidia_target_names = ';nvptx64--nvidiacl'
+    libclc_amd_target_names = ";amdgcn--amdhsa"
+    libclc_nvidia_target_names = ";nvptx64--nvidiacl"
 
     sycl_enable_fusion = "OFF"
     if not args.disable_fusion:
@@ -40,50 +47,50 @@ def do_configure(args):
     libdevice_dir = os.path.join(abs_src_dir, "libdevice")
     fusion_dir = os.path.join(abs_src_dir, "sycl-fusion")
     llvm_targets_to_build = args.host_target
-    llvm_enable_projects = 'clang;' + llvm_external_projects
-    libclc_build_native = 'OFF'
-    libclc_targets_to_build = ''
-    libclc_gen_remangled_variants = 'OFF'
-    sycl_build_pi_hip_platform = 'AMD'
-    sycl_clang_extra_flags = ''
-    sycl_werror = 'OFF'
-    llvm_enable_assertions = 'ON'
-    llvm_enable_doxygen = 'OFF'
-    llvm_enable_sphinx = 'OFF'
-    llvm_build_shared_libs = 'OFF'
-    llvm_enable_lld = 'OFF'
+    llvm_enable_projects = "clang;" + llvm_external_projects
+    libclc_build_native = "OFF"
+    libclc_targets_to_build = ""
+    libclc_gen_remangled_variants = "OFF"
+    sycl_build_pi_hip_platform = "AMD"
+    sycl_clang_extra_flags = ""
+    sycl_werror = "OFF"
+    llvm_enable_assertions = "ON"
+    llvm_enable_doxygen = "OFF"
+    llvm_enable_sphinx = "OFF"
+    llvm_build_shared_libs = "OFF"
+    llvm_enable_lld = "OFF"
     sycl_enabled_plugins = ["opencl"]
-    sycl_preview_lib = 'ON'
+    sycl_preview_lib = "ON"
 
-    sycl_enable_xpti_tracing = 'ON'
-    xpti_enable_werror = 'OFF'
+    sycl_enable_xpti_tracing = "ON"
+    xpti_enable_werror = "OFF"
 
     if sys.platform != "darwin":
         sycl_enabled_plugins.append("level_zero")
 
     # lld is needed on Windows or for the HIP plugin on AMD
-    if platform.system() == 'Windows' or (args.hip and args.hip_platform == 'AMD'):
-        llvm_enable_projects += ';lld'
+    if platform.system() == "Windows" or (args.hip and args.hip_platform == "AMD"):
+        llvm_enable_projects += ";lld"
 
     libclc_enabled = args.cuda or args.hip or args.native_cpu
     if libclc_enabled:
-        llvm_enable_projects += ';libclc'
+        llvm_enable_projects += ";libclc"
 
     if args.cuda:
-        llvm_targets_to_build += ';NVPTX'
+        llvm_targets_to_build += ";NVPTX"
         libclc_targets_to_build = libclc_nvidia_target_names
-        libclc_gen_remangled_variants = 'ON'
+        libclc_gen_remangled_variants = "ON"
         sycl_enabled_plugins.append("cuda")
 
     if args.hip:
-        if args.hip_platform == 'AMD':
-            llvm_targets_to_build += ';AMDGPU'
+        if args.hip_platform == "AMD":
+            llvm_targets_to_build += ";AMDGPU"
             libclc_targets_to_build += libclc_amd_target_names
 
-        elif args.hip_platform == 'NVIDIA' and not args.cuda:
-            llvm_targets_to_build += ';NVPTX'
+        elif args.hip_platform == "NVIDIA" and not args.cuda:
+            llvm_targets_to_build += ";NVPTX"
             libclc_targets_to_build += libclc_nvidia_target_names
-        libclc_gen_remangled_variants = 'ON'
+        libclc_gen_remangled_variants = "ON"
 
         sycl_build_pi_hip_platform = args.hip_platform
         sycl_enabled_plugins.append("hip")
@@ -96,28 +103,27 @@ def do_configure(args):
         libclc_gen_remangled_variants = "ON"
         sycl_enabled_plugins.append("native_cpu")
 
-
     # all llvm compiler targets don't require 3rd party dependencies, so can be
     # built/tested even if specific runtimes are not available
     if args.enable_all_llvm_targets:
-        llvm_targets_to_build += ';NVPTX;AMDGPU'
+        llvm_targets_to_build += ";NVPTX;AMDGPU"
 
     if args.werror or args.ci_defaults:
-        sycl_werror = 'ON'
-        xpti_enable_werror = 'ON'
+        sycl_werror = "ON"
+        xpti_enable_werror = "ON"
 
     if args.no_assertions:
-        llvm_enable_assertions = 'OFF'
+        llvm_enable_assertions = "OFF"
 
     if args.docs:
-        llvm_enable_doxygen = 'ON'
-        llvm_enable_sphinx = 'ON'
+        llvm_enable_doxygen = "ON"
+        llvm_enable_sphinx = "ON"
 
     if args.shared_libs:
-        llvm_build_shared_libs = 'ON'
+        llvm_build_shared_libs = "ON"
 
     if args.use_lld:
-        llvm_enable_lld = 'ON'
+        llvm_enable_lld = "ON"
 
     # CI Default conditionally appends to options, keep it at the bottom of
     # args handling
@@ -131,32 +137,33 @@ def do_configure(args):
         if sys.platform != "darwin":
             # libclc is required for CI validation
             libclc_enabled = True
-            if 'libclc' not in llvm_enable_projects:
-                llvm_enable_projects += ';libclc'
+            if "libclc" not in llvm_enable_projects:
+                llvm_enable_projects += ";libclc"
             # libclc passes `--nvvm-reflect-enable=false`, build NVPTX to enable it
-            if 'NVPTX' not in llvm_targets_to_build:
-                llvm_targets_to_build += ';NVPTX'
+            if "NVPTX" not in llvm_targets_to_build:
+                llvm_targets_to_build += ";NVPTX"
             # since we are building AMD libclc target we must have AMDGPU target
-            if 'AMDGPU' not in llvm_targets_to_build:
-                llvm_targets_to_build += ';AMDGPU'
+            if "AMDGPU" not in llvm_targets_to_build:
+                llvm_targets_to_build += ";AMDGPU"
             # Add both NVIDIA and AMD libclc targets
             if libclc_amd_target_names not in libclc_targets_to_build:
                 libclc_targets_to_build += libclc_amd_target_names
             if libclc_nvidia_target_names not in libclc_targets_to_build:
                 libclc_targets_to_build += libclc_nvidia_target_names
-            libclc_gen_remangled_variants = 'ON'
+            libclc_gen_remangled_variants = "ON"
 
     if args.enable_plugin:
         sycl_enabled_plugins += args.enable_plugin
 
     if args.disable_preview_lib:
-        sycl_preview_lib = 'OFF'
+        sycl_preview_lib = "OFF"
 
     install_dir = os.path.join(abs_obj_dir, "install")
 
     cmake_cmd = [
         "cmake",
-        "-G", args.cmake_gen,
+        "-G",
+        args.cmake_gen,
         "-DCMAKE_BUILD_TYPE={}".format(args.build_type),
         "-DLLVM_ENABLE_ASSERTIONS={}".format(llvm_enable_assertions),
         "-DLLVM_TARGETS_TO_BUILD={}".format(llvm_targets_to_build),
@@ -173,7 +180,7 @@ def do_configure(args):
         "-DLLVM_BUILD_TOOLS=ON",
         "-DSYCL_ENABLE_WERROR={}".format(sycl_werror),
         "-DCMAKE_INSTALL_PREFIX={}".format(install_dir),
-        "-DSYCL_INCLUDE_TESTS=ON", # Explicitly include all kinds of SYCL tests.
+        "-DSYCL_INCLUDE_TESTS=ON",  # Explicitly include all kinds of SYCL tests.
         "-DLLVM_ENABLE_DOXYGEN={}".format(llvm_enable_doxygen),
         "-DLLVM_ENABLE_SPHINX={}".format(llvm_enable_sphinx),
         "-DBUILD_SHARED_LIBS={}".format(llvm_build_shared_libs),
@@ -181,7 +188,7 @@ def do_configure(args):
         "-DLLVM_ENABLE_LLD={}".format(llvm_enable_lld),
         "-DXPTI_ENABLE_WERROR={}".format(xpti_enable_werror),
         "-DSYCL_CLANG_EXTRA_FLAGS={}".format(sycl_clang_extra_flags),
-        "-DSYCL_ENABLE_PLUGINS={}".format(';'.join(set(sycl_enabled_plugins))),
+        "-DSYCL_ENABLE_PLUGINS={}".format(";".join(set(sycl_enabled_plugins))),
         "-DSYCL_ENABLE_KERNEL_FUSION={}".format(sycl_enable_fusion),
         "-DSYCL_ENABLE_MAJOR_RELEASE_PREVIEW_LIB={}".format(sycl_preview_lib),
         "-DBUG_REPORT_URL=https://github.com/intel/llvm/issues",
@@ -199,31 +206,41 @@ def do_configure(args):
         )
 
     if args.l0_headers and args.l0_loader:
-      cmake_cmd.extend([
-            "-DLEVEL_ZERO_INCLUDE_DIR={}".format(args.l0_headers),
-            "-DLEVEL_ZERO_LIBRARY={}".format(args.l0_loader)])
+        cmake_cmd.extend(
+            [
+                "-DLEVEL_ZERO_INCLUDE_DIR={}".format(args.l0_headers),
+                "-DLEVEL_ZERO_LIBRARY={}".format(args.l0_loader),
+            ]
+        )
     elif args.l0_headers or args.l0_loader:
-      sys.exit("Please specify both Level Zero headers and loader or don't specify "
-               "none of them to let download from github.com")
+        sys.exit(
+            "Please specify both Level Zero headers and loader or don't specify "
+            "none of them to let download from github.com"
+        )
 
     # Add additional CMake options if provided
     if args.cmake_opt:
-      cmake_cmd += args.cmake_opt
-    
+        cmake_cmd += args.cmake_opt
+
     if args.add_security_flags:
-      cmake_cmd.extend(["-DEXTRA_SECURITY_FLAGS={}".format(args.add_security_flags)])
+        cmake_cmd.extend(["-DEXTRA_SECURITY_FLAGS={}".format(args.add_security_flags)])
 
     # Add path to root CMakeLists.txt
     cmake_cmd.append(llvm_dir)
 
     if args.use_libcxx:
-      if not (args.libcxx_include and args.libcxx_library):
-        sys.exit("Please specify include and library path of libc++ when building sycl "
-                 "runtime with it")
-      cmake_cmd.extend([
-            "-DSYCL_USE_LIBCXX=ON",
-            "-DSYCL_LIBCXX_INCLUDE_PATH={}".format(args.libcxx_include),
-            "-DSYCL_LIBCXX_LIBRARY_PATH={}".format(args.libcxx_library)])
+        if not (args.libcxx_include and args.libcxx_library):
+            sys.exit(
+                "Please specify include and library path of libc++ when building sycl "
+                "runtime with it"
+            )
+        cmake_cmd.extend(
+            [
+                "-DSYCL_USE_LIBCXX=ON",
+                "-DSYCL_LIBCXX_INCLUDE_PATH={}".format(args.libcxx_include),
+                "-DSYCL_LIBCXX_LIBRARY_PATH={}".format(args.libcxx_library),
+            ]
+        )
 
     print("[Cmake Command]: {}".format(" ".join(map(shlex.quote, cmake_cmd))))
 
@@ -232,61 +249,158 @@ def do_configure(args):
     except subprocess.CalledProcessError:
         cmake_cache = os.path.join(abs_obj_dir, "CMakeCache.txt")
         if os.path.isfile(cmake_cache):
-           print("There is CMakeCache.txt at " + cmake_cache +
-             " ... you can try to remove it and rerun.")
-           print("Configure failed!")
+            print(
+                "There is CMakeCache.txt at "
+                + cmake_cache
+                + " ... you can try to remove it and rerun."
+            )
+            print("Configure failed!")
         return False
 
     return True
 
+
 def main():
-    parser = argparse.ArgumentParser(prog="configure.py",
-                                     description="Generate build files from CMake configuration files",
-                                     formatter_class=argparse.RawTextHelpFormatter)
+    parser = argparse.ArgumentParser(
+        prog="configure.py",
+        description="Generate build files from CMake configuration files",
+        formatter_class=argparse.RawTextHelpFormatter,
+    )
     # CI system options
-    parser.add_argument("-n", "--build-number", metavar="BUILD_NUM", help="build number")
+    parser.add_argument(
+        "-n", "--build-number", metavar="BUILD_NUM", help="build number"
+    )
     parser.add_argument("-b", "--branch", metavar="BRANCH", help="pull request branch")
-    parser.add_argument("-d", "--base-branch", metavar="BASE_BRANCH", help="pull request base branch")
-    parser.add_argument("-r", "--pr-number", metavar="PR_NUM", help="pull request number")
-    parser.add_argument("-w", "--builder-dir", metavar="BUILDER_DIR",
-                        help="builder directory, which is the directory containing source and build directories")
+    parser.add_argument(
+        "-d", "--base-branch", metavar="BASE_BRANCH", help="pull request base branch"
+    )
+    parser.add_argument(
+        "-r", "--pr-number", metavar="PR_NUM", help="pull request number"
+    )
+    parser.add_argument(
+        "-w",
+        "--builder-dir",
+        metavar="BUILDER_DIR",
+        help="builder directory, which is the directory containing source and build directories",
+    )
     # User options
-    parser.add_argument("-s", "--src-dir", metavar="SRC_DIR", help="source directory (autodetected by default)")
-    parser.add_argument("-o", "--obj-dir", metavar="OBJ_DIR", help="build directory. (<src>/build by default)")
-    parser.add_argument("--l0-headers", metavar="L0_HEADER_DIR", help="directory with Level Zero headers")
-    parser.add_argument("--l0-loader", metavar="L0_LOADER", help="path to the Level Zero loader")
-    parser.add_argument("-t", "--build-type",
-                        metavar="BUILD_TYPE", default="Release", help="build type: Debug, Release")
-    parser.add_argument("--cuda", action='store_true', help="switch from OpenCL to CUDA")
-    parser.add_argument("--native_cpu", action='store_true', help="Enable SYCL Native CPU")
-    parser.add_argument("--hip", action='store_true', help="switch from OpenCL to HIP")
-    parser.add_argument("--hip-platform", type=str, choices=['AMD', 'NVIDIA'], default='AMD', help="choose hardware platform for HIP backend")
-    parser.add_argument("--host-target", default='host',
-                        help="host LLVM target architecture, defaults to \'host\', multiple targets may be provided as a semi-colon separated string")
-    parser.add_argument("--enable-all-llvm-targets", action='store_true', help="build compiler with all supported targets, it doesn't change runtime build")
-    parser.add_argument("--no-assertions", action='store_true', help="build without assertions")
-    parser.add_argument("--docs", action='store_true', help="build Doxygen documentation")
-    parser.add_argument("--werror", action='store_true', help="Treat warnings as errors")
-    parser.add_argument("--shared-libs", action='store_true', help="Build shared libraries")
-    parser.add_argument("--cmake-opt", action='append', help="Additional CMake option not configured via script parameters")
+    parser.add_argument(
+        "-s",
+        "--src-dir",
+        metavar="SRC_DIR",
+        help="source directory (autodetected by default)",
+    )
+    parser.add_argument(
+        "-o",
+        "--obj-dir",
+        metavar="OBJ_DIR",
+        help="build directory. (<src>/build by default)",
+    )
+    parser.add_argument(
+        "--l0-headers",
+        metavar="L0_HEADER_DIR",
+        help="directory with Level Zero headers",
+    )
+    parser.add_argument(
+        "--l0-loader", metavar="L0_LOADER", help="path to the Level Zero loader"
+    )
+    parser.add_argument(
+        "-t",
+        "--build-type",
+        metavar="BUILD_TYPE",
+        default="Release",
+        help="build type: Debug, Release",
+    )
+    parser.add_argument(
+        "--cuda", action="store_true", help="switch from OpenCL to CUDA"
+    )
+    parser.add_argument(
+        "--native_cpu", action="store_true", help="Enable SYCL Native CPU"
+    )
+    parser.add_argument("--hip", action="store_true", help="switch from OpenCL to HIP")
+    parser.add_argument(
+        "--hip-platform",
+        type=str,
+        choices=["AMD", "NVIDIA"],
+        default="AMD",
+        help="choose hardware platform for HIP backend",
+    )
+    parser.add_argument(
+        "--host-target",
+        default="host",
+        help="host LLVM target architecture, defaults to 'host', multiple targets may be provided as a semi-colon separated string",
+    )
+    parser.add_argument(
+        "--enable-all-llvm-targets",
+        action="store_true",
+        help="build compiler with all supported targets, it doesn't change runtime build",
+    )
+    parser.add_argument(
+        "--no-assertions", action="store_true", help="build without assertions"
+    )
+    parser.add_argument(
+        "--docs", action="store_true", help="build Doxygen documentation"
+    )
+    parser.add_argument(
+        "--werror", action="store_true", help="Treat warnings as errors"
+    )
+    parser.add_argument(
+        "--shared-libs", action="store_true", help="Build shared libraries"
+    )
+    parser.add_argument(
+        "--cmake-opt",
+        action="append",
+        help="Additional CMake option not configured via script parameters",
+    )
     parser.add_argument("--cmake-gen", default="Ninja", help="CMake generator")
-    parser.add_argument("--use-libcxx", action="store_true", help="build sycl runtime with libcxx")
-    parser.add_argument("--libcxx-include", metavar="LIBCXX_INCLUDE_PATH", help="libcxx include path")
-    parser.add_argument("--libcxx-library", metavar="LIBCXX_LIBRARY_PATH", help="libcxx library path")
-    parser.add_argument("--use-lld", action="store_true", help="Use LLD linker for build")
-    parser.add_argument("--llvm-external-projects", help="Add external projects to build. Add as comma seperated list.")
-    parser.add_argument("--ci-defaults", action="store_true", help="Enable default CI parameters")
-    parser.add_argument("--enable-plugin", action='append', help="Enable SYCL plugin")
-    parser.add_argument("--disable-preview-lib", action='store_true', help="Disable building of the SYCL runtime major release preview library")
-    parser.add_argument("--disable-fusion", action="store_true", help="Disable the kernel fusion JIT compiler")
-    parser.add_argument("--add_security_flags", type=str, choices=['none', 'default', 'sanitize'], default=None, help="Enables security flags for compile & link. Two values are supported: 'default' and 'sanitize'. 'Sanitize' option is an extension of 'default' set.")
-    parser.add_argument('--native-cpu-libclc-targets', help='Target triples for libclc, used by the Native CPU backend')
+    parser.add_argument(
+        "--use-libcxx", action="store_true", help="build sycl runtime with libcxx"
+    )
+    parser.add_argument(
+        "--libcxx-include", metavar="LIBCXX_INCLUDE_PATH", help="libcxx include path"
+    )
+    parser.add_argument(
+        "--libcxx-library", metavar="LIBCXX_LIBRARY_PATH", help="libcxx library path"
+    )
+    parser.add_argument(
+        "--use-lld", action="store_true", help="Use LLD linker for build"
+    )
+    parser.add_argument(
+        "--llvm-external-projects",
+        help="Add external projects to build. Add as comma seperated list.",
+    )
+    parser.add_argument(
+        "--ci-defaults", action="store_true", help="Enable default CI parameters"
+    )
+    parser.add_argument("--enable-plugin", action="append", help="Enable SYCL plugin")
+    parser.add_argument(
+        "--disable-preview-lib",
+        action="store_true",
+        help="Disable building of the SYCL runtime major release preview library",
+    )
+    parser.add_argument(
+        "--disable-fusion",
+        action="store_true",
+        help="Disable the kernel fusion JIT compiler",
+    )
+    parser.add_argument(
+        "--add_security_flags",
+        type=str,
+        choices=["none", "default", "sanitize"],
+        default=None,
+        help="Enables security flags for compile & link. Two values are supported: 'default' and 'sanitize'. 'Sanitize' option is an extension of 'default' set.",
+    )
+    parser.add_argument(
+        "--native-cpu-libclc-targets",
+        help="Target triples for libclc, used by the Native CPU backend",
+    )
     args = parser.parse_args()
 
     print("args:{}".format(args))
 
     return do_configure(args)
 
+
 if __name__ == "__main__":
     ret = main()
     exit_code = 0 if ret else 1
diff --git a/buildbot/dependency.py b/buildbot/dependency.py
index e79eae2b62cc1..3eac3cc7a92a8 100644
--- a/buildbot/dependency.py
+++ b/buildbot/dependency.py
@@ -4,6 +4,7 @@
 import subprocess
 import sys
 
+
 def do_dependency(args):
     ret = False
 
@@ -11,7 +12,8 @@ def do_dependency(args):
     if args.pr_number is not None and not args.clean_build:
         if args.branch is None or args.base_branch is None:
             "branch ({}) and base branch ({}) is required for pull request #{}".format(
-                args.branch, args.base_branch, args.pr_number)
+                args.branch, args.base_branch, args.pr_number
+            )
             return ret
         # fetching the recent state of base branch
         fetch_cmd = ["git", "fetch", "origin", args.base_branch]
@@ -25,14 +27,31 @@ def do_dependency(args):
         print(checkout_cmd)
         subprocess.check_call(checkout_cmd, cwd=args.src_dir)
         # get baseline commit
-        merge_base_cmd = ["git", "merge-base", "origin/{}".format(args.base_branch), args.branch]
+        merge_base_cmd = [
+            "git",
+            "merge-base",
+            "origin/{}".format(args.base_branch),
+            args.branch,
+        ]
         print(merge_base_cmd)
         base_commit = subprocess.check_output(merge_base_cmd, cwd=args.src_dir)
-        base_commit = base_commit.rstrip() 
-        diff_cmd = ["git", "--no-pager", "diff", base_commit, args.branch, "--name-only", "buildbot"]
+        base_commit = base_commit.rstrip()
+        diff_cmd = [
+            "git",
+            "--no-pager",
+            "diff",
+            base_commit,
+            args.branch,
+            "--name-only",
+            "buildbot",
+        ]
         print(diff_cmd)
         changed_build_scripts = subprocess.check_output(diff_cmd, cwd=args.src_dir)
-        changed_build_scripts = changed_build_scripts.rstrip() if changed_build_scripts is not None else None
+        changed_build_scripts = (
+            changed_build_scripts.rstrip()
+            if changed_build_scripts is not None
+            else None
+        )
         # clean build directory if build scripts have changed
         if len(changed_build_scripts) > 0:
             if os.path.isdir(args.obj_dir):
@@ -49,8 +68,14 @@ def do_dependency(args):
     # fetch OpenCL headers
     ocl_header_dir = os.path.join(args.obj_dir, "OpenCL-Headers")
     if not os.path.isdir(ocl_header_dir):
-        clone_cmd = ["git", "clone", "https://github.com/KhronosGroup/OpenCL-Headers",
-                     "OpenCL-Headers", "-b", "main"]
+        clone_cmd = [
+            "git",
+            "clone",
+            "https://github.com/KhronosGroup/OpenCL-Headers",
+            "OpenCL-Headers",
+            "-b",
+            "main",
+        ]
         subprocess.check_call(clone_cmd, cwd=args.obj_dir)
     else:
         fetch_cmd = ["git", "pull", "--ff", "--ff-only", "origin"]
@@ -64,9 +89,14 @@ def do_dependency(args):
     # fetch and build OpenCL ICD loader
     icd_loader_dir = os.path.join(args.obj_dir, "OpenCL-ICD-Loader")
     if not os.path.isdir(icd_loader_dir):
-        clone_cmd = ["git", "clone",
-                     "https://github.com/KhronosGroup/OpenCL-ICD-Loader",
-                     "OpenCL-ICD-Loader", "-b", "main"]
+        clone_cmd = [
+            "git",
+            "clone",
+            "https://github.com/KhronosGroup/OpenCL-ICD-Loader",
+            "OpenCL-ICD-Loader",
+            "-b",
+            "main",
+        ]
 
         subprocess.check_call(clone_cmd, cwd=args.obj_dir)
     else:
@@ -83,36 +113,60 @@ def do_dependency(args):
         shutil.rmtree(icd_build_dir)
     os.makedirs(icd_build_dir)
     install_dir = os.path.join(args.obj_dir, "install")
-    cmake_cmd = ["cmake", "-G", "Ninja",
-                 "-DCMAKE_INSTALL_PREFIX={}".format(install_dir),
-                 "-DOPENCL_ICD_LOADER_HEADERS_DIR={}".format(ocl_header_dir),
-                 ".." ]
+    cmake_cmd = [
+        "cmake",
+        "-G",
+        "Ninja",
+        "-DCMAKE_INSTALL_PREFIX={}".format(install_dir),
+        "-DOPENCL_ICD_LOADER_HEADERS_DIR={}".format(ocl_header_dir),
+        "..",
+    ]
 
     print("[Cmake Command]: {}".format(" ".join(cmake_cmd)))
-    
+
     subprocess.check_call(cmake_cmd, cwd=icd_build_dir)
 
-    env_tmp=os.environ
+    env_tmp = os.environ
     env_tmp["C_INCLUDE_PATH"] = "{}".format(ocl_header_dir)
     subprocess.check_call(["ninja", "install"], env=env_tmp, cwd=icd_build_dir)
 
     ret = True
     return ret
 
+
 def main():
-    parser = argparse.ArgumentParser(prog="dependency.py",
-                                     description="script to get and build dependency",
-                                     formatter_class=argparse.RawTextHelpFormatter)
-    parser.add_argument("-n", "--build-number", metavar="BUILD_NUM", help="build number")
+    parser = argparse.ArgumentParser(
+        prog="dependency.py",
+        description="script to get and build dependency",
+        formatter_class=argparse.RawTextHelpFormatter,
+    )
+    parser.add_argument(
+        "-n", "--build-number", metavar="BUILD_NUM", help="build number"
+    )
     parser.add_argument("-b", "--branch", metavar="BRANCH", help="pull request branch")
-    parser.add_argument("-d", "--base-branch", metavar="BASE_BRANCH", help="pull request base branch")
-    parser.add_argument("-r", "--pr-number", metavar="PR_NUM", help="pull request number")
-    parser.add_argument("-w", "--builder-dir", metavar="BUILDER_DIR",
-                        help="builder directory, which is the directory containing source and build directories")
+    parser.add_argument(
+        "-d", "--base-branch", metavar="BASE_BRANCH", help="pull request base branch"
+    )
+    parser.add_argument(
+        "-r", "--pr-number", metavar="PR_NUM", help="pull request number"
+    )
+    parser.add_argument(
+        "-w",
+        "--builder-dir",
+        metavar="BUILDER_DIR",
+        help="builder directory, which is the directory containing source and build directories",
+    )
     parser.add_argument("-s", "--src-dir", metavar="SRC_DIR", help="source directory")
-    parser.add_argument("-o", "--obj-dir", metavar="OBJ_DIR", required=True, help="build directory")
-    parser.add_argument("-c", "--clean-build", action="store_true", default=False,
-                        help="true if the build is clean build which has clobber step")
+    parser.add_argument(
+        "-o", "--obj-dir", metavar="OBJ_DIR", required=True, help="build directory"
+    )
+    parser.add_argument(
+        "-c",
+        "--clean-build",
+        action="store_true",
+        default=False,
+        help="true if the build is clean build which has clobber step",
+    )
 
     args = parser.parse_args()
 
@@ -120,6 +174,7 @@ def main():
 
     return do_dependency(args)
 
+
 if __name__ == "__main__":
     ret = main()
     exit_code = 0 if ret else 1
diff --git a/devops/dependencies.json b/devops/dependencies.json
index 00a1d7db2b692..6340db87bc5a2 100644
--- a/devops/dependencies.json
+++ b/devops/dependencies.json
@@ -1,15 +1,15 @@
 {
   "linux": {
     "compute_runtime": {
-      "github_tag": "24.22.29735.20",
-      "version": "24.22.29735.20",
-      "url": "https://github.com/intel/compute-runtime/releases/tag/24.22.29735.20",
+      "github_tag": "24.26.30049.6",
+      "version": "24.26.30049.6",
+      "url": "https://github.com/intel/compute-runtime/releases/tag/24.26.30049.6",
       "root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu"
     },
     "igc": {
-      "github_tag": "igc-1.0.16900.23",
-      "version": "1.0.16900.23",
-      "url": "https://github.com/intel/intel-graphics-compiler/releases/tag/igc-1.0.16900.23",
+      "github_tag": "igc-1.0.17193.4",
+      "version": "1.0.17193.4",
+      "url": "https://github.com/intel/intel-graphics-compiler/releases/tag/igc-1.0.17193.4",
       "root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu"
     },
     "cm": {
@@ -19,9 +19,9 @@
       "root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu"
     },
     "level_zero": {
-      "github_tag": "v1.17.17",
-      "version": "v1.17.17",
-      "url": "https://github.com/oneapi-src/level-zero/releases/tag/v1.17.17",
+      "github_tag": "v1.17.19",
+      "version": "v1.17.19",
+      "url": "https://github.com/oneapi-src/level-zero/releases/tag/v1.17.19",
       "root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu"
     },
     "tbb": {
diff --git a/libclc/ptx-nvidiacl/libspirv/synchronization/barrier.cl b/libclc/ptx-nvidiacl/libspirv/synchronization/barrier.cl
index 1cf3fb8750c2e..eb011986659eb 100644
--- a/libclc/ptx-nvidiacl/libspirv/synchronization/barrier.cl
+++ b/libclc/ptx-nvidiacl/libspirv/synchronization/barrier.cl
@@ -45,10 +45,54 @@ _CLC_OVERLOAD _CLC_DEF void __spirv_MemoryBarrier(unsigned int memory,
 _CLC_OVERLOAD _CLC_DEF _CLC_CONVERGENT void
 __spirv_ControlBarrier(unsigned int scope, unsigned int memory,
                        unsigned int semantics) {
+  unsigned int order = semantics & 0x1F;
   if (scope == Subgroup) {
     // use a full mask as barriers are required to be convergent and exited
     // threads can safely be in the mask
     __nvvm_bar_warp_sync(0xFFFFFFFF);
+  } else if (scope == Device && memory == Device &&
+             order == SequentiallyConsistent &&
+             __clc_nvvm_reflect_arch() >= 700) {
+    unsigned int env1, env2;
+    __asm__ __volatile__("mov.u32 %0, %%envreg1;" : "=r"(env1));
+    __asm__ __volatile__("mov.u32 %0, %%envreg2;" : "=r"(env2));
+    long long envreg1 = env1;
+    long long envreg2 = env2;
+    // Bit field insert operation. Place 32 bits of envreg2 next to 32 bits of
+    // envreg1: s64[envreg2][envreg1]. The resulting value is the address in
+    // device global memory region, where atomic operations can be performed.
+    long long atomicAddr;
+    __asm__ __volatile__("bfi.b64 %0, %1, %2, 32, 32;"
+                         : "=l"(atomicAddr)
+                         : "l"(envreg1), "l"(envreg2));
+    if (!atomicAddr) {
+      __builtin_trap();
+    } else {
+      unsigned int tidX = __nvvm_read_ptx_sreg_tid_x();
+      unsigned int tidY = __nvvm_read_ptx_sreg_tid_y();
+      unsigned int tidZ = __nvvm_read_ptx_sreg_tid_z();
+      if (tidX + tidY + tidZ == 0) {
+        // Increment address by 4 to get the precise region initialized to 0.
+        atomicAddr += 4;
+        unsigned int nctaidX = __nvvm_read_ptx_sreg_nctaid_x();
+        unsigned int nctaidY = __nvvm_read_ptx_sreg_nctaid_y();
+        unsigned int nctaidZ = __nvvm_read_ptx_sreg_nctaid_z();
+        unsigned int totalNctaid = nctaidX * nctaidY * nctaidZ;
+
+        // Do atomic.add(1) for each CTA and spin ld.acquire in a loop until all
+        // CTAs have performed the addition
+        unsigned int prev, current;
+        __asm__ __volatile__("atom.add.release.gpu.u32 %0,[%1],1;"
+                             : "=r"(prev)
+                             : "l"(atomicAddr));
+        do {
+          __asm__ __volatile__("ld.acquire.gpu.u32 %0,[%1];"
+                               : "=r"(current)
+                               : "l"(atomicAddr));
+        } while (current % totalNctaid != 0);
+      }
+      __nvvm_barrier_sync(0);
+    }
   } else {
     __syncthreads();
   }
diff --git a/libdevice/sanitizer_utils.cpp b/libdevice/sanitizer_utils.cpp
index f59bc17bb948e..c2a4f7124dbff 100644
--- a/libdevice/sanitizer_utils.cpp
+++ b/libdevice/sanitizer_utils.cpp
@@ -862,4 +862,35 @@ __asan_set_shadow_dynamic_local(uptr ptr, uint32_t num_args) {
     __spirv_ocl_printf(__mem_set_shadow_dynamic_local_end);
 }
 
+///
+/// ASAN initialize shdadow memory of private memory
+///
+
+static __SYCL_CONSTANT__ const char __mem_set_shadow_private_begin[] =
+    "[kernel] BEGIN __asan_set_shadow_private\n";
+static __SYCL_CONSTANT__ const char __mem_set_shadow_private_end[] =
+    "[kernel] END   __asan_set_shadow_private\n";
+static __SYCL_CONSTANT__ const char __mem_set_shadow_private[] =
+    "[kernel] set_shadow_private(beg=%p, end=%p, val:%02X)\n";
+
+DEVICE_EXTERN_C_NOINLINE void __asan_set_shadow_private(uptr begin, uptr size,
+                                                        char val) {
+  if (__AsanDebug)
+    __spirv_ocl_printf(__mem_set_shadow_private_begin);
+
+  auto *launch_info = (__SYCL_GLOBAL__ const LaunchInfo *)__AsanLaunchInfo;
+  if (launch_info->PrivateShadowOffset == 0)
+    return;
+
+  if (__AsanDebug)
+    __spirv_ocl_printf(__mem_set_shadow_private, (void *)begin,
+                       (void *)(begin + size), val & 0xFF);
+
+  for (size_t i = 0; i < size; i++)
+    ((__SYCL_GLOBAL__ u8 *)begin)[i] = val;
+
+  if (__AsanDebug)
+    __spirv_ocl_printf(__mem_set_shadow_private_end);
+}
+
 #endif // __SPIR__ || __SPIRV__
diff --git a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td
index 4939808595680..4fef4c918351d 100644
--- a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td
+++ b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td
@@ -53,8 +53,8 @@ def AspectExt_oneapi_bindless_images : Aspect<"ext_oneapi_bindless_images">;
 def AspectExt_oneapi_bindless_images_shared_usm : Aspect<"ext_oneapi_bindless_images_shared_usm">;
 def AspectExt_oneapi_bindless_images_1d_usm : Aspect<"ext_oneapi_bindless_images_1d_usm">;
 def AspectExt_oneapi_bindless_images_2d_usm : Aspect<"ext_oneapi_bindless_images_2d_usm">;
-def AspectExt_oneapi_interop_memory_import : Aspect<"ext_oneapi_interop_memory_import">;
-def AspectExt_oneapi_interop_semaphore_import : Aspect<"ext_oneapi_interop_semaphore_import">;
+def AspectExt_oneapi_external_memory_import : Aspect<"ext_oneapi_external_memory_import">;
+def AspectExt_oneapi_external_semaphore_import : Aspect<"ext_oneapi_external_semaphore_import">;
 def AspectExt_oneapi_mipmap : Aspect<"ext_oneapi_mipmap">;
 def AspectExt_oneapi_mipmap_anisotropy : Aspect<"ext_oneapi_mipmap_anisotropy">;
 def AspectExt_oneapi_mipmap_level_reference : Aspect<"ext_oneapi_mipmap_level_reference">;
@@ -130,9 +130,9 @@ def : TargetInfo<"__TestAspectList",
     AspectExt_intel_device_id, AspectExt_intel_memory_clock_rate, AspectExt_intel_memory_bus_width, AspectEmulated,
     AspectExt_intel_legacy_image, AspectExt_oneapi_bindless_images,
     AspectExt_oneapi_bindless_images_shared_usm, AspectExt_oneapi_bindless_images_1d_usm, AspectExt_oneapi_bindless_images_2d_usm,
-    AspectExt_oneapi_interop_memory_import, AspectExt_oneapi_interop_semaphore_import,
+    AspectExt_oneapi_external_memory_import, AspectExt_oneapi_external_semaphore_import,
     AspectExt_oneapi_mipmap, AspectExt_oneapi_mipmap_anisotropy, AspectExt_oneapi_mipmap_level_reference,
-    AspectExt_oneapi_bindless_sampled_image_fetch_3d, AspectExt_oneapi_cubemap, 
+    AspectExt_oneapi_cubemap, 
     AspectExt_oneapi_cubemap_seamless_filtering, 
     AspectExt_oneapi_image_array,
     AspectExt_oneapi_unique_addressing_per_dim,
@@ -140,6 +140,7 @@ def : TargetInfo<"__TestAspectList",
     AspectExt_oneapi_bindless_images_sample_2d_usm,
     AspectExt_oneapi_bindless_sampled_image_fetch_1d_usm, AspectExt_oneapi_bindless_sampled_image_fetch_1d,
     AspectExt_oneapi_bindless_sampled_image_fetch_2d_usm, AspectExt_oneapi_bindless_sampled_image_fetch_2d,
+    AspectExt_oneapi_bindless_sampled_image_fetch_3d,
     AspectExt_intel_esimd,
     AspectExt_oneapi_ballot_group, AspectExt_oneapi_fixed_size_group, AspectExt_oneapi_opportunistic_group,
     AspectExt_oneapi_tangle_group, AspectExt_intel_matrix, AspectExt_oneapi_is_composite, AspectExt_oneapi_is_component,
@@ -220,8 +221,8 @@ defvar CudaMinAspects = !listconcat(AllUSMAspects, [AspectGpu, AspectFp64, Aspec
     AspectExt_oneapi_opportunistic_group, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]);
 // Bindless images aspects are partially supported on CUDA and disabled by default at the moment.
 defvar CudaBindlessImagesAspects = [AspectExt_oneapi_bindless_images, AspectExt_oneapi_bindless_images_shared_usm,
-    AspectExt_oneapi_bindless_images_1d_usm, AspectExt_oneapi_bindless_images_2d_usm, AspectExt_oneapi_interop_memory_import,
-    AspectExt_oneapi_interop_semaphore_import, AspectExt_oneapi_mipmap, AspectExt_oneapi_mipmap_anisotropy,
+    AspectExt_oneapi_bindless_images_1d_usm, AspectExt_oneapi_bindless_images_2d_usm, AspectExt_oneapi_external_memory_import,
+    AspectExt_oneapi_external_semaphore_import, AspectExt_oneapi_mipmap, AspectExt_oneapi_mipmap_anisotropy,
     AspectExt_oneapi_mipmap_level_reference, AspectExt_oneapi_cubemap, AspectExt_oneapi_cubemap_seamless_filtering,
     AspectExt_oneapi_image_array, AspectExt_oneapi_unique_addressing_per_dim, AspectExt_oneapi_bindless_images_sample_2d_usm,
     AspectExt_oneapi_bindless_images_sample_2d_usm];
diff --git a/llvm/include/llvm/SYCLLowerIR/SpecConstants.h b/llvm/include/llvm/SYCLLowerIR/SpecConstants.h
index bbd0213158d46..8bf8bdf894d07 100644
--- a/llvm/include/llvm/SYCLLowerIR/SpecConstants.h
+++ b/llvm/include/llvm/SYCLLowerIR/SpecConstants.h
@@ -59,7 +59,7 @@ class SpecConstantsPass : public PassInfoMixin<SpecConstantsPass> {
   enum class HandlingMode { default_values, emulation, native };
 
 public:
-  SpecConstantsPass(HandlingMode Mode) : Mode(Mode) {}
+  SpecConstantsPass(HandlingMode Mode = HandlingMode::emulation) : Mode(Mode) {}
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
 
   // Searches given module for occurrences of specialization constant-specific
@@ -73,7 +73,7 @@ class SpecConstantsPass : public PassInfoMixin<SpecConstantsPass> {
                                            std::vector<char> &DefaultValues);
 
 private:
-  HandlingMode Mode = HandlingMode::emulation;
+  HandlingMode Mode;
 };
 
 bool checkModuleContainsSpecConsts(const Module &M);
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 5bbaea52085e3..4c09bd60a8e65 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -131,6 +131,7 @@
 #include "llvm/SYCLLowerIR/SYCLPropagateAspectsUsage.h"
 #include "llvm/SYCLLowerIR/SYCLPropagateJointMatrixUsage.h"
 #include "llvm/SYCLLowerIR/SYCLVirtualFunctionsAnalysis.h"
+#include "llvm/SYCLLowerIR/SpecConstants.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index f306e77b43afe..9f4297d0522da 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -164,6 +164,7 @@ MODULE_PASS("lower-slm-reservation-calls", ESIMDLowerSLMReservationCalls())
 MODULE_PASS("record-sycl-aspect-names", RecordSYCLAspectNamesPass())
 MODULE_PASS("sycl-virtual-functions-analysis",
             SYCLVirtualFunctionsAnalysisPass())
+MODULE_PASS("spec-constants", SpecConstantsPass())
 #undef MODULE_PASS
 
 #ifndef MODULE_PASS_WITH_PARAMS
diff --git a/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp b/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp
index 9506dcda2bcc1..a6609adce3429 100644
--- a/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp
+++ b/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp
@@ -1289,6 +1289,20 @@ translateSpirvGlobalUses(LoadInst *LI, StringRef SpirvGlobalName,
   }
 }
 
+static void translateGlobalUse(Value *Use, StringRef SpirvGlobalName,
+                               SmallVectorImpl<Instruction *> &InstsToErase) {
+  LoadInst *LI = dyn_cast<LoadInst>(Use);
+  ConstantExpr *CE = dyn_cast<ConstantExpr>(Use);
+  GetElementPtrConstantExpr *GEPCE = dyn_cast<GetElementPtrConstantExpr>(Use);
+  if (LI != nullptr) {
+    translateSpirvGlobalUses(LI, SpirvGlobalName, InstsToErase);
+  } else if (CE != nullptr || GEPCE != nullptr) {
+    for (User *U : (CE == nullptr ? GEPCE : CE)->users()) {
+      translateGlobalUse(U, SpirvGlobalName, InstsToErase);
+    }
+  }
+}
+
 static void createESIMDIntrinsicArgs(const ESIMDIntrinDesc &Desc,
                                      SmallVector<Value *, 16> &GenXArgs,
                                      CallInst &CI, id::FunctionEncoding *FE) {
@@ -2090,6 +2104,18 @@ PreservedAnalyses SYCLLowerESIMDPass::run(Module &M,
     MPM.run(M, MAM);
   }
 
+  SmallVector<Instruction *, 8> ToErase;
+  constexpr size_t PrefLen = StringRef(SPIRV_INTRIN_PREF).size();
+  for (GlobalVariable &Global : M.globals()) {
+    if (!Global.getName().starts_with(SPIRV_INTRIN_PREF))
+      continue;
+
+    for (User *U : Global.users())
+      translateGlobalUse(U, Global.getName().drop_front(PrefLen), ToErase);
+  }
+  for (auto *CI : ToErase)
+    CI->eraseFromParent();
+
   generateKernelMetadata(M);
   // This function needs to run after generateKernelMetadata, as it
   // uses the generated metadata:
@@ -2244,37 +2270,6 @@ size_t SYCLLowerESIMDPass::runOnFunction(Function &F,
       // this is ESIMD intrinsic - record for later translation
       ESIMDIntrCalls.push_back(CI);
     }
-
-    // Translate loads from SPIRV builtin globals into GenX intrinsics
-    auto *LI = dyn_cast<LoadInst>(&I);
-    if (LI) {
-      Value *LoadPtrOp = LI->getPointerOperand();
-      Value *SpirvGlobal = nullptr;
-      // Look through constant expressions to find SPIRV builtin globals
-      // It may come with or without cast.
-      auto *CE = dyn_cast<ConstantExpr>(LoadPtrOp);
-      auto *GEPCE = dyn_cast<GetElementPtrConstantExpr>(LoadPtrOp);
-      if (GEPCE) {
-        SpirvGlobal = GEPCE->getOperand(0);
-      } else if (CE) {
-        assert(CE->isCast() && "ConstExpr should be a cast");
-        SpirvGlobal = CE->getOperand(0);
-      } else {
-        SpirvGlobal = LoadPtrOp;
-      }
-
-      if (!isa<GlobalVariable>(SpirvGlobal) ||
-          !SpirvGlobal->getName().starts_with(SPIRV_INTRIN_PREF))
-        continue;
-
-      auto PrefLen = StringRef(SPIRV_INTRIN_PREF).size();
-
-      // Translate all uses of the load instruction from SPIRV builtin global.
-      // Replaces the original global load and it is uses and stores the old
-      // instructions to ToErase.
-      translateSpirvGlobalUses(LI, SpirvGlobal->getName().drop_front(PrefLen),
-                               ToErase);
-    }
   }
   // Now demangle and translate found ESIMD intrinsic calls
   for (auto *CI : ESIMDIntrCalls) {
diff --git a/llvm/lib/SYCLLowerIR/SpecConstants.cpp b/llvm/lib/SYCLLowerIR/SpecConstants.cpp
index 58f5a0d54b26e..4f43a22e95fd9 100644
--- a/llvm/lib/SYCLLowerIR/SpecConstants.cpp
+++ b/llvm/lib/SYCLLowerIR/SpecConstants.cpp
@@ -20,6 +20,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/TargetParser/Triple.h"
 
 #include <vector>
@@ -101,12 +102,16 @@ StringRef getStringLiteralArg(const CallInst *CI, unsigned ArgNo,
     // so that %1 is trivially known to be the address of the @.str literal.
 
     Value *TmpPtr = L->getPointerOperand();
-    AssertRelease((isa<AddrSpaceCastInst>(TmpPtr) &&
-                   isa<AllocaInst>(cast<AddrSpaceCastInst>(TmpPtr)
-                                       ->getPointerOperand()
-                                       ->stripPointerCasts())) ||
-                      isa<AllocaInst>(TmpPtr),
-                  "unexpected instruction type");
+    auto ValueIsAlloca = [](Value *V) {
+      if (auto *ASC = dyn_cast<AddrSpaceCastInst>(V))
+        V = ASC->getPointerOperand()->stripPointerCasts();
+      using namespace PatternMatch;
+      Value *X;
+      if (match(V, m_IntToPtr(m_Add(m_PtrToInt(m_Value(X)), m_ConstantInt()))))
+        V = X;
+      return isa<AllocaInst>(V);
+    };
+    AssertRelease(ValueIsAlloca(TmpPtr), "unexpected instruction type");
 
     // find the store of the literal address into TmpPtr
     StoreInst *Store = nullptr;
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index f500440e02706..4ee247821fb11 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -1042,6 +1042,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
   FunctionCallee AsanStackMallocFunc[kMaxAsanStackMallocSizeClass + 1],
       AsanStackFreeFunc[kMaxAsanStackMallocSizeClass + 1];
   FunctionCallee AsanSetShadowFunc[0x100] = {};
+  FunctionCallee AsanSetShadowPrivateFunc;
   FunctionCallee AsanPoisonStackMemoryFunc, AsanUnpoisonStackMemoryFunc;
   FunctionCallee AsanAllocaPoisonFunc, AsanAllocasUnpoisonFunc;
 
@@ -1257,10 +1258,11 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
   // ShadowMask is not zero. If ShadowMask[i] is zero, we assume that
   // ShadowBytes[i] is constantly zero and doesn't need to be overwritten.
   void copyToShadow(ArrayRef<uint8_t> ShadowMask, ArrayRef<uint8_t> ShadowBytes,
-                    IRBuilder<> &IRB, Value *ShadowBase);
+                    IRBuilder<> &IRB, Value *ShadowBase,
+                    bool ForceOutline = false);
   void copyToShadow(ArrayRef<uint8_t> ShadowMask, ArrayRef<uint8_t> ShadowBytes,
                     size_t Begin, size_t End, IRBuilder<> &IRB,
-                    Value *ShadowBase);
+                    Value *ShadowBase, bool ForceOutline = false);
   void copyToShadowInline(ArrayRef<uint8_t> ShadowMask,
                           ArrayRef<uint8_t> ShadowBytes, size_t Begin,
                           size_t End, IRBuilder<> &IRB, Value *ShadowBase);
@@ -3593,6 +3595,9 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) {
     AsanSetShadowFunc[Val] =
         M.getOrInsertFunction(Name.str(), IRB.getVoidTy(), IntptrTy, IntptrTy);
   }
+  AsanSetShadowPrivateFunc =
+      M.getOrInsertFunction("__asan_set_shadow_private", IRB.getVoidTy(),
+                            IntptrTy, IntptrTy, IRB.getInt8Ty());
 
   AsanAllocaPoisonFunc = M.getOrInsertFunction(
       kAsanAllocaPoison, IRB.getVoidTy(), IntptrTy, IntptrTy);
@@ -3655,14 +3660,17 @@ void FunctionStackPoisoner::copyToShadowInline(ArrayRef<uint8_t> ShadowMask,
 
 void FunctionStackPoisoner::copyToShadow(ArrayRef<uint8_t> ShadowMask,
                                          ArrayRef<uint8_t> ShadowBytes,
-                                         IRBuilder<> &IRB, Value *ShadowBase) {
-  copyToShadow(ShadowMask, ShadowBytes, 0, ShadowMask.size(), IRB, ShadowBase);
+                                         IRBuilder<> &IRB, Value *ShadowBase,
+                                         bool ForceOutline) {
+  copyToShadow(ShadowMask, ShadowBytes, 0, ShadowMask.size(), IRB, ShadowBase,
+               ForceOutline);
 }
 
 void FunctionStackPoisoner::copyToShadow(ArrayRef<uint8_t> ShadowMask,
                                          ArrayRef<uint8_t> ShadowBytes,
                                          size_t Begin, size_t End,
-                                         IRBuilder<> &IRB, Value *ShadowBase) {
+                                         IRBuilder<> &IRB, Value *ShadowBase,
+                                         bool ForceOutline) {
   assert(ShadowMask.size() == ShadowBytes.size());
   size_t Done = Begin;
   for (size_t i = Begin, j = Begin + 1; i < End; i = j++) {
@@ -3671,14 +3679,20 @@ void FunctionStackPoisoner::copyToShadow(ArrayRef<uint8_t> ShadowMask,
       continue;
     }
     uint8_t Val = ShadowBytes[i];
-    if (!AsanSetShadowFunc[Val])
+    if (!AsanSetShadowFunc[Val] && !ForceOutline)
       continue;
 
     // Skip same values.
     for (; j < End && ShadowMask[j] && Val == ShadowBytes[j]; ++j) {
     }
 
-    if (j - i >= ASan.MaxInlinePoisoningSize) {
+    if (ForceOutline) {
+      RTCI.createRuntimeCall(
+          IRB, AsanSetShadowPrivateFunc,
+          {IRB.CreateAdd(ShadowBase, ConstantInt::get(IntptrTy, i)),
+           ConstantInt::get(IntptrTy, j - i),
+           ConstantInt::get(IRB.getInt8Ty(), Val)});
+    } else if (j - i >= ASan.MaxInlinePoisoningSize) {
       copyToShadowInline(ShadowMask, ShadowBytes, Done, i, IRB, ShadowBase);
       RTCI.createRuntimeCall(
           IRB, AsanSetShadowFunc[Val],
@@ -3688,7 +3702,8 @@ void FunctionStackPoisoner::copyToShadow(ArrayRef<uint8_t> ShadowMask,
     }
   }
 
-  copyToShadowInline(ShadowMask, ShadowBytes, Done, End, IRB, ShadowBase);
+  if (!ForceOutline)
+    copyToShadowInline(ShadowMask, ShadowBytes, Done, End, IRB, ShadowBase);
 }
 
 // Fake stack allocator (asan_fake_stack.h) has 11 size classes
@@ -4062,7 +4077,8 @@ void FunctionStackPoisoner::processStaticAllocas() {
       ASan.memToShadow(LocalStackBase, IRB, kSpirOffloadPrivateAS);
   // As mask we must use most poisoned case: red zones and after scope.
   // As bytes we can use either the same or just red zones only.
-  copyToShadow(ShadowAfterScope, ShadowAfterScope, IRB, ShadowBase);
+  copyToShadow(ShadowAfterScope, ShadowAfterScope, IRB, ShadowBase,
+               TargetTriple.isSPIROrSPIRV());
 
   if (!StaticAllocaPoisonCallVec.empty()) {
     const auto &ShadowInScope = GetShadowBytes(SVD, L);
@@ -4132,7 +4148,8 @@ void FunctionStackPoisoner::processStaticAllocas() {
       IRBuilder<> IRBElse(ElseTerm);
       copyToShadow(ShadowAfterScope, ShadowClean, IRBElse, ShadowBase);
     } else {
-      copyToShadow(ShadowAfterScope, ShadowClean, IRBRet, ShadowBase);
+      copyToShadow(ShadowAfterScope, ShadowClean, IRBRet, ShadowBase,
+                   TargetTriple.isSPIROrSPIRV());
     }
   }
 
diff --git a/llvm/test/Instrumentation/AddressSanitizer/SPIRV/instrument_private_address_space.ll b/llvm/test/Instrumentation/AddressSanitizer/SPIRV/instrument_private_address_space.ll
index bf412b8225b79..88c0bf21ed54f 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/SPIRV/instrument_private_address_space.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/SPIRV/instrument_private_address_space.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -passes=asan -asan-instrumentation-with-call-threshold=0 -asan-stack=0 -asan-globals=0 -asan-constructor-kind=none -asan-spir-privates=1 -asan-use-after-return=never -S | FileCheck %s
+; RUN: opt < %s -passes=asan -asan-instrumentation-with-call-threshold=0 -asan-stack=0 -asan-globals=0 -asan-constructor-kind=none -asan-mapping-scale=4 -asan-spir-privates=1 -asan-use-after-return=never -S | FileCheck %s
 
 target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
 target triple = "spir64-unknown-unknown"
@@ -21,11 +21,15 @@ define spir_kernel void @kernel() #0 {
 entry:
   %p.i = alloca [4 x i32], align 4
   ; CHECK: %shadow_ptr = call i64 @__asan_mem_to_shadow(i64 %0, i32 0)
+  ; CHECK: call void @__asan_set_shadow_private(i64 %4, i64 2, i8 -15)
+  ; CHECK: call void @__asan_set_shadow_private(i64 %5, i64 1, i8 -13)
   call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %p.i)
   call void @llvm.memcpy.p0.p1.i64(ptr align 4 %p.i, ptr addrspace(1) align 4 @__const._ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv.p, i64 16, i1 false)
   %arraydecay.i = getelementptr inbounds [4 x i32], ptr %p.i, i64 0, i64 0
   %0 = addrspacecast ptr %arraydecay.i to ptr addrspace(4)
   %call.i = call spir_func i32 @_Z3fooPii(ptr addrspace(4) %0)
+  ; CHECK: call void @__asan_set_shadow_private(i64 %7, i64 2, i8 0)
+  ; CHECK: call void @__asan_set_shadow_private(i64 %8, i64 1, i8 0)
   ret void
 }
 
diff --git a/llvm/test/SYCLLowerIR/SpecConstants/literal-address-alloca-asan.ll b/llvm/test/SYCLLowerIR/SpecConstants/literal-address-alloca-asan.ll
new file mode 100644
index 0000000000000..1b904abfa0f3a
--- /dev/null
+++ b/llvm/test/SYCLLowerIR/SpecConstants/literal-address-alloca-asan.ll
@@ -0,0 +1,33 @@
+; RUN: opt -passes=spec-constants %s -S -o - | FileCheck %s
+
+; Check there is no assert error when literal address is loaded from an alloca
+; with offset.
+
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1"
+target triple = "spir64-unknown-unknown"
+
+%"class.sycl::_V1::specialization_id" = type { i32 }
+
+@_ZL9test_id_1 = addrspace(1) constant %"class.sycl::_V1::specialization_id" { i32 42 }
+@__usid_str = constant [36 x i8] c"uide7faddc6b4d2fe92____ZL9test_id_1\00"
+
+define spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlNS0_14kernel_handlerEE_clES4_(ptr addrspace(4) %this1.i7) {
+entry:
+  %MyAlloca = alloca i8, i64 224, align 32
+  %0 = ptrtoint ptr %MyAlloca to i64
+  %1 = add i64 %0, 96
+  %2 = inttoptr i64 %1 to ptr
+  %SymbolicID.ascast.i = addrspacecast ptr %2 to ptr addrspace(4)
+  store ptr addrspace(4) addrspacecast (ptr @__usid_str to ptr addrspace(4)), ptr addrspace(4) %SymbolicID.ascast.i, align 8
+  %3 = load ptr addrspace(4), ptr addrspace(4) %SymbolicID.ascast.i, align 8
+  %4 = load ptr addrspace(4), ptr addrspace(4) %this1.i7, align 8
+
+; CHECK-NOT: call spir_func noundef i32 @_Z37__sycl_getScalar2020SpecConstantValueIiET_PKcPKvS4_(
+; CHECK: %conv = sitofp i32 %load to double
+
+  %call.i8 = call spir_func i32 @_Z37__sycl_getScalar2020SpecConstantValueIiET_PKcPKvS4_(ptr addrspace(4) %3, ptr addrspace(4) addrspacecast (ptr addrspace(1) @_ZL9test_id_1 to ptr addrspace(4)), ptr addrspace(4) %4)
+  %conv = sitofp i32 %call.i8 to double
+  ret void
+}
+
+declare spir_func i32 @_Z37__sycl_getScalar2020SpecConstantValueIiET_PKcPKvS4_(ptr addrspace(4), ptr addrspace(4), ptr addrspace(4))
diff --git a/sycl-fusion/passes/kernel-fusion/SYCLSpecConstMaterializer.cpp b/sycl-fusion/passes/kernel-fusion/SYCLSpecConstMaterializer.cpp
index b1f54ee21b78d..3637930d72f8f 100644
--- a/sycl-fusion/passes/kernel-fusion/SYCLSpecConstMaterializer.cpp
+++ b/sycl-fusion/passes/kernel-fusion/SYCLSpecConstMaterializer.cpp
@@ -14,6 +14,7 @@
 #include <llvm/ADT/StringRef.h>
 #include <llvm/Support/CommandLine.h>
 #include <llvm/Support/Debug.h>
+#include <llvm/TargetParser/Triple.h>
 
 #define DEBUG_TYPE "sycl-spec-const-materializer"
 
@@ -298,9 +299,8 @@ PreservedAnalyses SYCLSpecConstMaterializer::run(Function &F,
   // Invariant: This pass is only intended to operate on SYCL kernels being
   // compiled to either `nvptx{,64}-nvidia-cuda`, or `amdgcn-amd-amdhsa`
   // triples.
-  auto AT = TargetHelpers::getArchType(*Mod);
-  if (TargetHelpers::ArchType::Cuda != AT &&
-      TargetHelpers::ArchType::AMDHSA != AT) {
+  Triple T(Mod->getTargetTriple());
+  if (!T.isNVPTX() && !T.isAMDGCN()) {
     LLVM_DEBUG(dbgs() << "Unsupported architecture\n");
     return PreservedAnalyses::all();
   }
diff --git a/sycl-fusion/test/lit.cfg.py b/sycl-fusion/test/lit.cfg.py
index d92326b020ce3..fffa59585ef0e 100644
--- a/sycl-fusion/test/lit.cfg.py
+++ b/sycl-fusion/test/lit.cfg.py
@@ -25,6 +25,6 @@
 config.substitutions.append(("%shlibdir", config.llvm_shlib_dir))
 
 if "NVPTX" in config.llvm_targets_to_build:
-    config.available_features.add('cuda')
+    config.available_features.add("cuda")
 if "AMDGPU" in config.llvm_targets_to_build:
-    config.available_features.add('hip_amd')
+    config.available_features.add("hip_amd")
diff --git a/sycl/cmake/modules/FetchUnifiedRuntime.cmake b/sycl/cmake/modules/FetchUnifiedRuntime.cmake
index 35a9142059418..a5a70600e9002 100644
--- a/sycl/cmake/modules/FetchUnifiedRuntime.cmake
+++ b/sycl/cmake/modules/FetchUnifiedRuntime.cmake
@@ -30,6 +30,7 @@ option(SYCL_UMF_DISABLE_HWLOC
 set(UR_BUILD_EXAMPLES OFF CACHE BOOL "Build example applications." FORCE)
 set(UR_BUILD_TESTS OFF CACHE BOOL "Build unit tests." FORCE)
 set(UR_BUILD_XPTI_LIBS OFF)
+set(UR_ENABLE_SYMBOLIZER ON CACHE BOOL "Enable symbolizer for sanitizer layer.")
 set(UR_ENABLE_TRACING ON)
 
 if("level_zero" IN_LIST SYCL_ENABLE_PLUGINS)
@@ -115,14 +116,14 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
       CACHE PATH "Path to external '${name}' adapter source dir" FORCE)
   endfunction()
 
-  set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
-  # commit a985a81dc9ba8adfcc8b54e35ad287e97766fb3e
-  # Merge: b7b0c8b3 f772f907
-  # Author: Piotr Balcer <piotr.balcer@intel.com>
-  # Date:   Mon Jul 29 09:11:29 2024 +0200
-  #     Merge pull request #1905 from igchor/umf_hwloc_disable
-  #     Bump UMF version to allow disabling hwloc
-  set(UNIFIED_RUNTIME_TAG a985a81dc9ba8adfcc8b54e35ad287e97766fb3e)
+set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
+  # commit 3e762e00bcf13d158fb58e8e8c2eabcfc8934b4e
+  # Merge: c805a71a a2a053de
+  # Author: Omar Ahmed <omar.ahmed@codeplay.com>
+  # Date:   Wed Jul 31 12:26:34 2024 +0100
+  #     Merge pull request #1884 from callumfare/callum/fix_printtrace
+  #     Enable PrintTrace when SYCL UR tracing is enabled
+  set(UNIFIED_RUNTIME_TAG 3e762e00bcf13d158fb58e8e8c2eabcfc8934b4e)
 
   set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES")
   # Due to the use of dependentloadflag and no installer for UMF and hwloc we need
diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc
index 866ac03436ab3..1a7a9062885db 100644
--- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc
+++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc
@@ -1591,9 +1591,9 @@ The device aspect descriptors for these queries are:
 [frame="none",options="header"]
 |======================
 |Device descriptor |Description
-|`aspect::ext_oneapi_interop_memory_import` | Indicates if the device supports 
+|`aspect::ext_oneapi_external_memory_import` | Indicates if the device supports 
 importing external memory resources.
-|`aspect::ext_oneapi_interop_semaphore_import`` | Indicates if the device 
+|`aspect::ext_oneapi_external_semaphore_import`` | Indicates if the device 
 supports importing external semaphore resources.
 |======================
 
@@ -1687,35 +1687,35 @@ resource type.
 ```cpp
 namespace sycl::ext::oneapi::experimental {
 
-struct interop_mem_handle {
+struct external_mem {
   using raw_handle_type = /* Implementation defined */;
   raw_handle_type raw_handle;
 };
 
 template <typename ResourceType>
-interop_mem_handle import_external_memory(
+external_mem import_external_memory(
     external_mem_descriptor<ResourceType> externalMemDescriptor,
     const sycl::device &syclDevice,
     const sycl::context &syclContext);
 
 template <typename ResourceType>
-interop_mem_handle import_external_memory(
+external_mem import_external_memory(
     external_mem_descriptor<ResourceType> externalMemDescriptor,
     const sycl::queue &syclQueue);
 
 image_mem_handle map_external_image_memory(
-    interop_mem_handle interopMemHandle,
+    external_mem externalMemHandle,
     const image_descriptor &imageDescriptor,
     const sycl::device &syclDevice,
     const sycl::context &syclContext);
 image_mem_handle map_external_image_memory(
-    interop_mem_handle interopMemHandle,
+    external_mem externalMemHandle,
     const image_descriptor &imageDescriptor,
     const sycl::queue &syclQueue);
 }
 ```
 
-The resulting `interop_mem_handle` can then be mapped, where the resulting type 
+The resulting `external_mem` can then be mapped, where the resulting type 
 is an `image_mem_handle`. This can be used to construct images in the same way 
 as memory allocated through `alloc_image_mem`. The `ext_oneapi_copy` operations 
 also work with imported memory mapped to `image_mem_handle` types.
@@ -1734,16 +1734,16 @@ behaviour.
 Once a user has finished operating on imported memory, they must ensure that 
 they destroy the imported memory handle through `release_external_memory`.
 
-`release_external_memory` can only accept `interop_mem_handles` that were
+`release_external_memory` can only accept `external_mem` objects that were
 created through `import_external_memory`.
 
 ```cpp
 namespace sycl::ext::oneapi::experimental {
 
-void release_external_memory(interop_mem_handle interopMem,
+void release_external_memory(external_mem externalMem,
                              const sycl::device &syclDevice,
                              const sycl::context &syclContext);
-void release_external_memory(interop_mem_handle interopMem,
+void release_external_memory(external_mem externalMem,
                              const sycl::queue &syclQueue);
 }
 ```
@@ -1807,27 +1807,27 @@ compatible with the `resource_fd` resource type.
 ```cpp
 namespace sycl::ext::oneapi::experimental {
 
-struct interop_semaphore_handle {
+struct external_semaphore {
   using raw_handle_type = /* Implementation defined */;
   raw_handle_type raw_handle;
 };
 
 template <typename ResourceType>
-interop_semaphore_handle import_external_semaphore(
+external_semaphore import_external_semaphore(
     external_semaphore_descriptor<ResourceType>
         externalSemaphoreDescriptor,
     const sycl::device &syclDevice,
     const sycl::context &syclContext);
 
 template <typename ResourceType>
-interop_semaphore_handle import_external_semaphore(
+external_semaphore import_external_semaphore(
     external_semaphore_descriptor<ResourceType>
         externalSemaphoreDescriptor,
     const sycl::queue &syclQueue);
 }
 ```
 
-The resulting `interop_semaphore_handle` can then be used in a SYCL command 
+The resulting `external_semaphore` can then be used in a SYCL command 
 group, to either wait until the semaphore signalled, or signal the semaphore.
 
 If the type of semaphore imported supports setting the state of discrete 
@@ -1843,77 +1843,77 @@ namespace sycl {
 class handler {
 public:
   void ext_oneapi_wait_external_semaphore(
-      ext::oneapi::experimental::interop_semaphore_handle
-          interop_semaphore_handle);
+      ext::oneapi::experimental::external_semaphore
+          external_semaphore);
 
   void ext_oneapi_wait_external_semaphore(
-      ext::oneapi::experimental::interop_semaphore_handle
-          interop_semaphore_handle,
+      ext::oneapi::experimental::external_semaphore
+          external_semaphore,
       uint64_t wait_value);
 
   void ext_oneapi_signal_external_semaphore(
-      ext::oneapi::experimental::interop_semaphore_handle
-          interop_semaphore_handle);
+      ext::oneapi::experimental::external_semaphore
+          external_semaphore);
 
   void ext_oneapi_signal_external_semaphore(
-      ext::oneapi::experimental::interop_semaphore_handle
-          interop_semaphore_handle,
+      ext::oneapi::experimental::external_semaphore
+          external_semaphore,
       uint64_t signal_value);
 };
 
 class queue {
 public:
   event ext_oneapi_wait_external_semaphore(
-      ext::oneapi::experimental::interop_semaphore_handle
-          interop_semaphore_handle);
+      ext::oneapi::experimental::external_semaphore
+          external_semaphore);
   event ext_oneapi_wait_external_semaphore(
-      ext::oneapi::experimental::interop_semaphore_handle
-          interop_semaphore_handle,
+      ext::oneapi::experimental::external_semaphore
+          external_semaphore,
       event DepEvent);
   event ext_oneapi_wait_external_semaphore(
-      ext::oneapi::experimental::interop_semaphore_handle
-          interop_semaphore_handle,
+      ext::oneapi::experimental::external_semaphore
+          external_semaphore,
       const std::vector<event> &DepEvents);
 
   event ext_oneapi_wait_external_semaphore(
-      ext::oneapi::experimental::interop_semaphore_handle
-          interop_semaphore_handle,
+      ext::oneapi::experimental::external_semaphore
+          external_semaphore,
       uint64_t wait_value);
   event ext_oneapi_wait_external_semaphore(
-      ext::oneapi::experimental::interop_semaphore_handle
-          interop_semaphore_handle,
+      ext::oneapi::experimental::external_semaphore
+          external_semaphore,
       uint64_t wait_value, 
       event DepEvent);
   event ext_oneapi_wait_external_semaphore(
-      ext::oneapi::experimental::interop_semaphore_handle
-          interop_semaphore_handle,
+      ext::oneapi::experimental::external_semaphore
+          external_semaphore,
       uint64_t wait_value, 
       const std::vector<event> &DepEvents);
 
   event ext_oneapi_signal_external_semaphore(
-      ext::oneapi::experimental::interop_semaphore_handle
-          interop_semaphore_handle);
+      ext::oneapi::experimental::external_semaphore
+          external_semaphore);
   event ext_oneapi_signal_external_semaphore(
-      ext::oneapi::experimental::interop_semaphore_handle
-          interop_semaphore_handle,
+      ext::oneapi::experimental::external_semaphore
+          external_semaphore,
       event DepEvent);
   event ext_oneapi_signal_external_semaphore(
-      ext::oneapi::experimental::interop_semaphore_handle
-          interop_semaphore_handle,
+      ext::oneapi::experimental::external_semaphore
+          external_semaphore,
       const std::vector<event> &DepEvents);
 
   event ext_oneapi_signal_external_semaphore(
-      ext::oneapi::experimental::interop_semaphore_handle
-          interop_semaphore_handle,
+      ext::oneapi::experimental::external_semaphore
+          external_semaphore,
       uint64_t signal_value);
   event ext_oneapi_signal_external_semaphore(
-      ext::oneapi::experimental::interop_semaphore_handle
-          interop_semaphore_handle,
+      ext::oneapi::experimental::external_semaphore
+          external_semaphore,
       uint64_t signal_value,
       event DepEvent);
   event ext_oneapi_signal_external_semaphore(
-      ext::oneapi::experimental::interop_semaphore_handle
-          interop_semaphore_handle,
+      ext::oneapi::experimental::external_semaphore
+          external_semaphore,
       uint64_t signal_value,
       const std::vector<event> &DepEvents);
 };
@@ -1952,11 +1952,11 @@ access the external semaphore once they are no longer required through
 ```cpp
 namespace sycl::ext::oneapi::experimental {
 
-void release_external_semaphore(interop_semaphore_handle semaphoreHandle,
+void release_external_semaphore(external_semaphore semaphoreHandle,
                                 const sycl::device &syclDevice,
                                 const sycl::context &syclContext);
 
-void release_external_semaphore(interop_semaphore_handle semaphoreHandle,
+void release_external_semaphore(external_semaphore semaphoreHandle,
                                 const sycl::queue &syclQueue);
 
 }
@@ -2547,34 +2547,34 @@ sycl::ext::oneapi::experimental::external_semaphore_descriptor<
 
 try {
   // Extension: import external semaphores
-  sycl::ext::oneapi::experimental::interop_semaphore_handle
-      wait_interop_semaphore_handle =
+  sycl::ext::oneapi::experimental::external_semaphore
+      wait_external_semaphore =
           sycl::ext::oneapi::experimental::import_external_semaphore(
               wait_external_semaphore_desc, queue);
 
-  sycl::ext::oneapi::experimental::interop_semaphore_handle
-      done_interop_semaphore_handle =
+  sycl::ext::oneapi::experimental::external_semaphore
+      done_external_semaphore =
           sycl::ext::oneapi::experimental::import_external_semaphore(
               done_external_semaphore_desc, queue);
 
   // Extension: import external memory from descriptors
-  sycl::ext::oneapi::experimental::interop_mem_handle
-      input_interop_mem_handle =
+  sycl::ext::oneapi::experimental::external_mem
+      input_external_mem =
           sycl::ext::oneapi::experimental::import_external_memory(
               input_ext_mem_desc, queue);
 
-  sycl::ext::oneapi::experimental::interop_mem_handle
-      output_interop_mem_handle =
+  sycl::ext::oneapi::experimental::external_mem
+      output_external_mem =
           sycl::ext::oneapi::experimental::import_external_memory(
               output_ext_mem_desc, queue);
 
   // Extension: map imported external memory to image memory
   sycl::ext::oneapi::experimental::image_mem_handle input_mapped_mem_handle =
       sycl::ext::oneapi::experimental::map_external_image_memory(
-          input_interop_mem_handle, desc, queue);
+          input_external_mem, desc, queue);
   sycl::ext::oneapi::experimental::image_mem_handle output_mapped_mem_handle =
       sycl::ext::oneapi::experimental::map_external_image_memory(
-          output_interop_mem_handle, desc, queue);
+          output_external_mem, desc, queue);
 
   // Extension: create images from mapped memory and return the handles
   sycl::ext::oneapi::experimental::unsampled_image_handle img_input =
@@ -2585,7 +2585,7 @@ try {
           output_mapped_mem_handle, desc, queue);
 
   // Extension: wait for imported semaphore
-  q.ext_oneapi_wait_external_semaphore(wait_interop_semaphore_handle)
+  q.ext_oneapi_wait_external_semaphore(wait_external_semaphore)
 
   // Submit our kernel that depends on imported "wait_semaphore_file_descriptor"
   q.submit([&](sycl::handler &cgh) {
@@ -2610,7 +2610,7 @@ try {
   });
 
   // Extension: signal imported semaphore
-  q.ext_oneapi_signal_external_semaphore(done_interop_semaphore_handle)
+  q.ext_oneapi_signal_external_semaphore(done_external_semaphore)
 
   // The external API can now use the semaphore it exported to 
   // "done_semaphore_file_descriptor" to schedule its own command submissions
@@ -2619,13 +2619,13 @@ try {
 
   // Extension: destroy all external resources
   sycl::ext::oneapi::experimental::release_external_memory(
-      input_interop_mem_handle, queue);
+      input_external_mem, queue);
   sycl::ext::oneapi::experimental::release_external_memory(
-      output_interop_mem_handle, queue);
+      output_external_mem, queue);
   sycl::ext::oneapi::experimental::release_external_semaphore(
-      wait_interop_semaphore_handle, queue);
+      wait_external_semaphore, queue);
   sycl::ext::oneapi::experimental::release_external_semaphore(
-      done_interop_semaphore_handle, queue);
+      done_external_semaphore, queue);
   sycl::ext::oneapi::experimental::destroy_image_handle(img_input, queue);
   sycl::ext::oneapi::experimental::destroy_image_handle(img_output, queue);
 } catch (sycl::exception e) {
@@ -2859,4 +2859,11 @@ These features still need to be handled:
                    3D USM images as they are not supported on any platform.
                  - Refine the description of `ext_oneapi_bindless_images` aspect
                    to indicate support for bindless image APIs.
+|5.16|2024-07-24| - Renamed interop aspect queries, handles, semaphore wait and 
+                    signal functions, by replacing `interop` with `external` for 
+                    consistency with other interop related structs/funcs and 
+                    3rd party interop API naming.
+                  - Removed `handle` keyword from `interop_xxx_handle` to 
+                    clear up possible confusion between 3rd party interop 
+                    handles and the imported `interop_xxx_handle`.
 |======================
diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc
index a39c1d1c1884b..8e37c76ecc16a 100644
--- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc
+++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc
@@ -556,6 +556,114 @@ Parameters:
 
 |===
 
+==== Dynamic Command Groups
+
+[source,c++]
+----
+namespace ext::oneapi::experimental {
+class dynamic_command_group {
+public:
+  dynamic_command_group(
+      command_graph<graph_state::modifiable> graph,
+      const std::vector<std::function<void(handler &)>>& cgfList);
+
+  size_t get_active_cgf();
+  void set_active_cgf(size_t cgfIndex);
+};
+----
+
+Dynamic command-groups can be added as nodes to a graph. They provide a mechanism that
+allows updating the command-group function of a node after the graph is finalized.
+There is always one command-group function in the dynamic command-group that is set
+as active. When a dynamic command-group node is executed, the kernel of the active
+command-group function will be run and all the other command-group functions in
+`cgfList` will be ignored.
+
+See <<executable-graph-update, Executable Graph Update>> for more information
+about updating command-groups.
+
+===== Limitations
+
+Dynamic command-groups can only be used to update kernels. Trying to update a command-group
+function that contains other operations will result in an error.
+
+All the command-group functions in a dynamic command-group must have identical dependencies.
+It is not allowed for a dynamic command-group to have command-group functions that would
+result in a change to the graph topology when set to active. In practice, this means that
+any calls to `handler.depends_on()` must be identical for all the command-group functions
+in a dynamic command-group.
+
+Table {counter: tableNumber}. Member functions of the `dynamic_command_group` class.
+[cols="2a,a"]
+|===
+|Member Function|Description
+
+|
+[source,c++]
+----
+dynamic_command_group(
+command_graph<graph_state::modifiable> graph,
+const std::vector<std::function<void(handler &)>>& cgfList);
+----
+
+|Constructs a dynamic command-group object that can be added as a node to a `command_graph`.
+
+Parameters:
+
+* `graph` - Graph to be associated with this `dynamic_command_group`.
+* `cgfList` - The list of command-group functions that can be activated for this dynamic command-group.
+              The command-group function at index 0 will be active by default.
+
+Exceptions:
+
+* Throws synchronously with error code `invalid` if the graph wasn't created with
+  the `property::graph::assume_buffer_outlives_graph` property and the `dynamic_command_group`
+  is created with command-group functions that use buffers. See the
+  <<assume-buffer-outlives-graph-property, Assume-Buffer-Outlives-Graph>>
+  property for more information.
+
+* Throws with error code `invalid` if the `dynamic_command_group` is created with
+  command-group functions that are not kernel executions.
+
+* Throws with error code `invalid` if the command-group functions in `cgfList` have
+  event dependencies that are incompatible with each other and would result in
+  different graph topologies when set to active.
+
+|
+[source,c++]
+----
+size_t get_active_cgf();
+----
+|Returns the index of the currently active command-group function in this
+`dynamic_command_group`.
+
+|
+[source,c++]
+----
+void set_active_cgf(size_t cgfIndex);
+----
+| Sets the command-group function with index `cgfIndex` as active. The index of the
+command-group function in a `dynamic_command_group` is identical to its index in the
+`cgfList` vector when it was passed to the `dynamic_command_group` constructor.
+
+This change will be reflected immediately in the modifiable graph which contains this
+`dynamic_command_group`. The new value will not be reflected in any executable graphs
+created from that modifiable graph until `command_graph::update()` is called, passing
+the modified nodes, or a new executable graph is finalized from the modifiable graph.
+
+Setting `cgfIndex` to the index of the currently active command-group function is
+a no-op.
+
+Parameters:
+
+* `cgfIndex` - The index of the command-group function that should be set as active.
+
+Exceptions:
+
+* Throw with error code `invalid` if `cgfIndex` is not a valid index.
+
+|===
+
 ==== Depends-On Property
 
 [source,c++]
@@ -631,6 +739,8 @@ public:
   template<typename T>
   node add(T cgf, const property_list& propList = {});
 
+  node add(dynamic_command_group& dynamicCG, const property_list& propList = {});
+
   void make_edge(node& src, node& dest);
 
   void print_graph(std::string path, bool verbose = false) const;
@@ -711,21 +821,39 @@ Updates to a graph will be scheduled after any in-flight executions of the same
 graph and will not affect previous submissions of the same graph. The user is
 not required to wait on any previous submissions of a graph before updating it.
 
-The only type of nodes that are currently able to be updated in a graph are
-kernel execution nodes.
-
-The aspects of a kernel execution node that can be configured during update are:
-
-* Parameters to the kernel.
-* Execution ND-Range of the kernel.
-
 To update an executable graph, the `property::graph::updatable` property must
 have been set when the graph was created during finalization. Otherwise, an
 exception will be thrown if a user tries to update an executable graph. This
 guarantee allows the backend to provide a more optimized implementation, if
 possible.
 
-===== Individual Node Update
+===== Supported Features
+
+The only types of nodes that are currently able to be updated in a graph are
+kernel execution nodes.
+
+There are two different API's that can be used to update a graph:
+
+* <<individual-node-update, Individual Node Update>> which allows updating
+individual nodes of a command-graph.
+* <<whole-graph-update, Whole Graph Update>> which allows updating the
+entirety of the graph simultaneously by using another graph as a
+reference.
+
+The aspects of a kernel execution node that can be changed during update are
+different depending on the API used to perform the update:
+
+* For the <<individual-node-update, Individual Node Update>> API it's possible to update
+the kernel function, the parameters to the kernel, and the ND-Range.
+* For the <<whole-graph-update, Whole Graph Update>> API, only the parameters of the kernel
+and the ND-Range can be updated.
+
+===== Individual Node Update [[individual-node-update]]
+
+Individual nodes of an executable graph can be updated directly. Depending on the attribute
+of the node that requires updating, different API's should be used:
+
+====== Parameter Updates
 
 Parameters to individual nodes in a graph in the `executable` state can be
 updated between graph executions using dynamic parameters. A `dynamic_parameter`
@@ -739,14 +867,6 @@ Parameter updates are performed using a `dynamic_parameter` instance by calling
 not registered, even if they use the same parameter value as a
 `dynamic_parameter`.
 
-The other node configuration that can be updated is the execution range of the
-kernel, this can be set through `node::update_nd_range()` or
-`node::update_range()` but does not require any prior registration.
-
-The executable graph can then be updated by passing the updated nodes to
-`command_graph<graph_state::executable>::update(node& node)` or
-`command_graph<graph_state::executable>::update(const std::vector<node>& nodes)`.
-
 Since the structure of the graph became fixed when finalizing, updating
 parameters on a node will not change the already defined dependencies between
 nodes. This is important to note when updating buffer parameters to a node,
@@ -762,6 +882,41 @@ dynamic parameter for the buffer can be registered with all the nodes which
 use the buffer as a parameter. Then a single `dynamic_parameter::update()` call
 will maintain the graphs data dependencies.
 
+====== Execution Range Updates
+
+Another configuration that can be updated is the execution range of the
+kernel, this can be set through `node::update_nd_range()` or
+`node::update_range()` but does not require any prior registration.
+
+An alternative way to update the execution range of a node is to do so while
+updating command groups as described in the next section.
+
+====== Command Group Updates
+
+The command-groups of a kernel node can be updated using dynamic command-groups.
+Dynamic command-groups allow replacing the command-group function of a kernel
+node with a different one. This effectively allows updating the kernel function
+and/or the kernel execution range.
+
+Command-group updates are performed by creating an instance of the
+`dynamic_command_group` class. A dynamic command-group is created with a modifiable
+state graph and a list of possible command-group functions. Command-group functions
+within a dynamic command-group can then be set to active by using the member function
+`dynamic_command_group::set_active_cgf()`.
+
+Dynamic command-groups are compatible with dynamic parameters. This means that
+dynamic parameters can be used in command-group functions that are part of
+dynamic command-groups. Updates to such dynamic parameters will be reflected
+in the command-group functions once they are activated.
+
+====== Committing Updates
+
+Updating a node using the methods mentioned above will take effect immediately
+for nodes in modifiable command-graphs. However, for graphs that are in the executable
+state, in order to commit the update, the updated nodes must be passed to
+`command_graph<graph_state::executable>::update(node& node)` or
+`command_graph<graph_state::executable>::update(const std::vector<node>& nodes)`.
+
 ===== Whole Graph Update [[whole-graph-update]]
 
 A graph in the executable state can have all of its nodes updated using the
@@ -1042,6 +1197,42 @@ Exceptions:
 |
 [source,c++]
 ----
+node add(dynamic_command_group& dynamicCG, const property_list& propList = {});
+----
+
+| Adds the dynamic command-group `dynamicCG` as a node to the graph and sets the
+current active command-group function in `dynamicCG` as the executable for future
+executions of this graph node.
+
+The current active command-group function in `dynamicCG` will be executed asynchronously
+when the graph is submitted to a queue. The requisites of this command-group
+function will be used to identify any dependent nodes in the graph
+to form edges with. The other command-group functions in `dynamicCG` will be captured
+into the graph but will not be executed in a graph submission unless they are
+set to active.
+
+Constraints:
+
+* This member function is only available when the `command_graph` state is
+  `graph_state::modifiable`.
+
+Parameters:
+
+* `dynamicCG` - Dynamic command-group object to be added as a node.
+
+* `propList` - Zero or more properties can be provided to the constructed node
+  via an instance of `property_list`. The `property::node::depends_on` property
+  can be passed here with a list of nodes to create dependency edges on.
+
+Returns: The dynamic command-group object node which has been added to the graph.
+
+Exceptions:
+
+* Throws synchronously with error code `invalid` if a queue is recording
+  commands to the graph.
+|
+[source,c++]
+----
 void make_edge(node& src, node& dest);
 ----
 
@@ -1157,8 +1348,9 @@ void update(node& node);
 ----
 
 | Updates an executable graph node that corresponds to `node`. `node` must be a
-kernel execution node. Kernel arguments and the ND-range of the node will be
-updated inside the executable graph to reflect the current values in `node`.
+kernel execution node. The command-group function of the node will be updated,
+inside the executable graph, to reflect the current values in `node`. This
+includes the kernel function, the kernel nd-range and the kernel parameters.
 
 Updating these values will not change the structure of the graph.
 
@@ -1190,9 +1382,10 @@ void update(const std::vector<node>& nodes);
 ----
 
 | Updates all executable graph nodes that corresponds to the nodes contained in
-`nodes`. All nodes must be kernel nodes. Kernel arguments and the ND-range of
-each node will be updated inside the executable graph to reflect the current
-values in each node in `nodes`.
+`nodes`. All nodes must be kernel nodes. The command-group function of each node
+will be updated, inside the executable graph, to reflect the current values in
+`nodes`. This includes the kernel function, the kernel nd-range and the kernel
+parameters".
 
 Updating these values will not change the structure of the graph.
 
@@ -1712,6 +1905,10 @@ the call to `queue::submit()` or `command_graph::add()` along with the calls to
 handler functions and this will not be reflected on future executions of the
 graph.
 
+Similarly, any command-group function inside a `dynamic_command_group` will be
+evaluated once, in index order, when submitted to the graph using
+`command_graph::add()`.
+
 Any code like this should be moved to a separate host-task and added to the
 graph via the recording or explicit APIs in order to be compatible with this
 extension.
@@ -2243,6 +2440,50 @@ node nodeA = myGraph.add([&](handler& cgh) {
 dynParamAccessor.update(bufferB.get_access());
 ----
 
+=== Dynamic Command Groups
+
+Example showing how a graph with a dynamic command group node can be updated.
+
+[source,c++]
+----
+queue Queue{};
+exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()};
+
+int *PtrA = malloc_device<int>(1024, Queue);
+int *PtrB = malloc_device<int>(1024, Queue)​
+
+auto CgfA = [&](handler &cgh) {
+  cgh.parallel_for(1024, [=](item<1> Item) {
+    PtrA[Item.get_id()] = 1;​
+  });
+};
+
+auto CgfB = [&](handler &cgh) {
+  cgh.parallel_for(512, [=](item<1> Item) {
+    PtrB[Item.get_id()] = 2;
+  });
+};
+
+// Construct a dynamic command-group with CgfA as the active cgf (index 0).
+auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CgfA, CgfB});
+
+// Create a dynamic command-group graph node.
+auto DynamicCGNode = Graph.add(DynamicCG);
+
+auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{});
+
+// The graph will execute CgfA.
+Queue.ext_oneapi_graph(ExecGraph).wait();
+
+// Sets CgfB as active in the dynamic command-group (index 1).
+DynamicCG.set_active_cgf(1);
+
+// Calls update to update the executable graph node with the changes to DynamicCG.
+ExecGraph.update(DynamicCGNode);
+
+// The graph will execute CgfB.
+Queue.ext_oneapi_graph(ExecGraph).wait();
+----
 === Whole Graph Update
 
 Example that shows recording and updating several nodes with different
@@ -2444,6 +2685,16 @@ to ensure this is desired and makes sense to users.
 
 **UNRESOLVED** Needs more discussion.
 
+=== Updatable command-groups in the Record & Replay API:
+
+Currently the only way to update command-groups in a graph is to use the
+Explicit API. There is a limitation in some backends that requires all
+the command-groups used for updating to be specified before the graph
+is finalized. This restriction makes it hard to implement the
+Record & Replay API in a performant manner.
+
+**UNRESOLVED** Needs more discussion.
+
 === Multi Device Graph
 
 Allow an executable graph to contain nodes targeting different devices.
diff --git a/sycl/doc/extensions/proposed/sycl_ext_oneapi_prefetch.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_prefetch.asciidoc
similarity index 98%
rename from sycl/doc/extensions/proposed/sycl_ext_oneapi_prefetch.asciidoc
rename to sycl/doc/extensions/experimental/sycl_ext_oneapi_prefetch.asciidoc
index 4a035028ae6a1..d70e4c3bfbe26 100644
--- a/sycl/doc/extensions/proposed/sycl_ext_oneapi_prefetch.asciidoc
+++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_prefetch.asciidoc
@@ -48,12 +48,11 @@ This extension also depends on the following other SYCL extensions:
 
 == Status
 
-This is a proposed extension specification, intended to gather community
-feedback.  Interfaces defined in this specification may not be implemented yet
-or may be in a preliminary state.  The specification itself may also change in
-incompatible ways before it is finalized.  *Shipping software products should
-not rely on APIs defined in this specification.*
-
+This is an experimental extension specification, intended to provide early
+access to features and gather community feedback. Interfaces defined in this
+specification are implemented in DPC++, but they are not finalized and may
+change incompatibly in future versions of DPC++ without prior notice. *Shipping
+software products should not rely on APIs defined in this specification.*
 
 == Overview
 
diff --git a/sycl/doc/extensions/proposed/sycl_ext_codeplay_cuda_cluster_group.asciidoc b/sycl/doc/extensions/proposed/sycl_ext_codeplay_cuda_cluster_group.asciidoc
new file mode 100644
index 0000000000000..840e057f9d0b1
--- /dev/null
+++ b/sycl/doc/extensions/proposed/sycl_ext_codeplay_cuda_cluster_group.asciidoc
@@ -0,0 +1,420 @@
+= sycl_ext_codeplay_cuda_cluster_group
+
+:source-highlighter: coderay 
+:coderay-linenums-mode: table
+
+// This section needs to be after the document title.
+:doctype: book
+:toc2:
+:toc: left
+:encoding: utf-8
+:lang: en
+:dpcpp: pass:[DPC++]
+:endnote: &#8212;{nbsp}end{nbsp}note
+
+// Set the default source code type in this document to C++,
+// for syntax highlighting purposes.  This is needed because
+// docbook uses c++ and html5 uses cpp.
+:language: {basebackend@docbook:c++:cpp}
+
+
+== Notice
+
+[%hardbreaks] 
+
+Copyright (C) 2024-2024 Codeplay Corporation.  All rights reserved.
+
+Khronos(R) is a registered trademark and SYCL(TM) and SPIR(TM) are trademarks of
+The Khronos Group Inc.  OpenCL(TM) is a trademark of Apple Inc. used by
+permission by Khronos.
+
+Other company and product names may be trademarks of the respective companies
+with which they are associated and can be claimed as the property of others.
+
+== Contact
+
+To report problems with this extension, please open a new issue at:
+
+https://github.com/intel/llvm/issues
+
+
+== Contributors
+Atharva Dubey, Codeplay +
+Gordon Brown, Codeplay 
+
+== Dependencies
+
+This extension is written against the SYCL 2020 revision 8 specification.  All
+references below to the "core SYCL specification" or to section numbers in the
+SYCL specification refer to that revision. 
+
+This extensions also depends on the following other sycl extensions: 
+
+* link:../experimental/sycl_ext_oneapi_enqueue_functions.asciidoc[
+          sycl_ext_oneapi_enqueue_functions]
+* link:../experimental/sycl/sycl_ext_oneapi_properties.asciidoc[
+    sycl_ext_oneapi_properties
+]
+
+
+== Status
+
+This is a proposed extension specification, intended to gather community
+feedback.  Interfaces defined in this specification may not be implemented yet
+or may be in a preliminary state.  The specification itself may also change in
+incompatible ways before it is finalized.  *Shipping software products should
+not rely on APIs defined in this specification.*
+
+
+== Glossary
+
+* Compute Capability: Abbreviated as "cc", a number assigned to each generation
+of NVIDIA's GPUs conveying the feature set associated with that number.
+
+
+
+== Overview
+
+CUDA compute capability (cc) 9.0 (sm_90 and above) devices introduces a new level in the
+thread hierarchy, called as thread block clusters, in CUDA terminology. A thread
+block cluster, is a collection of thread blocks (a work-group in SYCL
+terminology) that run concurrently. The work-groups which make up a cluster 
+have the ability to access one another's local memory, and can be synchronized. 
+This has various applications, convolutions, GEMMs and FFTs to name a few.
+
+This proposal introduces a SYCL API to expose these capabilities, and defines a 
+mechanism to launch a kernel with clusters enabled, access the cluster's various
+ranges and id's from the device code, atomics at the cluster level as well as 
+synchronize the cluster. This proposal also introduces a device aspect to check 
+if the SYCL device supports a cluster launch, and a device query to obtain the 
+maximum supported cluster size.
+
+
+== Specification
+
+=== Feature test macro
+
+This extension provides a feature-test macro as described in the core SYCL
+specification.  An implementation supporting this extension must predefine the
+macro `SYCL_EXT_CODEPLAY_CUDA_CLUSTER_GROUP` to one of the values defined in the
+table below.  Applications can test for the existence of this macro to determine
+if the implementation supports this feature, or applications can test the
+macro's value to determine which of the extension's features the implementation
+supports
+
+[%header,cols="1,5"]
+|===
+|Value
+|Description
+
+|1
+|The APIs of this experimental extension are not version-ed, so the
+ feature-test macro always has this value.
+|===
+
+
+=== Extension to `enum class aspect`
+
+[source]
+----
+namespace sycl {
+enum class aspect {
+  ...
+  ext_codeplay_cuda_cluster_group
+}
+}
+----
+
+A device requires the `ext_codeplay_cuda_cluster_group` aspect to
+support launching a kernel with the `cluster_size` property defined in the
+following section.
+
+
+=== Launching a kernel with a `cluster_group`
+
+Because of the special scheduling guarantees associated with a cluster launch,
+the backend must know which kernel would be using this feature. Thus, this 
+proposal introduces a new launch property called as `cluster_size` that will
+contain the cluster size as a number of work-groups.
+ 
+[source,c++]
+----
+namespace sycl::ext::codeplay::experimental::cuda {
+/**
+* Dim Dimensionality of the launch
+* size sycl::range specifying the number of work-groups in the cluster
+  in each dimension.
+*/
+template <int Dim>
+struct cluster_size {
+  cluster_size(const sycl::range<Dim>& size);
+  sycl::range<Dim> get_cluster_size();
+  ...
+};
+using cluster_size_key = cluster_size;
+} // namespace sycl::ext::codeplay::experimental::cuda
+----
+
+The property list can the be constructed as follows - 
+
+[source,c++]
+----
+properties  cluster_launch_property{cluster_size({1, 2, 1})};
+----
+
+[_Note:_ the total number of work-groups in the kernel must be a multiple of
+the cluster size in each dimension. _{endnote}_]
+
+The launch functions introduced in `sycl_ext_oneapi_enqueue_functions` can then
+be used to launch the kernel with the `cluster_size` property.
+
+
+=== Querying Maximum Cluster Size
+
+To query the maximum supported cluster size, this proposal adds a new device
+query, `max_cluster_group_size`, which returns the maximum possible number of 
+work-groups present inside the cluster.
+
+[source, c++]
+----
+size_t max_cluster_size = 
+  device.get_info<
+    ext::codeplay::experimental::cuda::info::device::max_cluster_group_size>();
+----
+[%header,cols="10,5,5"]
+|===
+|Device descriptor
+|Return Type
+|Description
+
+|`ext::codeplay::experimental::cuda::info::device::max_cluster_group_size`
+|size_t
+|Returns the maximum possible number of work-groups that can constitute a 
+cluster-group
+|===
+
+
+=== Accessing the Cluster Group From Device Code
+
+Building upon the group hierarchy in SYCL, this proposal adds another level
+above group (for work-groups), to be called as `cluster-group`, which 
+represents a collection of work-groups and will be accessible via the `nd_item`
+class, via a member function to be introduced called `ext_codeplay_cuda_get_cluster_group()`.
+
+
+[%header,cols="10,5"]
+|===
+|Method
+|Description
+
+|`cluster_group<Dim> nd_item::ext_codeplay_cuda_get_cluster_group()`
+|Returns the constituent `cluster_group` in the kernel, representing this
+`cluster_group` object's overall position in the `nd_range`
+|===
+
+
+The `cluster_group` class will contain the following member functions, to access
+the various ids of the work-item and work-groups.
+
+[source,c++]
+----
+    template<int Dim>
+    class cluster_group {
+    public:
+        using id_type = id<Dim>;
+        using range_type = range<Dim>;
+        using linear_id_type = size_t;
+
+        linear_id_type get_group_linear_id() const;
+
+        linear_id_type get_local_linear_id() const;
+
+        range_type get_group_range() const;
+
+        id_type get_group_id() const;
+
+        id_type get_local_id() const;
+
+        range_type get_local_range() const;
+
+        linear_id_type get_local_linear_range() const;
+
+        linear_id_type get_group_linear_range() const;
+
+        bool leader() const;
+
+        static constexpr memory_scope fence_scope = 
+                memory_scope::ext_codeplay_cuda_cluster_group;
+    }
+----
+
+
+[%header,cols="5,5"]
+|===
+|Method
+|Description
+
+|`linear_id get_group_linear_id() const`
+|Returns the linearized id of the calling work-group within the cluster.
+
+|`linear_id get_local_linear_id() const`
+|Returns the linearized index of the calling work-item within the cluster.
+
+|`range_type get_group_range() const`
+|Returns the number of work-groups in each dimension within the cluster.
+
+|`id_type get_group_id() const`
+|Returns the id of the calling work-group along each dimension within the cluster.
+
+|`id_type get_local_id() const`;
+|Returns the id of calling work-item along each dimension within the cluster.
+
+|`range_type get_local_range() const`;
+|Returns the number of work-items along each dimension within the cluster.
+
+|`linear_id_type get_local_linear_range() const`;
+|Returns a linearized version of the `range_type` returned by  `get_local_range`
+
+|`linear_id_type get_group_linear_range() const`;
+|Returns a linearized version of the `range_type` returned by  `get_group_range`
+
+|`bool leader() const`;
+|Returns true for exactly one work-item in the cluster, if the calling work-item
+is the leader of the cluster group. The leader is guaranteed to be the work-item
+for which `get_local_linear_id` return 0.
+|===
+
+
+== Accessing another work-group's local memory
+
+Work-groups within the cluster have the ability to access another work-group's 
+local memory. Typically addresses which reside in the local memory of a 
+work-group can only be accessed by the work-items of that work-group. 
+Therefore, to access another work-group's local memory, the address needs to be
+mapped such that the address in another work-group is accessible within the 
+calling work-item. Further, to access another work-group's local memory,
+all the work-groups within the cluster must exist and the work-groups should
+not cease to exist before all the memory operations are completed. This can be 
+ensured by synchronizing all the work-items within the cluster before and after
+the local memory operations, using `group_barrier`.
+
+A member function of the `cluster_group` class; 
+`map_cluster_local_pointer` will perform the mapping and return a pointer 
+which can then be dereferenced by the calling work-item. 
+
+
+[%header,cols="10,5"]
+|===
+|Method
+|Description
+
+|T*  map_cluster_local_pointer(T* addr, size_t group_id)
+|Accepts the equivalent address to the memory location relative to the calling 
+work-item which is to be mapped from the local memory of the work-group, as 
+specified by `group_id`, denoting the linear group id within the cluster
+|===
+
+Conversely, `get_cluster_group_linear_id_for_local_pointer` will return the 
+linearized id of the work-group a mapped local memory address belongs to.
+
+[%header,cols="10,5"]
+|===
+|Method
+|Description
+
+|size_t  get_cluster_group_linear_id_for_local_pointer(T* addr)
+|Accepts a pointer pointing to a valid local memory space, and the returns the 
+linearized id of the work-group in the cluster that address belongs to. 
+|===
+
+
+== Cluster Memory Fence Scope and Barrier
+
+Work-items in a work-group can access a local memory address from another
+work-group in the cluster-group, which has been mapped as described above. To
+facilitate this, a new memory scope is introduced to the `memory_scope` class;
+`ext_codeplay_cuda_cluster_group` which indicates a memory ordering
+constraint that applies to all work-items in the same cluster-group. This memory
+scope can be used with `atomic_ref` and other SYCL APIs that use
+`memory_scope`.
+
+[source, c++]
+----
+namespace sycl {
+
+  enum class memory_scope {
+  ...
+  ext_codeplay_cuda_cluster_group,
+  ...
+  };
+
+  namespace ext::codeplay::experimental::cuda {
+    inline constexpr auto memory_scope_cluster_group
+      = memory_scope::ext_codeplay_cuda_cluster_group;
+  } // namespace ext::codeplay::experimental::cuda
+} // namespace sycl
+----
+
+
+To coordinate all work-items in the cluster group, `sycl::group_barrier` can be 
+used, accepting the `cluster_group` class.
+
+
+== Example
+
+This section adds a representative example of how to launch a kernel with 
+the cluster-range specified and accessing various id's within the kernel - 
+
+[source,c++]
+----
+sycl::event launch_kernel_with_cluster() {
+  namespace syclcp = sycl::ext::codeplay::experimental;
+  namespace syclex = sycl::ext::oneapi::experimental;
+
+  sycl::queue q;
+
+  sycl::nd_range<3> ndRange({4096, 4096, 32}, {32, 32, 1});
+  syclex::properties props(syclcp::cuda::cluster_size({4, 4, 1}));
+  syclex::launch_config config(ndRange, props);
+
+  return syclex::submit_with_event(q, [&](sycl::handler& cgh){
+      syclex::nd_launch(cgh, config, [=](sycl::nd_item<3> it) {
+        auto cg = it.ext_codeplay_cuda_get_cluster_group();
+        auto cgId = cg.get_group_id();
+        ...
+      });
+  })
+}
+----
+
+
+== Known Issues
+
+. Forward Progress Guarantees
++ 
+--
+*UNRESOLVED* This Specification does not discuss the forward progress guarantees of the 
+   cluster_group. 
+--
+
+. Differentiating between decorated and generic address spaces
++
+--
+*UNRESOLVED* The functions `map_cluster_local_pointer` and 
+`get_cluster_group_linear_id_for_local_pointer` do not differentiate between
+generic and local memory address spaces, which might not be the most efficient.
+--
+
+
+== Revision History
+
+[cols="5,15,15,70"]
+[grid="rows"]
+[options="header"]
+|========================================
+|Rev|Date|Authors|Changes
+|4|2024-06-26|Atharva Dubey, Jack Kirk|Added device query and aspects, 
+                  review comments and additional cluster group member functions
+|2|2024-05-09|Atharva Dubey|Using enqueue functions to launch with properties
+|1|2024-04-29|Atharva Dubey|Initial public working draft
+|========================================
\ No newline at end of file
diff --git a/sycl/doc/extensions/proposed/sycl_ext_oneapi_barrier.asciidoc b/sycl/doc/extensions/proposed/sycl_ext_oneapi_barrier.asciidoc
index 2c3df707f0dfc..2e91bb675ac84 100644
--- a/sycl/doc/extensions/proposed/sycl_ext_oneapi_barrier.asciidoc
+++ b/sycl/doc/extensions/proposed/sycl_ext_oneapi_barrier.asciidoc
@@ -129,16 +129,16 @@ namespace sycl::ext::oneapi::experimental {
   public:
     using arrival_token = __unspecified__;
 
-    static constexpr ptrdiff_t max() noexcept;
+    static constexpr std::ptrdiff_t max() noexcept;
 
-    constexpr explicit barrier(ptrdiff_t expected,
+    constexpr explicit barrier(std::ptrdiff_t expected,
                                CompletionFunction f = CompletionFunction());
     ~barrier();
 
     barrier(const barrier&) = delete;
     barrier& operator=(const barrier&) = delete;
 
-    [[nodiscard]] arrival_token arrive(ptrdiff_t update = 1);
+    [[nodiscard]] arrival_token arrive(std::ptrdiff_t update = 1);
     void wait(arrival_token&& arrival) const;
 
     void arrive_and_wait();
@@ -150,14 +150,14 @@ namespace sycl::ext::oneapi::experimental {
 
 [source,c++]
 ----
-static constexpr ptrdiff_t max() noexcept;
+static constexpr std::ptrdiff_t max() noexcept;
 ----
 _Returns_: The maximum number of threads of execution that can be synchronized
 by any `barrier` with the specified `Scope` and `CompletionFunction`.
 
 [source,c++]
 ----
-constexpr explicit barrier(ptrdiff_t expected, CompletionFunction f = CompletionFunction());
+constexpr explicit barrier(std::ptrdiff_t expected, CompletionFunction f = CompletionFunction());
 ----
 _Preconditions_: If `Scope` is `memory_scope::work_group`, the calling thread
 of execution must be a work-item belonging to the work-group that will use the
@@ -184,7 +184,7 @@ concurrently introduces a data race.
 
 [source,c++]
 ----
-[[nodiscard]] arrival_token arrive(ptrdiff_t update = 1);
+[[nodiscard]] arrival_token arrive(std::ptrdiff_t update = 1);
 ----
 _Effects_: The calling thread of execution arrives at the barrier and decreases
 the expected count by `update`.
@@ -240,6 +240,7 @@ extension.
 
 [source,c++]
 ----
+namespace syclex = sycl::ext::oneapi::experimental;
 using work_group_barrier = syclex::barrier<sycl::memory_scope::work_group>;
 
 q.parallel_for(..., [=](sycl::nd_item it) {
@@ -264,6 +265,7 @@ initialized on the device that will use the barrier.
 
 [source,c++]
 ----
+namespace syclex = sycl::ext::oneapi::experimental;
 using device_barrier = syclex::barrier<sycl::memory_scope::device>;
 
 // Allocate memory for the barrier
@@ -306,6 +308,7 @@ accessible by the host.
 
 [source,c++]
 ----
+namespace syclex = sycl::ext::oneapi::experimental;
 using system_barrier = syclex::barrier<sycl::memory_scope::system>;
 
 // Allocate memory for the barrier
diff --git a/sycl/doc/syclcompat/README.md b/sycl/doc/syclcompat/README.md
index 127df2d17cac9..6dd8708afeb62 100644
--- a/sycl/doc/syclcompat/README.md
+++ b/sycl/doc/syclcompat/README.md
@@ -42,7 +42,14 @@ Specifically, this library depends on the following SYCL extensions:
     ../extensions/supported/sycl_ext_oneapi_assert.asciidoc)
 * [sycl_ext_oneapi_enqueue_barrier](
     ../extensions/supported/sycl_ext_oneapi_enqueue_barrier.asciidoc)
-* [sycl_ext_oneapi_usm_device_read_only](../extensions/supported/sycl_ext_oneapi_usm_device_read_only.asciidoc)
+* [sycl_ext_oneapi_usm_device_read_only](
+    ../extensions/supported/sycl_ext_oneapi_usm_device_read_only.asciidoc)
+* [sycl_ext_oneapi_properties](
+    ../extensions/experimental/sycl_ext_oneapi_properties.asciidoc)
+* [sycl_ext_oneapi_enqueue_functions](
+    ../extensions/experimental/sycl_ext_oneapi_enqueue_functions.asciidoc)
+* [sycl_ext_oneapi_kernel_properties](
+    ../extensions/experimental/sycl_ext_oneapi_kernel_properties.asciidoc)
 
 If available, the following extensions extend SYCLcompat functionality:
 
@@ -206,44 +213,6 @@ These translate any kernel dimensions from one convention to the other. An
 example of an equivalent SYCL call for a 3D kernel using `compat` is
 `syclcompat::global_id::x() == get_global_id(2)`.
 
-### Local Memory
-
-When using `compat` functions, there are two distinct interfaces to allocate
-device local memory. The first interface uses the _sycl_ext_oneapi_local_memory_
-extension to leverage local memory defined at compile time.
-_sycl_ext_oneapi_local_memory_ is accessed through the following wrapper:
-
-``` c++
-namespace syclcompat {
-
-template <typename AllocT> auto *local_mem();
-
-} // syclcompat
-```
-
-`syclcompat::local_mem<AllocT>()` can be used as illustrated in the example
-below.
-
-```c++
-// Sample kernel
-using namespace syclcompat;
-template <int BLOCK_SIZE>
-void local_mem_2d(int *d_A) {
-  // Local memory extension wrapper, size defined at compile-time
-  auto As = local_mem<int[BLOCK_SIZE][BLOCK_SIZE]>();
-  int id_x = local_id::x();
-  int id_y = local_id::y();
-  As[id_y][id_x] = id_x * BLOCK_SIZE + id_y;
-  wg_barrier();
-  int val = As[BLOCK_SIZE - id_y - 1][BLOCK_SIZE - id_x - 1];
-  d_A[global_id::y() * BLOCK_SIZE + global_id::x()] = val;
-}
-```
-
-The second interface allows users to allocate device local memory at runtime.
-SYCLcompat provides this functionality through its kernel launch interface,
-`launch<function>`, defined in the following section.
-
 ### launch<function>
 
 SYCLcompat provides a kernel `launch` interface which accepts a function that
@@ -254,7 +223,7 @@ device _function_ with the use of an `auto F` template parameter, and a variadic
 `Args` for the function's arguments.
 
 Various overloads for `launch<function>` exist to permit the user to launch on a
-specific `queue`, or to define dynamically sized device local memory.
+specific `queue`, or to describe the range as either `nd_range` or `dim3, dim3`.
 
 ``` c++
 namespace syclcompat {
@@ -273,22 +242,6 @@ template <auto F, typename... Args>
 sycl::event launch(const dim3 &grid, const dim3 &threads,
                    sycl::queue q, Args... args);
 
-template <auto F, int Dim, typename... Args>
-sycl::event launch(const sycl::nd_range<Dim> &range, size_t mem_size,
-                   sycl::queue q, Args... args);
-
-template <auto F, int Dim, typename... Args>
-sycl::event launch(const sycl::nd_range<Dim> &range, size_t mem_size,
-                   Args... args);
-
-template <auto F, typename... Args>
-sycl::event launch(const dim3 &grid, const dim3 &threads,
-                   size_t mem_size, sycl::queue q, Args... args);
-
-template <auto F, typename... Args>
-sycl::event launch(const dim3 &grid, const dim3 &threads,
-                   size_t mem_size, Args... args);
-
 } // syclcompat
 ```
 
@@ -313,67 +266,156 @@ auto range = sycl::nd_range<3>{blocksPerGrid * threadsPerBlock,
 syclcompat::launch<vectorAdd>(range, d_A, d_B, d_C, n);
 ```
 
-For dynamic local memory allocation, `launch<function>` injects a pointer to a
-local `char *` accessor of `mem_size` as the last argument of the kernel
-function. For example, the previous function named `vectorAdd` can be modified
-with the following signature, which adds a `char *` pointer to access local
-memory inside the kernel:
+Note that since `syclcompat::launch` accepts a device function, the kernel
+lambda is constructed by SYCLcompat internally. This means that, for
+example, `sycl::local_accessor`s cannot be declared. Instead, users wishing to
+use local memory should launch with a `launch_policy` object as described below.
 
-``` c++
-void vectorAdd(const float *A, const float *B, float *C, int n,
-               char *local_mem);
+#### launch_policy
+
+In addition to the simple `syclcompat::launch` interface described above,
+SYCLcompat provides a more flexible (`experimental`) interface to `launch` a
+kernel with a given `launch_policy`. By constructing and passing a
+`launch_policy`, users can pass `sycl::ext::oneapi::experimental::properties`
+associated with the kernel or launch, as well as request **local memory** for
+the kernel.
+
+In order to disambiguate the variadic constructor of `launch_policy`, the
+following wrapper structs are defined. The `kernel_properties` and
+`launch_properties` wrappers can be constructed *either* with a variadc set of
+properties, or with an existing `sycl_exp::properties` object.
+
+```cpp
+namespace syclcompat::experimental {
+namespace sycl_exp = sycl::ext::oneapi::experimental;
+
+// Wrapper for kernel sycl_exp::properties
+template <typename Properties> struct kernel_properties {
+  using Props = Properties;
+  template <typename... Props>
+  kernel_properties(Props... properties);
+  template <typename... Props>
+  kernel_properties(sycl_exp::properties<Props...> properties)
+  Properties props;
+};
+
+// Wrapper for launch sycl_exp::properties
+template <typename Properties> struct launch_properties {
+  using Props = Properties;
+  template <typename... Props>
+  launch_properties(Props... properties);
+  template <typename... Props>
+  launch_properties(sycl_exp::properties<Props...> properties)
+  Properties props;
+};
+
+// Wrapper for local memory size
+struct local_mem_size {
+  local_mem_size(size_t size = 0);
+  size_t size;
+};
+
+} //namespace syclcompat::experimental
 ```
 
-Then, `vectorAdd` can be launched like this:
+The constructors of `launch_policy` are variadic, accepting any form of range
+(`nd_range`, `range`, `dim3`, `dim3, dim3`), followed by zero or more of
+`local_memory_size`, `kernel_properties`, and `launch_properties`:
 
 ``` c++
-syclcompat::launch<vectorAdd>(blocksPerGrid, threadsPerBlock, mem_size, d_A,
-                              d_B, d_C, n);
+namespace syclcompat::experimental {
+namespace sycl_exp = sycl::ext::oneapi::experimental;
+
+// launch_policy is constructed by the user & passed to `compat_exp::launch`
+template <typename Range, typename KProps, typename LProps, bool LocalMem>
+class launch_policy {
+public:
+  using KPropsT = KProps;
+  using LPropsT = LProps;
+  using RangeT = Range;
+  static constexpr bool HasLocalMem = LocalMem;
+
+  template <typename... Ts>
+  launch_policy(Range range, Ts... ts);
+
+  template <typename... Ts>
+  launch_policy(dim3 global_range, Ts... ts);
+
+  template <typename... Ts>
+  launch_policy(dim3 global_range, dim3 local_range, Ts... ts);
+
+  KProps get_kernel_properties();
+  LProps get_launch_properties();
+  size_t get_local_mem_size();
+  Range get_range();
+};
+} //namespace syclcompat::experimental
 ```
 
-or this:
+The `launch` overloads accepting a `launch_policy` are:
+
+```cpp
+namespace syclcompat::experimental {
+
+template <auto F, typename LaunchPolicy, typename... Args>
+sycl::event launch(LaunchPolicy launch_policy, sycl::queue q, Args... args);
+
+template <auto F, typename LaunchPolicy, typename... Args>
+sycl::event launch(LaunchPolicy launch_policy, Args... args);
+} //namespace syclcompat::experimental
 
-``` c++
-auto range = sycl::nd_range<3>{globalSize, localSize};
-syclcompat::launch<vectorAdd>(range, mem_size, d_A, d_B, d_C, n);
 ```
 
-This `launch` interface allows users to define an internal memory pool, or
-scratchpad, that can then be reinterpreted as the datatype required by the user
-within the kernel function.
+For local memory, `launch<function>` injects a `char *` pointer to the beginning
+of a local accessor of the requested `local_mem_size` as the last argument of
+the kernel function. This `char *` can then be reinterpreted as the datatype
+required by the user within the kernel function.
 
-To launch a kernel with a specified sub-group size, overloads similar to above
-`launch` functions are present in the `syclcompat::experimental` namespace,
-which accept SubgroupSize as a template parameter and can be called as
-`launch<Function, SubgroupSize>`
+For example, the previous function named `vectorAdd` can be modified
+with the following signature, which adds a `char *` pointer to access local
+memory inside the kernel:
 
-```cpp
+``` c++
+void vectorAdd(const float *A, const float *B, float *C, int n,
+               char *local_mem);
+```
+
+Then, the new `vectorAdd` can be launched like this:
+
+``` c++
+using syclcompat::experimental;
+launch_policy policy{blocksPerGrid, threadsPerBlock,
+                      local_mem_size(nbytes)};
+launch<vectorAdd>(policy, d_A, d_B, d_C, n);
+```
 
-template <auto F, int SubgroupSize, typename... Args>
-sycl::event launch(sycl::nd_range<3> launch_range, std::size_t local_memory_size,
-       sycl::queue queue, Args... args);
+To request a different cache/local memory split on supported hardware:
 
-template <auto F, int SubgroupSize, typename... Args>
-sycl::event launch(sycl::nd_range<Dim> launch_range, std::size_t local_memory_size,
-       Args... args);
+```c++
+using syclcompat::experimental;
+namespace sycl_intel_exp = sycl::ext::intel::experimental;
 
-template <auto F, int SubgroupSize, typename... Args>
-sycl::event launch(::syclcompat::dim3 grid_dim, ::syclcompat::dim3 block_dim,
-       std::size_t local_memory_size, Args... args);
+sycl_intel_exp::cache_config cache_config{
+    sycl_intel_exp::large_slm};
+kernel_properties kernel_props{cache_config};
+launch_policy policy{blocksPerGrid, threadsPerBlock,
+                      local_mem_size(nbytes), kernel_props};
 
+launch<vectorAdd>(policy, d_A, d_B, d_C, n);
+```
 
-template <auto F, int SubgroupSize, typename... Args>
-sycl::event launch(sycl::nd_range<3> launch_range, sycl::queue queue, 
-       Args... args);
+To request a certain cluster dimension on supported hardware:
 
-template <auto F, int SubgroupSize, typename... Args>
-sycl::event launch(sycl::nd_range<Dim> launch_range,
-       Args... args);
+```c++
+using syclcompat::experimental;
+namespace sycl_exp = sycl::ext::oneapi::experimental;
 
-template <auto F, int SubgroupSize, typename... Args>
-sycl::event launch(::syclcompat::dim3 grid_dim, ::syclcompat::dim3 block_dim,
-       Args... args);
+sycl_exp::cuda::cluster_size cluster_dims(cluster_range);
+launch_policy policy{blocksPerGrid, threadsPerBlock,
+                                  local_mem_size(nbytes), 
+                                  launch_properties{cluster_dims}};
 
+launch<vectorAdd>(policy, d_A, d_B, d_C, n);
 ```
 
 ### Utilities
diff --git a/sycl/include/sycl/bit_cast.hpp b/sycl/include/sycl/bit_cast.hpp
index 09148847eefe3..1f6573e54de6b 100644
--- a/sycl/include/sycl/bit_cast.hpp
+++ b/sycl/include/sycl/bit_cast.hpp
@@ -61,7 +61,7 @@ constexpr
   static_assert(std::is_trivially_default_constructible<To>::value,
                 "To must be trivially default constructible");
   To to;
-  sycl::detail::memcpy(&to, &from, sizeof(To));
+  sycl::detail::memcpy_no_adl(&to, &from, sizeof(To));
   return to;
 #endif
 }
diff --git a/sycl/include/sycl/detail/group_sort_impl.hpp b/sycl/include/sycl/detail/group_sort_impl.hpp
index b33466b960c54..b3f72ddde09f6 100644
--- a/sycl/include/sycl/detail/group_sort_impl.hpp
+++ b/sycl/include/sycl/detail/group_sort_impl.hpp
@@ -495,28 +495,28 @@ struct ScratchMemory {
 
     operator T() const {
       T value{0};
-      detail::memcpy(&value, MPtr, sizeof(T));
+      detail::memcpy_no_adl(&value, MPtr, sizeof(T));
       return value;
     }
 
     T operator++(int) noexcept {
       T value{0};
-      detail::memcpy(&value, MPtr, sizeof(T));
+      detail::memcpy_no_adl(&value, MPtr, sizeof(T));
       T value_before = value++;
-      detail::memcpy(MPtr, &value, sizeof(T));
+      detail::memcpy_no_adl(MPtr, &value, sizeof(T));
       return value_before;
     }
 
     T operator++() noexcept {
       T value{0};
-      detail::memcpy(&value, MPtr, sizeof(T));
+      detail::memcpy_no_adl(&value, MPtr, sizeof(T));
       ++value;
-      detail::memcpy(MPtr, &value, sizeof(T));
+      detail::memcpy_no_adl(MPtr, &value, sizeof(T));
       return value;
     }
 
     ReferenceObj &operator=(const T &value) noexcept {
-      detail::memcpy(MPtr, &value, sizeof(T));
+      detail::memcpy_no_adl(MPtr, &value, sizeof(T));
       return *this;
     }
 
@@ -531,7 +531,7 @@ struct ScratchMemory {
     }
 
     void copy(const ReferenceObj &value) noexcept {
-      detail::memcpy(MPtr, value.MPtr, sizeof(T));
+      detail::memcpy_no_adl(MPtr, value.MPtr, sizeof(T));
     }
 
   private:
diff --git a/sycl/include/sycl/detail/memcpy.hpp b/sycl/include/sycl/detail/memcpy.hpp
index 9e2eac2b30b7c..b35d03a553385 100644
--- a/sycl/include/sycl/detail/memcpy.hpp
+++ b/sycl/include/sycl/detail/memcpy.hpp
@@ -13,7 +13,13 @@
 namespace sycl {
 inline namespace _V1 {
 namespace detail {
-inline void memcpy(void *Dst, const void *Src, size_t Size) {
+// Using "memcpy_no_adl" function name instead of "memcpy" to prevent
+// ambiguity with libc's memcpy. Even though they are in a different
+// namespace, due to ADL, compiler may lookup "memcpy" symbol in the
+// sycl::detail namespace, like in the following code:
+//    sycl::vec<int , 1> a, b;
+//    memcpy(&a, &b, sizeof(sycl::vec<int , 1>));
+inline void memcpy_no_adl(void *Dst, const void *Src, size_t Size) {
 #ifdef __SYCL_DEVICE_ONLY__
   __builtin_memcpy(Dst, Src, Size);
 #else
diff --git a/sycl/include/sycl/detail/spirv.hpp b/sycl/include/sycl/detail/spirv.hpp
index 76d4c81a93fe8..90436366a20ea 100644
--- a/sycl/include/sycl/detail/spirv.hpp
+++ b/sycl/include/sycl/detail/spirv.hpp
@@ -397,9 +397,9 @@ EnableIfGenericBroadcast<T, IdT> GroupBroadcast(Group g, T x, IdT local_id) {
   char *ResultBytes = reinterpret_cast<char *>(&Result);
   auto BroadcastBytes = [=](size_t Offset, size_t Size) {
     uint64_t BroadcastX, BroadcastResult;
-    detail::memcpy(&BroadcastX, XBytes + Offset, Size);
+    detail::memcpy_no_adl(&BroadcastX, XBytes + Offset, Size);
     BroadcastResult = GroupBroadcast(g, BroadcastX, local_id);
-    detail::memcpy(ResultBytes + Offset, &BroadcastResult, Size);
+    detail::memcpy_no_adl(ResultBytes + Offset, &BroadcastResult, Size);
   };
   GenericCall<T>(BroadcastBytes);
   return Result;
@@ -449,9 +449,9 @@ EnableIfGenericBroadcast<T> GroupBroadcast(Group g, T x,
   char *ResultBytes = reinterpret_cast<char *>(&Result);
   auto BroadcastBytes = [=](size_t Offset, size_t Size) {
     uint64_t BroadcastX, BroadcastResult;
-    detail::memcpy(&BroadcastX, XBytes + Offset, Size);
+    detail::memcpy_no_adl(&BroadcastX, XBytes + Offset, Size);
     BroadcastResult = GroupBroadcast(g, BroadcastX, local_id);
-    detail::memcpy(ResultBytes + Offset, &BroadcastResult, Size);
+    detail::memcpy_no_adl(ResultBytes + Offset, &BroadcastResult, Size);
   };
   GenericCall<T>(BroadcastBytes);
   return Result;
@@ -1104,9 +1104,9 @@ EnableIfGenericShuffle<T> Shuffle(GroupT g, T x, id<1> local_id) {
   char *ResultBytes = reinterpret_cast<char *>(&Result);
   auto ShuffleBytes = [=](size_t Offset, size_t Size) {
     ShuffleChunkT ShuffleX, ShuffleResult;
-    detail::memcpy(&ShuffleX, XBytes + Offset, Size);
+    detail::memcpy_no_adl(&ShuffleX, XBytes + Offset, Size);
     ShuffleResult = Shuffle(g, ShuffleX, local_id);
-    detail::memcpy(ResultBytes + Offset, &ShuffleResult, Size);
+    detail::memcpy_no_adl(ResultBytes + Offset, &ShuffleResult, Size);
   };
   GenericCall<T>(ShuffleBytes);
   return Result;
@@ -1119,9 +1119,9 @@ EnableIfGenericShuffle<T> ShuffleXor(GroupT g, T x, id<1> local_id) {
   char *ResultBytes = reinterpret_cast<char *>(&Result);
   auto ShuffleBytes = [=](size_t Offset, size_t Size) {
     ShuffleChunkT ShuffleX, ShuffleResult;
-    detail::memcpy(&ShuffleX, XBytes + Offset, Size);
+    detail::memcpy_no_adl(&ShuffleX, XBytes + Offset, Size);
     ShuffleResult = ShuffleXor(g, ShuffleX, local_id);
-    detail::memcpy(ResultBytes + Offset, &ShuffleResult, Size);
+    detail::memcpy_no_adl(ResultBytes + Offset, &ShuffleResult, Size);
   };
   GenericCall<T>(ShuffleBytes);
   return Result;
@@ -1134,9 +1134,9 @@ EnableIfGenericShuffle<T> ShuffleDown(GroupT g, T x, uint32_t delta) {
   char *ResultBytes = reinterpret_cast<char *>(&Result);
   auto ShuffleBytes = [=](size_t Offset, size_t Size) {
     ShuffleChunkT ShuffleX, ShuffleResult;
-    detail::memcpy(&ShuffleX, XBytes + Offset, Size);
+    detail::memcpy_no_adl(&ShuffleX, XBytes + Offset, Size);
     ShuffleResult = ShuffleDown(g, ShuffleX, delta);
-    detail::memcpy(ResultBytes + Offset, &ShuffleResult, Size);
+    detail::memcpy_no_adl(ResultBytes + Offset, &ShuffleResult, Size);
   };
   GenericCall<T>(ShuffleBytes);
   return Result;
@@ -1149,9 +1149,9 @@ EnableIfGenericShuffle<T> ShuffleUp(GroupT g, T x, uint32_t delta) {
   char *ResultBytes = reinterpret_cast<char *>(&Result);
   auto ShuffleBytes = [=](size_t Offset, size_t Size) {
     ShuffleChunkT ShuffleX, ShuffleResult;
-    detail::memcpy(&ShuffleX, XBytes + Offset, Size);
+    detail::memcpy_no_adl(&ShuffleX, XBytes + Offset, Size);
     ShuffleResult = ShuffleUp(g, ShuffleX, delta);
-    detail::memcpy(ResultBytes + Offset, &ShuffleResult, Size);
+    detail::memcpy_no_adl(ResultBytes + Offset, &ShuffleResult, Size);
   };
   GenericCall<T>(ShuffleBytes);
   return Result;
diff --git a/sycl/include/sycl/device_aspect_macros.hpp b/sycl/include/sycl/device_aspect_macros.hpp
index 385afb05c3ce4..b3b614fae58ce 100644
--- a/sycl/include/sycl/device_aspect_macros.hpp
+++ b/sycl/include/sycl/device_aspect_macros.hpp
@@ -233,14 +233,14 @@
 #define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_images_2d_usm__ 0
 #endif
 
-#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_interop_memory_import__
-//__SYCL_ASPECT(ext_oneapi_interop_memory_import, 46)
-#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_interop_memory_import__ 0
+#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_external_memory_import__
+//__SYCL_ASPECT(ext_oneapi_external_memory_import, 46)
+#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_external_memory_import__ 0
 #endif
 
-#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_interop_semaphore_import__
-//__SYCL_ASPECT(ext_oneapi_interop_semaphore_import, 48)
-#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_interop_semaphore_import__ 0
+#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_external_semaphore_import__
+//__SYCL_ASPECT(ext_oneapi_external_semaphore_import, 48)
+#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_external_semaphore_import__ 0
 #endif
 
 #ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_mipmap__
@@ -615,14 +615,14 @@
 #define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_images_2d_usm__ 0
 #endif
 
-#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_interop_memory_import__
-//__SYCL_ASPECT(ext_oneapi_interop_memory_import, 46)
-#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_interop_memory_import__ 0
+#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_external_memory_import__
+//__SYCL_ASPECT(ext_oneapi_external_memory_import, 46)
+#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_external_memory_import__ 0
 #endif
 
-#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_interop_semaphore_import__
-//__SYCL_ASPECT(ext_oneapi_interop_semaphore_import, 48)
-#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_interop_semaphore_import__ 0
+#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_external_semaphore_import__
+//__SYCL_ASPECT(ext_oneapi_external_semaphore_import, 48)
+#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_external_semaphore_import__ 0
 #endif
 
 #ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_mipmap__
diff --git a/sycl/include/sycl/ext/oneapi/bindless_images.hpp b/sycl/include/sycl/ext/oneapi/bindless_images.hpp
index 447e66ea50e98..dcd707aaa0be3 100644
--- a/sycl/include/sycl/ext/oneapi/bindless_images.hpp
+++ b/sycl/include/sycl/ext/oneapi/bindless_images.hpp
@@ -12,7 +12,7 @@
 #include <sycl/detail/export.hpp>                         // for __SYCL_EXPORT
 #include <sycl/device.hpp>                                // for device
 #include <sycl/ext/oneapi/bindless_images_descriptor.hpp> // for image_desc...
-#include <sycl/ext/oneapi/bindless_images_interop.hpp>    // for interop_me...
+#include <sycl/ext/oneapi/bindless_images_interop.hpp>    // for external_m...
 #include <sycl/ext/oneapi/bindless_images_memory.hpp>     // for image_mem_...
 #include <sycl/ext/oneapi/bindless_images_sampler.hpp>    // for bindless_i...
 #include <sycl/image.hpp>                                 // for image_chan...
@@ -124,150 +124,134 @@ get_mip_level_mem_handle(const image_mem_handle mipMem, unsigned int level,
                          const sycl::queue &syclQueue);
 
 /**
- *  @brief   Import external memory taking an external memory handle (the type
- *           of which is dependent on the OS & external API) and return an
- *           interop memory handle
+ *  @brief   Import external memory taking an external memory descriptor (the
+ *           type of which is dependent on the OS & external API) and return an
+ *           imported external memory object
  *
- *  @tparam  ExternalMemHandleType Handle type describing external memory handle
- *  @param   externalMem External memory descriptor
- *  @param   syclDevice The device in which we create our interop memory
- *  @param   syclContext The context in which we create our interop memory
- *           handle
- *  @return  Interop memory handle to the external memory
+ *  @tparam  ResourceType Resource type differentiating external resource types
+ *  @param   externalMemDesc External memory descriptor
+ *  @param   syclDevice The device in which we create our external memory
+ *  @param   syclContext The context in which we create our external memory
+ *  @return  Imported opaque external memory
  */
-template <typename ExternalMemHandleType>
-__SYCL_EXPORT interop_mem_handle import_external_memory(
-    external_mem_descriptor<ExternalMemHandleType> externalMem,
+template <typename ResourceType>
+__SYCL_EXPORT external_mem import_external_memory(
+    external_mem_descriptor<ResourceType> externalMemDesc,
     const sycl::device &syclDevice, const sycl::context &syclContext);
 
 /**
- *  @brief   Import external memory taking an external memory handle (the type
- *           of which is dependent on the OS & external API) and return an
- *           interop memory handle
+ *  @brief   Import external memory taking an external memory descriptor (the
+ *           type of which is dependent on the OS & external API) and return an
+ *           imported external memory object
  *
- *  @tparam  ExternalMemHandleType Handle type describing external memory handle
- *  @param   externalMem External memory descriptor
- *  @param   syclQueue The queue in which we create our interop memory
- *           handle
- *  @return  Interop memory handle to the external memory
+ *  @tparam  ResourceType Resource type differentiating external resource types
+ *  @param   externalMemDesc External memory descriptor
+ *  @param   syclQueue The queue in which we create our external memory
+ *  @return  Imported opaque external memory
  */
-template <typename ExternalMemHandleType>
-__SYCL_EXPORT interop_mem_handle import_external_memory(
-    external_mem_descriptor<ExternalMemHandleType> externalMem,
-    const sycl::queue &syclQueue);
+template <typename ResourceType>
+__SYCL_EXPORT external_mem
+import_external_memory(external_mem_descriptor<ResourceType> externalMemDesc,
+                       const sycl::queue &syclQueue);
 
 /**
- *  @brief   Maps an interop memory handle to an image memory handle (which may
+ *  @brief   Maps an external memory object to an image memory handle (which may
  *           have a device optimized memory layout)
  *
- *  @param   memHandle   Interop memory handle
+ *  @param   extMem      External memory object
  *  @param   desc        The image descriptor
- *  @param   syclDevice The device in which we create our image memory handle
+ *  @param   syclDevice  The device in which we create our image memory handle
  *  @param   syclContext The conext in which we create our image memory handle
  *  @return  Memory handle to externally allocated memory on the device
  */
 __SYCL_EXPORT
-image_mem_handle map_external_image_memory(interop_mem_handle memHandle,
+image_mem_handle map_external_image_memory(external_mem extMem,
                                            const image_descriptor &desc,
                                            const sycl::device &syclDevice,
                                            const sycl::context &syclContext);
 
 /**
- *  @brief   Maps an interop memory handle to an image memory handle (which may
+ *  @brief   Maps an external memory handle to an image memory handle (which may
  *           have a device optimized memory layout)
  *
- *  @param   memHandle   Interop memory handle
+ *  @param   extMem      External memory object
  *  @param   desc        The image descriptor
  *  @param   syclQueue   The queue in which we create our image memory handle
  *  @return  Memory handle to externally allocated memory on the device
  */
 __SYCL_EXPORT
-image_mem_handle map_external_image_memory(interop_mem_handle memHandle,
+image_mem_handle map_external_image_memory(external_mem extMem,
                                            const image_descriptor &desc,
                                            const sycl::queue &syclQueue);
 
 /**
- *  @brief   Import external semaphore taking an external semaphore handle (the
- *           type of which is dependent on the OS & external API)
+ *  @brief   Import external semaphore taking an external semaphore descriptor
+ *           (the type of which is dependent on the OS & external API)
  *
- *  @tparam  ExternalSemaphoreHandleType Handle type describing external
- *           semaphore handle
+ *  @tparam  ResourceType Resource type differentiating external resource types
  *  @param   externalSemaphoreDesc External semaphore descriptor
- *  @param   syclDevice The device in which we create our interop semaphore
- *           handle
- *  @param   syclContext The context in which we create our interop semaphore
- *           handle
- *  @return  Interop semaphore handle to the external semaphore
+ *  @param   syclDevice The device in which we create our external semaphore
+ *  @param   syclContext The context in which we create our external semaphore
+ *  @return  Imported opaque external semaphore
  */
-template <typename ExternalSemaphoreHandleType>
-__SYCL_EXPORT interop_semaphore_handle import_external_semaphore(
-    external_semaphore_descriptor<ExternalSemaphoreHandleType>
-        externalSemaphoreDesc,
+template <typename ResourceType>
+__SYCL_EXPORT external_semaphore import_external_semaphore(
+    external_semaphore_descriptor<ResourceType> externalSemaphoreDesc,
     const sycl::device &syclDevice, const sycl::context &syclContext);
 
 /**
- *  @brief   Import external semaphore taking an external semaphore handle (the
- *           type of which is dependent on the OS & external API)
+ *  @brief   Import external semaphore taking an external semaphore descriptor
+ *           (the type of which is dependent on the OS & external API)
  *
- *  @tparam  ExternalSemaphoreHandleType Handle type describing external
- *           semaphore handle
+ *  @tparam  ResourceType Resource type differentiating external resource types
  *  @param   externalSemaphoreDesc External semaphore descriptor
- *  @param   syclQueue The queue in which we create our interop semaphore
- *           handle
- *  @return  Interop semaphore handle to the external semaphore
+ *  @param   syclQueue The queue in which we create our external semaphore
+ *  @return  Imported opaque external semaphore
  */
-template <typename ExternalSemaphoreHandleType>
-__SYCL_EXPORT interop_semaphore_handle import_external_semaphore(
-    external_semaphore_descriptor<ExternalSemaphoreHandleType>
-        externalSemaphoreDesc,
+template <typename ResourceType>
+__SYCL_EXPORT external_semaphore import_external_semaphore(
+    external_semaphore_descriptor<ResourceType> externalSemaphoreDesc,
     const sycl::queue &syclQueue);
 
 /**
  *  @brief   Release the external semaphore
  *
- *  @param   semaphoreHandle The interop semaphore handle to destroy
- *  @param   syclDevice The device in which the interop semaphore handle was
- *           created
- *  @param   syclContext The context in which the interop semaphore handle was
- *           created
+ *  @param   extSemaphore The external semaphore to destroy
+ *  @param   syclDevice   The device in which the external semaphore was created
+ *  @param   syclContext  The context in which the external semaphore was
+ *                        created
  */
-__SYCL_EXPORT void
-release_external_semaphore(interop_semaphore_handle semaphoreHandle,
-                           const sycl::device &syclDevice,
-                           const sycl::context &syclContext);
+__SYCL_EXPORT void release_external_semaphore(external_semaphore extSemaphore,
+                                              const sycl::device &syclDevice,
+                                              const sycl::context &syclContext);
 
 /**
  *  @brief   Release the external semaphore
  *
- *  @param   semaphoreHandle The interop semaphore handle to destroy
- *  @param   syclQueue The queue in which the interop semaphore handle was
- *           created
+ *  @param   extSemaphore The external semaphore to destroy
+ *  @param   syclQueue The queue in which the external semaphore was created
  */
-__SYCL_EXPORT void
-release_external_semaphore(interop_semaphore_handle semaphoreHandle,
-                           const sycl::queue &syclQueue);
+__SYCL_EXPORT void release_external_semaphore(external_semaphore extSemaphore,
+                                              const sycl::queue &syclQueue);
 
 /**
  *  @brief   Release external memory
  *
- *  @param   interopHandle The interop memory handle to release
- *  @param   syclDevice The device in which the interop memory handle was
- * created
- *  @param   syclContext The context in which the interop memory handle was
- * created
+ *  @param   externalMem The external memory to release
+ *  @param   syclDevice  The device in which the external memory was created
+ *  @param   syclContext The context in which the external memory was created
  */
-__SYCL_EXPORT void release_external_memory(interop_mem_handle interopHandle,
+__SYCL_EXPORT void release_external_memory(external_mem externalMem,
                                            const sycl::device &syclDevice,
                                            const sycl::context &syclContext);
 
 /**
  *  @brief   Release external memory
  *
- *  @param   interopHandle The interop memory handle to release
- *  @param   syclQueue The queue in which the interop memory handle was
- * created
+ *  @param   externalMem The external memory to release
+ *  @param   syclQueue   The queue in which the external memory was created
  */
-__SYCL_EXPORT void release_external_memory(interop_mem_handle interopHandle,
+__SYCL_EXPORT void release_external_memory(external_mem externalMem,
                                            const sycl::queue &syclQueue);
 
 /**
@@ -1642,7 +1626,7 @@ inline event queue::ext_oneapi_copy(
 }
 
 inline event queue::ext_oneapi_wait_external_semaphore(
-    sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle,
+    sycl::ext::oneapi::experimental::external_semaphore SemaphoreHandle,
     event DepEvent, const detail::code_location &CodeLoc) {
   detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
   return submit(
@@ -1654,7 +1638,7 @@ inline event queue::ext_oneapi_wait_external_semaphore(
 }
 
 inline event queue::ext_oneapi_wait_external_semaphore(
-    sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle,
+    sycl::ext::oneapi::experimental::external_semaphore SemaphoreHandle,
     const std::vector<event> &DepEvents, const detail::code_location &CodeLoc) {
   detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
   return submit(
@@ -1666,7 +1650,7 @@ inline event queue::ext_oneapi_wait_external_semaphore(
 }
 
 inline event queue::ext_oneapi_wait_external_semaphore(
-    sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle,
+    sycl::ext::oneapi::experimental::external_semaphore SemaphoreHandle,
     uint64_t WaitValue, const detail::code_location &CodeLoc) {
   detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
   return submit(
@@ -1677,7 +1661,7 @@ inline event queue::ext_oneapi_wait_external_semaphore(
 }
 
 inline event queue::ext_oneapi_wait_external_semaphore(
-    sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle,
+    sycl::ext::oneapi::experimental::external_semaphore SemaphoreHandle,
     uint64_t WaitValue, event DepEvent, const detail::code_location &CodeLoc) {
   detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
   return submit(
@@ -1689,7 +1673,7 @@ inline event queue::ext_oneapi_wait_external_semaphore(
 }
 
 inline event queue::ext_oneapi_wait_external_semaphore(
-    sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle,
+    sycl::ext::oneapi::experimental::external_semaphore SemaphoreHandle,
     uint64_t WaitValue, const std::vector<event> &DepEvents,
     const detail::code_location &CodeLoc) {
   detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
@@ -1702,7 +1686,7 @@ inline event queue::ext_oneapi_wait_external_semaphore(
 }
 
 inline event queue::ext_oneapi_signal_external_semaphore(
-    sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle,
+    sycl::ext::oneapi::experimental::external_semaphore SemaphoreHandle,
     const detail::code_location &CodeLoc) {
   detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
   return submit(
@@ -1713,7 +1697,7 @@ inline event queue::ext_oneapi_signal_external_semaphore(
 }
 
 inline event queue::ext_oneapi_signal_external_semaphore(
-    sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle,
+    sycl::ext::oneapi::experimental::external_semaphore SemaphoreHandle,
     event DepEvent, const detail::code_location &CodeLoc) {
   detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
   return submit(
@@ -1725,7 +1709,7 @@ inline event queue::ext_oneapi_signal_external_semaphore(
 }
 
 inline event queue::ext_oneapi_signal_external_semaphore(
-    sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle,
+    sycl::ext::oneapi::experimental::external_semaphore SemaphoreHandle,
     const std::vector<event> &DepEvents, const detail::code_location &CodeLoc) {
   detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
   return submit(
@@ -1737,7 +1721,7 @@ inline event queue::ext_oneapi_signal_external_semaphore(
 }
 
 inline event queue::ext_oneapi_signal_external_semaphore(
-    sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle,
+    sycl::ext::oneapi::experimental::external_semaphore SemaphoreHandle,
     uint64_t SignalValue, const detail::code_location &CodeLoc) {
   detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
   return submit(
@@ -1748,7 +1732,7 @@ inline event queue::ext_oneapi_signal_external_semaphore(
 }
 
 inline event queue::ext_oneapi_signal_external_semaphore(
-    sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle,
+    sycl::ext::oneapi::experimental::external_semaphore SemaphoreHandle,
     uint64_t SignalValue, event DepEvent,
     const detail::code_location &CodeLoc) {
   detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
@@ -1761,7 +1745,7 @@ inline event queue::ext_oneapi_signal_external_semaphore(
 }
 
 inline event queue::ext_oneapi_signal_external_semaphore(
-    sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle,
+    sycl::ext::oneapi::experimental::external_semaphore SemaphoreHandle,
     uint64_t SignalValue, const std::vector<event> &DepEvents,
     const detail::code_location &CodeLoc) {
   detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
diff --git a/sycl/include/sycl/ext/oneapi/bindless_images_interop.hpp b/sycl/include/sycl/ext/oneapi/bindless_images_interop.hpp
index a9307c069e15e..a0d6eef50c5f6 100644
--- a/sycl/include/sycl/ext/oneapi/bindless_images_interop.hpp
+++ b/sycl/include/sycl/ext/oneapi/bindless_images_interop.hpp
@@ -30,15 +30,15 @@ enum class external_semaphore_handle_type {
   win32_nt_dx12_fence = 2,
 };
 
-/// Opaque interop memory handle type
-struct interop_mem_handle {
-  using raw_handle_type = ur_exp_interop_mem_handle_t;
+/// Opaque external memory handle type
+struct external_mem {
+  using raw_handle_type = ur_exp_external_mem_handle_t;
   raw_handle_type raw_handle;
 };
 
-/// Opaque interop semaphore handle type
-struct interop_semaphore_handle {
-  using raw_handle_type = ur_exp_interop_semaphore_handle_t;
+/// Imported opaque external semaphore
+struct external_semaphore {
+  using raw_handle_type = ur_exp_external_semaphore_handle_t;
   raw_handle_type raw_handle;
   external_semaphore_handle_type handle_type;
 };
diff --git a/sycl/include/sycl/ext/oneapi/experimental/bfloat16_math.hpp b/sycl/include/sycl/ext/oneapi/experimental/bfloat16_math.hpp
index 368d4ae98e35e..ed513ae3d2098 100644
--- a/sycl/include/sycl/ext/oneapi/experimental/bfloat16_math.hpp
+++ b/sycl/include/sycl/ext/oneapi/experimental/bfloat16_math.hpp
@@ -26,7 +26,7 @@ namespace detail {
 template <size_t N>
 uint32_t to_uint32_t(sycl::marray<bfloat16, N> x, size_t start) {
   uint32_t res;
-  sycl::detail::memcpy(&res, &x[start], sizeof(uint32_t));
+  sycl::detail::memcpy_no_adl(&res, &x[start], sizeof(uint32_t));
   return res;
 }
 } // namespace detail
@@ -112,7 +112,7 @@ sycl::marray<bfloat16, N> fabs(sycl::marray<bfloat16, N> x) {
     (__SYCL_CUDA_ARCH__ >= 800)
   for (size_t i = 0; i < N / 2; i++) {
     auto partial_res = __clc_fabs(detail::to_uint32_t(x, i * 2));
-    sycl::detail::memcpy(&res[i * 2], &partial_res, sizeof(uint32_t));
+    sycl::detail::memcpy_no_adl(&res[i * 2], &partial_res, sizeof(uint32_t));
   }
 
   if (N % 2) {
@@ -188,7 +188,7 @@ sycl::marray<bfloat16, N> fmin(sycl::marray<bfloat16, N> x,
   for (size_t i = 0; i < N / 2; i++) {
     auto partial_res = __clc_fmin(detail::to_uint32_t(x, i * 2),
                                   detail::to_uint32_t(y, i * 2));
-    sycl::detail::memcpy(&res[i * 2], &partial_res, sizeof(uint32_t));
+    sycl::detail::memcpy_no_adl(&res[i * 2], &partial_res, sizeof(uint32_t));
   }
 
   if (N % 2) {
@@ -270,7 +270,7 @@ sycl::marray<bfloat16, N> fmax(sycl::marray<bfloat16, N> x,
   for (size_t i = 0; i < N / 2; i++) {
     auto partial_res = __clc_fmax(detail::to_uint32_t(x, i * 2),
                                   detail::to_uint32_t(y, i * 2));
-    sycl::detail::memcpy(&res[i * 2], &partial_res, sizeof(uint32_t));
+    sycl::detail::memcpy_no_adl(&res[i * 2], &partial_res, sizeof(uint32_t));
   }
 
   if (N % 2) {
@@ -340,7 +340,7 @@ sycl::marray<bfloat16, N> fma(sycl::marray<bfloat16, N> x,
     auto partial_res =
         __clc_fma(detail::to_uint32_t(x, i * 2), detail::to_uint32_t(y, i * 2),
                   detail::to_uint32_t(z, i * 2));
-    sycl::detail::memcpy(&res[i * 2], &partial_res, sizeof(uint32_t));
+    sycl::detail::memcpy_no_adl(&res[i * 2], &partial_res, sizeof(uint32_t));
   }
 
   if (N % 2) {
diff --git a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp
index 6ba75098bd534..facc486ca2f84 100644
--- a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp
+++ b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp
@@ -126,7 +126,7 @@ inline __SYCL_ALWAYS_INLINE
 #else
     auto partial_res = sycl::tanh(sycl::detail::to_vec2(x, i * 2));
 #endif
-    sycl::detail::memcpy(&res[i * 2], &partial_res, sizeof(vec<T, 2>));
+    sycl::detail::memcpy_no_adl(&res[i * 2], &partial_res, sizeof(vec<T, 2>));
   }
   if (N % 2) {
 #if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__)
@@ -167,7 +167,7 @@ exp2(sycl::marray<half, N> x) __NOEXC {
 #else
     auto partial_res = sycl::exp2(sycl::detail::to_vec2(x, i * 2));
 #endif
-    sycl::detail::memcpy(&res[i * 2], &partial_res, sizeof(vec<half, 2>));
+    sycl::detail::memcpy_no_adl(&res[i * 2], &partial_res, sizeof(vec<half, 2>));
   }
   if (N % 2) {
 #if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__)
diff --git a/sycl/include/sycl/ext/oneapi/sub_group_mask.hpp b/sycl/include/sycl/ext/oneapi/sub_group_mask.hpp
index 1394f1f77e139..7a3bef52110db 100644
--- a/sycl/include/sycl/ext/oneapi/sub_group_mask.hpp
+++ b/sycl/include/sycl/ext/oneapi/sub_group_mask.hpp
@@ -111,7 +111,7 @@ struct sub_group_mask {
       size_t RemainingBytes = sizeof(Bits) - BytesCopied;
       size_t BytesToCopy =
           RemainingBytes < sizeof(T) ? RemainingBytes : sizeof(T);
-      sycl::detail::memcpy(reinterpret_cast<char *>(&Bits) + BytesCopied,
+      sycl::detail::memcpy_no_adl(reinterpret_cast<char *>(&Bits) + BytesCopied,
                            &val[I], BytesToCopy);
       BytesCopied += BytesToCopy;
     }
diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp
index 9d4038a318b78..6f2e9f9fc19b7 100644
--- a/sycl/include/sycl/handler.hpp
+++ b/sycl/include/sycl/handler.hpp
@@ -3303,45 +3303,45 @@ class __SYCL_EXPORT handler {
 
   /// Submit a non-blocking device-side wait on an external
   //  semaphore to the queue.
-  /// An exception is thrown if \p SemaphoreHandle is incomplete, or if the
+  /// An exception is thrown if \p extSemaphore is incomplete, or if the
   /// type of semaphore requires an explicit value to wait upon.
   ///
-  /// \param SemaphoreHandle is an opaque external interop semaphore handle
+  /// \param extSemaphore is an opaque external semaphore object
   void ext_oneapi_wait_external_semaphore(
-      ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle);
+      sycl::ext::oneapi::experimental::external_semaphore extSemaphore);
 
   /// Submit a non-blocking device-side wait on an external
   //  semaphore to the queue.
-  /// An exception is thrown if \p SemaphoreHandle is incomplete, or if the
+  /// An exception is thrown if \p extSemaphore is incomplete, or if the
   /// type of semaphore does not support waiting on an explicitly passed value.
   ///
-  /// \param SemaphoreHandle is an opaque external interop semaphore handle
+  /// \param extSemaphore is an opaque external semaphore object
   /// \param WaitValue is the value that this semaphore will wait upon, until it
   ///                  allows any further commands to execute on the queue.
   void ext_oneapi_wait_external_semaphore(
-      ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle,
+      sycl::ext::oneapi::experimental::external_semaphore extSemaphore,
       uint64_t WaitValue);
 
   /// Instruct the queue to signal the external semaphore once all previous
   /// commands submitted to the queue have completed execution.
-  /// An exception is thrown if \p SemaphoreHandle is incomplete, or if the
+  /// An exception is thrown if \p extSemaphore is incomplete, or if the
   /// type of semaphore requires an explicit value to signal.
   ///
-  /// \param SemaphoreHandle is an opaque external interop semaphore handle
+  /// \param extSemaphore is an opaque external semaphore object
   void ext_oneapi_signal_external_semaphore(
-      ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle);
+      sycl::ext::oneapi::experimental::external_semaphore extSemaphore);
 
   /// Instruct the queue to set the state of the external semaphore to
   /// \p SignalValue once all previous commands submitted to the queue have
   /// completed execution.
-  /// An exception is thrown if \p SemaphoreHandle is incomplete, or if the
+  /// An exception is thrown if \p extSemaphore is incomplete, or if the
   /// type of semaphore does not support signalling an explicitly passed value.
   ///
-  /// \param SemaphoreHandle is an opaque external interop semaphore handle
+  /// \param extSemaphore is an opaque external semaphore object.
   /// \param SignalValue is the value that this semaphore signal, once all
   ///                    prior opeartions on the queue complete.
   void ext_oneapi_signal_external_semaphore(
-      ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle,
+      sycl::ext::oneapi::experimental::external_semaphore extSemaphore,
       uint64_t SignalValue);
 
 private:
diff --git a/sycl/include/sycl/info/aspects.def b/sycl/include/sycl/info/aspects.def
index c1917cf1d7d9d..87d8c8643d422 100644
--- a/sycl/include/sycl/info/aspects.def
+++ b/sycl/include/sycl/info/aspects.def
@@ -39,8 +39,8 @@ __SYCL_ASPECT(ext_oneapi_bindless_images, 42)
 __SYCL_ASPECT(ext_oneapi_bindless_images_shared_usm, 43)
 __SYCL_ASPECT(ext_oneapi_bindless_images_1d_usm, 44)
 __SYCL_ASPECT(ext_oneapi_bindless_images_2d_usm, 45)
-__SYCL_ASPECT(ext_oneapi_interop_memory_import, 46)
-__SYCL_ASPECT(ext_oneapi_interop_semaphore_import, 48)
+__SYCL_ASPECT(ext_oneapi_external_memory_import, 46)
+__SYCL_ASPECT(ext_oneapi_external_semaphore_import, 48)
 __SYCL_ASPECT(ext_oneapi_mipmap, 50)
 __SYCL_ASPECT(ext_oneapi_mipmap_anisotropy, 51)
 __SYCL_ASPECT(ext_oneapi_mipmap_level_reference, 52)
diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp
index 706ca59ea854b..fbab1e5ca9148 100644
--- a/sycl/include/sycl/queue.hpp
+++ b/sycl/include/sycl/queue.hpp
@@ -1840,169 +1840,169 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
       const detail::code_location &CodeLoc = detail::code_location::current());
 
   /// Instruct the queue with a non-blocking wait on an external semaphore.
-  /// An exception is thrown if \p SemaphoreHandle is incomplete, or if the
+  /// An exception is thrown if \p extSemaphore is incomplete, or if the
   /// type of semaphore requires an explicit value to wait upon.
   ///
-  /// \param SemaphoreHandle is an opaque external interop semaphore handle.
+  /// \param extSemaphore is an opaque external semaphore object.
   /// \return an event representing the wait operation.
   event ext_oneapi_wait_external_semaphore(
-      sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle,
+      sycl::ext::oneapi::experimental::external_semaphore extSemaphore,
       const detail::code_location &CodeLoc = detail::code_location::current()) {
     detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
     return submit(
         [&](handler &CGH) {
-          CGH.ext_oneapi_wait_external_semaphore(SemaphoreHandle);
+          CGH.ext_oneapi_wait_external_semaphore(extSemaphore);
         },
         CodeLoc);
   }
 
   /// Instruct the queue with a non-blocking wait on an external semaphore.
-  /// An exception is thrown if \p SemaphoreHandle is incomplete, or if the
+  /// An exception is thrown if \p extSemaphore is incomplete, or if the
   /// type of semaphore requires an explicit value to wait upon.
   ///
-  /// \param SemaphoreHandle is an opaque external interop semaphore handle
+  /// \param extSemaphore is an opaque external semaphore object
   /// \param DepEvent is an event that specifies the kernel dependencies.
   /// \return an event representing the wait operation.
   event ext_oneapi_wait_external_semaphore(
-      sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle,
+      sycl::ext::oneapi::experimental::external_semaphore extSemaphore,
       event DepEvent,
       const detail::code_location &CodeLoc = detail::code_location::current());
 
   /// Instruct the queue with a non-blocking wait on an external semaphore.
-  /// An exception is thrown if \p SemaphoreHandle is incomplete, or if the
+  /// An exception is thrown if \p extSemaphore is incomplete, or if the
   /// type of semaphore requires an explicit value to wait upon.
   ///
-  /// \param SemaphoreHandle is an opaque external interop semaphore handle.
+  /// \param extSemaphore is an opaque external semaphore object.
   /// \param DepEvents is a vector of events that specifies the kernel
   /// dependencies.
   /// \return an event representing the wait operation.
   event ext_oneapi_wait_external_semaphore(
-      sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle,
+      sycl::ext::oneapi::experimental::external_semaphore extSemaphore,
       const std::vector<event> &DepEvents,
       const detail::code_location &CodeLoc = detail::code_location::current());
 
   /// Instruct the queue with a non-blocking wait on an external semaphore.
-  /// An exception is thrown if \p SemaphoreHandle is incomplete, or if the
+  /// An exception is thrown if \p extSemaphore is incomplete, or if the
   /// type of semaphore does not support waiting on an explicitly passed value.
   ///
-  /// \param SemaphoreHandle is an opaque external interop semaphore handle
+  /// \param extSemaphore is an opaque external semaphore object
   /// \param WaitValue is the value that this semaphore will wait upon, until it
   ///                  allows any further commands to execute on the queue.
   /// \return an event representing the wait operation.
   event ext_oneapi_wait_external_semaphore(
-      sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle,
+      sycl::ext::oneapi::experimental::external_semaphore extSemaphore,
       uint64_t WaitValue,
       const detail::code_location &CodeLoc = detail::code_location::current());
 
   /// Instruct the queue with a non-blocking wait on an external semaphore.
-  /// An exception is thrown if \p SemaphoreHandle is incomplete, or if the
+  /// An exception is thrown if \p extSemaphore is incomplete, or if the
   /// type of semaphore does not support waiting on an explicitly passed value.
   ///
-  /// \param SemaphoreHandle is an opaque external interop semaphore handle
+  /// \param extSemaphore is an opaque external semaphore object
   /// \param WaitValue is the value that this semaphore will wait upon, until it
   ///                  allows any further commands to execute on the queue.
   /// \param DepEvent is an event that specifies the kernel dependencies.
   /// \return an event representing the wait operation.
   event ext_oneapi_wait_external_semaphore(
-      sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle,
+      sycl::ext::oneapi::experimental::external_semaphore extSemaphore,
       uint64_t WaitValue, event DepEvent,
       const detail::code_location &CodeLoc = detail::code_location::current());
 
   /// Instruct the queue with a non-blocking wait on an external semaphore.
-  /// An exception is thrown if \p SemaphoreHandle is incomplete, or if the
+  /// An exception is thrown if \p extSemaphore is incomplete, or if the
   /// type of semaphore does not support waiting on an explicitly passed value.
   ///
-  /// \param SemaphoreHandle is an opaque external interop semaphore handle
+  /// \param extSemaphore is an opaque external semaphore object
   /// \param WaitValue is the value that this semaphore will wait upon, until it
   ///                  allows any further commands to execute on the queue.
   /// \param DepEvents is a vector of events that specifies the kernel
   /// dependencies.
   /// \return an event representing the wait operation.
   event ext_oneapi_wait_external_semaphore(
-      sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle,
+      sycl::ext::oneapi::experimental::external_semaphore extSemaphore,
       uint64_t WaitValue, const std::vector<event> &DepEvents,
       const detail::code_location &CodeLoc = detail::code_location::current());
 
   /// Instruct the queue to signal the external semaphore once all previous
   /// commands have completed execution.
-  /// An exception is thrown if \p SemaphoreHandle is incomplete, or if the
+  /// An exception is thrown if \p extSemaphore is incomplete, or if the
   /// type of semaphore requires an explicit value to signal.
   ///
-  /// \param SemaphoreHandle is an opaque external interop semaphore handle
+  /// \param extSemaphore is an opaque external semaphore object
   /// \return an event representing the signal operation.
   event ext_oneapi_signal_external_semaphore(
-      sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle,
+      sycl::ext::oneapi::experimental::external_semaphore extSemaphore,
       const detail::code_location &CodeLoc = detail::code_location::current());
 
   /// Instruct the queue to signal the external semaphore once all previous
   /// commands have completed execution.
-  /// An exception is thrown if \p SemaphoreHandle is incomplete, or if the
+  /// An exception is thrown if \p extSemaphore is incomplete, or if the
   /// type of semaphore requires an explicit value to signal.
   ///
-  /// \param SemaphoreHandle is an opaque external interop semaphore handle
+  /// \param extSemaphore is an opaque external semaphore object
   /// \param DepEvent is an event that specifies the kernel dependencies.
   /// \return an event representing the signal operation.
   event ext_oneapi_signal_external_semaphore(
-      sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle,
+      sycl::ext::oneapi::experimental::external_semaphore extSemaphore,
       event DepEvent,
       const detail::code_location &CodeLoc = detail::code_location::current());
 
   /// Instruct the queue to signal the external semaphore once all previous
   /// commands have completed execution.
-  /// An exception is thrown if \p SemaphoreHandle is incomplete, or if the
+  /// An exception is thrown if \p extSemaphore is incomplete, or if the
   /// type of semaphore requires an explicit value to signal.
   ///
-  /// \param SemaphoreHandle is an opaque external interop semaphore handle
+  /// \param extSemaphore is an opaque external semaphore object
   /// \param DepEvents is a vector of events that specifies the kernel
   /// dependencies.
   /// \return an event representing the signal operation.
   event ext_oneapi_signal_external_semaphore(
-      sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle,
+      sycl::ext::oneapi::experimental::external_semaphore extSemaphore,
       const std::vector<event> &DepEvents,
       const detail::code_location &CodeLoc = detail::code_location::current());
 
   /// Instruct the queue to signal the external semaphore once all previous
   /// commands have completed execution.
-  /// An exception is thrown if \p SemaphoreHandle is incomplete, or if the
+  /// An exception is thrown if \p extSemaphore is incomplete, or if the
   /// type of semaphore does not support signalling an explicitly passed value.
   ///
-  /// \param SemaphoreHandle is an opaque external interop semaphore handle
+  /// \param extSemaphore is an opaque external semaphore object
   /// \param SignalValue is the value that this semaphore signal, once all
   ///                    prior opeartions on the queue complete.
   /// \return an event representing the signal operation.
   event ext_oneapi_signal_external_semaphore(
-      sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle,
+      sycl::ext::oneapi::experimental::external_semaphore extSemaphore,
       uint64_t SignalValue,
       const detail::code_location &CodeLoc = detail::code_location::current());
 
   /// Instruct the queue to signal the external semaphore once all previous
   /// commands have completed execution.
-  /// An exception is thrown if \p SemaphoreHandle is incomplete, or if the
+  /// An exception is thrown if \p extSemaphore is incomplete, or if the
   /// type of semaphore does not support signalling an explicitly passed value.
   ///
-  /// \param SemaphoreHandle is an opaque external interop semaphore handle
+  /// \param extSemaphore is an opaque external semaphore object
   /// \param SignalValue is the value that this semaphore signal, once all
   ///                    prior opeartions on the queue complete.
   /// \param DepEvent is an event that specifies the kernel dependencies.
   /// \return an event representing the signal operation.
   event ext_oneapi_signal_external_semaphore(
-      sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle,
+      sycl::ext::oneapi::experimental::external_semaphore extSemaphore,
       uint64_t SignalValue, event DepEvent,
       const detail::code_location &CodeLoc = detail::code_location::current());
 
   /// Instruct the queue to signal the external semaphore once all previous
   /// commands have completed execution.
-  /// An exception is thrown if \p SemaphoreHandle is incomplete, or if the
+  /// An exception is thrown if \p extSemaphore is incomplete, or if the
   /// type of semaphore does not support signalling an explicitly passed value.
   ///
-  /// \param SemaphoreHandle is an opaque external interop semaphore handle
+  /// \param extSemaphore is an opaque external semaphore object
   /// \param SignalValue is the value that this semaphore signal, once all
   ///                    prior opeartions on the queue complete.
   /// \param DepEvents is a vector of events that specifies the kernel
   /// dependencies.
   /// \return an event representing the signal operation.
   event ext_oneapi_signal_external_semaphore(
-      sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle,
+      sycl::ext::oneapi::experimental::external_semaphore SemaphoreHandle,
       uint64_t SignalValue, const std::vector<event> &DepEvents,
       const detail::code_location &CodeLoc = detail::code_location::current());
 
diff --git a/sycl/include/syclcompat/launch.hpp b/sycl/include/syclcompat/launch.hpp
index 503f29ff8b91f..eb5d774bc12d3 100644
--- a/sycl/include/syclcompat/launch.hpp
+++ b/sycl/include/syclcompat/launch.hpp
@@ -31,6 +31,7 @@
 
 #include <syclcompat/device.hpp>
 #include <syclcompat/dims.hpp>
+#include <syclcompat/launch_policy.hpp>
 
 namespace syclcompat {
 
@@ -67,26 +68,6 @@ launch(const sycl::nd_range<3> &range, sycl::queue q, Args... args) {
       range, [=](sycl::nd_item<3>) { [[clang::always_inline]] F(args...); });
 }
 
-template <auto F, typename... Args>
-sycl::event launch(const sycl::nd_range<3> &range, size_t mem_size,
-                   sycl::queue q, Args... args) {
-  static_assert(detail::getArgumentCount(F) == sizeof...(args) + 1,
-                "Wrong number of arguments to SYCL kernel");
-
-  using F_t = decltype(F);
-  using f_return_t = typename std::invoke_result_t<F_t, Args..., char *>;
-  static_assert(std::is_same<f_return_t, void>::value,
-                "SYCL kernels should return void");
-
-  return q.submit([&](sycl::handler &cgh) {
-    auto local_acc = sycl::local_accessor<char, 1>(mem_size, cgh);
-    cgh.parallel_for(range, [=](sycl::nd_item<3>) {
-      auto local_mem = local_acc.get_pointer();
-      [[clang::always_inline]] F(args..., local_mem);
-    });
-  });
-}
-
 } // namespace detail
 
 template <int Dim>
@@ -137,87 +118,47 @@ launch(const dim3 &grid, const dim3 &threads, Args... args) {
   return launch<F>(grid, threads, get_default_queue(), args...);
 }
 
-/// Launches a kernel with the templated F param and arguments on a
-/// device specified by the given nd_range and SYCL queue.
-/// @tparam F SYCL kernel to be executed, expects signature F(T* local_mem,
-/// Args... args).
-/// @tparam Dim nd_range dimension number.
-/// @tparam Args Types of the arguments to be passed to the kernel.
-/// @param range Nd_range specifying the work group and global sizes for the
-/// kernel.
-/// @param q The SYCL queue on which to execute the kernel.
-/// @param mem_size The size, in number of bytes, of the local
-/// memory to be allocated for kernel.
-/// @param args The arguments to be passed to the kernel.
-/// @return A SYCL event object that can be used to synchronize with the
-/// kernel's execution.
-template <auto F, int Dim, typename... Args>
-sycl::event launch(const sycl::nd_range<Dim> &range, size_t mem_size,
-                   sycl::queue q, Args... args) {
-  return detail::launch<F>(detail::transform_nd_range<Dim>(range), mem_size, q,
-                           args...);
+} // namespace syclcompat
+
+namespace syclcompat::experimental {
+
+namespace detail {
+
+template <auto F, typename LaunchPolicy, typename... Args>
+sycl::event launch(LaunchPolicy launch_policy, sycl::queue q, Args... args) {
+  static_assert(syclcompat::args_compatible<LaunchPolicy, F, Args...>,
+                "Mismatch between device function signature and supplied "
+                "arguments. Have you correctly handled local memory/char*?");
+
+  sycl_exp::launch_config config(launch_policy.get_range(),
+                                 launch_policy.get_launch_properties());
+
+  return sycl_exp::submit_with_event(q, [&](sycl::handler &cgh) {
+    auto KernelFunctor = build_kernel_functor<F>(cgh, launch_policy, args...);
+    if constexpr (syclcompat::detail::is_range_v<
+                      typename LaunchPolicy::RangeT>) {
+      parallel_for(cgh, config, KernelFunctor);
+    } else {
+      static_assert(
+          syclcompat::detail::is_nd_range_v<typename LaunchPolicy::RangeT>);
+      nd_launch(cgh, config, KernelFunctor);
+    }
+  });
 }
 
-/// Launches a kernel with the templated F param and arguments on a
-/// device specified by the given nd_range using theSYCL default queue.
-/// @tparam F SYCL kernel to be executed, expects signature F(T* local_mem,
-/// Args... args).
-/// @tparam Dim nd_range dimension number.
-/// @tparam Args Types of the arguments to be passed to the kernel.
-/// @param range Nd_range specifying the work group and global sizes for the
-/// kernel.
-/// @param mem_size The size, in number of bytes, of the local
-/// memory to be allocated for kernel.
-/// @param args The arguments to be passed to the kernel.
-/// @return A SYCL event object that can be used to synchronize with the
-/// kernel's execution.
-template <auto F, int Dim, typename... Args>
-sycl::event launch(const sycl::nd_range<Dim> &range, size_t mem_size,
-                   Args... args) {
-  return launch<F>(range, mem_size, get_default_queue(), args...);
 }
 
-/// Launches a kernel with the templated F param and arguments on a
-/// device with a user-specified grid and block dimensions following the
-/// standard of other programming models using a user-defined SYCL queue.
-/// @tparam F SYCL kernel to be executed, expects signature F(T* local_mem,
-/// Args... args).
-/// @tparam Dim nd_range dimension number.
-/// @tparam Args Types of the arguments to be passed to the kernel.
-/// @param grid Grid dimensions represented with an (x, y, z) iteration space.
-/// @param threads Block dimensions represented with an (x, y, z) iteration
-/// space.
-/// @param mem_size The size, in number of bytes, of the local
-/// memory to be allocated for kernel.
-/// @param args The arguments to be passed to the kernel.
-/// @return A SYCL event object that can be used to synchronize with the
-/// kernel's execution.
-template <auto F, typename... Args>
-sycl::event launch(const dim3 &grid, const dim3 &threads, size_t mem_size,
-                   sycl::queue q, Args... args) {
-  return launch<F>(sycl::nd_range<3>{grid * threads, threads}, mem_size, q,
-                   args...);
+
+template <auto F, typename LaunchPolicy, typename... Args>
+sycl::event launch(LaunchPolicy launch_policy, sycl::queue q, Args... args) {
+  static_assert(detail::is_launch_policy_v<LaunchPolicy>);
+  return detail::launch<F>(launch_policy, q, args...);
 }
 
-/// Launches a kernel with the templated F param and arguments on a
-/// device with a user-specified grid and block dimensions following the
-/// standard of other programming models using the default SYCL queue.
-/// @tparam F SYCL kernel to be executed, expects signature F(T* local_mem,
-/// Args... args).
-/// @tparam Dim nd_range dimension number.
-/// @tparam Args Types of the arguments to be passed to the kernel.
-/// @param grid Grid dimensions represented with an (x, y, z) iteration space.
-/// @param threads Block dimensions represented with an (x, y, z) iteration
-/// space.
-/// @param mem_size The size, in number of bytes, of the
-/// local memory to be allocated.
-/// @param args The arguments to be passed to the kernel.
-/// @return A SYCL event object that can be used to synchronize with the
-/// kernel's execution.
-template <auto F, typename... Args>
-sycl::event launch(const dim3 &grid, const dim3 &threads, size_t mem_size,
-                   Args... args) {
-  return launch<F>(grid, threads, mem_size, get_default_queue(), args...);
+template <auto F, typename LaunchPolicy, typename... Args>
+sycl::event launch(LaunchPolicy launch_policy, Args... args) {
+  static_assert(detail::is_launch_policy_v<LaunchPolicy>);
+  return launch<F>(launch_policy, get_default_queue(), args...);
 }
 
-} // namespace syclcompat
+} // namespace syclcompat::experimental
diff --git a/sycl/include/syclcompat/launch_experimental.hpp b/sycl/include/syclcompat/launch_experimental.hpp
deleted file mode 100644
index 3074c8c20371e..0000000000000
--- a/sycl/include/syclcompat/launch_experimental.hpp
+++ /dev/null
@@ -1,105 +0,0 @@
-/***************************************************************************
- *
- *  Copyright (C) Codeplay Software Ltd.
- *
- *  Part of the LLVM Project, under the Apache License v2.0 with LLVM
- *  Exceptions. See https://llvm.org/LICENSE.txt for license information.
- *  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- *
- *  SYCLcompat
- *
- *  launch_experimental.hpp
- *
- *  Description:
- *  Launch Overloads with accepting required subgroup size
- **************************************************************************/
-
-#pragma once
-
-#include <syclcompat/device.hpp>
-#include <syclcompat/dims.hpp>
-#include <syclcompat/launch.hpp>
-
-namespace syclcompat {
-namespace experimental {
-
-//================================================================================================//
-// Overloads using Local Memory //
-//================================================================================================//
-
-template <auto F, int SubgroupSize, typename... Args>
-std::enable_if_t<std::is_invocable_v<decltype(F), Args..., char *>, sycl::event>
-launch(sycl::nd_range<3> launch_range, std::size_t local_memory_size,
-       sycl::queue queue, Args... args) {
-  return queue.submit([&](sycl::handler &cgh) {
-    sycl::local_accessor<char, 1> loc(local_memory_size, cgh);
-    cgh.parallel_for(
-        launch_range,
-        [=](sycl::nd_item<3> it) [[sycl::reqd_sub_group_size(SubgroupSize)]] {
-          [[clang::always_inline]] F(
-              args..., loc.get_multi_ptr<sycl::access::decorated::yes>());
-        });
-  });
-}
-
-template <auto F, int SubgroupSize, int Dim, typename... Args>
-std::enable_if_t<std::is_invocable_v<decltype(F), Args..., char *>, sycl::event>
-launch(sycl::nd_range<Dim> launch_range, std::size_t local_memory_size,
-       Args... args) {
-  return launch<F, SubgroupSize, Args...>(
-      ::syclcompat::detail::transform_nd_range(launch_range), local_memory_size,
-      ::syclcompat::get_default_queue(), args...);
-}
-
-template <auto F, int SubgroupSize, typename... Args>
-std::enable_if_t<std::is_invocable_v<decltype(F), Args..., char *>, sycl::event>
-launch(::syclcompat::dim3 grid_dim, ::syclcompat::dim3 block_dim,
-       std::size_t local_memory_size, Args... args) {
-  return launch<F, SubgroupSize, Args...>(
-      ::syclcompat::detail::transform_nd_range(sycl::nd_range(
-          sycl::range<3>(grid_dim * block_dim), sycl::range<3>(block_dim))),
-      local_memory_size, ::syclcompat::get_default_queue(), args...);
-}
-
-//================================================================================================//
-// Overloads not using Local Memory //
-//================================================================================================//
-
-template <auto F, int SubgroupSize, typename... Args>
-std::enable_if_t<std::is_invocable_v<decltype(F), Args...>, sycl::event>
-launch(sycl::nd_range<3> launch_range, sycl::queue queue, Args... args) {
-  return queue.submit([&](sycl::handler &cgh) {
-    cgh.parallel_for(launch_range,
-                     [=](sycl::nd_item<3> it)
-                         [[sycl::reqd_sub_group_size(SubgroupSize)]] {
-                           [[clang::always_inline]] F(args...);
-                         });
-  });
-}
-
-template <auto F, int SubgroupSize, int Dim, typename... Args>
-std::enable_if_t<std::is_invocable_v<decltype(F), Args...>, sycl::event>
-launch(sycl::nd_range<Dim> launch_range, Args... args) {
-  return launch<F, SubgroupSize, Args...>(
-      ::syclcompat::detail::transform_nd_range(launch_range),
-      ::syclcompat::get_default_queue(), args...);
-}
-
-template <auto F, int SubgroupSize, typename... Args>
-std::enable_if_t<std::is_invocable_v<decltype(F), Args...>, sycl::event>
-launch(::syclcompat::dim3 grid_dim, ::syclcompat::dim3 block_dim,
-       Args... args) {
-  return launch<F, SubgroupSize, Args...>(
-      ::syclcompat::detail::transform_nd_range(sycl::nd_range(
-          sycl::range<3>(grid_dim * block_dim), sycl::range<3>(block_dim))),
-      ::syclcompat::get_default_queue(), args...);
-}
-
-} // namespace experimental
-} // namespace syclcompat
diff --git a/sycl/include/syclcompat/launch_policy.hpp b/sycl/include/syclcompat/launch_policy.hpp
new file mode 100644
index 0000000000000..1c5f6ed3e97d6
--- /dev/null
+++ b/sycl/include/syclcompat/launch_policy.hpp
@@ -0,0 +1,254 @@
+/***************************************************************************
+ *
+ *  Copyright (C) Codeplay Software Ltd.
+ *
+ *  Part of the LLVM Project, under the Apache License v2.0 with LLVM
+ *  Exceptions. See https://llvm.org/LICENSE.txt for license information.
+ *  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ *  SYCL compatibility extension
+ *
+ *  launch.hpp
+ *
+ *  Description:
+ *    launch functionality for the SYCL compatibility extension
+ **************************************************************************/
+
+#pragma once
+
+#include "sycl/ext/oneapi/experimental/enqueue_functions.hpp"
+#include "sycl/ext/oneapi/properties/properties.hpp"
+#include <sycl/event.hpp>
+#include <sycl/nd_range.hpp>
+#include <sycl/queue.hpp>
+#include <sycl/range.hpp>
+
+#include <syclcompat/defs.hpp>
+#include <syclcompat/device.hpp>
+#include <syclcompat/dims.hpp>
+#include <syclcompat/traits.hpp>
+
+namespace syclcompat {
+namespace experimental {
+
+namespace sycl_exp = sycl::ext::oneapi::experimental;
+
+// Wrapper for kernel sycl_exp::properties
+template <typename Properties> struct kernel_properties {
+  static_assert(sycl_exp::is_property_list_v<Properties>);
+  using Props = Properties;
+
+  template <typename... Props>
+  kernel_properties(Props... properties) : props{properties...} {}
+
+  template <typename... Props>
+  kernel_properties(sycl_exp::properties<Props...> properties)
+      : props{properties} {}
+
+  Properties props;
+};
+
+template <typename... Props, typename = std::enable_if_t<detail::are_all_props<Props...>::value, void>>
+kernel_properties(Props... props)
+    -> kernel_properties<decltype(sycl_exp::properties(props...))>;
+
+template <typename... Props>
+kernel_properties(sycl_exp::properties<Props...> props)
+    -> kernel_properties<sycl_exp::properties<Props...>>;
+
+// Wrapper for launch sycl_exp::properties
+template <typename Properties> struct launch_properties {
+  static_assert(sycl_exp::is_property_list_v<Properties>);
+  using Props = Properties;
+
+  template <typename... Props>
+  launch_properties(Props... properties) : props{properties...} {}
+
+  template <typename... Props>
+  launch_properties(sycl_exp::properties<Props...> properties)
+      : props{properties} {}
+
+  Properties props;
+};
+
+template <typename... Props, typename = std::enable_if_t<detail::are_all_props<Props...>::value, void>>
+launch_properties(Props... props)
+    -> launch_properties<decltype(sycl_exp::properties(props...))>;
+
+template <typename... Props>
+launch_properties(sycl_exp::properties<Props...> props)
+    -> launch_properties<sycl_exp::properties<Props...>>;
+
+// Wrapper for local memory size
+struct local_mem_size {
+  local_mem_size(size_t size = 0) : size{size} {};
+  size_t size;
+};
+
+// launch_policy is constructed by the user & passed to `compat_exp::launch`
+template <typename Range, typename KProps, typename LProps, bool LocalMem>
+class launch_policy {
+  static_assert(sycl_exp::is_property_list_v<KProps>);
+  static_assert(sycl_exp::is_property_list_v<LProps>);
+  static_assert(syclcompat::detail::is_range_or_nd_range_v<Range>);
+  static_assert(syclcompat::detail::is_nd_range_v<Range> || !LocalMem,
+                "sycl::range kernel launches are incompatible with local "
+                "memory usage!");
+
+public:
+  using KPropsT = KProps;
+  using LPropsT = LProps;
+  using RangeT = Range;
+  static constexpr bool HasLocalMem = LocalMem;
+
+private:
+  launch_policy() = default;
+
+  template <typename... Ts>
+  launch_policy(Ts... ts)
+      : _kernel_properties{detail::property_getter<
+            kernel_properties, kernel_properties<KPropsT>, std::tuple<Ts...>>()(
+            std::tuple<Ts...>(ts...))},
+        _launch_properties{detail::property_getter<
+            launch_properties, launch_properties<LPropsT>, std::tuple<Ts...>>()(
+            std::tuple<Ts...>(ts...))},
+        _local_mem_size{
+            detail::local_mem_getter<local_mem_size, std::tuple<Ts...>>()(
+                std::tuple<Ts...>(ts...))} {
+    check_variadic_args(ts...);
+  }
+
+  template <typename... Ts> void check_variadic_args(Ts...) {
+    static_assert(
+        std::conjunction_v<std::disjunction<detail::is_kernel_properties<Ts>,
+                                            detail::is_launch_properties<Ts>,
+                                            detail::is_local_mem_size<Ts>>...>,
+        "Received an unexpected argument to ctor. Did you forget to wrap "
+        "in "
+        "compat::kernel_properties, launch_properties, local_mem_size?");
+  }
+
+public:
+  template <typename... Ts>
+  launch_policy(Range range, Ts... ts) : launch_policy(ts...) {
+    _range = range;
+    check_variadic_args(ts...);
+  }
+
+  template <typename... Ts>
+  launch_policy(dim3 global_range, Ts... ts) : launch_policy(ts...) {
+    _range = Range{global_range};
+    check_variadic_args(ts...);
+  }
+
+  template <typename... Ts>
+  launch_policy(dim3 global_range, dim3 local_range, Ts... ts)
+      : launch_policy(ts...) {
+    _range = Range{global_range * local_range, local_range};
+    check_variadic_args(ts...);
+  }
+
+  KProps get_kernel_properties() { return _kernel_properties.props; }
+  LProps get_launch_properties() { return _launch_properties.props; }
+  size_t get_local_mem_size() { return _local_mem_size.size; }
+  Range get_range() { return _range; }
+
+private:
+  Range _range;
+  kernel_properties<KProps> _kernel_properties;
+  launch_properties<LProps> _launch_properties;
+  local_mem_size _local_mem_size;
+};
+
+// Deduction guides for launch_policy
+template <typename Range, typename... Ts>
+launch_policy(Range, Ts...) -> launch_policy<
+    Range, detail::properties_or_empty<kernel_properties, Ts...>,
+    detail::properties_or_empty<launch_properties, Ts...>,
+    detail::has_type<local_mem_size, std::tuple<Ts...>>::value>;
+
+template <int Dim, typename... Ts>
+launch_policy(sycl::range<Dim>, sycl::range<Dim>, Ts...) -> launch_policy<
+    sycl::nd_range<Dim>, detail::properties_or_empty<kernel_properties, Ts...>,
+    detail::properties_or_empty<launch_properties, Ts...>,
+    detail::has_type<local_mem_size, std::tuple<Ts...>>::value>;
+
+template <typename... Ts>
+launch_policy(dim3, Ts...) -> launch_policy<
+    sycl::range<3>, detail::properties_or_empty<kernel_properties, Ts...>,
+    detail::properties_or_empty<launch_properties, Ts...>,
+    detail::has_type<local_mem_size, std::tuple<Ts...>>::value>;
+
+template <typename... Ts>
+launch_policy(dim3, dim3, Ts...) -> launch_policy<
+    sycl::nd_range<3>, detail::properties_or_empty<kernel_properties, Ts...>,
+    detail::properties_or_empty<launch_properties, Ts...>,
+    detail::has_type<local_mem_size, std::tuple<Ts...>>::value>;
+
+namespace detail {
+
+template <auto F, typename Range, typename KProps, bool HasLocalMem,
+          typename... Args>
+struct KernelFunctor {
+  KernelFunctor(KProps kernel_props, Args... args)
+      : _kernel_properties{kernel_props},
+        _argument_tuple(std::make_tuple(args...)) {}
+
+  KernelFunctor(KProps kernel_props, sycl::local_accessor<char, 1> local_acc,
+                Args... args)
+      : _kernel_properties{kernel_props}, _local_acc{local_acc},
+        _argument_tuple(std::make_tuple(args...)) {}
+
+  auto get(sycl_exp::properties_tag) { return _kernel_properties; }
+
+  __syclcompat_inline__ void
+  operator()(syclcompat::detail::range_to_item_t<Range>) const {
+    if constexpr (HasLocalMem) {
+      char *local_mem_ptr = static_cast<char *>(
+          _local_acc.template get_multi_ptr<sycl::access::decorated::no>().get());
+      std::apply(
+          [lmem_ptr = local_mem_ptr](auto &&...args) { F(args..., lmem_ptr); },
+          _argument_tuple);
+    } else {
+      std::apply([](auto &&...args) { F(args...); }, _argument_tuple);
+    }
+  }
+
+  KProps _kernel_properties;
+  std::tuple<Args...> _argument_tuple;
+  std::conditional_t<HasLocalMem, sycl::local_accessor<char, 1>, std::monostate>
+      _local_acc; // monostate for empty type
+};
+
+//====================================================================
+// This helper function avoids 2 nested `if constexpr` in detail::launch
+template <auto F, typename LaunchPolicy, typename... Args>
+auto build_kernel_functor(sycl::handler &cgh, LaunchPolicy launch_policy,
+                          Args... args)
+    -> KernelFunctor<F, typename LaunchPolicy::RangeT,
+                     typename LaunchPolicy::KPropsT, LaunchPolicy::HasLocalMem,
+                     Args...> {
+  if constexpr (LaunchPolicy::HasLocalMem) {
+    sycl::local_accessor<char, 1> local_memory(
+        launch_policy.get_local_mem_size(), cgh);
+    return KernelFunctor<F, typename LaunchPolicy::RangeT,
+                         typename LaunchPolicy::KPropsT,
+                         LaunchPolicy::HasLocalMem, Args...>(
+        launch_policy.get_kernel_properties(), local_memory, args...);
+  } else {
+    return KernelFunctor<F, typename LaunchPolicy::RangeT,
+                         typename LaunchPolicy::KPropsT,
+                         LaunchPolicy::HasLocalMem, Args...>(
+        launch_policy.get_kernel_properties(), args...);
+  }
+}
+
+} // namespace detail
+} // namespace experimental
+} // namespace syclcompat
diff --git a/sycl/include/syclcompat/syclcompat.hpp b/sycl/include/syclcompat/syclcompat.hpp
index 401b5681d40dd..8c5f693794948 100644
--- a/sycl/include/syclcompat/syclcompat.hpp
+++ b/sycl/include/syclcompat/syclcompat.hpp
@@ -29,7 +29,6 @@
 #include <syclcompat/id_query.hpp>
 #include <syclcompat/kernel.hpp>
 #include <syclcompat/launch.hpp>
-#include <syclcompat/launch_experimental.hpp>
 #include <syclcompat/math.hpp>
 #include <syclcompat/memory.hpp>
 #include <syclcompat/util.hpp>
diff --git a/sycl/include/syclcompat/traits.hpp b/sycl/include/syclcompat/traits.hpp
index f992c67bae8ca..2f389ccf79484 100644
--- a/sycl/include/syclcompat/traits.hpp
+++ b/sycl/include/syclcompat/traits.hpp
@@ -23,6 +23,10 @@
 #pragma once
 
 #include <cstddef>
+#include <sycl/ext/oneapi/properties/properties.hpp>
+#include <sycl/ext/oneapi/properties/property_value.hpp>
+#include <sycl/range.hpp>
+#include <sycl/nd_range.hpp>
 #include <type_traits>
 
 namespace syclcompat {
@@ -41,4 +45,209 @@ template <typename T> struct arith {
 };
 template <typename T> using arith_t = typename arith<T>::type;
 
+// Traits to check device function signature matches args (with or without local
+// mem)
+template <auto F, typename... Args>
+struct device_fn_invocable : std::is_invocable<decltype(F), Args...> {};
+
+template <auto F, typename... Args>
+struct device_fn_lmem_invocable
+    : std::is_invocable<decltype(F), Args..., char *> {};
+
+template <typename LaunchPolicy, auto F, typename... Args>
+constexpr inline bool args_compatible =
+    std::conditional_t<LaunchPolicy::HasLocalMem,
+                       device_fn_lmem_invocable<F, Args...>,
+                       device_fn_invocable<F, Args...>>::value;
+
+namespace detail {
+
+// Trait for identifying sycl::range and sycl::nd_range.
+template <typename T> struct is_range : std::false_type {};
+template <int Dim> struct is_range<sycl::range<Dim>> : std::true_type {};
+
+template <typename T> constexpr bool is_range_v = is_range<T>::value;
+
+template <typename T> struct is_nd_range : std::false_type {};
+template <int Dim> struct is_nd_range<sycl::nd_range<Dim>> : std::true_type {};
+
+template <typename T> constexpr bool is_nd_range_v = is_nd_range<T>::value;
+
+template <typename T>
+constexpr bool is_range_or_nd_range_v =
+    std::disjunction_v<is_range<T>, is_nd_range<T>>;
+
+// Trait range_to_item_t to convert nd_range -> nd_item, range -> item
+template <typename T> struct range_to_item_map;
+template <int Dim> struct range_to_item_map<sycl::nd_range<Dim>> {
+  using ItemT = sycl::nd_item<Dim>;
+};
+template <int Dim> struct range_to_item_map<sycl::range<Dim>> {
+  using ItemT = sycl::item<Dim>;
+};
+
+template <typename T>
+using range_to_item_t = typename range_to_item_map<T>::ItemT;
+
+} // namespace detail
+
+// Forward decls
+namespace experimental {
+
+template <typename Properties> struct kernel_properties;
+template <typename Properties> struct launch_properties;
+struct local_mem_size;
+
+template <typename Range, typename KProps, typename LProps, bool LocalMem>
+class launch_policy;
+} // namespace experimental
+
+namespace experimental::detail {
+
+// Helper for tuple_template_index
+template <template <typename TT> typename PropertyContainer, typename Tuple>
+struct tuple_template_index_helper;
+
+template <template <typename TT> typename PropertyContainer>
+struct tuple_template_index_helper<PropertyContainer, std::tuple<>> {
+  static constexpr std::size_t value = 0;
+};
+
+template <template <typename TT> typename PropertyContainer, typename T,
+          typename... Rest>
+struct tuple_template_index_helper<PropertyContainer,
+                                   std::tuple<PropertyContainer<T>, Rest...>> {
+  static constexpr std::size_t value = 0;
+  using RestTuple = std::tuple<Rest...>;
+  static_assert(
+      tuple_template_index_helper<PropertyContainer, RestTuple>::value ==
+          std::tuple_size_v<RestTuple>,
+      "type appears more than once in tuple");
+};
+
+template <template <typename TT> typename PropertyContainer, typename First,
+          typename... Rest>
+struct tuple_template_index_helper<PropertyContainer,
+                                   std::tuple<First, Rest...>> {
+  using RestTuple = std::tuple<Rest...>;
+  static constexpr std::size_t value =
+      1 + tuple_template_index_helper<PropertyContainer, RestTuple>::value;
+};
+
+// tuple_template_index is a trait helper which finds the index of a class
+// template in a std::tuple<Ts...>. During template argument deduction for
+// launch, this enables us to search the tuple for e.g. `kernel_properties`
+// without knowing the concrete type (e.g. kernel_properties<KProps>) A compile
+// time error is raised if the class template is found more than once. If not
+// found, returns the tuple size (i.e. this is not an error).
+template <template <typename TT> typename PropertyContainer, typename Tuple>
+struct tuple_template_index {
+  static constexpr std::size_t value =
+      tuple_template_index_helper<PropertyContainer, Tuple>::value;
+};
+
+// tuple_contains_template piggy-backs on the functionality of
+// tuple_template_index to detect whether a class template exists in the tuple
+template <template <typename TT> typename PropertyContainer, typename Tuple>
+    struct tuple_contains_template
+    : std::conditional_t <
+      tuple_template_index<PropertyContainer, Tuple>::value<
+          std::tuple_size_v<Tuple>, std::true_type, std::false_type> {};
+
+template <bool TupleContains, typename PropertyContainerConcrete,
+          typename Tuple>
+struct property_getter_helper;
+
+template <typename PropertyContainerConcrete, typename Tuple>
+struct property_getter_helper<true, PropertyContainerConcrete, Tuple> {
+  PropertyContainerConcrete operator()(Tuple tuple) {
+    return std::get<PropertyContainerConcrete>(tuple);
+  }
+};
+
+template <typename PropertyContainerConcrete, typename Tuple>
+struct property_getter_helper<false, PropertyContainerConcrete, Tuple> {
+  PropertyContainerConcrete operator()(Tuple) {
+    return {};
+  }
+};
+
+// For local_mem_size
+template <typename T, typename Tuple> struct has_type;
+
+template <typename T, typename... Us>
+struct has_type<T, std::tuple<Us...>>
+    : std::disjunction<std::is_same<T, Us>...> {};
+
+template <template <typename TT> typename PropertyContainer,
+          typename PropertyContainerConcrete, typename Tuple>
+using property_getter = property_getter_helper<
+    detail::tuple_contains_template<PropertyContainer, Tuple>::value,
+    PropertyContainerConcrete, Tuple>;
+
+template <typename PropertyContainerConcrete, typename Tuple>
+using local_mem_getter =
+    property_getter_helper<has_type<PropertyContainerConcrete, Tuple>::value,
+                           PropertyContainerConcrete, Tuple>;
+
+// Helpers for properties_or_empty
+template <bool InTuple, template <typename TT> typename PropertyContainer,
+          typename... Ts>
+struct properties_or_empty_helper;
+
+template <template <typename TT> typename PropertyContainer, typename... Ts>
+struct properties_or_empty_helper<false, PropertyContainer, Ts...> {
+  using Props = sycl::ext::oneapi::experimental::empty_properties_t;
+};
+
+template <template <typename TT> typename PropertyContainer, typename... Ts>
+struct properties_or_empty_helper<true, PropertyContainer, Ts...> {
+  using Props = typename std::tuple_element_t<
+      tuple_template_index<PropertyContainer, std::tuple<Ts...>>::value,
+      std::tuple<Ts...>>::Props;
+};
+
+// Template type alias which searches variadic types for e.g.
+// syclcompat::experimental::kernel_properties, launch_properties and returns
+// the contained sycl_exp::properties. If not found, returns
+// sycl_exp::empty_properties_t
+template <template <typename TT> typename PropertyContainer, typename... Ts>
+using properties_or_empty = typename properties_or_empty_helper<
+    tuple_contains_template<PropertyContainer, std::tuple<Ts...>>::value,
+    PropertyContainer, Ts...>::Props;
+
+// Traits to detect objects related to compat_exp::launch
+// ========================================================
+
+// Trait to detect compat_exp::kernel_properties
+template <typename T> struct is_kernel_properties : std::false_type {};
+template <typename TT>
+struct is_kernel_properties<kernel_properties<TT>> : std::true_type {};
+
+// Trait to detect compat_exp::launch_properties
+template <typename T> struct is_launch_properties : std::false_type {};
+template <typename TT>
+struct is_launch_properties<launch_properties<TT>> : std::true_type {};
+
+// Trait to detect compat_exp::local_mem_size
+template <typename T> struct is_local_mem_size : std::false_type {};
+template <> struct is_local_mem_size<local_mem_size> : std::true_type {};
+
+// Traits to detect compat_exp::launch_policy
+template <typename T> struct is_launch_policy : std::false_type {};
+
+template <typename RangeT, typename KProps, typename LProps, bool LocalMem>
+struct is_launch_policy<launch_policy<RangeT, KProps, LProps, LocalMem>>
+    : std::true_type {};
+
+template <typename T>
+inline constexpr bool is_launch_policy_v = is_launch_policy<T>::value;
+
+// Trait to detect if all args are sycl_exp property types
+template <typename... Args>
+using are_all_props = std::conjunction<
+    sycl::ext::oneapi::experimental::is_property_value<Args>...>;
+
+} // namespace experimental::detail
+
 } // namespace syclcompat
diff --git a/sycl/source/detail/bindless_images.cpp b/sycl/source/detail/bindless_images.cpp
index 3f551d9d84ab6..775dbd0bb60fd 100644
--- a/sycl/source/detail/bindless_images.cpp
+++ b/sycl/source/detail/bindless_images.cpp
@@ -431,8 +431,8 @@ create_image(void *devPtr, size_t pitch, const bindless_image_sampler &sampler,
 }
 
 template <>
-__SYCL_EXPORT interop_mem_handle import_external_memory<resource_fd>(
-    external_mem_descriptor<resource_fd> externalMem,
+__SYCL_EXPORT external_mem import_external_memory<resource_fd>(
+    external_mem_descriptor<resource_fd> externalMemDesc,
     const sycl::device &syclDevice, const sycl::context &syclContext) {
   std::shared_ptr<sycl::detail::context_impl> CtxImpl =
       sycl::detail::getSyclObjImpl(syclContext);
@@ -442,36 +442,36 @@ __SYCL_EXPORT interop_mem_handle import_external_memory<resource_fd>(
   ur_device_handle_t Device = DevImpl->getHandleRef();
   const sycl::detail::PluginPtr &Plugin = CtxImpl->getPlugin();
 
-  ur_exp_interop_mem_handle_t urInteropMem = nullptr;
+  ur_exp_external_mem_handle_t urExternalMem = nullptr;
   ur_exp_file_descriptor_t urFileDescriptor = {};
   urFileDescriptor.stype = UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR;
-  urFileDescriptor.fd = externalMem.external_resource.file_descriptor;
-  ur_exp_interop_mem_desc_t urExternalMemDescriptor = {};
-  urExternalMemDescriptor.stype = UR_STRUCTURE_TYPE_EXP_INTEROP_MEM_DESC;
+  urFileDescriptor.fd = externalMemDesc.external_resource.file_descriptor;
+  ur_exp_external_mem_desc_t urExternalMemDescriptor = {};
+  urExternalMemDescriptor.stype = UR_STRUCTURE_TYPE_EXP_EXTERNAL_MEM_DESC;
   urExternalMemDescriptor.pNext = &urFileDescriptor;
 
   // For `resource_fd` external memory type, the handle type is always
   // `OPAQUE_FD`. No need for a switch statement like we have for win32
   // resources.
   Plugin->call<sycl::errc::invalid>(urBindlessImagesImportExternalMemoryExp, C,
-                                    Device, externalMem.size_in_bytes,
+                                    Device, externalMemDesc.size_in_bytes,
                                     UR_EXP_EXTERNAL_MEM_TYPE_OPAQUE_FD,
-                                    &urExternalMemDescriptor, &urInteropMem);
+                                    &urExternalMemDescriptor, &urExternalMem);
 
-  return interop_mem_handle{urInteropMem};
+  return external_mem{urExternalMem};
 }
 
 template <>
-__SYCL_EXPORT interop_mem_handle import_external_memory<resource_fd>(
-    external_mem_descriptor<resource_fd> externalMem,
+__SYCL_EXPORT external_mem import_external_memory<resource_fd>(
+    external_mem_descriptor<resource_fd> externalMemDesc,
     const sycl::queue &syclQueue) {
   return import_external_memory<resource_fd>(
-      externalMem, syclQueue.get_device(), syclQueue.get_context());
+      externalMemDesc, syclQueue.get_device(), syclQueue.get_context());
 }
 
 template <>
-__SYCL_EXPORT interop_mem_handle import_external_memory<resource_win32_handle>(
-    external_mem_descriptor<resource_win32_handle> externalMem,
+__SYCL_EXPORT external_mem import_external_memory<resource_win32_handle>(
+    external_mem_descriptor<resource_win32_handle> externalMemDesc,
     const sycl::device &syclDevice, const sycl::context &syclContext) {
   std::shared_ptr<sycl::detail::context_impl> CtxImpl =
       sycl::detail::getSyclObjImpl(syclContext);
@@ -481,17 +481,17 @@ __SYCL_EXPORT interop_mem_handle import_external_memory<resource_win32_handle>(
   ur_device_handle_t Device = DevImpl->getHandleRef();
   const sycl::detail::PluginPtr &Plugin = CtxImpl->getPlugin();
 
-  ur_exp_interop_mem_handle_t urInteropMem = nullptr;
+  ur_exp_external_mem_handle_t urExternalMem = nullptr;
   ur_exp_win32_handle_t urWin32Handle = {};
   urWin32Handle.stype = UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE;
-  urWin32Handle.handle = externalMem.external_resource.handle;
-  ur_exp_interop_mem_desc_t urExternalMemDescriptor{};
-  urExternalMemDescriptor.stype = UR_STRUCTURE_TYPE_EXP_INTEROP_MEM_DESC;
+  urWin32Handle.handle = externalMemDesc.external_resource.handle;
+  ur_exp_external_mem_desc_t urExternalMemDescriptor{};
+  urExternalMemDescriptor.stype = UR_STRUCTURE_TYPE_EXP_EXTERNAL_MEM_DESC;
   urExternalMemDescriptor.pNext = &urWin32Handle;
 
   // Select appropriate memory handle type.
   ur_exp_external_mem_type_t urHandleType;
-  switch (externalMem.handle_type) {
+  switch (externalMemDesc.handle_type) {
   case external_mem_handle_type::win32_nt_handle:
     urHandleType = UR_EXP_EXTERNAL_MEM_TYPE_WIN32_NT;
     break;
@@ -504,23 +504,23 @@ __SYCL_EXPORT interop_mem_handle import_external_memory<resource_win32_handle>(
   }
 
   Plugin->call<sycl::errc::invalid>(urBindlessImagesImportExternalMemoryExp, C,
-                                    Device, externalMem.size_in_bytes,
+                                    Device, externalMemDesc.size_in_bytes,
                                     urHandleType, &urExternalMemDescriptor,
-                                    &urInteropMem);
+                                    &urExternalMem);
 
-  return interop_mem_handle{urInteropMem};
+  return external_mem{urExternalMem};
 }
 
 template <>
-__SYCL_EXPORT interop_mem_handle import_external_memory<resource_win32_handle>(
-    external_mem_descriptor<resource_win32_handle> externalMem,
+__SYCL_EXPORT external_mem import_external_memory<resource_win32_handle>(
+    external_mem_descriptor<resource_win32_handle> externalMemDesc,
     const sycl::queue &syclQueue) {
   return import_external_memory<resource_win32_handle>(
-      externalMem, syclQueue.get_device(), syclQueue.get_context());
+      externalMemDesc, syclQueue.get_device(), syclQueue.get_context());
 }
 
 __SYCL_EXPORT
-image_mem_handle map_external_image_memory(interop_mem_handle memHandle,
+image_mem_handle map_external_image_memory(external_mem extMem,
                                            const image_descriptor &desc,
                                            const sycl::device &syclDevice,
                                            const sycl::context &syclContext) {
@@ -538,25 +538,25 @@ image_mem_handle map_external_image_memory(interop_mem_handle memHandle,
   ur_image_format_t urFormat;
   populate_ur_structs(desc, urDesc, urFormat);
 
-  ur_exp_interop_mem_handle_t urInteropMem{memHandle.raw_handle};
+  ur_exp_external_mem_handle_t urExternalMem{extMem.raw_handle};
 
   image_mem_handle retHandle;
   Plugin->call<sycl::errc::invalid>(urBindlessImagesMapExternalArrayExp, C,
-                                    Device, &urFormat, &urDesc, urInteropMem,
+                                    Device, &urFormat, &urDesc, urExternalMem,
                                     &retHandle.raw_handle);
 
   return image_mem_handle{retHandle};
 }
 
 __SYCL_EXPORT
-image_mem_handle map_external_image_memory(interop_mem_handle memHandle,
+image_mem_handle map_external_image_memory(external_mem extMem,
                                            const image_descriptor &desc,
                                            const sycl::queue &syclQueue) {
-  return map_external_image_memory(memHandle, desc, syclQueue.get_device(),
+  return map_external_image_memory(extMem, desc, syclQueue.get_device(),
                                    syclQueue.get_context());
 }
 
-__SYCL_EXPORT void release_external_memory(interop_mem_handle interopMem,
+__SYCL_EXPORT void release_external_memory(external_mem extMem,
                                            const sycl::device &syclDevice,
                                            const sycl::context &syclContext) {
   std::shared_ptr<sycl::detail::context_impl> CtxImpl =
@@ -567,18 +567,18 @@ __SYCL_EXPORT void release_external_memory(interop_mem_handle interopMem,
   ur_device_handle_t Device = DevImpl->getHandleRef();
   const sycl::detail::PluginPtr &Plugin = CtxImpl->getPlugin();
 
-  Plugin->call<sycl::errc::invalid>(urBindlessImagesReleaseInteropExp, C,
-                                    Device, interopMem.raw_handle);
+  Plugin->call<sycl::errc::invalid>(urBindlessImagesReleaseExternalMemoryExp, C,
+                                    Device, extMem.raw_handle);
 }
 
-__SYCL_EXPORT void release_external_memory(interop_mem_handle interopMem,
+__SYCL_EXPORT void release_external_memory(external_mem extMem,
                                            const sycl::queue &syclQueue) {
-  release_external_memory(interopMem, syclQueue.get_device(),
+  release_external_memory(extMem, syclQueue.get_device(),
                           syclQueue.get_context());
 }
 
 template <>
-__SYCL_EXPORT interop_semaphore_handle import_external_semaphore(
+__SYCL_EXPORT external_semaphore import_external_semaphore(
     external_semaphore_descriptor<resource_fd> externalSemaphoreDesc,
     const sycl::device &syclDevice, const sycl::context &syclContext) {
   std::shared_ptr<sycl::detail::context_impl> CtxImpl =
@@ -589,27 +589,27 @@ __SYCL_EXPORT interop_semaphore_handle import_external_semaphore(
       sycl::detail::getSyclObjImpl(syclDevice);
   ur_device_handle_t Device = DevImpl->getHandleRef();
 
-  ur_exp_interop_semaphore_handle_t urInteropSemaphore;
+  ur_exp_external_semaphore_handle_t urExternalSemaphore;
   ur_exp_file_descriptor_t urFileDescriptor = {};
   urFileDescriptor.stype = UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR;
   urFileDescriptor.fd = externalSemaphoreDesc.external_resource.file_descriptor;
-  ur_exp_interop_semaphore_desc_t urInteropSemDesc = {};
-  urInteropSemDesc.stype = UR_STRUCTURE_TYPE_EXP_INTEROP_SEMAPHORE_DESC;
-  urInteropSemDesc.pNext = &urFileDescriptor;
+  ur_exp_external_semaphore_desc_t urExternalSemDesc = {};
+  urExternalSemDesc.stype = UR_STRUCTURE_TYPE_EXP_EXTERNAL_SEMAPHORE_DESC;
+  urExternalSemDesc.pNext = &urFileDescriptor;
 
   // For this specialization of `import_external_semaphore` the handleType is
   // always `OPAQUE_FD`.
   Plugin->call<sycl::errc::invalid>(urBindlessImagesImportExternalSemaphoreExp,
                                     C, Device,
                                     UR_EXP_EXTERNAL_SEMAPHORE_TYPE_OPAQUE_FD,
-                                    &urInteropSemDesc, &urInteropSemaphore);
+                                    &urExternalSemDesc, &urExternalSemaphore);
 
-  return interop_semaphore_handle{urInteropSemaphore,
+  return external_semaphore{urExternalSemaphore,
                                   external_semaphore_handle_type::opaque_fd};
 }
 
 template <>
-__SYCL_EXPORT interop_semaphore_handle import_external_semaphore(
+__SYCL_EXPORT external_semaphore import_external_semaphore(
     external_semaphore_descriptor<resource_fd> externalSemaphoreDesc,
     const sycl::queue &syclQueue) {
   return import_external_semaphore(
@@ -617,7 +617,7 @@ __SYCL_EXPORT interop_semaphore_handle import_external_semaphore(
 }
 
 template <>
-__SYCL_EXPORT interop_semaphore_handle import_external_semaphore(
+__SYCL_EXPORT external_semaphore import_external_semaphore(
     external_semaphore_descriptor<resource_win32_handle> externalSemaphoreDesc,
     const sycl::device &syclDevice, const sycl::context &syclContext) {
   std::shared_ptr<sycl::detail::context_impl> CtxImpl =
@@ -628,13 +628,13 @@ __SYCL_EXPORT interop_semaphore_handle import_external_semaphore(
       sycl::detail::getSyclObjImpl(syclDevice);
   ur_device_handle_t Device = DevImpl->getHandleRef();
 
-  ur_exp_interop_semaphore_handle_t urInteropSemaphore;
+  ur_exp_external_semaphore_handle_t urExternalSemaphore;
   ur_exp_win32_handle_t urWin32Handle = {};
   urWin32Handle.stype = UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE;
   urWin32Handle.handle = externalSemaphoreDesc.external_resource.handle;
-  ur_exp_interop_semaphore_desc_t urInteropSemDesc = {};
-  urInteropSemDesc.stype = UR_STRUCTURE_TYPE_EXP_INTEROP_SEMAPHORE_DESC;
-  urInteropSemDesc.pNext = &urWin32Handle;
+  ur_exp_external_semaphore_desc_t urExternalSemDesc = {};
+  urExternalSemDesc.stype = UR_STRUCTURE_TYPE_EXP_EXTERNAL_SEMAPHORE_DESC;
+  urExternalSemDesc.pNext = &urWin32Handle;
 
   // Select appropriate semaphore handle type.
   ur_exp_external_semaphore_type_t urHandleType;
@@ -651,15 +651,15 @@ __SYCL_EXPORT interop_semaphore_handle import_external_semaphore(
   }
 
   Plugin->call<sycl::errc::invalid>(urBindlessImagesImportExternalSemaphoreExp,
-                                    C, Device, urHandleType, &urInteropSemDesc,
-                                    &urInteropSemaphore);
+                                    C, Device, urHandleType, &urExternalSemDesc,
+                                    &urExternalSemaphore);
 
-  return interop_semaphore_handle{urInteropSemaphore,
+  return external_semaphore{urExternalSemaphore,
                                   externalSemaphoreDesc.handle_type};
 }
 
 template <>
-__SYCL_EXPORT interop_semaphore_handle import_external_semaphore(
+__SYCL_EXPORT external_semaphore import_external_semaphore(
     external_semaphore_descriptor<resource_win32_handle> externalSemaphoreDesc,
     const sycl::queue &syclQueue) {
   return import_external_semaphore(
@@ -667,7 +667,7 @@ __SYCL_EXPORT interop_semaphore_handle import_external_semaphore(
 }
 
 __SYCL_EXPORT void
-release_external_semaphore(interop_semaphore_handle semaphoreHandle,
+release_external_semaphore(external_semaphore externalSemaphore,
                            const sycl::device &syclDevice,
                            const sycl::context &syclContext) {
   std::shared_ptr<sycl::detail::context_impl> CtxImpl =
@@ -679,13 +679,13 @@ release_external_semaphore(interop_semaphore_handle semaphoreHandle,
   ur_device_handle_t Device = DevImpl->getHandleRef();
 
   Plugin->call<sycl::errc::invalid>(urBindlessImagesReleaseExternalSemaphoreExp,
-                                    C, Device, semaphoreHandle.raw_handle);
+                                    C, Device, externalSemaphore.raw_handle);
 }
 
 __SYCL_EXPORT void
-release_external_semaphore(interop_semaphore_handle semaphoreHandle,
+release_external_semaphore(external_semaphore externalSemaphore,
                            const sycl::queue &syclQueue) {
-  release_external_semaphore(semaphoreHandle, syclQueue.get_device(),
+  release_external_semaphore(externalSemaphore, syclQueue.get_device(),
                              syclQueue.get_context());
 }
 
diff --git a/sycl/source/detail/cg.hpp b/sycl/source/detail/cg.hpp
index f11b67c8a5be4..42bd088427dd9 100644
--- a/sycl/source/detail/cg.hpp
+++ b/sycl/source/detail/cg.hpp
@@ -613,39 +613,37 @@ class CGCopyImage : public CG {
 
 /// "Semaphore Wait" command group class.
 class CGSemaphoreWait : public CG {
-  ur_exp_interop_semaphore_handle_t MInteropSemaphoreHandle;
+  ur_exp_external_semaphore_handle_t MExternalSemaphore;
   std::optional<uint64_t> MWaitValue;
 
 public:
-  CGSemaphoreWait(ur_exp_interop_semaphore_handle_t InteropSemaphoreHandle,
+  CGSemaphoreWait(ur_exp_external_semaphore_handle_t ExternalSemaphore,
                   std::optional<uint64_t> WaitValue,
                   CG::StorageInitHelper CGData, detail::code_location loc = {})
       : CG(CGType::SemaphoreWait, std::move(CGData), std::move(loc)),
-        MInteropSemaphoreHandle(InteropSemaphoreHandle), MWaitValue(WaitValue) {
-  }
+        MExternalSemaphore(ExternalSemaphore), MWaitValue(WaitValue) {}
 
-  ur_exp_interop_semaphore_handle_t getInteropSemaphoreHandle() const {
-    return MInteropSemaphoreHandle;
+  ur_exp_external_semaphore_handle_t getExternalSemaphore() const {
+    return MExternalSemaphore;
   }
   std::optional<uint64_t> getWaitValue() const { return MWaitValue; }
 };
 
 /// "Semaphore Signal" command group class.
 class CGSemaphoreSignal : public CG {
-  ur_exp_interop_semaphore_handle_t MInteropSemaphoreHandle;
+  ur_exp_external_semaphore_handle_t MExternalSemaphore;
   std::optional<uint64_t> MSignalValue;
 
 public:
-  CGSemaphoreSignal(ur_exp_interop_semaphore_handle_t InteropSemaphoreHandle,
+  CGSemaphoreSignal(ur_exp_external_semaphore_handle_t ExternalSemaphore,
                     std::optional<uint64_t> SignalValue,
                     CG::StorageInitHelper CGData,
                     detail::code_location loc = {})
       : CG(CGType::SemaphoreSignal, std::move(CGData), std::move(loc)),
-        MInteropSemaphoreHandle(InteropSemaphoreHandle),
-        MSignalValue(SignalValue) {}
+        MExternalSemaphore(ExternalSemaphore), MSignalValue(SignalValue) {}
 
-  ur_exp_interop_semaphore_handle_t getInteropSemaphoreHandle() const {
-    return MInteropSemaphoreHandle;
+  ur_exp_external_semaphore_handle_t getExternalSemaphore() const {
+    return MExternalSemaphore;
   }
   std::optional<uint64_t> getSignalValue() const { return MSignalValue; }
 };
diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp
index a1d89ed845da2..beb9bae0dd0f1 100644
--- a/sycl/source/detail/device_binary_image.cpp
+++ b/sycl/source/detail/device_binary_image.cpp
@@ -178,6 +178,11 @@ void RTDeviceBinaryImage::init(sycl_device_binary Bin) {
   KernelParamOptInfo.init(Bin, __SYCL_PROPERTY_SET_KERNEL_PARAM_OPT_INFO);
   AssertUsed.init(Bin, __SYCL_PROPERTY_SET_SYCL_ASSERT_USED);
   ProgramMetadata.init(Bin, __SYCL_PROPERTY_SET_PROGRAM_METADATA);
+  // Convert ProgramMetadata into the UR format
+  for (const auto &Prop : ProgramMetadata) {
+    ProgramMetadataUR.push_back(
+        ur::mapDeviceBinaryPropertyToProgramMetadata(Prop));
+  }
   ExportedSymbols.init(Bin, __SYCL_PROPERTY_SET_SYCL_EXPORTED_SYMBOLS);
   ImportedSymbols.init(Bin, __SYCL_PROPERTY_SET_SYCL_IMPORTED_SYMBOLS);
   DeviceGlobals.init(Bin, __SYCL_PROPERTY_SET_SYCL_DEVICE_GLOBALS);
diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp
index b88c9a8faef43..d5762c4617bb9 100644
--- a/sycl/source/detail/device_impl.cpp
+++ b/sycl/source/detail/device_impl.cpp
@@ -521,21 +521,21 @@ bool device_impl::has(aspect Aspect) const {
             sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS;
     return call_successful && support;
   }
-  case aspect::ext_oneapi_interop_memory_import: {
+  case aspect::ext_oneapi_external_memory_import: {
     ur_bool_t support = false;
     bool call_successful =
         getPlugin()->call_nocheck(
             urDeviceGetInfo, MDevice,
-            UR_DEVICE_INFO_INTEROP_MEMORY_IMPORT_SUPPORT_EXP, sizeof(ur_bool_t),
+            UR_DEVICE_INFO_EXTERNAL_MEMORY_IMPORT_SUPPORT_EXP, sizeof(ur_bool_t),
             &support, nullptr) == UR_RESULT_SUCCESS;
     return call_successful && support;
   }
-  case aspect::ext_oneapi_interop_semaphore_import: {
+  case aspect::ext_oneapi_external_semaphore_import: {
     ur_bool_t support = false;
     bool call_successful =
         getPlugin()->call_nocheck(
             urDeviceGetInfo, MDevice,
-            UR_DEVICE_INFO_INTEROP_SEMAPHORE_IMPORT_SUPPORT_EXP,
+            UR_DEVICE_INFO_EXTERNAL_SEMAPHORE_IMPORT_SUPPORT_EXP,
             sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS;
     return call_successful && support;
   }
diff --git a/sycl/source/detail/handler_impl.hpp b/sycl/source/detail/handler_impl.hpp
index 0da70c4f86483..8978b52aaf4c1 100644
--- a/sycl/source/detail/handler_impl.hpp
+++ b/sycl/source/detail/handler_impl.hpp
@@ -134,7 +134,7 @@ class handler_impl {
   ur_rect_region_t MCopyExtent;
 
   // Extra information for semaphore interoperability
-  ur_exp_interop_semaphore_handle_t MInteropSemaphoreHandle;
+  ur_exp_external_semaphore_handle_t MExternalSemaphore;
   std::optional<uint64_t> MWaitValue;
   std::optional<uint64_t> MSignalValue;
 
diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp
index ab0eb8a00f13a..7de12004dd572 100644
--- a/sycl/source/detail/program_manager/program_manager.cpp
+++ b/sycl/source/detail/program_manager/program_manager.cpp
@@ -500,11 +500,9 @@ std::pair<ur_program_handle_t, bool> ProgramManager::getOrCreateURProgram(
     // Get program metadata from properties
     std::vector<ur_program_metadata_t> ProgMetadataVector;
     for (const RTDeviceBinaryImage *Img : AllImages) {
-      auto ProgMetadata = Img->getProgramMetadata();
-      for (const auto &Prop : ProgMetadata) {
-        ProgMetadataVector.push_back(
-            ur::mapDeviceBinaryPropertyToProgramMetadata(Prop));
-      }
+      auto &ImgProgMetadata = Img->getProgramMetadataUR();
+      ProgMetadataVector.insert(ProgMetadataVector.end(),
+                                ImgProgMetadata.begin(), ImgProgMetadata.end());
     }
     // TODO: Build for multiple devices once supported by program manager
     NativePrg = createBinaryProgram(getSyclObjImpl(Context), Device,
@@ -1509,14 +1507,9 @@ ProgramManager::ProgramPtr ProgramManager::build(
   LinkPrograms.push_back(Program.get());
 
   for (ur_program_handle_t Prg : ExtraProgramsToLink) {
-    auto Result =
-        Plugin->call_nocheck(urProgramCompileExp, Prg, /* num devices =*/1,
-                             &Device, CompileOptions.c_str());
-    if (Result == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) {
-      Plugin->call(urProgramCompile, Context->getHandleRef(), Prg,
-                   CompileOptions.c_str());
-    }
-    Plugin->checkUrResult(Result);
+    auto Res = doCompile(Plugin, Prg, /*num devices =*/1, &Device,
+                         Context->getHandleRef(), CompileOptions.c_str());
+    Plugin->checkUrResult(Res);
 
     LinkPrograms.push_back(Prg);
   }
diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp
index 5e13f5ba5c8e0..a713783887227 100644
--- a/sycl/source/detail/scheduler/commands.cpp
+++ b/sycl/source/detail/scheduler/commands.cpp
@@ -3313,7 +3313,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
     auto OptWaitValue = SemWait->getWaitValue();
     uint64_t WaitValue = OptWaitValue.has_value() ? OptWaitValue.value() : 0;
     Plugin->call(urBindlessImagesWaitExternalSemaphoreExp,
-                 MQueue->getHandleRef(), SemWait->getInteropSemaphoreHandle(),
+                 MQueue->getHandleRef(), SemWait->getExternalSemaphore(),
                  OptWaitValue.has_value(), WaitValue, 0, nullptr, nullptr);
 
     return UR_RESULT_SUCCESS;
@@ -3327,7 +3327,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
     uint64_t SignalValue =
         OptSignalValue.has_value() ? OptSignalValue.value() : 0;
     Plugin->call(urBindlessImagesSignalExternalSemaphoreExp,
-                 MQueue->getHandleRef(), SemSignal->getInteropSemaphoreHandle(),
+                 MQueue->getHandleRef(), SemSignal->getExternalSemaphore(),
                  OptSignalValue.has_value(), SignalValue, 0, nullptr, nullptr);
 
     return UR_RESULT_SUCCESS;
diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp
index b788daf78c76a..95421a80a8ce5 100644
--- a/sycl/source/handler.cpp
+++ b/sycl/source/handler.cpp
@@ -483,14 +483,14 @@ event handler::finalize() {
         std::move(impl->CGData), MCodeLoc));
     break;
   case detail::CGType::SemaphoreWait:
-    CommandGroup.reset(new detail::CGSemaphoreWait(
-        impl->MInteropSemaphoreHandle, impl->MWaitValue,
-        std::move(impl->CGData), MCodeLoc));
+    CommandGroup.reset(
+        new detail::CGSemaphoreWait(impl->MExternalSemaphore, impl->MWaitValue,
+                                    std::move(impl->CGData), MCodeLoc));
     break;
   case detail::CGType::SemaphoreSignal:
     CommandGroup.reset(new detail::CGSemaphoreSignal(
-        impl->MInteropSemaphoreHandle, impl->MSignalValue,
-        std::move(impl->CGData), MCodeLoc));
+        impl->MExternalSemaphore, impl->MSignalValue, std::move(impl->CGData),
+        MCodeLoc));
     break;
   case detail::CGType::None:
     if (detail::ur::trace()) {
@@ -1417,14 +1417,14 @@ void handler::ext_oneapi_copy(
 }
 
 void handler::ext_oneapi_wait_external_semaphore(
-    sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle) {
+    sycl::ext::oneapi::experimental::external_semaphore ExtSemaphore) {
   throwIfGraphAssociated<
       ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
           sycl_ext_oneapi_bindless_images>();
-  if (SemaphoreHandle.handle_type !=
+  if (ExtSemaphore.handle_type !=
           sycl::ext::oneapi::experimental::external_semaphore_handle_type::
               opaque_fd &&
-      SemaphoreHandle.handle_type !=
+      ExtSemaphore.handle_type !=
           sycl::ext::oneapi::experimental::external_semaphore_handle_type::
               win32_nt_handle) {
     throw sycl::exception(
@@ -1432,19 +1432,19 @@ void handler::ext_oneapi_wait_external_semaphore(
         "Invalid type of semaphore for this operation. The "
         "type of semaphore used needs a user passed wait value.");
   }
-  impl->MInteropSemaphoreHandle =
-      (ur_exp_interop_semaphore_handle_t)SemaphoreHandle.raw_handle;
+  impl->MExternalSemaphore =
+      (ur_exp_external_semaphore_handle_t)ExtSemaphore.raw_handle;
   impl->MWaitValue = {};
   setType(detail::CGType::SemaphoreWait);
 }
 
 void handler::ext_oneapi_wait_external_semaphore(
-    sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle,
+    sycl::ext::oneapi::experimental::external_semaphore ExtSemaphore,
     uint64_t WaitValue) {
   throwIfGraphAssociated<
       ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
           sycl_ext_oneapi_bindless_images>();
-  if (SemaphoreHandle.handle_type !=
+  if (ExtSemaphore.handle_type !=
       sycl::ext::oneapi::experimental::external_semaphore_handle_type::
           win32_nt_dx12_fence) {
     throw sycl::exception(
@@ -1452,21 +1452,21 @@ void handler::ext_oneapi_wait_external_semaphore(
         "Invalid type of semaphore for this operation. The "
         "type of semaphore does not support user passed wait values.");
   }
-  impl->MInteropSemaphoreHandle =
-      (ur_exp_interop_semaphore_handle_t)SemaphoreHandle.raw_handle;
+  impl->MExternalSemaphore =
+      (ur_exp_external_semaphore_handle_t)ExtSemaphore.raw_handle;
   impl->MWaitValue = WaitValue;
   setType(detail::CGType::SemaphoreWait);
 }
 
 void handler::ext_oneapi_signal_external_semaphore(
-    sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle) {
+    sycl::ext::oneapi::experimental::external_semaphore ExtSemaphore) {
   throwIfGraphAssociated<
       ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
           sycl_ext_oneapi_bindless_images>();
-  if (SemaphoreHandle.handle_type !=
+  if (ExtSemaphore.handle_type !=
           sycl::ext::oneapi::experimental::external_semaphore_handle_type::
               opaque_fd &&
-      SemaphoreHandle.handle_type !=
+      ExtSemaphore.handle_type !=
           sycl::ext::oneapi::experimental::external_semaphore_handle_type::
               win32_nt_handle) {
     throw sycl::exception(
@@ -1474,19 +1474,19 @@ void handler::ext_oneapi_signal_external_semaphore(
         "Invalid type of semaphore for this operation. The "
         "type of semaphore used needs a user passed signal value.");
   }
-  impl->MInteropSemaphoreHandle =
-      (ur_exp_interop_semaphore_handle_t)SemaphoreHandle.raw_handle;
+  impl->MExternalSemaphore =
+      (ur_exp_external_semaphore_handle_t)ExtSemaphore.raw_handle;
   impl->MSignalValue = {};
   setType(detail::CGType::SemaphoreSignal);
 }
 
 void handler::ext_oneapi_signal_external_semaphore(
-    sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle,
+    sycl::ext::oneapi::experimental::external_semaphore ExtSemaphore,
     uint64_t SignalValue) {
   throwIfGraphAssociated<
       ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
           sycl_ext_oneapi_bindless_images>();
-  if (SemaphoreHandle.handle_type !=
+  if (ExtSemaphore.handle_type !=
       sycl::ext::oneapi::experimental::external_semaphore_handle_type::
           win32_nt_dx12_fence) {
     throw sycl::exception(
@@ -1494,8 +1494,8 @@ void handler::ext_oneapi_signal_external_semaphore(
         "Invalid type of semaphore for this operation. The "
         "type of semaphore does not support user passed signal values.");
   }
-  impl->MInteropSemaphoreHandle =
-      (ur_exp_interop_semaphore_handle_t)SemaphoreHandle.raw_handle;
+  impl->MExternalSemaphore =
+      (ur_exp_external_semaphore_handle_t)ExtSemaphore.raw_handle;
   impl->MSignalValue = SignalValue;
   setType(detail::CGType::SemaphoreSignal);
 }
diff --git a/sycl/test-e2e/AddressSanitizer/bad-free/bad-free-minus1.cpp b/sycl/test-e2e/AddressSanitizer/bad-free/bad-free-minus1.cpp
index 1c76c29001c32..87bc69308a9d8 100644
--- a/sycl/test-e2e/AddressSanitizer/bad-free/bad-free-minus1.cpp
+++ b/sycl/test-e2e/AddressSanitizer/bad-free/bad-free-minus1.cpp
@@ -27,3 +27,7 @@ int main() {
 // CHECK-HOST:   [[ADDR]] is located inside of Host USM region {{\[0x.*, 0x.*\)}}
 // CHECK-SHARED: [[ADDR]] is located inside of Shared USM region {{\[0x.*, 0x.*\)}}
 // CHECK-DEVICE: [[ADDR]] is located inside of Device USM region {{\[0x.*, 0x.*\)}}
+// CHECK: allocated here:
+// CHECK-HOST: in main {{.*bad-free-minus1.cpp:}}[[@LINE-15]]
+// CHECK-SHARED: in main {{.*bad-free-minus1.cpp:}}[[@LINE-14]]
+// CHECK-DEVICE: in main {{.*bad-free-minus1.cpp:}}[[@LINE-13]]
diff --git a/sycl/test-e2e/AddressSanitizer/bad-free/bad-free-plus1.cpp b/sycl/test-e2e/AddressSanitizer/bad-free/bad-free-plus1.cpp
index a39a375eb7715..cd80e8baa77ad 100644
--- a/sycl/test-e2e/AddressSanitizer/bad-free/bad-free-plus1.cpp
+++ b/sycl/test-e2e/AddressSanitizer/bad-free/bad-free-plus1.cpp
@@ -25,5 +25,9 @@ int main() {
   // CHECK-HOST:   [[ADDR]] is located inside of Host USM region {{\[0x.*, 0x.*\)}}
   // CHECK-SHARED: [[ADDR]] is located inside of Shared USM region {{\[0x.*, 0x.*\)}}
   // CHECK-DEVICE: [[ADDR]] is located inside of Device USM region {{\[0x.*, 0x.*\)}}
+  // CHECK:  allocated here:
+  // CHECK-HOST: in main {{.*bad-free-plus1.cpp:}}[[@LINE-13]]
+  // CHECK-SHARED: in main {{.*bad-free-plus1.cpp:}}[[@LINE-12]]
+  // CHECK-DEVICE: in main {{.*bad-free-plus1.cpp:}}[[@LINE-11]]
   return 0;
 }
diff --git a/sycl/test-e2e/AddressSanitizer/common/kernel-debug.cpp b/sycl/test-e2e/AddressSanitizer/common/kernel-debug.cpp
index eff8b9698bec0..7828f398dfe10 100644
--- a/sycl/test-e2e/AddressSanitizer/common/kernel-debug.cpp
+++ b/sycl/test-e2e/AddressSanitizer/common/kernel-debug.cpp
@@ -2,6 +2,10 @@
 // RUN: %{build} %device_asan_flags -O2 -g -o %t
 // RUN: env UR_LAYER_ASAN_OPTIONS=debug:1 %{run} %t 2>&1 | FileCheck --check-prefixes CHECK-DEBUG %s
 // RUN: env UR_LAYER_ASAN_OPTIONS=debug:0 %{run} %t 2>&1 | FileCheck %s
+
+// TODO: Reenable, see https://github.com/intel/llvm/issues/14658
+// UNSUPPORTED: windows, linux
+
 #include <sycl/usm.hpp>
 
 /// This test is used to check enabling/disabling kernel debug message
diff --git a/sycl/test-e2e/AddressSanitizer/double-free/double-free.cpp b/sycl/test-e2e/AddressSanitizer/double-free/double-free.cpp
index 5356310f1724d..438b5b5f7ff1b 100644
--- a/sycl/test-e2e/AddressSanitizer/double-free/double-free.cpp
+++ b/sycl/test-e2e/AddressSanitizer/double-free/double-free.cpp
@@ -30,4 +30,8 @@ int main() {
 // CHECK-SHARED: [[ADDR]] is located inside of Shared USM region {{\[0x.*, 0x.*\)}}
 // CHECK-DEVICE: [[ADDR]] is located inside of Device USM region {{\[0x.*, 0x.*\)}}
 // CHECK: freed here
+// CHECH: in main {{.*double-free.cpp:}}[@LINE-33]
 // CHECK: previously allocated here
+// CHECK-HOST: in main {{.*double-free.cpp:}}[[@LINE-19]]
+// CHECK-SHARED: in main {{.*double-free.cpp:}}[[@LINE-18]]
+// CHECK-DEVICE: in main {{.*double-free.cpp:}}[[@LINE-17]]
diff --git a/sycl/test-e2e/AddressSanitizer/multiple-reports/multiple_kernels.cpp b/sycl/test-e2e/AddressSanitizer/multiple-reports/multiple_kernels.cpp
index 6d1c46b535d48..8d4a44af1a078 100644
--- a/sycl/test-e2e/AddressSanitizer/multiple-reports/multiple_kernels.cpp
+++ b/sycl/test-e2e/AddressSanitizer/multiple-reports/multiple_kernels.cpp
@@ -2,6 +2,9 @@
 // RUN: %{build} %device_asan_flags -Xarch_device -fsanitize-recover=address -O2 -g -o %t
 // RUN: env SYCL_PREFER_UR=1 %{run} %t 2>&1 | FileCheck %s
 
+// TODO: Reenable, see https://github.com/intel/llvm/issues/14658
+// UNSUPPORTED: windows, linux
+
 #include <sycl/detail/core.hpp>
 #include <sycl/usm.hpp>
 
diff --git a/sycl/test-e2e/AddressSanitizer/multiple-reports/one_kernel.cpp b/sycl/test-e2e/AddressSanitizer/multiple-reports/one_kernel.cpp
index aa702c61ad774..32974db46c2ef 100644
--- a/sycl/test-e2e/AddressSanitizer/multiple-reports/one_kernel.cpp
+++ b/sycl/test-e2e/AddressSanitizer/multiple-reports/one_kernel.cpp
@@ -2,6 +2,9 @@
 // RUN: %{build} %device_asan_flags -Xarch_device -fsanitize-recover=address -O2 -g -o %t
 // RUN: env SYCL_PREFER_UR=1 %{run} %t 2>&1 | FileCheck %s
 
+// TODO: Reenable, see https://github.com/intel/llvm/issues/14658
+// UNSUPPORTED: windows, linux
+
 #include <sycl/detail/core.hpp>
 #include <sycl/usm.hpp>
 
diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/large_group_size.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/large_group_size.cpp
new file mode 100644
index 0000000000000..86ea99bd14359
--- /dev/null
+++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/large_group_size.cpp
@@ -0,0 +1,26 @@
+// REQUIRES: linux
+// RUN: %{build} %device_asan_flags -O2 -g -o %t
+// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK %s
+
+#include <sycl/detail/core.hpp>
+
+#include <sycl/usm.hpp>
+
+int main() {
+  sycl::queue Q;
+  constexpr std::size_t N = 12345678;
+  auto *array = sycl::malloc_device<char>(N, Q);
+
+  Q.submit([&](sycl::handler &h) {
+    h.parallel_for<class MyKernelR_4>(
+        sycl::nd_range<1>(N + 1, 1),
+        [=](sycl::nd_item<1> item) { ++array[item.get_global_id(0)]; });
+  });
+  Q.wait();
+  // CHECK-DEVICE: ERROR: DeviceSanitizer: out-of-bounds-access on Device USM
+  // CHECK: {{READ of size 1 at kernel <.*MyKernelR_4> LID\(0, 0, 0\) GID\(12345678, 0, 0\)}}
+  // CHECK: {{  #0 .* .*large_group_size.cpp:}}[[@LINE-5]]
+
+  sycl::free(array, Q);
+  return 0;
+}
diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_char.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_char.cpp
index 54acba761ae6e..6038752ca14c3 100644
--- a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_char.cpp
+++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_char.cpp
@@ -16,7 +16,7 @@
 
 int main() {
   sycl::queue Q;
-  constexpr std::size_t N = 12345;
+  constexpr std::size_t N = 12;
 #if defined(MALLOC_HOST)
   auto *array = sycl::malloc_host<char>(N, Q);
 #elif defined(MALLOC_SHARED)
@@ -34,7 +34,7 @@ int main() {
   // CHECK-DEVICE: ERROR: DeviceSanitizer: out-of-bounds-access on Device USM
   // CHECK-HOST:   ERROR: DeviceSanitizer: out-of-bounds-access on Host USM
   // CHECK-SHARED: ERROR: DeviceSanitizer: out-of-bounds-access on Shared USM
-  // CHECK: {{READ of size 1 at kernel <.*MyKernelR_4> LID\(0, 0, 0\) GID\(12345, 0, 0\)}}
+  // CHECK: {{READ of size 1 at kernel <.*MyKernelR_4> LID\(0, 0, 0\) GID\(12, 0, 0\)}}
   // CHECK: {{  #0 .* .*parallel_for_char.cpp:}}[[@LINE-7]]
 
   sycl::free(array, Q);
diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_double.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_double.cpp
index f6c12fcc75818..eb53f29dfb53e 100644
--- a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_double.cpp
+++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_double.cpp
@@ -16,7 +16,7 @@
 
 int main() {
   sycl::queue Q;
-  constexpr std::size_t N = 123456;
+  constexpr std::size_t N = 12;
 #if defined(MALLOC_HOST)
   auto *array = sycl::malloc_host<double>(N, Q);
 #elif defined(MALLOC_SHARED)
@@ -34,7 +34,7 @@ int main() {
   // CHECK-DEVICE: ERROR: DeviceSanitizer: out-of-bounds-access on Device USM
   // CHECK-HOST:   ERROR: DeviceSanitizer: out-of-bounds-access on Host USM
   // CHECK-SHARED: ERROR: DeviceSanitizer: out-of-bounds-access on Shared USM
-  // CHECK: {{READ of size 8 at kernel <.*MyKernelR_4> LID\(0, 0, 0\) GID\(123456, 0, 0\)}}
+  // CHECK: {{READ of size 8 at kernel <.*MyKernelR_4> LID\(0, 0, 0\) GID\(12, 0, 0\)}}
   // CHECK: {{  #0 .* .*parallel_for_double.cpp:}}[[@LINE-7]]
 
   sycl::free(array, Q);
diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_func.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_func.cpp
index f582ec78226cb..6627b26295234 100644
--- a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_func.cpp
+++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_func.cpp
@@ -18,12 +18,12 @@ __attribute__((noinline)) void foo(int *array, size_t i) { array[i] = 1; }
 // CHECK-DEVICE: ERROR: DeviceSanitizer: out-of-bounds-access on Device USM
 // CHECK-HOST:   ERROR: DeviceSanitizer: out-of-bounds-access on Host USM
 // CHECK-SHARED: ERROR: DeviceSanitizer: out-of-bounds-access on Shared USM
-// CHECK: {{WRITE of size 4 at kernel <.*MyKernel> LID\(0, 0, 0\) GID\(123, 0, 0\)}}
+// CHECK: {{WRITE of size 4 at kernel <.*MyKernel> LID\(0, 0, 0\) GID\(12, 0, 0\)}}
 // CHECK: {{  #0 foo\(int\*, unsigned long\) .*parallel_for_func.cpp:}}[[@LINE-5]]
 
 int main() {
   sycl::queue Q;
-  constexpr std::size_t N = 123;
+  constexpr std::size_t N = 12;
 #if defined(MALLOC_HOST)
   auto *array = sycl::malloc_host<int>(N, Q);
 #elif defined(MALLOC_SHARED)
diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_int.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_int.cpp
index d51e3263ea7c5..178f578b2547e 100644
--- a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_int.cpp
+++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_int.cpp
@@ -16,7 +16,7 @@
 
 int main() {
   sycl::queue Q;
-  constexpr std::size_t N = 512;
+  constexpr std::size_t N = 12;
 #if defined(MALLOC_HOST)
   auto *array = sycl::malloc_host<int>(N, Q);
 #elif defined(MALLOC_SHARED)
@@ -34,7 +34,7 @@ int main() {
   // CHECK-DEVICE: ERROR: DeviceSanitizer: out-of-bounds-access on Device USM
   // CHECK-HOST:   ERROR: DeviceSanitizer: out-of-bounds-access on Host USM
   // CHECK-SHARED: ERROR: DeviceSanitizer: out-of-bounds-access on Shared USM
-  // CHECK: {{READ of size 4 at kernel <.*MyKernelR_4> LID\(0, 0, 0\) GID\(512, 0, 0\)}}
+  // CHECK: {{READ of size 4 at kernel <.*MyKernelR_4> LID\(0, 0, 0\) GID\(12, 0, 0\)}}
   // CHECK: {{  #0 .* .*parallel_for_int.cpp:}}[[@LINE-7]]
 
   sycl::free(array, Q);
diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_short.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_short.cpp
index 8b16598531605..cd488743453cc 100644
--- a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_short.cpp
+++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_short.cpp
@@ -16,7 +16,7 @@
 
 int main() {
   sycl::queue Q;
-  constexpr std::size_t N = 1024;
+  constexpr std::size_t N = 12;
 #if defined(MALLOC_HOST)
   auto *array = sycl::malloc_host<short>(N, Q);
 #elif defined(MALLOC_SHARED)
@@ -34,7 +34,7 @@ int main() {
   // CHECK-DEVICE: ERROR: DeviceSanitizer: out-of-bounds-access on Device USM
   // CHECK-HOST:   ERROR: DeviceSanitizer: out-of-bounds-access on Host USM
   // CHECK-SHARED: ERROR: DeviceSanitizer: out-of-bounds-access on Shared USM
-  // CHECK: {{READ of size 2 at kernel <.*MyKernelR_4> LID\(0, 0, 0\) GID\(1024, 0, 0\)}}
+  // CHECK: {{READ of size 2 at kernel <.*MyKernelR_4> LID\(0, 0, 0\) GID\(12, 0, 0\)}}
   // CHECK: {{  #0 .* .*parallel_for_short.cpp:}}[[@LINE-7]]
 
   sycl::free(array, Q);
diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_no_local_size.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_no_local_size.cpp
index 041f2b2ee1a69..9fb77d98766c6 100644
--- a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_no_local_size.cpp
+++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_no_local_size.cpp
@@ -16,7 +16,7 @@
 
 int main() {
   sycl::queue Q;
-  constexpr std::size_t N = 12345;
+  constexpr std::size_t N = 12;
 #if defined(MALLOC_HOST)
   auto *array = sycl::malloc_host<char>(N, Q);
 #elif defined(MALLOC_SHARED)
@@ -33,7 +33,7 @@ int main() {
   // CHECK-DEVICE: ERROR: DeviceSanitizer: out-of-bounds-access on Device USM
   // CHECK-HOST:   ERROR: DeviceSanitizer: out-of-bounds-access on Host USM
   // CHECK-SHARED: ERROR: DeviceSanitizer: out-of-bounds-access on Shared USM
-  // CHECK: READ of size 1 at kernel {{<.*MyKernel.*>}} LID({{.*}}, 0, 0) GID(12345, 0, 0)
+  // CHECK: READ of size 1 at kernel {{<.*MyKernel.*>}} LID({{.*}}, 0, 0) GID(12, 0, 0)
   // CHECK: {{  #0 .* .*parallel_no_local_size.cpp:}}[[@LINE-7]]
 
   sycl::free(array, Q);
diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/private/multiple_private.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/private/multiple_private.cpp
index 284a04d2d9775..ec2a0b3fb435a 100644
--- a/sycl/test-e2e/AddressSanitizer/out-of-bounds/private/multiple_private.cpp
+++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/private/multiple_private.cpp
@@ -1,7 +1,7 @@
 // REQUIRES: linux
-// RUN: %{build} %device_asan_flags -DVAR=1 -O2 -g -o %t1
+// RUN: %{build} -Wno-error=array-bounds %device_asan_flags -DVAR=1 -O2 -g -o %t1
 // RUN: env SYCL_PREFER_UR=1 %{run} not %t1 2>&1 | FileCheck --check-prefixes CHECK,CHECK-VAR1 %s
-// RUN: %{build} %device_asan_flags -DVAR=2 -O2 -g -o %t2
+// RUN: %{build} -Wno-error=array-bounds %device_asan_flags -DVAR=2 -O2 -g -o %t2
 // RUN: env SYCL_PREFER_UR=1 %{run} not %t2 2>&1 | FileCheck --check-prefixes CHECK,CHECK-VAR2 %s
 // RUN: %{build} %device_asan_flags -DVAR=3 -O2 -g -o %t3
 // RUN: env SYCL_PREFER_UR=1 %{run} not %t3 2>&1 | FileCheck --check-prefixes CHECK,CHECK-VAR3 %s
diff --git a/sycl/test-e2e/AddressSanitizer/use-after-free/quarantine-free.cpp b/sycl/test-e2e/AddressSanitizer/use-after-free/quarantine-free.cpp
index 59974e9cdc72c..e93e573a469b0 100644
--- a/sycl/test-e2e/AddressSanitizer/use-after-free/quarantine-free.cpp
+++ b/sycl/test-e2e/AddressSanitizer/use-after-free/quarantine-free.cpp
@@ -1,6 +1,10 @@
 // REQUIRES: linux
 // RUN: %{build} %device_asan_flags -O0 -g -o %t
 // RUN: %force_device_asan_rt UR_LAYER_ASAN_OPTIONS=quarantine_size_mb:5 UR_LOG_SANITIZER=level:info %{run} %t 2>&1 | FileCheck %s
+
+// TODO: Reenable, see https://github.com/intel/llvm/issues/14658
+// UNSUPPORTED: windows, linux
+
 #include <sycl/usm.hpp>
 
 /// Quarantine Cache Test
diff --git a/sycl/test-e2e/AddressSanitizer/use-after-free/quarantine-no-free.cpp b/sycl/test-e2e/AddressSanitizer/use-after-free/quarantine-no-free.cpp
index c512cfe556957..23b08b37f210b 100644
--- a/sycl/test-e2e/AddressSanitizer/use-after-free/quarantine-no-free.cpp
+++ b/sycl/test-e2e/AddressSanitizer/use-after-free/quarantine-no-free.cpp
@@ -46,7 +46,9 @@ int main() {
   // CHECK:   #0 {{.*}} {{.*quarantine-no-free.cpp}}:[[@LINE-5]]
   // CHECK: [[ADDR]] is located inside of Device USM region [{{0x.*}}, {{0x.*}})
   // CHECK: allocated here:
+  // CHECK: in main {{.*quarantine-no-free.cpp}}:[[@LINE-27]]
   // CHECK: released here:
+  // CHECK: in main {{.*quarantine-no-free.cpp}}:[[@LINE-25]]
 
   return 0;
 }
diff --git a/sycl/test-e2e/AddressSanitizer/use-after-free/use-after-free.cpp b/sycl/test-e2e/AddressSanitizer/use-after-free/use-after-free.cpp
index 59cbb37c88fb9..fbf99ada75584 100644
--- a/sycl/test-e2e/AddressSanitizer/use-after-free/use-after-free.cpp
+++ b/sycl/test-e2e/AddressSanitizer/use-after-free/use-after-free.cpp
@@ -21,7 +21,9 @@ int main() {
   // CHECK:   #0 {{.*}} {{.*use-after-free.cpp:}}[[@LINE-5]]
   // CHECK: [[ADDR]] is located inside of Device USM region [{{0x.*}}, {{0x.*}})
   // CHECK: allocated here:
+  // CHECK: in main {{.*use-after-free.cpp:}}[[@LINE-14]]
   // CHECK: released here:
+  // CHECK: in main {{.*use-after-free.cpp:}}[[@LINE-15]]
 
   return 0;
 }
diff --git a/sycl/test-e2e/Basic/built-ins/marray_relational.cpp b/sycl/test-e2e/Basic/built-ins/marray_relational.cpp
index 85e101d556bfb..390a880590af6 100644
--- a/sycl/test-e2e/Basic/built-ins/marray_relational.cpp
+++ b/sycl/test-e2e/Basic/built-ins/marray_relational.cpp
@@ -1,6 +1,6 @@
-// RUN: %{build} -o %t.out
+// RUN: %{build} -Wno-error=deprecated-declarations -o %t.out
 // RUN: %{run} %t.out
-// RUN: %if preview-breaking-changes-supported %{ %{build} -fpreview-breaking-changes -o %t_preview.out %}
+// RUN: %if preview-breaking-changes-supported %{ %{build} -Wno-error=deprecated-declarations -fpreview-breaking-changes -o %t_preview.out %}
 // RUN: %if preview-breaking-changes-supported %{ %{run} %t_preview.out%}
 
 #include "helpers.hpp"
diff --git a/sycl/test-e2e/Basic/cuda_max_wgs_error.cpp b/sycl/test-e2e/Basic/cuda_max_wgs_error.cpp
index 2fcc254157e28..48775bdb06678 100644
--- a/sycl/test-e2e/Basic/cuda_max_wgs_error.cpp
+++ b/sycl/test-e2e/Basic/cuda_max_wgs_error.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -o %t.out -fno-sycl-id-queries-fit-in-int
+// RUN: %{build} -Wno-error=deprecated-declarations -o %t.out -fno-sycl-id-queries-fit-in-int
 // RUN: %{run} %t.out
 //
 // REQUIRES: cuda
diff --git a/sycl/test-e2e/Basic/host-task-dependency.cpp b/sycl/test-e2e/Basic/host-task-dependency.cpp
index dc8ae18d5f567..ae8456d6be88d 100644
--- a/sycl/test-e2e/Basic/host-task-dependency.cpp
+++ b/sycl/test-e2e/Basic/host-task-dependency.cpp
@@ -178,15 +178,15 @@ int main() {
 }
 
 // launch of Gen kernel
-// CHECK:---> urKernelCreate(
+// CHECK:---> urKernelCreate
 // CHECK: NameGen
-// CHECK:---> urEnqueueKernelLaunch(
+// CHECK:---> urEnqueueKernelLaunch
 // prepare for host task
-// CHECK:---> urEnqueueMemBuffer{{Map|Read}}(
+// CHECK:---> urEnqueueMemBuffer{{Map|Read}}
 // launch of Copier kernel
-// CHECK:---> urKernelCreate(
+// CHECK:---> urKernelCreate
 // CHECK: Copier
-// CHECK:---> urEnqueueKernelLaunch(
+// CHECK:---> urEnqueueKernelLaunch
 
 // CHECK:Third buffer [  0] = 0
 // CHECK:Third buffer [  1] = 1
diff --git a/sycl/test-e2e/Basic/info.cpp b/sycl/test-e2e/Basic/info.cpp
index 1b64ab886cd8f..3fd83be26e994 100644
--- a/sycl/test-e2e/Basic/info.cpp
+++ b/sycl/test-e2e/Basic/info.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -D__SYCL_INTERNAL_API -o %t.out
+// RUN: %{build} -Wno-error=deprecated-declarations -D__SYCL_INTERNAL_API -o %t.out
 // RUN: %{run} %t.out
 
 //==----------------info.cpp - SYCL objects get_info() test ----------------==//
diff --git a/sycl/test-e2e/Basic/multi_ptr.cpp b/sycl/test-e2e/Basic/multi_ptr.cpp
index 52ed97e6f541f..885f08cfea134 100644
--- a/sycl/test-e2e/Basic/multi_ptr.cpp
+++ b/sycl/test-e2e/Basic/multi_ptr.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -fsycl-dead-args-optimization -o %t.out
+// RUN: %{build} -Wno-error=unused-command-line-argument -fsycl-dead-args-optimization -o %t.out
 // RUN: %{run} %t.out
 
 //==--------------- multi_ptr.cpp - SYCL multi_ptr test --------------------==//
diff --git a/sycl/test-e2e/Basic/multi_ptr_legacy.cpp b/sycl/test-e2e/Basic/multi_ptr_legacy.cpp
index da0d6f56eab5c..bd09e04b23761 100644
--- a/sycl/test-e2e/Basic/multi_ptr_legacy.cpp
+++ b/sycl/test-e2e/Basic/multi_ptr_legacy.cpp
@@ -1,6 +1,6 @@
-// RUN: %{build} -fsycl-dead-args-optimization -DSYCL2020_DISABLE_DEPRECATION_WARNINGS -o %t.out
+// RUN: %{build} -Wno-error=unused-command-line-argument -fsycl-dead-args-optimization -DSYCL2020_DISABLE_DEPRECATION_WARNINGS -o %t.out
 // RUN: %{run} %t.out
-// RUN: %{build} -DRESTRICT_WRITE_ACCESS_TO_CONSTANT_PTR -fsycl-dead-args-optimization -DSYCL2020_DISABLE_DEPRECATION_WARNINGS -o %t1.out
+// RUN: %{build} -Wno-error=unused-command-line-argument -DRESTRICT_WRITE_ACCESS_TO_CONSTANT_PTR -fsycl-dead-args-optimization -DSYCL2020_DISABLE_DEPRECATION_WARNINGS -o %t1.out
 // RUN: %{run} %t1.out
 
 //==-------- multi_ptr_legacy.cpp - SYCL multi_ptr legacy test -------------==//
diff --git a/sycl/test-e2e/Basic/multi_ptr_legacy_usm_addr_ext.cpp b/sycl/test-e2e/Basic/multi_ptr_legacy_usm_addr_ext.cpp
index 6c234a9315c44..76651914de1b6 100644
--- a/sycl/test-e2e/Basic/multi_ptr_legacy_usm_addr_ext.cpp
+++ b/sycl/test-e2e/Basic/multi_ptr_legacy_usm_addr_ext.cpp
@@ -1,7 +1,7 @@
 // REQUIRES: accelerator
-// RUN: %{build} -D__ENABLE_USM_ADDR_SPACE__ -fsycl-dead-args-optimization -o %t.out
+// RUN: %{build} -Wno-error=deprecated-declarations -D__ENABLE_USM_ADDR_SPACE__ -fsycl-dead-args-optimization -o %t.out
 // RUN: %{run} %t.out
-// RUN: %{build} -D__ENABLE_USM_ADDR_SPACE__ -DRESTRICT_WRITE_ACCESS_TO_CONSTANT_PTR -fsycl-dead-args-optimization -o %t1.out
+// RUN: %{build} -Wno-error=deprecated-declarations -D__ENABLE_USM_ADDR_SPACE__ -DRESTRICT_WRITE_ACCESS_TO_CONSTANT_PTR -fsycl-dead-args-optimization -o %t1.out
 // RUN: %{run} %t1.out
 
 //==-- multi_ptr_legacy_usm_addr_ext.cpp - SYCL multi_ptr legacy test ext --==//
diff --git a/sycl/test-e2e/Basic/offset-accessor-get_pointer.cpp b/sycl/test-e2e/Basic/offset-accessor-get_pointer.cpp
index 097405ef5763b..182a008d37e6c 100644
--- a/sycl/test-e2e/Basic/offset-accessor-get_pointer.cpp
+++ b/sycl/test-e2e/Basic/offset-accessor-get_pointer.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -o %t.out
+// RUN: %{build} -Wno-error=deprecated-declarations -o %t.out
 // RUN: %{run} %t.out
 
 // Per the SYCL 2020 spec (4.7.6.12 and others)
diff --git a/sycl/test-e2e/Basic/parallel_for_indexers.cpp b/sycl/test-e2e/Basic/parallel_for_indexers.cpp
index e9834e245e94c..3c41ac04fc8b1 100644
--- a/sycl/test-e2e/Basic/parallel_for_indexers.cpp
+++ b/sycl/test-e2e/Basic/parallel_for_indexers.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -o %t2.out
+// RUN: %{build} -Wno-error=deprecated-declarations -o %t2.out
 // RUN: %{run} %t2.out
 
 #include <sycl/detail/core.hpp>
diff --git a/sycl/test-e2e/Basic/range_offset_fit_in_int.cpp b/sycl/test-e2e/Basic/range_offset_fit_in_int.cpp
index 0d1ea0cd67380..c58b6460918c8 100644
--- a/sycl/test-e2e/Basic/range_offset_fit_in_int.cpp
+++ b/sycl/test-e2e/Basic/range_offset_fit_in_int.cpp
@@ -1,5 +1,5 @@
 // REQUIRES: cpu
-// RUN: %{build} -fsycl-id-queries-fit-in-int -o %t.out
+// RUN: %{build} -Wno-error=deprecated-declarations -fsycl-id-queries-fit-in-int -o %t.out
 // RUN: %{run} %t.out
 
 #include <climits>
diff --git a/sycl/test-e2e/Basic/reqd_work_group_size.cpp b/sycl/test-e2e/Basic/reqd_work_group_size.cpp
index f52ab51a4f8d4..d3fbe1621c757 100644
--- a/sycl/test-e2e/Basic/reqd_work_group_size.cpp
+++ b/sycl/test-e2e/Basic/reqd_work_group_size.cpp
@@ -1,9 +1,6 @@
 // RUN: %{build} -o %t.out
 // RUN: %{run} %t.out
 
-// TODO: Reenable, see https://github.com/intel/llvm/issues/14598
-// UNSUPPORTED: linux, windows
-
 #include <sycl/detail/core.hpp>
 
 #include <iostream>
diff --git a/sycl/test-e2e/Basic/sampler/sampler.cpp b/sycl/test-e2e/Basic/sampler/sampler.cpp
index 103bcd5d57fab..738408812f098 100644
--- a/sycl/test-e2e/Basic/sampler/sampler.cpp
+++ b/sycl/test-e2e/Basic/sampler/sampler.cpp
@@ -1,6 +1,6 @@
 // REQUIRES: aspect-ext_intel_legacy_image
 // TODO: Can we move it to sycl/test?
-// RUN: %{build} -fsycl-dead-args-optimization -o %t.out
+// RUN: %{build} -Wno-error=unused-command-line-argument -fsycl-dead-args-optimization -o %t.out
 // RUN: %{run} %t.out
 
 //==--------------- sampler.cpp - SYCL sampler basic test ------------------==//
diff --git a/sycl/test-e2e/CMakeLists.txt b/sycl/test-e2e/CMakeLists.txt
index c8b096315a8d6..96018d8b30b62 100644
--- a/sycl/test-e2e/CMakeLists.txt
+++ b/sycl/test-e2e/CMakeLists.txt
@@ -60,6 +60,7 @@ if(MSVC AND NOT SYCL_TEST_E2E_STANDALONE)
 else()
   set(SYCL_E2E_CLANG_CXX_FLAGS ${CMAKE_CXX_FLAGS})
 endif()
+list(APPEND SYCL_E2E_CLANG_CXX_FLAGS "-Werror")
 
 if(SYCL_TEST_E2E_STANDALONE)
   configure_file("${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in"
diff --git a/sycl/test-e2e/Complex/sycl_complex_math_test.cpp b/sycl/test-e2e/Complex/sycl_complex_math_test.cpp
index 1178bdf0b5ffe..899fb82667271 100644
--- a/sycl/test-e2e/Complex/sycl_complex_math_test.cpp
+++ b/sycl/test-e2e/Complex/sycl_complex_math_test.cpp
@@ -1,6 +1,6 @@
 // DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%}
 
-// RUN: %{build} -fsycl-device-code-split=per_kernel %{mathflags} -o %t.out
+// RUN: %{build} -Wno-error=deprecated-declarations -fsycl-device-code-split=per_kernel %{mathflags} -o %t.out
 // RUN: %{run} %t.out
 
 #include "sycl_complex_helper.hpp"
diff --git a/sycl/test-e2e/Complex/sycl_complex_operator_test.cpp b/sycl/test-e2e/Complex/sycl_complex_operator_test.cpp
index 8894f5d965462..23eefc323a0ef 100644
--- a/sycl/test-e2e/Complex/sycl_complex_operator_test.cpp
+++ b/sycl/test-e2e/Complex/sycl_complex_operator_test.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out
+// RUN: %{build} -Wno-error=deprecated-declarations -fsycl-device-code-split=per_kernel -o %t.out
 // RUN: %{run} %t.out
 
 #include "sycl_complex_helper.hpp"
diff --git a/sycl/test-e2e/Complex/sycl_complex_pow_test.cpp b/sycl/test-e2e/Complex/sycl_complex_pow_test.cpp
index 91d58a4cdeebb..21a362a99d2ef 100644
--- a/sycl/test-e2e/Complex/sycl_complex_pow_test.cpp
+++ b/sycl/test-e2e/Complex/sycl_complex_pow_test.cpp
@@ -1,6 +1,6 @@
 // DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%}
 
-// RUN: %{build} -fsycl-device-code-split=per_kernel %{mathflags} -o %t.out
+// RUN: %{build} -Wno-error=deprecated-declarations -fsycl-device-code-split=per_kernel %{mathflags} -o %t.out
 // RUN: %{run} %t.out
 
 #include "sycl_complex_helper.hpp"
diff --git a/sycl/test-e2e/Complex/sycl_complex_stream_test.cpp b/sycl/test-e2e/Complex/sycl_complex_stream_test.cpp
index d8d645f6ac7dc..eb5c98a7b3ba7 100644
--- a/sycl/test-e2e/Complex/sycl_complex_stream_test.cpp
+++ b/sycl/test-e2e/Complex/sycl_complex_stream_test.cpp
@@ -1,6 +1,6 @@
 // DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-finite-math-only%} %else %{-fno-finite-math-only%}
 
-// RUN: %{build} -fsycl-device-code-split=per_kernel %{mathflags} -o %t.out
+// RUN: %{build} -Wno-error=deprecated-declarations -fsycl-device-code-split=per_kernel %{mathflags} -o %t.out
 // RUN: %{run} %t.out
 
 #include "sycl_complex_helper.hpp"
diff --git a/sycl/test-e2e/Config/kernel_from_file.cpp b/sycl/test-e2e/Config/kernel_from_file.cpp
index a8b480a89c561..defbbac5601c8 100644
--- a/sycl/test-e2e/Config/kernel_from_file.cpp
+++ b/sycl/test-e2e/Config/kernel_from_file.cpp
@@ -6,7 +6,7 @@
 // FIXME separate compilation requires -fno-sycl-dead-args-optimization
 // As we are doing a separate device compilation here, we need to explicitly
 // add the device lib instrumentation (itt_compiler_wrapper)
-// RUN: %clangxx -DSYCL_DISABLE_FALLBACK_ASSERT %cxx_std_optionc++17 -fsycl-device-only -fno-sycl-dead-args-optimization -Xclang -fsycl-int-header=%t.h -c %s -o %t.bc -Xclang -verify-ignore-unexpected=note,warning -Wno-sycl-strict
+// RUN: %clangxx -Wno-error=ignored-attributes -DSYCL_DISABLE_FALLBACK_ASSERT %cxx_std_optionc++17 -fsycl-device-only -fno-sycl-dead-args-optimization -Xclang -fsycl-int-header=%t.h -c %s -o %t.bc -Xclang -verify-ignore-unexpected=note,warning -Wno-sycl-strict
 // >> ---- unbundle compiler wrapper and sanitizer device objects
 // RUN: clang-offload-bundler -type=o -targets=sycl-spir64-unknown-unknown -input=%sycl_static_libs_dir/libsycl-itt-compiler-wrappers%obj_ext -output=%t_compiler_wrappers.bc -unbundle
 // RUN: %if linux %{ clang-offload-bundler -type=o -targets=sycl-spir64-unknown-unknown -input=%sycl_static_libs_dir/libsycl-sanitizer%obj_ext -output=%t_sanitizer.bc -unbundle %}
@@ -14,7 +14,7 @@
 // RUN: %if linux %{ llvm-link -o=%t_app.bc %t.bc %t_compiler_wrappers.bc %t_sanitizer.bc %} %else %{ llvm-link -o=%t_app.bc %t.bc %t_compiler_wrappers.bc %}
 // >> ---- translate to SPIR-V
 // RUN: llvm-spirv -o %t.spv %t_app.bc
-// RUN: %clangxx -DSYCL_DISABLE_FALLBACK_ASSERT %cxx_std_optionc++17 %include_option %t.h %s -o %t.out %sycl_options -fno-sycl-dead-args-optimization -Xclang -verify-ignore-unexpected=note,warning
+// RUN: %clangxx -Wno-error=ignored-attributes -DSYCL_DISABLE_FALLBACK_ASSERT %cxx_std_optionc++17 %include_option %t.h %s -o %t.out %sycl_options -fno-sycl-dead-args-optimization -Xclang -verify-ignore-unexpected=note,warning
 // RUN: env SYCL_USE_KERNEL_SPV=%t.spv %{run} %t.out | FileCheck %s
 // CHECK: Passed
 
diff --git a/sycl/test-e2e/Config/select_device.cpp b/sycl/test-e2e/Config/select_device.cpp
index 8ed6221bfeb43..a1c67d9fcbda1 100644
--- a/sycl/test-e2e/Config/select_device.cpp
+++ b/sycl/test-e2e/Config/select_device.cpp
@@ -1,5 +1,5 @@
 // REQUIRES: gpu
-// RUN: %{build} -o %t.out
+// RUN: %{build} -Wno-error=return-type -o %t.out
 //
 // RUN: env ONEAPI_DEVICE_SELECTOR="*:gpu" %{run-unfiltered-devices} %t.out DEVICE_INFO write > %t.txt
 // RUN: env ONEAPI_DEVICE_SELECTOR="*:gpu" %{run-unfiltered-devices} %t.out DEVICE_INFO read %t.txt
diff --git a/sycl/test-e2e/DeprecatedFeatures/lit.local.cfg b/sycl/test-e2e/DeprecatedFeatures/lit.local.cfg
new file mode 100644
index 0000000000000..17484918ac865
--- /dev/null
+++ b/sycl/test-e2e/DeprecatedFeatures/lit.local.cfg
@@ -0,0 +1,6 @@
+original_clangxx=""
+for substitution in config.substitutions:
+  if substitution[0] == "%clangxx":
+    original_clangxx=substitution[1]
+config.substitutions.insert(0,
+  ("%clangxx", original_clangxx + " -Wno-error=deprecated-declarations "))
diff --git a/sycl/test-e2e/DeviceCodeSplit/split-per-kernel.cpp b/sycl/test-e2e/DeviceCodeSplit/split-per-kernel.cpp
index b58e1ce59899a..f45f2333a853e 100644
--- a/sycl/test-e2e/DeviceCodeSplit/split-per-kernel.cpp
+++ b/sycl/test-e2e/DeviceCodeSplit/split-per-kernel.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out \
+// RUN: %{build} -Wno-error=unused-command-line-argument -fsycl-device-code-split=per_kernel -o %t.out \
 // RUN: -fsycl-dead-args-optimization
 // RUN: %{run} %t.out
 //
diff --git a/sycl/test-e2e/DeviceCodeSplit/split-per-source-main.cpp b/sycl/test-e2e/DeviceCodeSplit/split-per-source-main.cpp
index 7cea0450f90df..4f9063c18e784 100644
--- a/sycl/test-e2e/DeviceCodeSplit/split-per-source-main.cpp
+++ b/sycl/test-e2e/DeviceCodeSplit/split-per-source-main.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -fsycl-device-code-split=per_source -I %S/Inputs -o %t.out %S/Inputs/split-per-source-second-file.cpp \
+// RUN: %{build} -Wno-error=unused-command-line-argument -fsycl-device-code-split=per_source -I %S/Inputs -o %t.out %S/Inputs/split-per-source-second-file.cpp \
 // RUN: -fsycl-dead-args-optimization
 // RUN: %{run} %t.out
 //
diff --git a/sycl/test-e2e/DeviceGlobal/device_global_arrow.cpp b/sycl/test-e2e/DeviceGlobal/device_global_arrow.cpp
index 24b0437f2a35c..ffcadf8667bda 100644
--- a/sycl/test-e2e/DeviceGlobal/device_global_arrow.cpp
+++ b/sycl/test-e2e/DeviceGlobal/device_global_arrow.cpp
@@ -3,8 +3,7 @@
 //
 // The OpenCL GPU backends do not currently support device_global backend
 // calls.
-// TODO: Reenable linux/windows, see https://github.com/intel/llvm/issues/14598
-// UNSUPPORTED: opencl && gpu, linux, windows
+// UNSUPPORTED: opencl && gpu
 //
 // Tests operator-> on device_global.
 
diff --git a/sycl/test-e2e/DeviceGlobal/device_global_device_only.cpp b/sycl/test-e2e/DeviceGlobal/device_global_device_only.cpp
index 07ea4f0ec94b4..ac2894c13c855 100644
--- a/sycl/test-e2e/DeviceGlobal/device_global_device_only.cpp
+++ b/sycl/test-e2e/DeviceGlobal/device_global_device_only.cpp
@@ -3,8 +3,7 @@
 //
 // The OpenCL GPU backends do not currently support device_global backend
 // calls.
-// TODO: Reenable linux/windows, see https://github.com/intel/llvm/issues/14598
-// UNSUPPORTED: opencl && gpu, linux, windows
+// UNSUPPORTED: opencl && gpu
 //
 // Tests basic device_global access through device kernels.
 
diff --git a/sycl/test-e2e/DeviceGlobal/device_global_operator_passthrough.cpp b/sycl/test-e2e/DeviceGlobal/device_global_operator_passthrough.cpp
index c98a22b851df6..b687bb4c4365d 100644
--- a/sycl/test-e2e/DeviceGlobal/device_global_operator_passthrough.cpp
+++ b/sycl/test-e2e/DeviceGlobal/device_global_operator_passthrough.cpp
@@ -3,8 +3,7 @@
 //
 // The OpenCL GPU backends do not currently support device_global backend
 // calls.
-// TODO: Reenable linux/windows, see https://github.com/intel/llvm/issues/14598
-// UNSUPPORTED: opencl && gpu, linux, windows
+// UNSUPPORTED: opencl && gpu
 //
 // Tests the passthrough of operators on device_global.
 
diff --git a/sycl/test-e2e/DeviceGlobal/device_global_subscript.cpp b/sycl/test-e2e/DeviceGlobal/device_global_subscript.cpp
index e519db2894993..cec40fafd61f3 100644
--- a/sycl/test-e2e/DeviceGlobal/device_global_subscript.cpp
+++ b/sycl/test-e2e/DeviceGlobal/device_global_subscript.cpp
@@ -3,8 +3,7 @@
 //
 // The OpenCL GPU backends do not currently support device_global backend
 // calls.
-// TODO: Reenable linux/windows, see https://github.com/intel/llvm/issues/14598
-// UNSUPPORTED: opencl && gpu, linux, windows
+// UNSUPPORTED: opencl && gpu
 //
 // Tests operator[] on device_global.
 
diff --git a/sycl/test-e2e/DeviceLib/ITTAnnotations/atomic.cpp b/sycl/test-e2e/DeviceLib/ITTAnnotations/atomic.cpp
index b6650ed166bba..272c21885365f 100644
--- a/sycl/test-e2e/DeviceLib/ITTAnnotations/atomic.cpp
+++ b/sycl/test-e2e/DeviceLib/ITTAnnotations/atomic.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -fsycl-instrument-device-code -o %t.out
+// RUN: %{build} -Wno-error=unused-command-line-argument -fsycl-instrument-device-code -o %t.out
 // RUN: %{run} %t.out
 
 #include "sycl/atomic_ref.hpp"
diff --git a/sycl/test-e2e/DeviceLib/ITTAnnotations/barrier.cpp b/sycl/test-e2e/DeviceLib/ITTAnnotations/barrier.cpp
index 42156b8710922..6597b06576744 100644
--- a/sycl/test-e2e/DeviceLib/ITTAnnotations/barrier.cpp
+++ b/sycl/test-e2e/DeviceLib/ITTAnnotations/barrier.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -fsycl-instrument-device-code -o %t.out
+// RUN: %{build} -Wno-error=unused-command-line-argument -fsycl-instrument-device-code -o %t.out
 // RUN: %{run} %t.out
 
 #include "sycl/detail/core.hpp"
diff --git a/sycl/test-e2e/DeviceLib/built-ins/scalar_relational.cpp b/sycl/test-e2e/DeviceLib/built-ins/scalar_relational.cpp
index 0eab2036a98b8..d67cc5d79e5df 100644
--- a/sycl/test-e2e/DeviceLib/built-ins/scalar_relational.cpp
+++ b/sycl/test-e2e/DeviceLib/built-ins/scalar_relational.cpp
@@ -1,5 +1,5 @@
 // DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%}
-// RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out %{mathflags}
+// RUN: %{build} -Wno-error=deprecated-declarations -fsycl-device-code-split=per_kernel -o %t.out %{mathflags}
 // RUN: %{run} %t.out
 
 #include <sycl/detail/core.hpp>
diff --git a/sycl/test-e2e/DeviceLib/cmath_test.cpp b/sycl/test-e2e/DeviceLib/cmath_test.cpp
index 8206cba22bc8b..54e0c14d08bfa 100644
--- a/sycl/test-e2e/DeviceLib/cmath_test.cpp
+++ b/sycl/test-e2e/DeviceLib/cmath_test.cpp
@@ -4,7 +4,7 @@
 // RUN: %{build} -fno-builtin %{mathflags} -o %t.out
 // RUN: %{run} %t.out
 
-// RUN: %{build} -fno-builtin -fsycl-device-lib-jit-link %{mathflags} -o %t.out
+// RUN: %{build} -Wno-error=unused-command-line-argument -fno-builtin -fsycl-device-lib-jit-link %{mathflags} -o %t.out
 // RUN: %if !gpu %{ %{run} %t.out %}
 //
 // // Check that --fast-math works with cmath funcs for CUDA
diff --git a/sycl/test-e2e/DeviceLib/math_override_test.cpp b/sycl/test-e2e/DeviceLib/math_override_test.cpp
index 491f21d76a486..27929099f7fc7 100644
--- a/sycl/test-e2e/DeviceLib/math_override_test.cpp
+++ b/sycl/test-e2e/DeviceLib/math_override_test.cpp
@@ -1,5 +1,5 @@
 // UNSUPPORTED: windows
-// RUN: %clangxx -fsycl %s -o %t.out -fno-builtin
+// RUN: %clangxx -fsycl %s -Wno-error=unused-command-line-argument -o %t.out -fno-builtin
 #include <iostream>
 #include <math.h>
 #include <sycl/detail/core.hpp>
diff --git a/sycl/test-e2e/DeviceLib/math_test.cpp b/sycl/test-e2e/DeviceLib/math_test.cpp
index ccc368a21e668..0380234575061 100644
--- a/sycl/test-e2e/DeviceLib/math_test.cpp
+++ b/sycl/test-e2e/DeviceLib/math_test.cpp
@@ -5,7 +5,7 @@
 // RUN: %{build} %{mathflags} -o %t.out
 // RUN: %{run} %t.out
 
-// RUN: %{build} -fsycl-device-lib-jit-link %{mathflags} -o %t.out
+// RUN: %{build} -Wno-error=unused-command-line-argument -fsycl-device-lib-jit-link %{mathflags} -o %t.out
 // RUN: %if !gpu %{ %{run} %t.out %}
 
 #include "math_utils.hpp"
diff --git a/sycl/test-e2e/DeviceLib/separate_compile_test.cpp b/sycl/test-e2e/DeviceLib/separate_compile_test.cpp
index 6d9d23e438b1c..a453c3c638d30 100644
--- a/sycl/test-e2e/DeviceLib/separate_compile_test.cpp
+++ b/sycl/test-e2e/DeviceLib/separate_compile_test.cpp
@@ -3,13 +3,13 @@
 // RUN: %clangxx -fsycl -fsycl-link %S/std_complex_math_test.cpp -o %t_device.o %{mathflags}
 // RUN: %clangxx -fsycl-device-only -Xclang -fsycl-int-header=std_complex_math_test_ihdr.h %S/std_complex_math_test.cpp -Wno-sycl-strict %{mathflags}
 // >> host compilation...
-// RUN: %clangxx %cxx_std_optionc++17 %include_option std_complex_math_test_ihdr.h -c %S/std_complex_math_test.cpp -o %t_host.o %sycl_options -Wno-sycl-strict %{mathflags}
+// RUN: %clangxx -Wno-error=unused-command-line-argument -Wno-error=ignored-attributes %cxx_std_optionc++17 %include_option std_complex_math_test_ihdr.h -c %S/std_complex_math_test.cpp -o %t_host.o %sycl_options -Wno-sycl-strict %{mathflags}
 // RUN: %clangxx %t_host.o %t_device.o -o %t.out %sycl_options %{mathflags}
 // RUN: %{run} %t.out
 
 // RUN: %clangxx -fsycl -fsycl-link  %S/std_complex_math_fp64_test.cpp -o %t_fp64_device.o %{mathflags}
 // RUN: %clangxx -fsycl-device-only -Xclang -fsycl-int-header=std_complex_math_fp64_test_ihdr.h %S/std_complex_math_fp64_test.cpp -Wno-sycl-strict %{mathflags}
 // >> host compilation...
-// RUN: %clangxx %cxx_std_optionc++17 %include_option std_complex_math_fp64_test_ihdr.h -c %S/std_complex_math_fp64_test.cpp -o %t_fp64_host.o %sycl_options -Wno-sycl-strict %{mathflags}
+// RUN: %clangxx -Wno-error=unused-command-line-argument -Wno-error=ignored-attributes %cxx_std_optionc++17 %include_option std_complex_math_fp64_test_ihdr.h -c %S/std_complex_math_fp64_test.cpp -o %t_fp64_host.o %sycl_options -Wno-sycl-strict %{mathflags}
 // RUN: %clangxx %t_fp64_host.o %t_fp64_device.o -o %t_fp64.out %sycl_options %{mathflags}
 // RUN: %{run} %t.out
diff --git a/sycl/test-e2e/DeviceLib/string_test.cpp b/sycl/test-e2e/DeviceLib/string_test.cpp
index a52f7684424c6..c518661303b6e 100644
--- a/sycl/test-e2e/DeviceLib/string_test.cpp
+++ b/sycl/test-e2e/DeviceLib/string_test.cpp
@@ -1,8 +1,8 @@
 // UNSUPPORTED: hip
-// RUN: %{build} -fno-builtin -o %t.out
+// RUN: %{build} -Wno-error=deprecated-declarations -Wno-error=pointer-to-int-cast -fno-builtin -o %t.out
 // RUN: %{run} %t.out
 //
-// RUN: %{build} -fno-builtin -fsycl-device-lib-jit-link -o %t.out
+// RUN: %{build} -Wno-error=unused-command-line-argument -Wno-error=deprecated-declarations -Wno-error=pointer-to-int-cast -fno-builtin -fsycl-device-lib-jit-link -o %t.out
 // RUN: %if !gpu %{ %{run} %t.out %}
 
 // UNSUPPORTED: accelerator
diff --git a/sycl/test-e2e/DiscardEvents/discard_events_accessors.cpp b/sycl/test-e2e/DiscardEvents/discard_events_accessors.cpp
index e6f298cbb1be6..7656ce8e146c7 100644
--- a/sycl/test-e2e/DiscardEvents/discard_events_accessors.cpp
+++ b/sycl/test-e2e/DiscardEvents/discard_events_accessors.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -o %t.out
+// RUN: %{build} -Wno-error=deprecated-declarations -o %t.out
 //
 // RUN: env SYCL_UR_TRACE=1 %{run} %t.out &> %t.txt ; FileCheck %s --input-file %t.txt
 //
@@ -6,12 +6,12 @@
 // urEnqueueKernelLaunch for USM kernel using local accessor, but
 // is not `nullptr` for kernel using buffer accessor.
 //
-// CHECK: ---> urEnqueueKernelLaunch(
-// CHECK-SAME: .phEvent = nullptr
+// CHECK: ---> urEnqueueKernelLaunch
+// CHECK: .phEvent = nullptr
 //
 // CHECK-NOT: ---> urEnqueueKernelLaunch({{.*}}.phEvent = nullptr
-// CHECK: ---> urEnqueueKernelLaunch(
-// CHECK-SAME: -> UR_RESULT_SUCCESS
+// CHECK: ---> urEnqueueKernelLaunch
+// CHECK: -> UR_RESULT_SUCCESS
 //
 // CHECK: The test passed.
 
diff --git a/sycl/test-e2e/DiscardEvents/discard_events_using_assert.cpp b/sycl/test-e2e/DiscardEvents/discard_events_using_assert.cpp
index 208ec11ca1746..df123883fdc5d 100644
--- a/sycl/test-e2e/DiscardEvents/discard_events_using_assert.cpp
+++ b/sycl/test-e2e/DiscardEvents/discard_events_using_assert.cpp
@@ -10,8 +10,8 @@
 // urEnqueueKernelLaunch.
 //
 // CHECK-NOT: ---> urEnqueueKernelLaunch({{.*}}.phEvent = nullptr
-// CHECK: ---> urEnqueueKernelLaunch(
-// CHECK-SAME: -> UR_RESULT_SUCCESS
+// CHECK: ---> urEnqueueKernelLaunch
+// CHECK: -> UR_RESULT_SUCCESS
 //
 // CHECK: The test passed.
 
diff --git a/sycl/test-e2e/DiscardEvents/discard_events_using_assert_ndebug.cpp b/sycl/test-e2e/DiscardEvents/discard_events_using_assert_ndebug.cpp
index 481e5a93291ce..5a91ff7c87f9e 100644
--- a/sycl/test-e2e/DiscardEvents/discard_events_using_assert_ndebug.cpp
+++ b/sycl/test-e2e/DiscardEvents/discard_events_using_assert_ndebug.cpp
@@ -5,8 +5,8 @@
 // The test checks that the last parameter is `nullptr` for
 // urEnqueueKernelLaunch.
 //
-// CHECK: ---> urEnqueueKernelLaunch(
-// CHECK-SAME: .phEvent = nullptr
+// CHECK: ---> urEnqueueKernelLaunch
+// CHECK: .phEvent = nullptr
 //
 // CHECK: The test passed.
 
diff --git a/sycl/test-e2e/DiscardEvents/discard_events_usm.cpp b/sycl/test-e2e/DiscardEvents/discard_events_usm.cpp
index b02e751792313..e6416f211c9e5 100644
--- a/sycl/test-e2e/DiscardEvents/discard_events_usm.cpp
+++ b/sycl/test-e2e/DiscardEvents/discard_events_usm.cpp
@@ -16,76 +16,78 @@
 //       Since it is a warning it is safe to ignore for this test.
 //
 // Everything that follows TestQueueOperations()
-// CHECK: ---> urEnqueueUSMFill(
-// CHECK-SAME: .phEvent = nullptr
+// CHECK: ---> urEnqueueUSMFill
+// CHECK: .phEvent = nullptr
 //
-// CHECK: ---> urEnqueueUSMMemcpy(
-// CHECK-SAME: .phEvent = nullptr
+// CHECK: ---> urEnqueueUSMMemcpy
+// CHECK: .phEvent = nullptr
 //
 // Level-zero backend doesn't use urEnqueueUSMFill
-// CHECK-L0: ---> urEnqueueKernelLaunch({{.*}} .phEvent = nullptr
+// CHECK-L0: ---> urEnqueueKernelLaunch
+// CHECK-L0: .phEvent = nullptr
 // CHECK-OTHER: ---> urEnqueueUSMFill({{.*}} .phEvent = nullptr
 //
 // ---> urEnqueueUSMMemcpy(
-// CHECK: ---> urEnqueueUSMMemcpy(
-// CHECK-SAME: .phEvent = nullptr
+// CHECK: ---> urEnqueueUSMMemcpy
+// CHECK: .phEvent = nullptr
 //
-// CHECK: ---> urEnqueueUSMPrefetch(
-// CHECK-SAME: .phEvent = nullptr
+// CHECK: ---> urEnqueueUSMPrefetch
+// CHECK: .phEvent = nullptr
 //
-// CHECK: ---> urEnqueueUSMAdvise(
-// CHECK-SAME: .phEvent = nullptr
-// CHECK-SAME: ) -> {{UR_RESULT_SUCCESS|UR_RESULT_ERROR_ADAPTER_SPECIFIC}}
+// CHECK: ---> urEnqueueUSMAdvise
+// CHECK: .phEvent = nullptr
+// CHECK: -> {{UR_RESULT_SUCCESS|UR_RESULT_ERROR_ADAPTER_SPECIFIC}}
 //
-// CHECK: ---> urEnqueueKernelLaunch(
-// CHECK-SAME: .phEvent = nullptr
+// CHECK: ---> urEnqueueKernelLaunch
+// CHECK: .phEvent = nullptr
 //
-// CHECK: ---> urEnqueueKernelLaunch(
-// CHECK-SAME: .phEvent = nullptr
+// CHECK: ---> urEnqueueKernelLaunch
+// CHECK: .phEvent = nullptr
 //
 // RegularQueue
 // CHECK-NOT: ---> urEnqueueUSMFill({{.*}} .phEvent = nullptr
-// CHECK: ---> urEnqueueUSMFill(
-// CHECK-SAME: -> UR_RESULT_SUCCESS
+// CHECK: ---> urEnqueueUSMFill
+// CHECK: -> UR_RESULT_SUCCESS
 //
-// CHECK: ---> urEnqueueEventsWait(
-// CHECK-SAME: .phEvent = nullptr
+// CHECK: ---> urEnqueueEventsWait
+// CHECK: .phEvent = nullptr
 //
 // Everything that follows TestQueueOperationsViaSubmit()
-// CHECK: ---> urEnqueueUSMFill(
-// CHECK-SAME: .phEvent = nullptr
+// CHECK: ---> urEnqueueUSMFill
+// CHECK: .phEvent = nullptr
 //
-// CHECK: ---> urEnqueueUSMMemcpy(
-// CHECK-SAME: .phEvent = nullptr
+// CHECK: ---> urEnqueueUSMMemcpy
+// CHECK: .phEvent = nullptr
 //
 // Level-zero backend doesn't use urEnqueueUSMFill
-// CHECK-L0: ---> urEnqueueKernelLaunch({{.*}} .phEvent = nullptr
+// CHECK-L0: ---> urEnqueueKernelLaunch
+// CHECK-L0: .phEvent = nullptr
 // CHECK-OTHER: ---> urEnqueueUSMFill({{.*}} .phEvent = nullptr
 //
 // ---> urEnqueueUSMMemcpy(
-// CHECK: ---> urEnqueueUSMMemcpy(
-// CHECK-SAME: .phEvent = nullptr
+// CHECK: ---> urEnqueueUSMMemcpy
+// CHECK: .phEvent = nullptr
 //
-// CHECK: ---> urEnqueueUSMPrefetch(
-// CHECK-SAME: .phEvent = nullptr
+// CHECK: ---> urEnqueueUSMPrefetch
+// CHECK: .phEvent = nullptr
 //
-// CHECK: ---> urEnqueueUSMAdvise(
-// CHECK-SAME: .phEvent = nullptr
+// CHECK: ---> urEnqueueUSMAdvise
+// CHECK: .phEvent = nullptr
 // CHECK-SAME: ) -> {{UR_RESULT_SUCCESS|UR_RESULT_ERROR_ADAPTER_SPECIFIC}}
 //
-// CHECK: ---> urEnqueueKernelLaunch(
-// CHECK-SAME: .phEvent = nullptr
+// CHECK: ---> urEnqueueKernelLaunch
+// CHECK: .phEvent = nullptr
 //
-// CHECK: ---> urEnqueueKernelLaunch(
-// CHECK-SAME: .phEvent = nullptr
+// CHECK: ---> urEnqueueKernelLaunch
+// CHECK: .phEvent = nullptr
 //
 // RegularQueue
 // CHECK-NOT: ---> urEnqueueUSMFill({{.*}} .phEvent = nullptr
-// CHECK: ---> urEnqueueUSMFill(
-// CHECK-SAME: -> UR_RESULT_SUCCESS
+// CHECK: ---> urEnqueueUSMFill
+// CHECK: -> UR_RESULT_SUCCESS
 //
-// CHECK: ---> urEnqueueEventsWait(
-// CHECK-SAME: .phEvent = nullptr
+// CHECK: ---> urEnqueueEventsWait
+// CHECK: .phEvent = nullptr
 //
 // CHECK: The test passed.
 
diff --git a/sycl/test-e2e/DiscardEvents/discard_events_usm_ooo_queue.cpp b/sycl/test-e2e/DiscardEvents/discard_events_usm_ooo_queue.cpp
index edd30f2901d71..a75c04ab76c38 100644
--- a/sycl/test-e2e/DiscardEvents/discard_events_usm_ooo_queue.cpp
+++ b/sycl/test-e2e/DiscardEvents/discard_events_usm_ooo_queue.cpp
@@ -17,99 +17,101 @@
 //
 // Everything that follows TestQueueOperations()
 // CHECK-NOT: ---> urEnqueueUSMFill({{.*}} .phEvent = nullptr
-// CHECK: ---> urEnqueueUSMFill(
-// CHECK-SAME: -> UR_RESULT_SUCCESS
+// CHECK: ---> urEnqueueUSMFill
+// CHECK: -> UR_RESULT_SUCCESS
 //
 // CHECK-NOT: ---> urEnqueueUSMMemcpy({{.*}} .phEvent = nullptr
-// CHECK: ---> urEnqueueUSMMemcpy(
-// CHECK-SAME: -> UR_RESULT_SUCCESS
+// CHECK: ---> urEnqueueUSMMemcpy
+// CHECK: -> UR_RESULT_SUCCESS
 //
 // Level-zero backend doesn't use urEnqueueUSMFill
-// CHECK-L0: ---> urEnqueueKernelLaunch({{.*}} .phEvent = {{[0-9a-f]+}}
+// CHECK-L0: ---> urEnqueueKernelLaunch
+// CHECK-L0: .phEvent = {{[0-9a-f]+}}
 // CHECK-OTHER: ---> urEnqueueUSMFill({{.*}} .phEvent = {{[0-9a-f]+}}
-// CHECK-SAME: -> UR_RESULT_SUCCESS
+// CHECK: -> UR_RESULT_SUCCESS
 //
 // ---> urEnqueueUSMMemcpy(
 // CHECK-NOT: ---> urEnqueueUSMMemcpy({{.*}} .phEvent = nullptr
-// CHECK: ---> urEnqueueUSMMemcpy(
-// CHECK-SAME: -> UR_RESULT_SUCCESS
+// CHECK: ---> urEnqueueUSMMemcpy
+// CHECK: -> UR_RESULT_SUCCESS
 //
 // CHECK-NOT: ---> urEnqueueUSMPrefetch({{.*}} .phEvent = nullptr
-// CHECK: ---> urEnqueueUSMPrefetch(
-// CHECK-SAME: ) -> {{UR_RESULT_SUCCESS|UR_RESULT_ERROR_ADAPTER_SPECIFIC}}
+// CHECK: ---> urEnqueueUSMPrefetch
+// CHECK: -> {{UR_RESULT_SUCCESS|UR_RESULT_ERROR_ADAPTER_SPECIFIC}}
 //
 // CHECK-NOT: ---> urEnqueueUSMAdvise({{.*}} .phEvent = nullptr
-// CHECK: ---> urEnqueueUSMAdvise(
-// CHECK-SAME: ) -> {{UR_RESULT_SUCCESS|UR_RESULT_ERROR_ADAPTER_SPECIFIC}}
+// CHECK: ---> urEnqueueUSMAdvise
+// CHECK: -> {{UR_RESULT_SUCCESS|UR_RESULT_ERROR_ADAPTER_SPECIFIC}}
 //
 // CHECK-NOT: ---> urEnqueueEventsWaitWithBarrier({{.*}} .phEvent = nullptr
-// CHECK: ---> urEnqueueEventsWaitWithBarrier(
-// CHECK-SAME: -> UR_RESULT_SUCCESS
+// CHECK: ---> urEnqueueEventsWaitWithBarrier
+// CHECK: -> UR_RESULT_SUCCESS
 //
 // CHECK-NOT: ---> urEnqueueKernelLaunch({{.*}} .phEvent = nullptr
-// CHECK: ---> urEnqueueKernelLaunch(
-// CHECK-SAME: -> UR_RESULT_SUCCESS
+// CHECK: ---> urEnqueueKernelLaunch
+// CHECK: -> UR_RESULT_SUCCESS
 //
 // CHECK-NOT: ---> urEnqueueKernelLaunch({{.*}} .phEvent = nullptr
-// CHECK: ---> urEnqueueKernelLaunch(
-// CHECK-SAME: -> UR_RESULT_SUCCESS
+// CHECK: ---> urEnqueueKernelLaunch
+// CHECK: -> UR_RESULT_SUCCESS
 //
 // RegularQueue
 // CHECK-NOT: ---> urEnqueueUSMFill({{.*}} .phEvent = nullptr
-// CHECK: ---> urEnqueueUSMFill(
-// CHECK-SAME: -> UR_RESULT_SUCCESS
+// CHECK: ---> urEnqueueUSMFill
+// CHECK: -> UR_RESULT_SUCCESS
 //
 // CHECK-NOT: ---> urEnqueueEventsWait({{.*}} .phEvent = nullptr
-// CHECK: ---> urEnqueueEventsWait(
-// CHECK-SAME: -> UR_RESULT_SUCCESS
+// CHECK: ---> urEnqueueEventsWait
+// CHECK: -> UR_RESULT_SUCCESS
 //
 // Everything that follows TestQueueOperationsViaSubmit()
 // CHECK-NOT: ---> urEnqueueUSMFill({{.*}} .phEvent = nullptr
-// CHECK: ---> urEnqueueUSMFill(
-// CHECK-SAME: -> UR_RESULT_SUCCESS
+// CHECK: ---> urEnqueueUSMFill
+// CHECK: -> UR_RESULT_SUCCESS
 //
 // CHECK-NOT: ---> urEnqueueUSMMemcpy({{.*}} .phEvent = nullptr
-// CHECK: ---> urEnqueueUSMMemcpy(
-// CHECK-SAME: -> UR_RESULT_SUCCESS
+// CHECK: ---> urEnqueueUSMMemcpy
+// CHECK: -> UR_RESULT_SUCCESS
 //
 // Level-zero backend doesn't use urEnqueueUSMFill
-// CHECK-L0: ---> urEnqueueKernelLaunch({{.*}} .phEvent = {{[0-9a-f]+}}
+// CHECK-L0: ---> urEnqueueKernelLaunch
+// CHECK-L0: .phEvent = {{[0-9a-f]+}}
 // CHECK-OTHER: ---> urEnqueueUSMFill({{.*}} .phEvent = {{[0-9a-f]+}}
-// CHECK-SAME: -> UR_RESULT_SUCCESS
+// CHECK: -> UR_RESULT_SUCCESS
 //
 // ---> urEnqueueUSMMemcpy(
 // CHECK-NOT: ---> urEnqueueUSMMemcpy({{.*}} .phEvent = nullptr
-// CHECK: ---> urEnqueueUSMMemcpy(
-// CHECK-SAME: -> UR_RESULT_SUCCESS
+// CHECK: ---> urEnqueueUSMMemcpy
+// CHECK: -> UR_RESULT_SUCCESS
 //
 // CHECK-NOT: ---> urEnqueueUSMPrefetch({{.*}} .phEvent = nullptr
-// CHECK: ---> urEnqueueUSMPrefetch(
-// CHECK-SAME: ) -> {{UR_RESULT_SUCCESS|UR_RESULT_ERROR_ADAPTER_SPECIFIC}}
+// CHECK: ---> urEnqueueUSMPrefetch
+// CHECK: ) -> {{UR_RESULT_SUCCESS|UR_RESULT_ERROR_ADAPTER_SPECIFIC}}
 //
 // CHECK-NOT: ---> urEnqueueUSMAdvise({{.*}} .phEvent = nullptr
-// CHECK: ---> urEnqueueUSMAdvise(
-// CHECK-SAME: ) -> {{UR_RESULT_SUCCESS|UR_RESULT_ERROR_ADAPTER_SPECIFIC}}
+// CHECK: ---> urEnqueueUSMAdvise
+// CHECK: ) -> {{UR_RESULT_SUCCESS|UR_RESULT_ERROR_ADAPTER_SPECIFIC}}
 //
 // CHECK-NOT: ---> urEnqueueEventsWaitWithBarrier({{.*}} .phEvent = nullptr
-// CHECK: ---> urEnqueueEventsWaitWithBarrier(
-// CHECK-SAME: -> UR_RESULT_SUCCESS
+// CHECK: ---> urEnqueueEventsWaitWithBarrier
+// CHECK: -> UR_RESULT_SUCCESS
 //
 // CHECK-NOT: ---> urEnqueueKernelLaunch({{.*}} .phEvent = nullptr
-// CHECK: ---> urEnqueueKernelLaunch(
-// CHECK-SAME: -> UR_RESULT_SUCCESS
+// CHECK: ---> urEnqueueKernelLaunch
+// CHECK: -> UR_RESULT_SUCCESS
 //
 // CHECK-NOT: ---> urEnqueueKernelLaunch({{.*}} .phEvent = nullptr
-// CHECK: ---> urEnqueueKernelLaunch(
-// CHECK-SAME: -> UR_RESULT_SUCCESS
+// CHECK: ---> urEnqueueKernelLaunch
+// CHECK: -> UR_RESULT_SUCCESS
 //
 // RegularQueue
 // CHECK-NOT: ---> urEnqueueUSMFill({{.*}} .phEvent = nullptr
-// CHECK: ---> urEnqueueUSMFill(
-// CHECK-SAME: -> UR_RESULT_SUCCESS
+// CHECK: ---> urEnqueueUSMFill
+// CHECK: -> UR_RESULT_SUCCESS
 //
 // CHECK-NOT: ---> urEnqueueEventsWait({{.*}} .phEvent = nullptr
-// CHECK: ---> urEnqueueEventsWait(
-// CHECK-SAME: -> UR_RESULT_SUCCESS
+// CHECK: ---> urEnqueueEventsWait
+// CHECK: -> UR_RESULT_SUCCESS
 //
 // CHECK: The test passed.
 
diff --git a/sycl/test-e2e/ESIMD/addc.cpp b/sycl/test-e2e/ESIMD/addc.cpp
index 856bcdf94a9e0..0a286d5fc08eb 100644
--- a/sycl/test-e2e/ESIMD/addc.cpp
+++ b/sycl/test-e2e/ESIMD/addc.cpp
@@ -7,6 +7,8 @@
 //===----------------------------------------------------------------------===//
 // RUN: %{build} -o %t.out
 // RUN: %{run} %t.out
+// https://github.com/intel/llvm/issues/14868
+// UNSUPPORTED: windows
 
 // The test verifies ESIMD API that adds 2 32-bit integer scalars/vectors with
 // carry returning the result as 2 parts: carry flag the input modified operand
diff --git a/sycl/test-e2e/ESIMD/lit.local.cfg b/sycl/test-e2e/ESIMD/lit.local.cfg
index 239c2e46e691b..2f910e5bed07e 100644
--- a/sycl/test-e2e/ESIMD/lit.local.cfg
+++ b/sycl/test-e2e/ESIMD/lit.local.cfg
@@ -12,3 +12,11 @@ if 'gpu-intel-gen9' in config.available_features and platform.system() == 'Windo
 # We should investigate why OCL fails separately.
 if 'gpu-intel-dg2' in config.available_features:
   config.required_features += ['level_zero']
+
+# TODO: Remove this once the warnings are resolved
+original_clangxx=""
+for substitution in config.substitutions:
+  if substitution[0] == "%clangxx":
+    original_clangxx=substitution[1]
+config.substitutions.insert(0,
+  ("%clangxx", original_clangxx+" -Wno-error=deprecated-declarations"))
diff --git a/sycl/test-e2e/ESIMD/lsc/lsc_usm_store_u32.cpp b/sycl/test-e2e/ESIMD/lsc/lsc_usm_store_u32.cpp
index 58a3ef70fd4fd..45ca8e44b06a1 100644
--- a/sycl/test-e2e/ESIMD/lsc/lsc_usm_store_u32.cpp
+++ b/sycl/test-e2e/ESIMD/lsc/lsc_usm_store_u32.cpp
@@ -13,7 +13,7 @@
 // optimizations to produce correct result.
 // DEFINE: %{fpflags} = %if cl_options %{/clang:-ffp-exception-behavior=maytrap%} %else %{-ffp-exception-behavior=maytrap%}
 
-// RUN: %{build} %{fpflags} -o %t.out
+// RUN: %{build} -Wno-error=unsupported-floating-point-opt %{fpflags} -o %t.out
 // RUN: %{run} %t.out
 
 #include "Inputs/lsc_usm_store.hpp"
diff --git a/sycl/test-e2e/ESIMD/lsc/lsc_usm_store_u32_64.cpp b/sycl/test-e2e/ESIMD/lsc/lsc_usm_store_u32_64.cpp
index 14851907dbc9f..0098b8d388406 100644
--- a/sycl/test-e2e/ESIMD/lsc/lsc_usm_store_u32_64.cpp
+++ b/sycl/test-e2e/ESIMD/lsc/lsc_usm_store_u32_64.cpp
@@ -13,7 +13,7 @@
 // optimizations to produce correct result.
 // DEFINE: %{fpflags} = %if cl_options %{/clang:-ffp-exception-behavior=maytrap%} %else %{-ffp-exception-behavior=maytrap%}
 
-// RUN: %{build} %{fpflags} -o %t.out
+// RUN: %{build} -Wno-error=unsupported-floating-point-opt %{fpflags} -o %t.out
 // RUN: %{run} %t.out
 
 // 64 bit offset variant of the test - uses 64 bit offsets.
diff --git a/sycl/test-e2e/ESIMD/regression/dgetrf.cpp b/sycl/test-e2e/ESIMD/regression/dgetrf.cpp
index f704ed4c5d8f6..33209e39a93f9 100644
--- a/sycl/test-e2e/ESIMD/regression/dgetrf.cpp
+++ b/sycl/test-e2e/ESIMD/regression/dgetrf.cpp
@@ -5,7 +5,7 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-// RUN: %{build} -I%S/.. -o %t.out
+// RUN: %{build} -Wno-error=format -I%S/.. -o %t.out
 // RUN: %{run} %t.out 3 2 1
 //
 // This test checks the correctness of ESIMD program for batched LU
diff --git a/sycl/test-e2e/ESIMD/regression/dgetrf_ref.cpp b/sycl/test-e2e/ESIMD/regression/dgetrf_ref.cpp
index 0712554f12ef8..bfaa7c43c9ffe 100644
--- a/sycl/test-e2e/ESIMD/regression/dgetrf_ref.cpp
+++ b/sycl/test-e2e/ESIMD/regression/dgetrf_ref.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// RUN: %{build} -DUSE_REF -I%S/.. -o %t.ref.out
+// RUN: %{build} -Wno-error=format -DUSE_REF -I%S/.. -o %t.ref.out
 // RUN: %{run} %t.ref.out 3 2 1
 //
 // This test checks the correctness of ESIMD program for batched LU
diff --git a/sycl/test-e2e/ESIMD/subb.cpp b/sycl/test-e2e/ESIMD/subb.cpp
index 2729b15fe60aa..ff970e3f17a1d 100644
--- a/sycl/test-e2e/ESIMD/subb.cpp
+++ b/sycl/test-e2e/ESIMD/subb.cpp
@@ -7,6 +7,8 @@
 //===----------------------------------------------------------------------===//
 // RUN: %{build} -o %t.out
 // RUN: %{run} %t.out
+// https://github.com/intel/llvm/issues/14868
+// UNSUPPORTED: windows
 
 // The test verifies ESIMD API that substracts 2 32-bit integer scalars/vectors
 // with borrow returning the result as 2 parts: borrow flag the input modified
diff --git a/sycl/test-e2e/ESIMD/sycl_esimd_mix.cpp b/sycl/test-e2e/ESIMD/sycl_esimd_mix.cpp
index a8c3a964e4e3e..8c447b82b3e2e 100644
--- a/sycl/test-e2e/ESIMD/sycl_esimd_mix.cpp
+++ b/sycl/test-e2e/ESIMD/sycl_esimd_mix.cpp
@@ -122,22 +122,22 @@ int main(void) {
 // Some backends will call urProgramBuild and some will call 
 // urProgramBuildExp depending on urProgramBuildExp support.
 
-// CHECK-LABEL: ---> urProgramBuild{{(Exp)?}}(
+// CHECK-LABEL: ---> urProgramBuild{{(Exp)?}}
 // CHECK-NOT: -vc-codegen
 // CHECK-WITH-VAR: -g
 // CHECK-NOT: -vc-codegen
 // CHECK: {{.*}}-> UR_RESULT_SUCCESS
-// CHECK-LABEL: ---> urKernelCreate(
+// CHECK-LABEL: ---> urKernelCreate
 // CHECK: {{.*}}SyclKernel
 // CHECK: {{.*}}-> UR_RESULT_SUCCESS
 
 // For ESIMD kernels, -vc-codegen option is always preserved,
 // regardless of SYCL_PROGRAM_COMPILE_OPTIONS value.
 
-// CHECK-LABEL: ---> urProgramBuild{{(Exp)?}}(
+// CHECK-LABEL: ---> urProgramBuild{{(Exp)?}}
 // CHECK-NO-VAR: -vc-codegen
 // CHECK-WITH-VAR: -g -vc-codegen
 // CHECK: {{.*}}-> UR_RESULT_SUCCESS
-// CHECK-LABEL: ---> urKernelCreate(
+// CHECK-LABEL: ---> urKernelCreate
 // CHECK: {{.*}}EsimdKernel
 // CHECK: {{.*}}-> UR_RESULT_SUCCESS
\ No newline at end of file
diff --git a/sycl/test-e2e/ESIMD/unified_memory_api/atomic_update_acc_dg2_pvc_cmpxchg.cpp b/sycl/test-e2e/ESIMD/unified_memory_api/atomic_update_acc_dg2_pvc_cmpxchg.cpp
index d1ff53cce37d0..237296c501e89 100644
--- a/sycl/test-e2e/ESIMD/unified_memory_api/atomic_update_acc_dg2_pvc_cmpxchg.cpp
+++ b/sycl/test-e2e/ESIMD/unified_memory_api/atomic_update_acc_dg2_pvc_cmpxchg.cpp
@@ -7,7 +7,7 @@
 //===---------------------------------------------------------------------===//
 
 // REQUIRES: arch-intel_gpu_pvc || gpu-intel-dg2
-// REQUIRES-INTEL-DRIVER: win: 101.5660
+// REQUIRES-INTEL-DRIVER: lin: 29803
 
 // RUN: %{build} -o %t.out
 // RUN: %{run} %t.out
diff --git a/sycl/test-e2e/ESIMD/unified_memory_api/atomic_update_usm_dg2_pvc_cmpxchg.cpp b/sycl/test-e2e/ESIMD/unified_memory_api/atomic_update_usm_dg2_pvc_cmpxchg.cpp
index f96b8fef54555..13e283faf8c55 100644
--- a/sycl/test-e2e/ESIMD/unified_memory_api/atomic_update_usm_dg2_pvc_cmpxchg.cpp
+++ b/sycl/test-e2e/ESIMD/unified_memory_api/atomic_update_usm_dg2_pvc_cmpxchg.cpp
@@ -7,7 +7,7 @@
 //===---------------------------------------------------------------------===//
 
 // REQUIRES: arch-intel_gpu_pvc || gpu-intel-dg2
-// REQUIRES-INTEL-DRIVER: win: 101.5660
+// REQUIRES-INTEL-DRIVER: lin: 29803
 
 // RUN: %{build} -o %t.out
 // RUN: %{run} %t.out
diff --git a/sycl/test-e2e/EnqueueNativeCommand/custom-command-cuda.cpp b/sycl/test-e2e/EnqueueNativeCommand/custom-command-cuda.cpp
index 2aa8d9a4c590f..ff73a763b2dae 100644
--- a/sycl/test-e2e/EnqueueNativeCommand/custom-command-cuda.cpp
+++ b/sycl/test-e2e/EnqueueNativeCommand/custom-command-cuda.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -o %t.out %cuda_options
+// RUN: %{build} -Wno-error=deprecated-declarations -o %t.out %cuda_options
 // RUN: %{run} %t.out
 // REQUIRES: cuda, cuda_dev_kit
 
diff --git a/sycl/test-e2e/FilterSelector/reuse.cpp b/sycl/test-e2e/FilterSelector/reuse.cpp
index d738386ab921a..e2fa81ab92b43 100644
--- a/sycl/test-e2e/FilterSelector/reuse.cpp
+++ b/sycl/test-e2e/FilterSelector/reuse.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -o %t1.out
+// RUN: %{build} -Wno-error=deprecated-declarations -o %t1.out
 // RUN: %{run} %t1.out
 
 //==----------------- reuse.cpp - filter_selector reuse test ---------------==//
diff --git a/sycl/test-e2e/FilterSelector/select.cpp b/sycl/test-e2e/FilterSelector/select.cpp
index bccd564e89a04..7bec788e23a6e 100644
--- a/sycl/test-e2e/FilterSelector/select.cpp
+++ b/sycl/test-e2e/FilterSelector/select.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -o %t1.out
+// RUN: %{build} -Wno-error=deprecated-declarations -o %t1.out
 // RUN: %{run} %t1.out
 
 //==------------------- select.cpp - filter_selector test ------------------==//
diff --git a/sycl/test-e2e/FilterSelector/select_device_level_zero.cpp b/sycl/test-e2e/FilterSelector/select_device_level_zero.cpp
index 5d962d2a51948..53d16b798ebcc 100644
--- a/sycl/test-e2e/FilterSelector/select_device_level_zero.cpp
+++ b/sycl/test-e2e/FilterSelector/select_device_level_zero.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -o %t.out
+// RUN: %{build} -Wno-error=deprecated-declarations -o %t.out
 // RUN: env ONEAPI_DEVICE_SELECTOR=level_zero:gpu %{run-unfiltered-devices} %t.out
 //
 // Checks if only specified device types can be acquired from select_device
diff --git a/sycl/test-e2e/Graph/Explicit/host_task2_multiple_roots.cpp b/sycl/test-e2e/Graph/Explicit/host_task2_multiple_roots.cpp
index 57bac9fd2def3..b4cb1a126dad6 100644
--- a/sycl/test-e2e/Graph/Explicit/host_task2_multiple_roots.cpp
+++ b/sycl/test-e2e/Graph/Explicit/host_task2_multiple_roots.cpp
@@ -15,6 +15,10 @@
 // https://github.com/intel/llvm/issues/11852
 // UNSUPPORTED: windows
 
+// Failed in Nightly on Linux
+// https://github.com/intel/llvm/issues/14852
+// UNSUPPORTED: linux
+
 #define GRAPH_E2E_EXPLICIT
 
 #include "../Inputs/host_task2_multiple_roots.cpp"
diff --git a/sycl/test-e2e/GroupAlgorithm/different_types.cpp b/sycl/test-e2e/GroupAlgorithm/different_types.cpp
index 79bba3745cf87..915f14431ee88 100644
--- a/sycl/test-e2e/GroupAlgorithm/different_types.cpp
+++ b/sycl/test-e2e/GroupAlgorithm/different_types.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -fsycl-device-code-split=per_kernel -I . -o %t.out
+// RUN: %{build} -Wno-error=deprecated-declarations -fsycl-device-code-split=per_kernel -I . -o %t.out
 // RUN: %{run} %t.out
 
 #include "../helpers.hpp"
diff --git a/sycl/test-e2e/GroupAlgorithm/exclusive_scan_sycl2020.cpp b/sycl/test-e2e/GroupAlgorithm/exclusive_scan_sycl2020.cpp
index e90b70137473a..7e925a783afb2 100644
--- a/sycl/test-e2e/GroupAlgorithm/exclusive_scan_sycl2020.cpp
+++ b/sycl/test-e2e/GroupAlgorithm/exclusive_scan_sycl2020.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -fsycl-device-code-split=per_kernel -I . -o %t.out
+// RUN: %{build} -Wno-error=deprecated-declarations -fsycl-device-code-split=per_kernel -I . -o %t.out
 // RUN: %{run} %t.out
 
 #include "../helpers.hpp"
diff --git a/sycl/test-e2e/GroupAlgorithm/inclusive_scan_sycl2020.cpp b/sycl/test-e2e/GroupAlgorithm/inclusive_scan_sycl2020.cpp
index e73215714a7c7..32ef97fbc2837 100644
--- a/sycl/test-e2e/GroupAlgorithm/inclusive_scan_sycl2020.cpp
+++ b/sycl/test-e2e/GroupAlgorithm/inclusive_scan_sycl2020.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -fsycl-device-code-split=per_kernel -I . -o %t.out
+// RUN: %{build} -Wno-error=deprecated-declarations -fsycl-device-code-split=per_kernel -I . -o %t.out
 // RUN: %{run} %t.out
 
 #include "../helpers.hpp"
diff --git a/sycl/test-e2e/GroupAlgorithm/load_store/odd_wg_size.cpp b/sycl/test-e2e/GroupAlgorithm/load_store/odd_wg_size.cpp
index c5e0ce225427a..adce5e9f588bc 100644
--- a/sycl/test-e2e/GroupAlgorithm/load_store/odd_wg_size.cpp
+++ b/sycl/test-e2e/GroupAlgorithm/load_store/odd_wg_size.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -o %t.out
+// RUN: %{build} -Wno-error=incorrect-sub-group-size -o %t.out
 // RUN: %{run} %t.out
 
 #include <sycl/detail/core.hpp>
diff --git a/sycl/test-e2e/GroupAlgorithm/load_store/partial_sg.cpp b/sycl/test-e2e/GroupAlgorithm/load_store/partial_sg.cpp
index e74563fae97d2..72b87364f0ee9 100644
--- a/sycl/test-e2e/GroupAlgorithm/load_store/partial_sg.cpp
+++ b/sycl/test-e2e/GroupAlgorithm/load_store/partial_sg.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -o %t.out
+// RUN: %{build} -Wno-error=incorrect-sub-group-size -o %t.out
 // RUN: %{run} %t.out
 
 #include <sycl/detail/core.hpp>
diff --git a/sycl/test-e2e/GroupAlgorithm/root_group.cpp b/sycl/test-e2e/GroupAlgorithm/root_group.cpp
index 983f8e7ca003a..d8393f35c6253 100644
--- a/sycl/test-e2e/GroupAlgorithm/root_group.cpp
+++ b/sycl/test-e2e/GroupAlgorithm/root_group.cpp
@@ -1,6 +1,6 @@
 // Fails with opencl non-cpu, enable when fixed.
 // XFAIL: (opencl && !cpu && !accelerator)
-// RUN: %{build} -I . -o %t.out
+// RUN: %{build} -I . -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %}
 // RUN: %{run} %t.out
 
 // Disabled temporarily while investigation into the failure is ongoing.
@@ -10,6 +10,7 @@
 #include <cstdlib>
 #include <type_traits>
 
+#include <sycl/builtins.hpp>
 #include <sycl/detail/core.hpp>
 #include <sycl/ext/oneapi/experimental/root_group.hpp>
 #include <sycl/group_barrier.hpp>
@@ -53,10 +54,17 @@ void testRootGroup() {
     sycl::accessor data{dataBuf, h};
     h.parallel_for<
         class RootGroupKernel>(range, props, [=](sycl::nd_item<1> it) {
+      volatile float X = 1.0f;
+      volatile float Y = 1.0f;
       auto root = it.ext_oneapi_get_root_group();
       data[root.get_local_id()] = root.get_local_id();
       sycl::group_barrier(root);
-
+      // Delay half of the workgroups with extra work to check that the barrier
+      // synchronizes the whole device.
+      if (it.get_group(0) % 2 == 0) {
+        X += sycl::sin(X);
+        Y += sycl::cos(Y);
+      }
       root =
           sycl::ext::oneapi::experimental::this_work_item::get_root_group<1>();
       int sum = data[root.get_local_id()] +
diff --git a/sycl/test-e2e/GroupLocalMemory/group_local_memory.cpp b/sycl/test-e2e/GroupLocalMemory/group_local_memory.cpp
index 29f75b0ece1d1..7356f94a69ff8 100644
--- a/sycl/test-e2e/GroupLocalMemory/group_local_memory.cpp
+++ b/sycl/test-e2e/GroupLocalMemory/group_local_memory.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -o %t.out
+// RUN: %{build} -Wno-error=deprecated-declarations -o %t.out
 // RUN: %{run} %t.out
 //
 // XFAIL: hip_nvidia
diff --git a/sycl/test-e2e/GroupLocalMemory/no_early_opt.cpp b/sycl/test-e2e/GroupLocalMemory/no_early_opt.cpp
index 0039e2b872a30..1091cf90b6dca 100644
--- a/sycl/test-e2e/GroupLocalMemory/no_early_opt.cpp
+++ b/sycl/test-e2e/GroupLocalMemory/no_early_opt.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -fno-sycl-early-optimizations -o %t.out
+// RUN: %{build} -Wno-error=deprecated-declarations -fno-sycl-early-optimizations -o %t.out
 // RUN: %{run} %t.out
 //
 // XFAIL: hip_nvidia
diff --git a/sycl/test-e2e/HostInteropTask/interop-task-hip.cpp b/sycl/test-e2e/HostInteropTask/interop-task-hip.cpp
index 7682f4ad25160..6daadc97888fc 100644
--- a/sycl/test-e2e/HostInteropTask/interop-task-hip.cpp
+++ b/sycl/test-e2e/HostInteropTask/interop-task-hip.cpp
@@ -1,6 +1,6 @@
 // FIXME: the rocm include path and link path are highly platform dependent,
 // we should set this with some variable instead.
-// RUN: %{build} -o %t.out -I/opt/rocm/include -L/opt/rocm/lib -lamdhip64
+// RUN: %{build} -Wno-error=deprecated-declarations -o %t.out -I/opt/rocm/include -L/opt/rocm/lib -lamdhip64
 // RUN: %{run} %t.out
 // REQUIRES: hip
 
diff --git a/sycl/test-e2e/HostInteropTask/interop-task.cpp b/sycl/test-e2e/HostInteropTask/interop-task.cpp
index 668affc96a7c0..e9a22bc816a21 100644
--- a/sycl/test-e2e/HostInteropTask/interop-task.cpp
+++ b/sycl/test-e2e/HostInteropTask/interop-task.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -o %t.out %threads_lib %opencl_lib
+// RUN: %{build} -Wno-error=deprecated-declarations -o %t.out %threads_lib %opencl_lib
 // RUN: %{run} %t.out
 // UNSUPPORTED: level_zero, cuda
 // REQUIRES: opencl, opencl_icd
diff --git a/sycl/test-e2e/KernelAndProgram/kernel-bundle-merge-options-env.cpp b/sycl/test-e2e/KernelAndProgram/kernel-bundle-merge-options-env.cpp
index c2589dcd42f88..d41e188cc2b0c 100644
--- a/sycl/test-e2e/KernelAndProgram/kernel-bundle-merge-options-env.cpp
+++ b/sycl/test-e2e/KernelAndProgram/kernel-bundle-merge-options-env.cpp
@@ -3,7 +3,7 @@
 // RUN: %{build} -DSYCL_DISABLE_FALLBACK_ASSERT=1 -o %t.out %debug_option
 // RUN: env SYCL_UR_TRACE=1 SYCL_PROGRAM_COMPILE_OPTIONS=-DENV_COMPILE_OPTS SYCL_PROGRAM_LINK_OPTIONS=-DENV_LINK_OPTS SYCL_PROGRAM_APPEND_COMPILE_OPTIONS=-DENV_APPEND_COMPILE_OPTS SYCL_PROGRAM_APPEND_LINK_OPTIONS=-DENV_APPEND_LINK_OPTS %{run} %t.out | FileCheck %s
 // Check that options are overrided
-// RUN: %{build} -DSYCL_DISABLE_FALLBACK_ASSERT=1 -Xsycl-target-linker=spir64 -DBAR -Xsycl-target-frontend=spir64 -DBAR_COMPILE -o %t.out
+// RUN: %{build} -Wno-error=unused-command-line-argument -DSYCL_DISABLE_FALLBACK_ASSERT=1 -Xsycl-target-linker=spir64 -DBAR -Xsycl-target-frontend=spir64 -DBAR_COMPILE -o %t.out
 // RUN: env SYCL_UR_TRACE=1 SYCL_PROGRAM_COMPILE_OPTIONS=-DENV_COMPILE_OPTS SYCL_PROGRAM_LINK_OPTIONS=-DENV_LINK_OPTS SYCL_PROGRAM_APPEND_COMPILE_OPTIONS=-DENV_APPEND_COMPILE_OPTS SYCL_PROGRAM_APPEND_LINK_OPTIONS=-DENV_APPEND_LINK_OPTS %{run} %t.out | FileCheck %s
 // UNSUPPORTED: hip
 
diff --git a/sycl/test-e2e/KernelAndProgram/target_compile_fast.cpp b/sycl/test-e2e/KernelAndProgram/target_compile_fast.cpp
index 2501ca0b6e2df..0e0fd4ffb0693 100644
--- a/sycl/test-e2e/KernelAndProgram/target_compile_fast.cpp
+++ b/sycl/test-e2e/KernelAndProgram/target_compile_fast.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -ftarget-compile-fast -o %t_with.out
+// RUN: %{build} -Wno-error=unused-command-line-argument -ftarget-compile-fast -o %t_with.out
 // RUN: %{build} -o %t_without.out
 
 // RUN: env SYCL_UR_TRACE=1 %{run} %t_with.out 2>&1 | FileCheck %if !gpu || hip || cuda %{ --check-prefix=CHECK-WITHOUT %} %else %{ --check-prefix=CHECK-INTEL-GPU-WITH %} %s
diff --git a/sycl/test-e2e/KernelFusion/lit.local.cfg b/sycl/test-e2e/KernelFusion/lit.local.cfg
index cc77315a316ef..1d0db3020f754 100644
--- a/sycl/test-e2e/KernelFusion/lit.local.cfg
+++ b/sycl/test-e2e/KernelFusion/lit.local.cfg
@@ -1,8 +1,7 @@
 import platform
 
 config.required_features += ['fusion']
-# TODO: Reenable hip, see https://github.com/intel/llvm/issues/14598
-config.unsupported_features += ['accelerator', 'hip']
+config.unsupported_features += ['accelerator']
 
 # TODO: enable on Windows once kernel fusion is supported on Windows.
 if platform.system() != "Linux":
diff --git a/sycl/test-e2e/KernelParams/struct_kernel_param.cpp b/sycl/test-e2e/KernelParams/struct_kernel_param.cpp
index f204ee0ccaa8c..bb9206b295762 100644
--- a/sycl/test-e2e/KernelParams/struct_kernel_param.cpp
+++ b/sycl/test-e2e/KernelParams/struct_kernel_param.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -o %t.out
+// RUN: %{build} -Wno-error=deprecated-declarations -o %t.out
 // RUN: %{run} %t.out
 
 //==-struct_kernel_param.cpp-Checks passing structs as kernel params--------==//
diff --git a/sycl/test-e2e/LLVMIntrinsicLowering/bitreverse.cpp b/sycl/test-e2e/LLVMIntrinsicLowering/bitreverse.cpp
index c5e71243f59b7..05d171e1f8085 100644
--- a/sycl/test-e2e/LLVMIntrinsicLowering/bitreverse.cpp
+++ b/sycl/test-e2e/LLVMIntrinsicLowering/bitreverse.cpp
@@ -9,7 +9,7 @@
 // will lower llvm.bitreverse.* intrinsics instead of relying on SPIRV
 // BitReverse instruction.
 // Also build executable with SPV dump.
-// RUN: %{build} -o %t.out -O2 -Xspirv-translator --spirv-ext=-SPV_KHR_bit_instructions -fsycl-dump-device-code=%t.spvdir
+// RUN: %{build} -Wno-error=psabi -Wno-error=constant-conversion -o %t.out -O2 -Xspirv-translator --spirv-ext=-SPV_KHR_bit_instructions -fsycl-dump-device-code=%t.spvdir
 
 // Rename SPV file to explictly known filename.
 // RUN: mv %t.spvdir/*.spv %t.spvdir/dump.spv
@@ -26,7 +26,7 @@
 /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
 // Build without lowering explicitly disabled.
-// RUN: %{build} -o %t.bitinstructions.out
+// RUN: %{build} -Wno-error=psabi -Wno-error=constant-conversion -o %t.bitinstructions.out
 
 // Execution should still be correct.
 // RUN: %{run} %t.bitinstructions.out
diff --git a/sycl/test-e2e/Matrix/SG32/joint_matrix_prefetch.cpp b/sycl/test-e2e/Matrix/SG32/joint_matrix_prefetch.cpp
index 60204f804c84a..a6ce09fe13b9f 100644
--- a/sycl/test-e2e/Matrix/SG32/joint_matrix_prefetch.cpp
+++ b/sycl/test-e2e/Matrix/SG32/joint_matrix_prefetch.cpp
@@ -12,6 +12,9 @@
 
 // XFAIL: gpu
 
+// SG size = 32 is not currently supported for SYCL Joint Matrix by IGC on DG2
+// UNSUPPORTED: gpu-intel-dg2
+
 #include "../common.hpp"
 
 #define SG_SZ 32
diff --git a/sycl/test-e2e/Matrix/get_coord_int8_matB.cpp b/sycl/test-e2e/Matrix/get_coord_int8_matB.cpp
index 0b7f520888fd1..a5166bbf54201 100644
--- a/sycl/test-e2e/Matrix/get_coord_int8_matB.cpp
+++ b/sycl/test-e2e/Matrix/get_coord_int8_matB.cpp
@@ -9,7 +9,7 @@
 
 // RUN: %{build} -o %t.out
 // RUN: %{run} %t.out
-// XFAIL: cpu, gpu-intel-dg2
+// XFAIL: cpu
 
 #include "common.hpp"
 #include "get_coord_int8_matB_impl.hpp"
diff --git a/sycl/test-e2e/Matrix/joint_matrix_prefetch.cpp b/sycl/test-e2e/Matrix/joint_matrix_prefetch.cpp
index 7abea83c6d287..df5c4b27a122b 100644
--- a/sycl/test-e2e/Matrix/joint_matrix_prefetch.cpp
+++ b/sycl/test-e2e/Matrix/joint_matrix_prefetch.cpp
@@ -9,8 +9,6 @@
 // RUN: %{build} -o %t.out
 // RUN: %{run} %t.out
 
-// XFAIL: gpu
-
 #include "common.hpp"
 
 constexpr size_t TN = 16;
diff --git a/sycl/test-e2e/Matrix/joint_matrix_prefetch_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_prefetch_impl.hpp
index 9d9c99bf4ae1a..1e665f618860f 100644
--- a/sycl/test-e2e/Matrix/joint_matrix_prefetch_impl.hpp
+++ b/sycl/test-e2e/Matrix/joint_matrix_prefetch_impl.hpp
@@ -106,8 +106,7 @@ int main() {
   }
   if (!support_p) {
     std::cout << "Prefetch not supported on this device" << std::endl;
-    // Once the test is not marked as XFAIL, this should change to return 0;
-    return 1;
+    return 0;
   }
   static constexpr size_t M = TM * 2;
   static constexpr size_t N = TN * 2;
diff --git a/sycl/test-e2e/NewOffloadDriver/diamond_shape.cpp b/sycl/test-e2e/NewOffloadDriver/diamond_shape.cpp
index af760cb13c605..d3fb670b6bb75 100644
--- a/sycl/test-e2e/NewOffloadDriver/diamond_shape.cpp
+++ b/sycl/test-e2e/NewOffloadDriver/diamond_shape.cpp
@@ -1,6 +1,4 @@
 // REQUIRES: fusion
-// TODO: Reenable, see https://github.com/intel/llvm/issues/14598
-// UNSUPPORTED: hip
 
 // RUN: %{build} %{embed-ir} -O2 --offload-new-driver -o %t.out
 // RUN: %{run} %t.out
diff --git a/sycl/test-e2e/NewOffloadDriver/multisource.cpp b/sycl/test-e2e/NewOffloadDriver/multisource.cpp
index ec0f3ad1b4569..83223d4b68e09 100644
--- a/sycl/test-e2e/NewOffloadDriver/multisource.cpp
+++ b/sycl/test-e2e/NewOffloadDriver/multisource.cpp
@@ -10,7 +10,7 @@
 // Test with `--offload-new-driver`
 // RUN: %{build} --offload-new-driver -c -o %t.kernel.o -DINIT_KERNEL -DCALC_KERNEL
 // RUN: %{build} --offload-new-driver -c -o %t.main.o -DMAIN_APP
-// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} --offload-new-driver %t.kernel.o %t.main.o -o %t.fat
+// RUN: %clangxx -Wno-error=unused-command-line-argument -fsycl -fsycl-targets=%{sycl_triple} --offload-new-driver %t.kernel.o %t.main.o -o %t.fat
 // RUN: %{run} %t.fat
 
 // Multiple sources with kernel code
@@ -18,7 +18,7 @@
 // RUN: %{build} --offload-new-driver -c -o %t.init.o -DINIT_KERNEL
 // RUN: %{build} --offload-new-driver -c -o %t.calc.o -DCALC_KERNEL
 // RUN: %{build} --offload-new-driver -c -o %t.main.o -DMAIN_APP
-// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} --offload-new-driver %t.init.o %t.calc.o %t.main.o -o %t.fat
+// RUN: %clangxx -Wno-error=unused-command-line-argument -fsycl -fsycl-targets=%{sycl_triple} --offload-new-driver %t.init.o %t.calc.o %t.main.o -o %t.fat
 // RUN: %{run} %t.fat
 
 #include <sycl/detail/core.hpp>
diff --git a/sycl/test-e2e/NewOffloadDriver/split-per-source-main.cpp b/sycl/test-e2e/NewOffloadDriver/split-per-source-main.cpp
index 1996e93ad3382..a2e130fa34d1b 100644
--- a/sycl/test-e2e/NewOffloadDriver/split-per-source-main.cpp
+++ b/sycl/test-e2e/NewOffloadDriver/split-per-source-main.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -fsycl-device-code-split=per_source -I %S/Inputs -o %t.out %S/Inputs/split-per-source-second-file.cpp \
+// RUN: %{build} -Wno-error=unused-command-line-argument -fsycl-device-code-split=per_source -I %S/Inputs -o %t.out %S/Inputs/split-per-source-second-file.cpp \
 // RUN: --offload-new-driver -fsycl-dead-args-optimization
 // RUN: %{run} %t.out
 //
diff --git a/sycl/test-e2e/NewOffloadDriver/sycl-external-with-optional-features.cpp b/sycl/test-e2e/NewOffloadDriver/sycl-external-with-optional-features.cpp
index 38268ad5efb6f..d1e8ef20a9a70 100644
--- a/sycl/test-e2e/NewOffloadDriver/sycl-external-with-optional-features.cpp
+++ b/sycl/test-e2e/NewOffloadDriver/sycl-external-with-optional-features.cpp
@@ -1,7 +1,7 @@
 // Test with `--offload-new-driver`
 // RUN: %{build} -DSOURCE1 --offload-new-driver -c -o %t1.o
 // RUN: %{build} -DSOURCE2 --offload-new-driver -c -o %t2.o
-// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} --offload-new-driver %t1.o %t2.o -o %t.exe
+// RUN: %clangxx -Wno-error=unused-command-line-argument -fsycl -fsycl-targets=%{sycl_triple} --offload-new-driver %t1.o %t2.o -o %t.exe
 // RUN: %{run} %t.exe
 
 #ifdef SOURCE1
diff --git a/sycl/test-e2e/OnlineCompiler/online_compiler_L0.cpp b/sycl/test-e2e/OnlineCompiler/online_compiler_L0.cpp
index 58fe10e30ea69..fc92d8957ef31 100644
--- a/sycl/test-e2e/OnlineCompiler/online_compiler_L0.cpp
+++ b/sycl/test-e2e/OnlineCompiler/online_compiler_L0.cpp
@@ -1,6 +1,6 @@
 // REQUIRES: level_zero, level_zero_dev_kit, cm-compiler
 
-// RUN: %{build} -DRUN_KERNELS %level_zero_options -o %t.out
+// RUN: %{build} -Wno-error=deprecated-declarations -DRUN_KERNELS %level_zero_options -o %t.out
 // RUN: %{run} %t.out
 
 // This test checks ext::intel feature class online_compiler for Level-Zero.
diff --git a/sycl/test-e2e/OnlineCompiler/online_compiler_OpenCL.cpp b/sycl/test-e2e/OnlineCompiler/online_compiler_OpenCL.cpp
index 57cd957d3a1df..8a0d67d3b02c3 100644
--- a/sycl/test-e2e/OnlineCompiler/online_compiler_OpenCL.cpp
+++ b/sycl/test-e2e/OnlineCompiler/online_compiler_OpenCL.cpp
@@ -1,7 +1,7 @@
 // REQUIRES: opencl, opencl_icd, cm-compiler
 // UNSUPPORTED: accelerator
 
-// RUN: %{build} -DRUN_KERNELS %opencl_lib -o %t.out
+// RUN: %{build} -Wno-error=deprecated-declarations -DRUN_KERNELS %opencl_lib -o %t.out
 // RUN: %{run} %t.out
 
 // This test checks ext::intel feature class online_compiler for OpenCL.
diff --git a/sycl/test-e2e/OptionalKernelFeatures/is_compatible/is_compatible_with_aspects.cpp b/sycl/test-e2e/OptionalKernelFeatures/is_compatible/is_compatible_with_aspects.cpp
index 535c3647793c7..68a8c19f1de0a 100644
--- a/sycl/test-e2e/OptionalKernelFeatures/is_compatible/is_compatible_with_aspects.cpp
+++ b/sycl/test-e2e/OptionalKernelFeatures/is_compatible/is_compatible_with_aspects.cpp
@@ -1,7 +1,7 @@
 // requires: cpu, gpu, accelerator
 // UNSUPPORTED: hip
 // FIXME: enable the test back, see intel/llvm#8146
-// RUN: %{build} -O0 -o %t.out
+// RUN: %{build} -Wno-error=incorrect-sub-group-size -O0 -o %t.out
 // RUN: %{run} %t.out
 
 #include <sycl/detail/core.hpp>
diff --git a/sycl/test-e2e/Plugin/enqueue-arg-order-buffer.cpp b/sycl/test-e2e/Plugin/enqueue-arg-order-buffer.cpp
index 2a7d0db62df94..387b72fb2dbb8 100644
--- a/sycl/test-e2e/Plugin/enqueue-arg-order-buffer.cpp
+++ b/sycl/test-e2e/Plugin/enqueue-arg-order-buffer.cpp
@@ -1,5 +1,5 @@
 // UNSUPPORTED: hip_nvidia
-// RUN: %{build} -o %t.out
+// RUN: %{build} -Wno-error=deprecated-declarations -o %t.out
 // RUN: env SYCL_UR_TRACE=1 %{run} %t.out | FileCheck %s
 
 #include <iostream>
diff --git a/sycl/test-e2e/Plugin/interop-level-zero-buffer.cpp b/sycl/test-e2e/Plugin/interop-level-zero-buffer.cpp
index 9f097992de68d..9ec7f00a10819 100644
--- a/sycl/test-e2e/Plugin/interop-level-zero-buffer.cpp
+++ b/sycl/test-e2e/Plugin/interop-level-zero-buffer.cpp
@@ -2,7 +2,7 @@
 // L0 plugin incorrectly reports memory leaks because it doesn't take into
 // account direct calls to L0 API.
 // UNSUPPORTED: ze_debug
-// RUN: %{build} %level_zero_options -o %t.out
+// RUN: %{build} -Wno-error=deprecated-declarations %level_zero_options -o %t.out
 // RUN: env UR_L0_DEBUG=1 %{run} %t.out
 
 // Test interoperability buffer for the Level Zer backend
diff --git a/sycl/test-e2e/Plugin/interop-level-zero-thread.cpp b/sycl/test-e2e/Plugin/interop-level-zero-thread.cpp
index 334567e5ca59c..129d43dc3ceb1 100644
--- a/sycl/test-e2e/Plugin/interop-level-zero-thread.cpp
+++ b/sycl/test-e2e/Plugin/interop-level-zero-thread.cpp
@@ -1,5 +1,5 @@
 // REQUIRES: level_zero, level_zero_dev_kit
-// RUN: %{build} -o %t.out %level_zero_options %threads_lib
+// RUN: %{build} -Wno-error=deprecated-declarations -Wno-error=#warnings -o %t.out %level_zero_options %threads_lib
 // RUN: %{run} %t.out
 //
 // CHECK: Running iteration 0
diff --git a/sycl/test-e2e/Plugin/level_zero_batch_barrier.cpp b/sycl/test-e2e/Plugin/level_zero_batch_barrier.cpp
index fded93f810a72..73c8d1931abbe 100644
--- a/sycl/test-e2e/Plugin/level_zero_batch_barrier.cpp
+++ b/sycl/test-e2e/Plugin/level_zero_batch_barrier.cpp
@@ -1,8 +1,5 @@
 // REQUIRES: gpu, level_zero, level_zero_dev_kit
 
-// TODO: Reenable, see https://github.com/intel/llvm/issues/14704
-// UNSUPPORTED: windows, linux
-
 // RUN: %{build} %level_zero_options -o %t.out
 // RUN: env SYCL_UR_TRACE=1 UR_L0_DEBUG=1 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{run} %t.out 2>&1 | FileCheck %s
 
diff --git a/sycl/test-e2e/Plugin/level_zero_dynamic_batch_test.cpp b/sycl/test-e2e/Plugin/level_zero_dynamic_batch_test.cpp
index a028eced5cf91..fce6033c04fb0 100644
--- a/sycl/test-e2e/Plugin/level_zero_dynamic_batch_test.cpp
+++ b/sycl/test-e2e/Plugin/level_zero_dynamic_batch_test.cpp
@@ -1,6 +1,5 @@
 // REQUIRES: gpu, level_zero
-// TODO: Reenable, see https://github.com/intel/llvm/issues/14721
-// UNSUPPORTED: ze_debug, windows, linux
+// UNSUPPORTED: ze_debug
 
 // RUN: %{build} -o %t.ooo.out
 // RUN: %{build} -DUSING_INORDER -o %t.ino.out
diff --git a/sycl/test-e2e/Plugin/level_zero_eager_init.cpp b/sycl/test-e2e/Plugin/level_zero_eager_init.cpp
index bb672ea6eea72..3d51e754f1489 100644
--- a/sycl/test-e2e/Plugin/level_zero_eager_init.cpp
+++ b/sycl/test-e2e/Plugin/level_zero_eager_init.cpp
@@ -1,6 +1,6 @@
 // REQUIRES: level_zero, level_zero_dev_kit
 
-// RUN: %{build} %level_zero_options -o %t.out
+// RUN: %{build} -Wno-error=deprecated-declarations %level_zero_options -o %t.out
 // RUN: env UR_L0_DEBUG=1 SYCL_EAGER_INIT=1 %{run} %t.out 2>&1 | FileCheck %s
 //
 // The test is to check that under SYCL_EAGER_INIT=1 there is no calls to
diff --git a/sycl/test-e2e/Plugin/level_zero_ext_intel_cslice.cpp b/sycl/test-e2e/Plugin/level_zero_ext_intel_cslice.cpp
index 89d366bf404d3..f49f5a44ea5ff 100644
--- a/sycl/test-e2e/Plugin/level_zero_ext_intel_cslice.cpp
+++ b/sycl/test-e2e/Plugin/level_zero_ext_intel_cslice.cpp
@@ -2,7 +2,7 @@
 // REQUIRES: aspect-ext_intel_device_id
 // UNSUPPORTED: gpu-intel-pvc-1T
 
-// RUN: %{build} -o %t.out
+// RUN: %{build} -Wno-error=deprecated-declarations -o %t.out
 
 // TODO: at this time PVC 1T systems are not correctly supporting CSLICE
 // affinity partitioning So the test is marked as UNSUPPORTED until that is
diff --git a/sycl/test-e2e/PreviewBreakingChanges/preview_lib_marker.cpp b/sycl/test-e2e/PreviewBreakingChanges/preview_lib_marker.cpp
index 7a14c4067b898..ff5510ac46fd5 100644
--- a/sycl/test-e2e/PreviewBreakingChanges/preview_lib_marker.cpp
+++ b/sycl/test-e2e/PreviewBreakingChanges/preview_lib_marker.cpp
@@ -1,6 +1,6 @@
 // REQUIRES: preview-breaking-changes-supported
 
-// RUN: %clangxx -fsycl -fpreview-breaking-changes %s -o %t.out
+// RUN: %clangxx -fsycl -fpreview-breaking-changes %s -Wno-error=unused-command-line-argument -o %t.out
 // RUN: %{run} %t.out
 
 // Test to help identify that E2E testing correctly detects and uses the preview
diff --git a/sycl/test-e2e/Regression/check_vector_of_opencl_event.cpp b/sycl/test-e2e/Regression/check_vector_of_opencl_event.cpp
index 5dd574dc5f6d8..4691523fb36c3 100644
--- a/sycl/test-e2e/Regression/check_vector_of_opencl_event.cpp
+++ b/sycl/test-e2e/Regression/check_vector_of_opencl_event.cpp
@@ -17,7 +17,8 @@ int main() {
     cgh.single_task<class event_kernel>([]() {});
   });
   // Check that get_native function returns a vector
-  std::vector<cl_event> ClEventVec = get_native<sycl::backend::opencl>(event);
+  std::vector<cl_event> ClEventVec =
+      sycl::get_native<sycl::backend::opencl>(event);
   // Check that make_event is working properly with vector<cl_event> as a
   // param
   sycl::event SyclEvent =
diff --git a/sycl/test-e2e/Regression/commandlist/gpu.cpp b/sycl/test-e2e/Regression/commandlist/gpu.cpp
index 1fd677f9b5274..a2133a62a1318 100644
--- a/sycl/test-e2e/Regression/commandlist/gpu.cpp
+++ b/sycl/test-e2e/Regression/commandlist/gpu.cpp
@@ -2,5 +2,5 @@
 
 // UNSUPPORTED: ze_debug
 
-// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %S/Inputs/FindPrimesSYCL.cpp %S/Inputs/main.cpp -o %t.out -lpthread
+// RUN: %clangxx -Wno-error=vla-cxx-extension -fsycl -fsycl-targets=%{sycl_triple} %S/Inputs/FindPrimesSYCL.cpp %S/Inputs/main.cpp -o %t.out -lpthread
 // RUN: %{run} %t.out
diff --git a/sycl/test-e2e/Regression/get_subgroup_sizes.cpp b/sycl/test-e2e/Regression/get_subgroup_sizes.cpp
index 38a07ce20ef79..f076d18494244 100644
--- a/sycl/test-e2e/Regression/get_subgroup_sizes.cpp
+++ b/sycl/test-e2e/Regression/get_subgroup_sizes.cpp
@@ -4,7 +4,7 @@
 // UNSUPPORTED: cuda || hip
 // TODO: Similar issue to FPGAs
 
-// RUN: %{build} -o %t.out
+// RUN: %{build} -Wno-error=deprecated-declarations -o %t.out
 // RUN: %{run} %t.out
 
 //==-- get_subgroup_sizes.cpp - Test for bug fix in subgroup sizes query --==//
diff --git a/sycl/test-e2e/Regression/implicit_atomic_conversion.cpp b/sycl/test-e2e/Regression/implicit_atomic_conversion.cpp
index ee89b993eedce..8adfc00d37048 100644
--- a/sycl/test-e2e/Regression/implicit_atomic_conversion.cpp
+++ b/sycl/test-e2e/Regression/implicit_atomic_conversion.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -o %t.out
+// RUN: %{build} -Wno-error=deprecated-declarations -o %t.out
 // RUN: %{run} %t.out
 
 #include <sycl/detail/core.hpp>
diff --git a/sycl/test-e2e/Regression/mad_sat.cpp b/sycl/test-e2e/Regression/mad_sat.cpp
index d2a7254f2bd60..401a2644d1743 100644
--- a/sycl/test-e2e/Regression/mad_sat.cpp
+++ b/sycl/test-e2e/Regression/mad_sat.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -o %t.out
+// RUN: %{build} -Wno-error=integer-overflow -Wno-error=implicitly-unsigned-literal -o %t.out
 // RUNx: %{run} %t.out
 
 #include <sycl/detail/core.hpp>
diff --git a/sycl/test-e2e/Regression/range-rounding-this-id.cpp b/sycl/test-e2e/Regression/range-rounding-this-id.cpp
index b383929867948..f6976687d36ce 100644
--- a/sycl/test-e2e/Regression/range-rounding-this-id.cpp
+++ b/sycl/test-e2e/Regression/range-rounding-this-id.cpp
@@ -1,6 +1,6 @@
 // This test ensures that this_id returns the correct value
 // even when a kernel is wrapped in a range rounding kernel.
-// RUN: %{build} -o %t.out
+// RUN: %{build} -Wno-error=deprecated-declarations -o %t.out
 // RUN: env SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS=16:32:0 \
 // RUN:     SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE=1 \
 // RUN: %{run} %t.out | FileCheck %s
diff --git a/sycl/test-e2e/Scheduler/HostAccDestruction.cpp b/sycl/test-e2e/Scheduler/HostAccDestruction.cpp
index 99d93bd85bf52..cea31dc84ddc1 100644
--- a/sycl/test-e2e/Scheduler/HostAccDestruction.cpp
+++ b/sycl/test-e2e/Scheduler/HostAccDestruction.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -fsycl-dead-args-optimization -o %t.out
+// RUN: %{build} -Wno-error=unused-command-line-argument -fsycl-dead-args-optimization -o %t.out
 // RUN: env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s
 
 // Windows doesn't yet have full shutdown().
@@ -35,5 +35,5 @@ int main() {
 }
 
 // CHECK:host acc destructor call
-// CHECK:---> urEnqueueKernelLaunch(
+// CHECK:---> urEnqueueKernelLaunch
 // CHECK:end of scope
diff --git a/sycl/test-e2e/Scheduler/MemObjRemapping.cpp b/sycl/test-e2e/Scheduler/MemObjRemapping.cpp
index 0b681149c2999..17ad55733f78e 100644
--- a/sycl/test-e2e/Scheduler/MemObjRemapping.cpp
+++ b/sycl/test-e2e/Scheduler/MemObjRemapping.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -o %t.out
+// RUN: %{build} -Wno-error=deprecated-declarations -o %t.out
 // RUN: env SYCL_HOST_UNIFIED_MEMORY=1 SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s
 //
 // XFAIL: hip_nvidia
@@ -29,7 +29,7 @@ int main() {
   {
     // Check access mode flags
     // CHECK: urEnqueueMemBufferMap
-    // CHECK-SAME: .mapFlags = UR_MAP_FLAG_READ
+    // CHECK: .mapFlags = UR_MAP_FLAG_READ
     auto AccA = BufA.get_access<access::mode::read>();
     for (std::size_t I = 0; I < Size; ++I) {
       assert(AccA[I] == I);
@@ -38,7 +38,7 @@ int main() {
   {
     // CHECK: urEnqueueMemUnmap
     // CHECK: urEnqueueMemBufferMap
-    // CHECK-SAME: .mapFlags = UR_MAP_FLAG_READ | UR_MAP_FLAG_WRITE
+    // CHECK: .mapFlags = UR_MAP_FLAG_READ | UR_MAP_FLAG_WRITE
     auto AccA = BufA.get_access<access::mode::write>();
     for (std::size_t I = 0; I < Size; ++I)
       AccA[I] = 2 * I;
diff --git a/sycl/test-e2e/Scheduler/ReleaseResourcesTest.cpp b/sycl/test-e2e/Scheduler/ReleaseResourcesTest.cpp
index ba6b91f5acc1b..c7ca4dde5e5bd 100644
--- a/sycl/test-e2e/Scheduler/ReleaseResourcesTest.cpp
+++ b/sycl/test-e2e/Scheduler/ReleaseResourcesTest.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -fsycl-dead-args-optimization -o %t.out
+// RUN: %{build} -Wno-error=unused-command-line-argument -fsycl-dead-args-optimization -o %t.out
 // RUN: env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s
 //
 // TODO: Reenable on Windows, see https://github.com/intel/llvm/issues/14768
diff --git a/sycl/test-e2e/SeparateCompile/test.cpp b/sycl/test-e2e/SeparateCompile/test.cpp
index 5c7c34c204cec..27e165865fd37 100644
--- a/sycl/test-e2e/SeparateCompile/test.cpp
+++ b/sycl/test-e2e/SeparateCompile/test.cpp
@@ -8,13 +8,13 @@
 // >> device compilation...
 // RUN: %clangxx -DSYCL_DISABLE_FALLBACK_ASSERT -fno-sycl-dead-args-optimization -fsycl-device-only -Xclang -fsycl-int-header=sycl_ihdr_a.h %s -c -o a_kernel.bc -Wno-sycl-strict
 // >> host compilation...
-// RUN: %clangxx -DSYCL_DISABLE_FALLBACK_ASSERT %cxx_std_optionc++17 %include_option sycl_ihdr_a.h %debug_option -c %s -o a.o %sycl_options -fno-sycl-dead-args-optimization -Wno-sycl-strict
+// RUN: %clangxx -Wno-error=ignored-attributes -Wno-error=unused-command-line-argument -DSYCL_DISABLE_FALLBACK_ASSERT %cxx_std_optionc++17 %include_option sycl_ihdr_a.h %debug_option -c %s -o a.o %sycl_options -fno-sycl-dead-args-optimization -Wno-sycl-strict
 //
 // >> ---- compile src2
 // >> device compilation...
-// RUN: %clangxx -DSYCL_DISABLE_FALLBACK_ASSERT -DB_CPP=1 -fno-sycl-dead-args-optimization -fsycl-device-only -Xclang -fsycl-int-header=sycl_ihdr_b.h %s -c -o b_kernel.bc -Wno-sycl-strict
+// RUN: %clangxx -Wno-error=unused-command-line-argument -DSYCL_DISABLE_FALLBACK_ASSERT -DB_CPP=1 -fno-sycl-dead-args-optimization -fsycl-device-only -Xclang -fsycl-int-header=sycl_ihdr_b.h %s -c -o b_kernel.bc -Wno-sycl-strict
 // >> host compilation...
-// RUN: %clangxx -DSYCL_DISABLE_FALLBACK_ASSERT -DB_CPP=1 %cxx_std_optionc++17 %include_option sycl_ihdr_b.h %debug_option -c %s -o b.o %sycl_options -fno-sycl-dead-args-optimization -Wno-sycl-strict
+// RUN: %clangxx -Wno-error=ignored-attributes -Wno-error=unused-command-line-argument -DSYCL_DISABLE_FALLBACK_ASSERT -DB_CPP=1 %cxx_std_optionc++17 %include_option sycl_ihdr_b.h %debug_option -c %s -o b.o %sycl_options -fno-sycl-dead-args-optimization -Wno-sycl-strict
 //
 // >> ---- bundle .o with .spv
 // >> run bundler
@@ -48,7 +48,7 @@
 // RUN: clang-offload-wrapper -o wrapper.bc -host=x86_64 -kind=sycl -target=spir64 -batch test_spv.table
 //
 // >> compile .bc to .o
-// RUN: %clangxx -c wrapper.bc -o wrapper.o
+// RUN: %clangxx -Wno-error=override-module -c wrapper.bc -o wrapper.o
 //
 // >> ---- link the full hetero app
 // RUN: %clangxx wrapper.o a.o b.o -o app.exe %sycl_options
diff --git a/sycl/test-e2e/SpecConstants/2020/handler-api.cpp b/sycl/test-e2e/SpecConstants/2020/handler-api.cpp
index ff5e586e35651..e08815ca5d9f5 100644
--- a/sycl/test-e2e/SpecConstants/2020/handler-api.cpp
+++ b/sycl/test-e2e/SpecConstants/2020/handler-api.cpp
@@ -7,7 +7,7 @@
 // - test that specialization constant values can be set within command group
 //   scope and correctly retrieved within a kernel
 
-// RUN: %{build} -o %t.out -fsycl-dead-args-optimization
+// RUN: %{build} -Wno-error=unused-command-line-argument -o %t.out -fsycl-dead-args-optimization
 // RUN: %{run} %t.out
 
 // FIXME: ACC devices use emulation path, which is not yet supported
diff --git a/sycl/test-e2e/SpecConstants/2020/host_apis.cpp b/sycl/test-e2e/SpecConstants/2020/host_apis.cpp
index 09820442bfced..fddc661f3f4d8 100644
--- a/sycl/test-e2e/SpecConstants/2020/host_apis.cpp
+++ b/sycl/test-e2e/SpecConstants/2020/host_apis.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -o %t.out -fsycl-dead-args-optimization
+// RUN: %{build} -Wno-error=unused-command-line-argument -o %t.out -fsycl-dead-args-optimization
 // RUN: %{run} %t.out
 
 #include <sycl/detail/core.hpp>
diff --git a/sycl/test-e2e/SubGroup/barrier.cpp b/sycl/test-e2e/SubGroup/barrier.cpp
index facdbd0c1a456..b346e7f246e41 100644
--- a/sycl/test-e2e/SubGroup/barrier.cpp
+++ b/sycl/test-e2e/SubGroup/barrier.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out
+// RUN: %{build} -Wno-error=deprecated-declarations -fsycl-device-code-split=per_kernel -o %t.out
 // RUN: %{run} %t.out
 
 //==---------- barrier.cpp - SYCL sub_group barrier test -------*- C++ -*---==//
diff --git a/sycl/test-e2e/SubGroup/load_store.cpp b/sycl/test-e2e/SubGroup/load_store.cpp
index 61014d152fcbe..ceeb40c9ec0aa 100644
--- a/sycl/test-e2e/SubGroup/load_store.cpp
+++ b/sycl/test-e2e/SubGroup/load_store.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out
+// RUN: %{build} -Wno-error=deprecated-declarations -fsycl-device-code-split=per_kernel -o %t.out
 // RUN: %{run} %t.out
 //
 // TODO: Flaky reenable, see https://github.com/intel/llvm/issues/14765
diff --git a/sycl/test-e2e/SubGroupMask/GroupSize.cpp b/sycl/test-e2e/SubGroupMask/GroupSize.cpp
index 024205f264500..84d13c3ff7603 100644
--- a/sycl/test-e2e/SubGroupMask/GroupSize.cpp
+++ b/sycl/test-e2e/SubGroupMask/GroupSize.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out
+// RUN: %{build} -Wno-error=incorrect-sub-group-size -fsycl-device-code-split=per_kernel -o %t.out
 
 // REQUIRES: gpu
 
diff --git a/sycl/test-e2e/USM/pfor_flatten.cpp b/sycl/test-e2e/USM/pfor_flatten.cpp
index b2cc58713f775..0af26abb2b92d 100644
--- a/sycl/test-e2e/USM/pfor_flatten.cpp
+++ b/sycl/test-e2e/USM/pfor_flatten.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -fsycl-dead-args-optimization -o %t1.out
+// RUN: %{build} -Wno-error=unused-command-line-argument -fsycl-dead-args-optimization -o %t1.out
 // RUN: %{run} %t1.out
 
 //==--------------- pfor_flatten.cpp - Kernel Launch Flattening test -------==//
diff --git a/sycl/test-e2e/USM/usm_leak_check.cpp b/sycl/test-e2e/USM/usm_leak_check.cpp
index c4ade6a7619c2..8787512de7b9f 100644
--- a/sycl/test-e2e/USM/usm_leak_check.cpp
+++ b/sycl/test-e2e/USM/usm_leak_check.cpp
@@ -1,6 +1,6 @@
 // REQUIRES: level_zero
 
-// RUN: %{build} -o %t.out
+// RUN: %{build} -Wno-error=deprecated-declarations -o %t.out
 
 // RUN: %{l0_leak_check} %{run} %t.out u 2>&1 | FileCheck %s --implicit-check-not=LEAK
 // RUN: %{l0_leak_check} %{run} %t.out s 2>&1 | FileCheck %s --implicit-check-not=LEAK
diff --git a/sycl/test-e2e/WeakObject/lit.local.cfg b/sycl/test-e2e/WeakObject/lit.local.cfg
new file mode 100644
index 0000000000000..3d15db684c3a1
--- /dev/null
+++ b/sycl/test-e2e/WeakObject/lit.local.cfg
@@ -0,0 +1,7 @@
+# TODO: Remove this once the warnings are resolved
+original_clangxx=""
+for substitution in config.substitutions:
+  if substitution[0] == "%clangxx":
+    original_clangxx=substitution[1]
+config.substitutions.insert(0,
+  ("%clangxx", original_clangxx+" -Wno-error=deprecated-declarations"))
diff --git a/sycl/test-e2e/WeakObject/weak_object_utils.hpp b/sycl/test-e2e/WeakObject/weak_object_utils.hpp
index 010215fdb401c..d0fdaf5870d9f 100644
--- a/sycl/test-e2e/WeakObject/weak_object_utils.hpp
+++ b/sycl/test-e2e/WeakObject/weak_object_utils.hpp
@@ -51,15 +51,9 @@ template <template <typename> typename CallableT> void runTest(sycl::queue Q) {
   sycl::accessor PAcc1D{Buf1D, sycl::read_write};
   sycl::accessor PAcc2D{Buf2D, sycl::read_write};
   sycl::accessor PAcc3D{Buf3D, sycl::read_write};
-  sycl::accessor<int, 1, sycl::access::mode::read_write,
-                 sycl::access::target::host_buffer>
-      HAcc1D;
-  sycl::accessor<int, 2, sycl::access::mode::read_write,
-                 sycl::access::target::host_buffer>
-      HAcc2D;
-  sycl::accessor<int, 3, sycl::access::mode::read_write,
-                 sycl::access::target::host_buffer>
-      HAcc3D;
+  sycl::host_accessor<int, 1, sycl::access::mode::read_write> HAcc1D;
+  sycl::host_accessor<int, 2, sycl::access::mode::read_write> HAcc2D;
+  sycl::host_accessor<int, 3, sycl::access::mode::read_write> HAcc3D;
   sycl::host_accessor<int, 1> HAcc1D_2020;
   sycl::host_accessor<int, 2> HAcc2D_2020;
   sycl::host_accessor<int, 3> HAcc3D_2020;
@@ -202,24 +196,12 @@ void runTestMulti(sycl::queue Q1) {
   sycl::accessor PAcc2D2{Buf2D2, sycl::read_write};
   sycl::accessor PAcc3D1{Buf3D1, sycl::read_write};
   sycl::accessor PAcc3D2{Buf3D2, sycl::read_write};
-  sycl::accessor<int, 1, sycl::access::mode::read_write,
-                 sycl::access::target::host_buffer>
-      HAcc1D1;
-  sycl::accessor<int, 1, sycl::access::mode::read_write,
-                 sycl::access::target::host_buffer>
-      HAcc1D2;
-  sycl::accessor<int, 2, sycl::access::mode::read_write,
-                 sycl::access::target::host_buffer>
-      HAcc2D1;
-  sycl::accessor<int, 2, sycl::access::mode::read_write,
-                 sycl::access::target::host_buffer>
-      HAcc2D2;
-  sycl::accessor<int, 3, sycl::access::mode::read_write,
-                 sycl::access::target::host_buffer>
-      HAcc3D1;
-  sycl::accessor<int, 3, sycl::access::mode::read_write,
-                 sycl::access::target::host_buffer>
-      HAcc3D2;
+  sycl::host_accessor<int, 1, sycl::access::mode::read_write> HAcc1D1;
+  sycl::host_accessor<int, 1, sycl::access::mode::read_write> HAcc1D2;
+  sycl::host_accessor<int, 2, sycl::access::mode::read_write> HAcc2D1;
+  sycl::host_accessor<int, 2, sycl::access::mode::read_write> HAcc2D2;
+  sycl::host_accessor<int, 3, sycl::access::mode::read_write> HAcc3D1;
+  sycl::host_accessor<int, 3, sycl::access::mode::read_write> HAcc3D2;
   sycl::host_accessor<int, 1> HAcc1D1_2020;
   sycl::host_accessor<int, 2> HAcc2D1_2020;
   sycl::host_accessor<int, 3> HAcc3D1_2020;
diff --git a/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp b/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp
index 020d263498635..b195678161951 100644
--- a/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp
+++ b/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp
@@ -3,9 +3,6 @@
 // RUN: %{build} -o %t.out
 // RUN: env UR_ENABLE_LAYERS=UR_LAYER_TRACING env XPTI_TRACE_ENABLE=1 env XPTI_FRAMEWORK_DISPATCHER=%xptifw_dispatcher env XPTI_SUBSCRIBERS=%t_collector.so %{run} %t.out | FileCheck %s
 
-// TODO: Reenable, see https://github.com/intel/llvm/issues/14744
-// UNSUPPORTED: windows, linux
-
 #include "basic_event_collection.inc"
 //
 // CHECK: xptiTraceInit: Stream Name = ur
@@ -41,8 +38,7 @@
 // CHECK-NEXT: Edge create
 // CHECK-DAG:   queue_id : {{.*}}
 // CHECK-DAG:   event : {{.*}}
-// CHECK-DAG:   kernel_name : virtual_node[{{.*}}]
-// CHECK-NEXT: Task begin
+// CHECK: Task begin
 // CHECK-DAG:    queue_id : {{.*}}
 // CHECK-DAG:    sym_line_no : {{.*}}
 // CHECK-DAG:    sym_source_file_name : {{.*}}
diff --git a/sycl/test-e2e/XPTI/buffer/accessors.cpp b/sycl/test-e2e/XPTI/buffer/accessors.cpp
index 65f1b62d6b2b5..bbad451a06a15 100644
--- a/sycl/test-e2e/XPTI/buffer/accessors.cpp
+++ b/sycl/test-e2e/XPTI/buffer/accessors.cpp
@@ -1,6 +1,6 @@
 // REQUIRES: xptifw, opencl
 // RUN: %clangxx %s -DXPTI_COLLECTOR -DXPTI_CALLBACK_API_EXPORTS %xptifw_lib %shared_lib %fPIC %cxx_std_optionc++17 -o %t_collector.dll
-// RUN: %{build} -o %t.out
+// RUN: %{build} -Wno-error=deprecated-declarations -o %t.out
 // RUN: env XPTI_TRACE_ENABLE=1 XPTI_FRAMEWORK_DISPATCHER=%xptifw_dispatcher XPTI_SUBSCRIBERS=%t_collector.dll %{run} %t.out | FileCheck %s
 
 #ifdef XPTI_COLLECTOR
diff --git a/sycl/test-e2e/XPTI/kernel/content.cpp b/sycl/test-e2e/XPTI/kernel/content.cpp
index 548c9f20b322c..0e68788100a2a 100644
--- a/sycl/test-e2e/XPTI/kernel/content.cpp
+++ b/sycl/test-e2e/XPTI/kernel/content.cpp
@@ -1,8 +1,8 @@
 // REQUIRES: xptifw, opencl
 // RUN: %clangxx %s -DXPTI_COLLECTOR -DXPTI_CALLBACK_API_EXPORTS %xptifw_lib %shared_lib %fPIC %cxx_std_optionc++17 -o %t_collector.dll
-// RUN: %{build} -O2 -o %t.opt.out
+// RUN: %{build} -Wno-error=deprecated-declarations -O2 -o %t.opt.out
 // RUN: env XPTI_TRACE_ENABLE=1 XPTI_FRAMEWORK_DISPATCHER=%xptifw_dispatcher XPTI_SUBSCRIBERS=%t_collector.dll %{run} %t.opt.out | FileCheck %s --check-prefix=CHECK-OPT
-// RUN: %{build} -fno-sycl-dead-args-optimization -o %t.noopt.out
+// RUN: %{build} -Wno-error=deprecated-declarations -fno-sycl-dead-args-optimization -o %t.noopt.out
 // RUN: env XPTI_TRACE_ENABLE=1 XPTI_FRAMEWORK_DISPATCHER=%xptifw_dispatcher XPTI_SUBSCRIBERS=%t_collector.dll %{run} %t.noopt.out | FileCheck %s --check-prefix=CHECK-NOOPT
 
 #ifdef XPTI_COLLECTOR
diff --git a/sycl/test-e2e/bindless_images/array/read_write_unsampled_array.cpp b/sycl/test-e2e/bindless_images/array/read_write_unsampled_array.cpp
index 180c6ea7fe710..292df2af57b36 100644
--- a/sycl/test-e2e/bindless_images/array/read_write_unsampled_array.cpp
+++ b/sycl/test-e2e/bindless_images/array/read_write_unsampled_array.cpp
@@ -1,8 +1,8 @@
 // REQUIRES: linux
-// REQUIRES: cuda
+// REQUIRES: cuda || (level_zero && gpu-intel-dg2)
 
 // RUN: %{build} -o %t.out
-// RUN: %{run} %t.out
+// RUN: %{run} env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out
 
 #include "../helpers/common.hpp"
 #include <iostream>
diff --git a/sycl/test-e2e/bindless_images/dx12_interop/read_write_unsampled.cpp b/sycl/test-e2e/bindless_images/dx12_interop/read_write_unsampled.cpp
index eacf3a40ebfd5..d142aa611b4e1 100644
--- a/sycl/test-e2e/bindless_images/dx12_interop/read_write_unsampled.cpp
+++ b/sycl/test-e2e/bindless_images/dx12_interop/read_write_unsampled.cpp
@@ -84,12 +84,14 @@ void DX12InteropTest::initDX12Resources() {
   ThrowIfFailed(m_dx12Device->CreateFence(
       m_sharedFenceValue, D3D12_FENCE_FLAG_SHARED, IID_PPV_ARGS(&m_dx12Fence)));
 
+#ifdef TEST_SEMAPHORE_IMPORT
   ThrowIfFailed(m_dx12Device->CreateSharedHandle(m_dx12Fence.Get(), nullptr,
                                                  GENERIC_ALL, nullptr,
                                                  &m_sharedSemaphoreHandle));
 
   // Import our shared DX12 fence resource to SYCL.
   importDX12SharedSemaphoreHandle();
+#endif
 
   // Create an event handle to use for synchronization.
   m_dx12FenceEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr);
@@ -126,11 +128,12 @@ void DX12InteropTest::importDX12SharedSemaphoreHandle() {
 }
 
 void DX12InteropTest::callSYCLKernel() {
-
+#ifdef TEST_SEMAPHORE_IMPORT
   // Wait for imported semaphore. This semaphore was signalled at the
   // end of `populateDX12Texture`.
   m_syclQueue.ext_oneapi_wait_external_semaphore(m_syclInteropSemaphoreHandle,
                                                  m_sharedFenceValue);
+#endif
 
   // We can't capture the image handle through `this` in the lambda.
   // If we do the kernel will crash.
@@ -159,6 +162,7 @@ void DX12InteropTest::callSYCLKernel() {
     exit(-1);
   }
 
+#ifdef TEST_SEMAPHORE_IMPORT
   // Increment the fence value.
   m_sharedFenceValue++;
 
@@ -170,6 +174,9 @@ void DX12InteropTest::callSYCLKernel() {
 
   // Use DX12 to wait for the semaphore signalled by SYCL above.
   waitDX12Fence();
+#else
+  m_syclQueue.wait();
+#endif
 }
 
 void DX12InteropTest::populateDX12Texture() {
@@ -417,7 +424,8 @@ void DX12InteropTest::cleanupDX12() {
   waitDX12Fence();
 
   // Clean up opened handles
-  CloseHandle(m_sharedSemaphoreHandle);
+  if (m_sharedSemaphoreHandle != INVALID_HANDLE_VALUE)
+    CloseHandle(m_sharedSemaphoreHandle);
   CloseHandle(m_sharedMemoryHandle);
   CloseHandle(m_dx12FenceEvent);
 
@@ -436,16 +444,15 @@ void DX12InteropTest::getDX12Adapter(IDXGIFactory2 *pFactory,
     DXGI_ADAPTER_DESC1 desc;
     adapter->GetDesc1(&desc);
 
-    if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) {
+    if (!(desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE)) {
       // We don't want a software adapter.
-      continue;
-    }
 
-    // Check to see if the adapter supports Direct3D 12, but don't create the
-    // actual device yet.
-    if (SUCCEEDED(D3D12CreateDevice(adapter.Get(), D3D_FEATURE_LEVEL_12_0,
-                                    _uuidof(ID3D12Device), nullptr))) {
-      break;
+      // Check to see if the adapter supports Direct3D 12, but don't create the
+      // actual device yet.
+      if (SUCCEEDED(D3D12CreateDevice(adapter.Get(), D3D_FEATURE_LEVEL_12_0,
+                                      _uuidof(ID3D12Device), nullptr))) {
+        break;
+      }
     }
 
     // Increment adapter index and find the next adapter.
diff --git a/sycl/test-e2e/bindless_images/dx12_interop/read_write_unsampled.h b/sycl/test-e2e/bindless_images/dx12_interop/read_write_unsampled.h
index fab2512d3bbdb..6cdc3906e5785 100644
--- a/sycl/test-e2e/bindless_images/dx12_interop/read_write_unsampled.h
+++ b/sycl/test-e2e/bindless_images/dx12_interop/read_write_unsampled.h
@@ -81,7 +81,7 @@ class DX12InteropTest {
   // Shared handles and values
   uint64_t m_sharedFenceValue;
   HANDLE m_sharedMemoryHandle;
-  HANDLE m_sharedSemaphoreHandle;
+  HANDLE m_sharedSemaphoreHandle = INVALID_HANDLE_VALUE;
 
   // SYCL Objects
   sycl::queue m_syclQueue;
diff --git a/sycl/test-e2e/bindless_images/dx12_interop/read_write_unsampled_semaphore.cpp b/sycl/test-e2e/bindless_images/dx12_interop/read_write_unsampled_semaphore.cpp
new file mode 100644
index 0000000000000..0a699e4d22438
--- /dev/null
+++ b/sycl/test-e2e/bindless_images/dx12_interop/read_write_unsampled_semaphore.cpp
@@ -0,0 +1,8 @@
+// REQUIRES: cuda
+// REQUIRES: windows
+
+// RUN: %{build} -l d3d12 -l dxgi -l dxguid -o %t.out
+// RUN: %t.out
+
+#define TEST_SEMAPHORE_IMPORT
+#include "read_write_unsampled.cpp"
diff --git a/sycl/test-e2e/bindless_images/image_get_info.cpp b/sycl/test-e2e/bindless_images/image_get_info.cpp
index 016f83d134110..6e5f170c08356 100644
--- a/sycl/test-e2e/bindless_images/image_get_info.cpp
+++ b/sycl/test-e2e/bindless_images/image_get_info.cpp
@@ -129,15 +129,15 @@ int main() {
 #endif
 
     // Extension: query for bindless image interop support -- device aspects
-    bool interopMemoryImportSupport =
-        dev.has(sycl::aspect::ext_oneapi_interop_memory_import);
-    bool interopSemaphoreImportSupport =
-        dev.has(sycl::aspect::ext_oneapi_interop_semaphore_import);
+    bool externalMemoryImportSupport =
+        dev.has(sycl::aspect::ext_oneapi_external_memory_import);
+    bool externalSemaphoreImportSupport =
+        dev.has(sycl::aspect::ext_oneapi_external_semaphore_import);
 
 #ifdef VERBOSE_PRINT
-    std::cout << "interopMemoryImportSupport: " << interopMemoryImportSupport
-              << "\ninteropSemaphoreImportSupport: "
-              << interopSemaphoreImportSupport << "\n";
+    std::cout << "externalMemoryImportSupport: " << externalMemoryImportSupport
+              << "\nexternalSemaphoreImportSupport: "
+              << externalSemaphoreImportSupport << "\n";
 #endif
 
     // Extension: query for bindless image array support - device aspect
diff --git a/sycl/test-e2e/bindless_images/read_1D.cpp b/sycl/test-e2e/bindless_images/read_1D.cpp
index 7c24e3813456a..2c850e83e38eb 100644
--- a/sycl/test-e2e/bindless_images/read_1D.cpp
+++ b/sycl/test-e2e/bindless_images/read_1D.cpp
@@ -1,7 +1,7 @@
-// REQUIRES: cuda
+// REQUIRES: cuda || (level_zero && gpu-intel-dg2)
 
 // RUN: %{build} -o %t.out
-// RUN: %t.out
+// RUN: env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out
 
 #include <iostream>
 #include <sycl/detail/core.hpp>
diff --git a/sycl/test-e2e/bindless_images/read_2D.cpp b/sycl/test-e2e/bindless_images/read_2D.cpp
index fad3ecc632c4e..e4fc6800dec62 100644
--- a/sycl/test-e2e/bindless_images/read_2D.cpp
+++ b/sycl/test-e2e/bindless_images/read_2D.cpp
@@ -1,7 +1,7 @@
-// REQUIRES: cuda
+// REQUIRES: cuda || (level_zero && gpu-intel-dg2)
 
 // RUN: %{build} -o %t.out
-// RUN: %t.out
+// RUN: env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out
 
 #include <iostream>
 #include <sycl/detail/core.hpp>
diff --git a/sycl/test-e2e/bindless_images/read_2D_dynamic.cpp b/sycl/test-e2e/bindless_images/read_2D_dynamic.cpp
index 06d9ec9eb1039..71b5c447b6093 100644
--- a/sycl/test-e2e/bindless_images/read_2D_dynamic.cpp
+++ b/sycl/test-e2e/bindless_images/read_2D_dynamic.cpp
@@ -1,7 +1,7 @@
-// REQUIRES: cuda
+// REQUIRES: cuda || (level_zero && gpu-intel-dg2)
 
 // RUN: %{build} -o %t.out
-// RUN: %t.out
+// RUN: env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out
 
 #include <iostream>
 #include <sycl/detail/core.hpp>
diff --git a/sycl/test-e2e/bindless_images/read_3D.cpp b/sycl/test-e2e/bindless_images/read_3D.cpp
index 8484bf2fe714f..1baac18ef7fea 100644
--- a/sycl/test-e2e/bindless_images/read_3D.cpp
+++ b/sycl/test-e2e/bindless_images/read_3D.cpp
@@ -1,7 +1,7 @@
-// REQUIRES: cuda
+// REQUIRES: cuda || (level_zero && gpu-intel-dg2)
 
 // RUN: %{build} -o %t.out
-// RUN: %t.out
+// RUN: env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out
 
 #include <iostream>
 #include <sycl/detail/core.hpp>
diff --git a/sycl/test-e2e/bindless_images/read_norm_types.cpp b/sycl/test-e2e/bindless_images/read_norm_types.cpp
index e43e0e1da8fa4..16de22f2f69ce 100644
--- a/sycl/test-e2e/bindless_images/read_norm_types.cpp
+++ b/sycl/test-e2e/bindless_images/read_norm_types.cpp
@@ -1,7 +1,7 @@
-// REQUIRES: cuda
+// REQUIRES: cuda || (level_zero && gpu-intel-dg2)
 
 // RUN: %{build} -o %t.out
-// RUN: %{run} %t.out
+// RUN: env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out
 
 #include <iostream>
 #include <limits>
diff --git a/sycl/test-e2e/bindless_images/read_write_1D.cpp b/sycl/test-e2e/bindless_images/read_write_1D.cpp
index 6e570a23ed4ce..fca0901bdd8df 100644
--- a/sycl/test-e2e/bindless_images/read_write_1D.cpp
+++ b/sycl/test-e2e/bindless_images/read_write_1D.cpp
@@ -1,7 +1,7 @@
-// REQUIRES: cuda
+// REQUIRES: cuda || (level_zero && gpu-intel-dg2)
 
 // RUN: %{build} -o %t.out
-// RUN: %t.out
+// RUN: env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out
 
 #include <iostream>
 #include <sycl/detail/core.hpp>
diff --git a/sycl/test-e2e/bindless_images/read_write_1D_subregion.cpp b/sycl/test-e2e/bindless_images/read_write_1D_subregion.cpp
index 9421ab154f4f6..edb33c0780dc5 100644
--- a/sycl/test-e2e/bindless_images/read_write_1D_subregion.cpp
+++ b/sycl/test-e2e/bindless_images/read_write_1D_subregion.cpp
@@ -1,7 +1,7 @@
-// REQUIRES: cuda
+// REQUIRES: cuda || (level_zero && gpu-intel-dg2)
 
 // RUN: %{build} -o %t.out
-// RUN: %t.out
+// RUN: env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out
 
 #include <iostream>
 #include <sycl/detail/core.hpp>
diff --git a/sycl/test-e2e/bindless_images/read_write_2D.cpp b/sycl/test-e2e/bindless_images/read_write_2D.cpp
index a04d1eeaa92bd..abde9816f5425 100644
--- a/sycl/test-e2e/bindless_images/read_write_2D.cpp
+++ b/sycl/test-e2e/bindless_images/read_write_2D.cpp
@@ -1,7 +1,7 @@
-// REQUIRES: cuda
+// REQUIRES: cuda || (level_zero && gpu-intel-dg2)
 
 // RUN: %{build} -o %t.out
-// RUN: %t.out
+// RUN: env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out
 
 #include <iostream>
 #include <sycl/detail/core.hpp>
diff --git a/sycl/test-e2e/bindless_images/read_write_2D_subregion.cpp b/sycl/test-e2e/bindless_images/read_write_2D_subregion.cpp
index 62469168b7fd4..464a173a1d1b1 100644
--- a/sycl/test-e2e/bindless_images/read_write_2D_subregion.cpp
+++ b/sycl/test-e2e/bindless_images/read_write_2D_subregion.cpp
@@ -1,7 +1,7 @@
-// REQUIRES: cuda
+// REQUIRES: cuda || (level_zero && gpu-intel-dg2)
 
 // RUN: %{build} -o %t.out
-// RUN: %t.out
+// RUN: env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out
 
 #include <iostream>
 #include <sycl/detail/core.hpp>
diff --git a/sycl/test-e2e/bindless_images/read_write_3D.cpp b/sycl/test-e2e/bindless_images/read_write_3D.cpp
index bce2af414e020..e310e9347b4ab 100644
--- a/sycl/test-e2e/bindless_images/read_write_3D.cpp
+++ b/sycl/test-e2e/bindless_images/read_write_3D.cpp
@@ -1,7 +1,7 @@
-// REQUIRES: cuda
+// REQUIRES: cuda || (level_zero && gpu-intel-dg2)
 
 // RUN: %{build} -o %t.out
-// RUN: %t.out
+// RUN: env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out
 
 #include <iostream>
 #include <sycl/detail/core.hpp>
diff --git a/sycl/test-e2e/bindless_images/read_write_3D_subregion.cpp b/sycl/test-e2e/bindless_images/read_write_3D_subregion.cpp
index 943bb4b3d57de..9910ef0989b68 100644
--- a/sycl/test-e2e/bindless_images/read_write_3D_subregion.cpp
+++ b/sycl/test-e2e/bindless_images/read_write_3D_subregion.cpp
@@ -1,7 +1,7 @@
-// REQUIRES: cuda
+// REQUIRES: cuda || (level_zero && gpu-intel-dg2)
 
 // RUN: %{build} -o %t.out
-// RUN: %t.out
+// RUN: env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out
 
 #include <iostream>
 #include <sycl/detail/core.hpp>
diff --git a/sycl/test-e2e/bindless_images/read_write_unsampled.cpp b/sycl/test-e2e/bindless_images/read_write_unsampled.cpp
index a0565b6129631..9be45d5752086 100644
--- a/sycl/test-e2e/bindless_images/read_write_unsampled.cpp
+++ b/sycl/test-e2e/bindless_images/read_write_unsampled.cpp
@@ -1,7 +1,7 @@
-// REQUIRES: cuda
+// REQUIRES: cuda || (level_zero && gpu-intel-dg2)
 
 // RUN: %{build} -o %t.out
-// RUN: %t.out
+// RUN: env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out
 
 #include "helpers/common.hpp"
 #include <iostream>
diff --git a/sycl/test-e2e/bindless_images/sampling_1D.cpp b/sycl/test-e2e/bindless_images/sampling_1D.cpp
index 7e01299bd57bb..e8f38cabf4db4 100644
--- a/sycl/test-e2e/bindless_images/sampling_1D.cpp
+++ b/sycl/test-e2e/bindless_images/sampling_1D.cpp
@@ -1,7 +1,7 @@
-// REQUIRES: cuda
+// REQUIRES: cuda || (level_zero && gpu-intel-dg2)
 
 // RUN: %{build} -o %t.out
-// RUN: %t.out
+// RUN: env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out
 
 #include <iostream>
 #include <sycl/detail/core.hpp>
diff --git a/sycl/test-e2e/bindless_images/sampling_2D.cpp b/sycl/test-e2e/bindless_images/sampling_2D.cpp
index 1e36db27561ac..30ccf696fbc2f 100644
--- a/sycl/test-e2e/bindless_images/sampling_2D.cpp
+++ b/sycl/test-e2e/bindless_images/sampling_2D.cpp
@@ -1,7 +1,7 @@
-// REQUIRES: cuda
+// REQUIRES: cuda || (level_zero && gpu-intel-dg2)
 
 // RUN: %{build} -o %t.out
-// RUN: %t.out
+// RUN: env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out
 
 #include <iostream>
 #include <sycl/detail/core.hpp>
diff --git a/sycl/test-e2e/bindless_images/sampling_2D_half.cpp b/sycl/test-e2e/bindless_images/sampling_2D_half.cpp
index b4bb7ae61a36b..247b9589ba9d5 100644
--- a/sycl/test-e2e/bindless_images/sampling_2D_half.cpp
+++ b/sycl/test-e2e/bindless_images/sampling_2D_half.cpp
@@ -1,8 +1,8 @@
-// REQUIRES: cuda
+// REQUIRES: cuda || (level_zero && gpu-intel-dg2)
 // REQUIRES: aspect-fp16
 
 // RUN: %{build} -o %t.out
-// RUN: %t.out
+// RUN: env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out
 
 #include <iostream>
 #include <sycl/detail/core.hpp>
diff --git a/sycl/test-e2e/bindless_images/sampling_3D.cpp b/sycl/test-e2e/bindless_images/sampling_3D.cpp
index ff79bedc2555d..16b06e6964edd 100644
--- a/sycl/test-e2e/bindless_images/sampling_3D.cpp
+++ b/sycl/test-e2e/bindless_images/sampling_3D.cpp
@@ -1,7 +1,7 @@
-// REQUIRES: cuda
+// REQUIRES: cuda || (level_zero && gpu-intel-dg2)
 
 // RUN: %{build} -o %t.out
-// RUN: %t.out
+// RUN: env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out
 
 #include <iostream>
 #include <sycl/detail/core.hpp>
diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/mipmaps.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/mipmaps.cpp
index 7cac3cce862a6..e6dbc28627852 100644
--- a/sycl/test-e2e/bindless_images/vulkan_interop/mipmaps.cpp
+++ b/sycl/test-e2e/bindless_images/vulkan_interop/mipmaps.cpp
@@ -20,7 +20,7 @@ namespace syclexp = sycl::ext::oneapi::experimental;
 struct handles_t {
   syclexp::sampled_image_handle imgInput;
   syclexp::image_mem_handle imgMem;
-  syclexp::interop_mem_handle inputInteropMemHandle;
+  syclexp::external_mem inputExternalMem;
 };
 
 template <typename InteropMemHandleT>
@@ -42,19 +42,18 @@ handles_t create_handles(sycl::context &ctxt, sycl::device &dev,
 #endif
 
   // Extension: interop mem handle imported from file descriptor
-  syclexp::interop_mem_handle inputInteropMemHandle =
+  syclexp::external_mem inputExternalMem =
       syclexp::import_external_memory(inputExtMemDesc, dev, ctxt);
 
   // Extension: interop mem handle imported from file descriptor
   syclexp::image_mem_handle inputMappedMemHandle =
-      syclexp::map_external_image_memory(inputInteropMemHandle, desc, dev,
-                                         ctxt);
+      syclexp::map_external_image_memory(inputExternalMem, desc, dev, ctxt);
 
   // Extension: create the image and return the handle
   syclexp::sampled_image_handle imgInput =
       syclexp::create_image(inputMappedMemHandle, samp, desc, dev, ctxt);
 
-  return {imgInput, inputMappedMemHandle, inputInteropMemHandle};
+  return {imgInput, inputMappedMemHandle, inputExternalMem};
 }
 
 template <int NDims, typename DType, int NChannels,
@@ -152,7 +151,7 @@ bool run_sycl(sycl::range<NDims> globalSize, sycl::range<NDims> localSize,
     syclexp::destroy_image_handle(handles.imgInput, dev, ctxt);
     syclexp::free_image_mem(handles.imgMem, syclexp::image_type::mipmap, dev,
                             ctxt);
-    syclexp::release_external_memory(handles.inputInteropMemHandle, dev, ctxt);
+    syclexp::release_external_memory(handles.inputExternalMem, dev, ctxt);
   } catch (sycl::exception e) {
     std::cerr << "\tKernel submission failed! " << e.what() << std::endl;
     exit(-1);
diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp
index 4afaff48b466b..56514b1ecf30e 100644
--- a/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp
+++ b/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp
@@ -1,8 +1,8 @@
-// REQUIRES: cuda
+// REQUIRES: cuda || (windows && level_zero && gpu-intel-dg2)
 // REQUIRES: vulkan
 
 // RUN: %{build} %link-vulkan -o %t.out
-// RUN: %{run} %t.out
+// RUN: %{run} env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out
 
 // Uncomment to print additional test information
 // #define VERBOSE_PRINT
@@ -17,7 +17,7 @@ namespace syclexp = sycl::ext::oneapi::experimental;
 struct handles_t {
   syclexp::sampled_image_handle imgInput;
   syclexp::image_mem_handle imgMem;
-  syclexp::interop_mem_handle inputInteropMemHandle;
+  syclexp::external_mem inputExternalMem;
 };
 
 template <typename InteropHandleT>
@@ -37,20 +37,19 @@ handles_t create_test_handles(sycl::context &ctxt, sycl::device &dev,
       interopHandle, syclexp::external_mem_handle_type::opaque_fd, imgSize};
 #endif
 
-  // Extension: interop mem handle imported from file descriptor
-  syclexp::interop_mem_handle inputInteropMemHandle =
+  // Extension: external memory imported from file descriptor
+  syclexp::external_mem inputExternalMem =
       syclexp::import_external_memory(inputExtMemDesc, dev, ctxt);
 
-  // Extension: interop mem handle imported from file descriptor
+  // Extension: mapped memory handle from external memory
   syclexp::image_mem_handle inputMappedMemHandle =
-      syclexp::map_external_image_memory(inputInteropMemHandle, desc, dev,
-                                         ctxt);
+      syclexp::map_external_image_memory(inputExternalMem, desc, dev, ctxt);
 
   // Extension: create the image and return the handle
   syclexp::sampled_image_handle imgInput =
       syclexp::create_image(inputMappedMemHandle, samp, desc, dev, ctxt);
 
-  return {imgInput, inputMappedMemHandle, inputInteropMemHandle};
+  return {imgInput, inputMappedMemHandle, inputExternalMem};
 }
 
 template <typename InteropHandleT, int NDims, typename DType, int NChannels,
@@ -140,7 +139,7 @@ bool run_sycl(InteropHandleT inputInteropMemHandle,
     syclexp::destroy_image_handle(handles.imgInput, dev, ctxt);
     syclexp::free_image_mem(handles.imgMem, syclexp::image_type::standard, dev,
                             ctxt);
-    syclexp::release_external_memory(handles.inputInteropMemHandle, dev, ctxt);
+    syclexp::release_external_memory(handles.inputExternalMem, dev, ctxt);
   } catch (sycl::exception e) {
     std::cerr << "\tKernel submission failed! " << e.what() << std::endl;
     exit(-1);
diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp
index 980c320f12784..50881b721f848 100644
--- a/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp
+++ b/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp
@@ -19,12 +19,12 @@ namespace syclexp = sycl::ext::oneapi::experimental;
 // Helpers and utilities
 namespace util {
 struct handles_t {
-  syclexp::interop_mem_handle input_interop_mem_handle_1,
-      input_interop_mem_handle_2, output_interop_mem_handle;
+  syclexp::external_mem input_external_mem_1, input_external_mem_2,
+      output_external_mem;
   syclexp::image_mem_handle input_mem_handle_1, input_mem_handle_2,
       output_mem_handle;
-  syclexp::interop_semaphore_handle sycl_wait_interop_semaphore_handle,
-      sycl_done_interop_semaphore_handle;
+  syclexp::external_semaphore sycl_wait_external_semaphore,
+      sycl_done_external_semaphore;
   syclexp::unsampled_image_handle input_1, input_2, output;
 };
 
@@ -66,23 +66,20 @@ create_test_handles(sycl::context &ctxt, sycl::device &dev,
 #endif
 
   // Extension: create interop memory handles
-  syclexp::interop_mem_handle input_interop_mem_handle_1 =
+  syclexp::external_mem input_external_mem_1 =
       syclexp::import_external_memory(input_ext_mem_desc_1, dev, ctxt);
-  syclexp::interop_mem_handle input_interop_mem_handle_2 =
+  syclexp::external_mem input_external_mem_2 =
       syclexp::import_external_memory(input_ext_mem_desc_2, dev, ctxt);
-  syclexp::interop_mem_handle output_interop_mem_handle =
+  syclexp::external_mem output_external_mem =
       syclexp::import_external_memory(output_ext_mem_desc, dev, ctxt);
 
   // Extension: map image memory handles
   syclexp::image_mem_handle input_mapped_mem_handle_1 =
-      syclexp::map_external_image_memory(input_interop_mem_handle_1, desc, dev,
-                                         ctxt);
+      syclexp::map_external_image_memory(input_external_mem_1, desc, dev, ctxt);
   syclexp::image_mem_handle input_mapped_mem_handle_2 =
-      syclexp::map_external_image_memory(input_interop_mem_handle_2, desc, dev,
-                                         ctxt);
+      syclexp::map_external_image_memory(input_external_mem_2, desc, dev, ctxt);
   syclexp::image_mem_handle output_mapped_mem_handle =
-      syclexp::map_external_image_memory(output_interop_mem_handle, desc, dev,
-                                         ctxt);
+      syclexp::map_external_image_memory(output_external_mem, desc, dev, ctxt);
 
   // Extension: create the image and return the handle
   syclexp::unsampled_image_handle input_1 =
@@ -113,31 +110,31 @@ create_test_handles(sycl::context &ctxt, sycl::device &dev,
           syclexp::external_semaphore_handle_type::opaque_fd};
 #endif
 
-  syclexp::interop_semaphore_handle sycl_wait_interop_semaphore_handle =
+  syclexp::external_semaphore sycl_wait_external_semaphore =
       syclexp::import_external_semaphore(sycl_wait_external_semaphore_desc, dev,
                                          ctxt);
-  syclexp::interop_semaphore_handle sycl_done_interop_semaphore_handle =
+  syclexp::external_semaphore sycl_done_external_semaphore =
       syclexp::import_external_semaphore(sycl_done_external_semaphore_desc, dev,
                                          ctxt);
 
-  return {input_interop_mem_handle_1,
-          input_interop_mem_handle_2,
-          output_interop_mem_handle,
+  return {input_external_mem_1,
+          input_external_mem_2,
+          output_external_mem,
           input_mapped_mem_handle_1,
           input_mapped_mem_handle_2,
           output_mapped_mem_handle,
-          sycl_wait_interop_semaphore_handle,
-          sycl_done_interop_semaphore_handle,
+          sycl_wait_external_semaphore,
+          sycl_done_external_semaphore,
           input_1,
           input_2,
           output};
 }
 
 void cleanup_test(sycl::context &ctxt, sycl::device &dev, handles_t handles) {
-  syclexp::release_external_semaphore(
-      handles.sycl_wait_interop_semaphore_handle, dev, ctxt);
-  syclexp::release_external_semaphore(
-      handles.sycl_done_interop_semaphore_handle, dev, ctxt);
+  syclexp::release_external_semaphore(handles.sycl_wait_external_semaphore, dev,
+                                      ctxt);
+  syclexp::release_external_semaphore(handles.sycl_done_external_semaphore, dev,
+                                      ctxt);
   syclexp::destroy_image_handle(handles.input_1, dev, ctxt);
   syclexp::destroy_image_handle(handles.input_2, dev, ctxt);
   syclexp::destroy_image_handle(handles.output, dev, ctxt);
@@ -147,12 +144,9 @@ void cleanup_test(sycl::context &ctxt, sycl::device &dev, handles_t handles) {
                           syclexp::image_type::standard, dev, ctxt);
   syclexp::free_image_mem(handles.output_mem_handle,
                           syclexp::image_type::standard, dev, ctxt);
-  syclexp::release_external_memory(handles.input_interop_mem_handle_1, dev,
-                                   ctxt);
-  syclexp::release_external_memory(handles.input_interop_mem_handle_2, dev,
-                                   ctxt);
-  syclexp::release_external_memory(handles.output_interop_mem_handle, dev,
-                                   ctxt);
+  syclexp::release_external_memory(handles.input_external_mem_1, dev, ctxt);
+  syclexp::release_external_memory(handles.input_external_mem_2, dev, ctxt);
+  syclexp::release_external_memory(handles.output_external_mem, dev, ctxt);
 }
 
 template <typename InteropMemHandleT, typename InteropSemHandleT, int NDims,
@@ -182,8 +176,7 @@ void run_ndim_test(sycl::range<NDims> global_size,
       sycl_done_semaphore_handle, img_size, desc);
 
   // Extension: wait for imported semaphore
-  q.ext_oneapi_wait_external_semaphore(
-      handles.sycl_wait_interop_semaphore_handle);
+  q.ext_oneapi_wait_external_semaphore(handles.sycl_wait_external_semaphore);
 
   try {
     q.submit([&](sycl::handler &cgh) {
@@ -246,7 +239,7 @@ void run_ndim_test(sycl::range<NDims> global_size,
     // Extension: signal imported semaphore
     q.submit([&](sycl::handler &cgh) {
       cgh.ext_oneapi_signal_external_semaphore(
-          handles.sycl_done_interop_semaphore_handle);
+          handles.sycl_done_external_semaphore);
     });
 
     // Wait for kernel completion before destroying external objects
diff --git a/sycl/test-e2e/helpers.hpp b/sycl/test-e2e/helpers.hpp
index 71cf891c5fb97..6b7434f4c7a9d 100644
--- a/sycl/test-e2e/helpers.hpp
+++ b/sycl/test-e2e/helpers.hpp
@@ -35,7 +35,7 @@ class VecPrinter {
     printHelper<Idx + 1>(Out, Elem1);
   }
   template <>
-  static void printHelper<EndIdx>(std::ostream &Out, const VecT &Elem1) {}
+  inline void printHelper<EndIdx>(std::ostream &Out, const VecT &Elem1) {}
 
   VecT MVec;
 };
diff --git a/sycl/test-e2e/syclcompat/device/device.cpp b/sycl/test-e2e/syclcompat/device/device.cpp
index 180db72afc9f8..98ac69fa513e2 100644
--- a/sycl/test-e2e/syclcompat/device/device.cpp
+++ b/sycl/test-e2e/syclcompat/device/device.cpp
@@ -29,7 +29,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// RUN: %{build} -o %t.out
+// RUN: %{build} -Wno-error=user-defined-warnings -o %t.out
 // RUN: %{run} %t.out
 
 #include <syclcompat/device.hpp>
diff --git a/sycl/test-e2e/syclcompat/launch/kernel_properties.cpp b/sycl/test-e2e/syclcompat/launch/kernel_properties.cpp
new file mode 100644
index 0000000000000..efd32b94c25d0
--- /dev/null
+++ b/sycl/test-e2e/syclcompat/launch/kernel_properties.cpp
@@ -0,0 +1,64 @@
+/***************************************************************************
+ *
+ *  Copyright (C) Codeplay Software Ltd.
+ *
+ *  Part of the LLVM Project, under the Apache License v2.0 with LLVM
+ *  Exceptions. See https://llvm.org/LICENSE.txt for license information.
+ *  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ *  SYCLcompat API
+ *
+ *  kernel_properties.cpp
+ *
+ *  Description:
+ *     launch<F> with kernel_properties tests
+ **************************************************************************/
+
+// We need hardware which can support at least 2 sub-group sizes, since that
+// hardware (presumably) supports the `intel_reqd_sub_group_size` attribute.
+// REQUIRES: sg-32 && sg-16
+// RUN: %{build} -S -emit-llvm -o - | FileCheck %s
+
+#include <sycl/ext/oneapi/kernel_properties/properties.hpp>
+#include <sycl/detail/core.hpp>
+#include <sycl/ext/oneapi/properties/properties.hpp>
+
+#include <syclcompat/launch.hpp>
+
+namespace compat_exp = syclcompat::experimental;
+namespace sycl_exp = sycl::ext::oneapi::experimental;
+
+// Dummy kernel function for testing
+inline void empty_kernel_1(){};
+inline void empty_kernel_2(){};
+
+// Set `sub_group_size` property for kernel & check it becomes attribute
+// `reqd_sub_group_size`
+int test_kernel_properties() {
+  std::cout << __PRETTY_FUNCTION__ << std::endl;
+
+  compat_exp::kernel_properties my_k_props{sycl_exp::sub_group_size<32>};
+  compat_exp::launch_policy my_config(sycl::nd_range<1>{{32}, {32}},
+                                      my_k_props);
+  compat_exp::launch<empty_kernel_1>(my_config);
+
+  //CHECK: {{define.*kernel.*empty_kernel_1.* !intel_reqd_sub_group_size !}}
+  return 0;
+}
+
+// Negative test for previous test
+int test_no_kernel_properties() {
+  std::cout << __PRETTY_FUNCTION__ << std::endl;
+
+  compat_exp::launch_policy my_config(sycl::nd_range<1>{{32}, {32}});
+  compat_exp::launch<empty_kernel_2>(my_config);
+
+  //CHECK-NOT: {{define.*kernel.*empty_kernel_2.* !intel_reqd_sub_group_size !}}
+  return 0;
+}
diff --git a/sycl/test-e2e/syclcompat/launch/launch.cpp b/sycl/test-e2e/syclcompat/launch/launch.cpp
index 9eca4ec8f4b42..a45d833c13364 100644
--- a/sycl/test-e2e/syclcompat/launch/launch.cpp
+++ b/sycl/test-e2e/syclcompat/launch/launch.cpp
@@ -21,93 +21,23 @@
  **************************************************************************/
 // https://github.com/intel/llvm/issues/14387
 // UNSUPPORTED: gpu-intel-dg2
-// RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out
+// RUN: %{build} -o %t.out
 // RUN: %{run} %t.out
 
-#include <type_traits>
-
 #include <sycl/detail/core.hpp>
 #include <sycl/group_barrier.hpp>
 
 #include <syclcompat/device.hpp>
 #include <syclcompat/id_query.hpp>
 #include <syclcompat/launch.hpp>
-#include <syclcompat/launch_experimental.hpp>
 #include <syclcompat/memory.hpp>
 
-#include "../common.hpp"
 #include "launch_fixt.hpp"
 
 // Dummy kernel functions for testing
 inline void empty_kernel(){};
 inline void int_kernel(int a){};
 inline void int_ptr_kernel(int *a){};
-inline void dynamic_local_mem_empty_kernel(char *a){};
-
-template <typename T>
-inline void dynamic_local_mem_basicdt_kernel(T value, char *local_mem){};
-
-template <typename T>
-void dynamic_local_mem_typed_kernel(T *data, char *local_mem) {
-  constexpr size_t memsize = LaunchTestWithArgs<T>::LOCAL_MEM_SIZE;
-  constexpr size_t num_elements = memsize / sizeof(T);
-  T *typed_local_mem = reinterpret_cast<T *>(local_mem);
-
-  const int id =
-      sycl::ext::oneapi::this_work_item::get_nd_item<1>().get_global_id(0);
-  if (id < num_elements) {
-    typed_local_mem[id] = static_cast<T>(id);
-  }
-  sycl::group_barrier(sycl::ext::oneapi::this_work_item::get_work_group<1>());
-  if (id < num_elements) {
-    data[id] = typed_local_mem[num_elements - id - 1];
-  }
-};
-
-template <typename T>
-void reqd_sg_size_kernel(int modifier_val, int num_elements, T *data) {
-
-  const int id = sycl::ext::oneapi::this_work_item::get_nd_item<3>()
-                     .get_global_linear_id();
-  const int sg_size = sycl::ext::oneapi::this_work_item::get_nd_item<3>()
-                          .get_sub_group()
-                          .get_local_linear_range();
-  if (id < num_elements) {
-    if (id < num_elements - modifier_val) {
-      data[id] = static_cast<T>(
-          (id + modifier_val - sg_size) < 0 ? 0 : id + modifier_val - sg_size);
-    } else {
-      data[id] = static_cast<T>(id + modifier_val + sg_size);
-    }
-  }
-}
-
-template <typename T>
-void reqd_sg_size_kernel_with_local_memory(int modifier_val, int num_elements,
-                                           T *data, char *local_mem) {
-  T *typed_local_mem = reinterpret_cast<T *>(local_mem);
-  const int id = sycl::ext::oneapi::this_work_item::get_nd_item<3>()
-                     .get_global_linear_id();
-  const int sg_size = sycl::ext::oneapi::this_work_item::get_nd_item<3>()
-                          .get_sub_group()
-                          .get_local_linear_range();
-
-  const int wi_id_in_wg =
-      sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_local_linear_id();
-
-  if (id < num_elements - modifier_val) {
-    typed_local_mem[wi_id_in_wg] = static_cast<T>(
-        (id + modifier_val - sg_size) < 0 ? 0 : id + modifier_val - sg_size);
-  } else {
-    typed_local_mem[wi_id_in_wg] = static_cast<T>(id + modifier_val + sg_size);
-  }
-
-  syclcompat::wg_barrier();
-
-  if (id < num_elements) {
-    data[id] = typed_local_mem[wi_id_in_wg];
-  }
-}
 
 template <int Dim>
 void compute_nd_range_3d(RangeParams<Dim> range_param, std::string test_name) {
@@ -202,349 +132,11 @@ void test_ptr_arg_launch() {
   syclcompat::launch<int_ptr_kernel>(lt.grid_, lt.thread_, lt.q_, int_ptr);
 }
 
-void test_dynamic_mem_no_arg_launch() {
-  std::cout << __PRETTY_FUNCTION__ << std::endl;
-  LaunchTest lt;
-
-  syclcompat::launch<dynamic_local_mem_empty_kernel>(lt.range_1_, 1);
-  syclcompat::launch<dynamic_local_mem_empty_kernel>(lt.range_2_, 1);
-  syclcompat::launch<dynamic_local_mem_empty_kernel>(lt.range_3_, 1);
-  syclcompat::launch<dynamic_local_mem_empty_kernel>(lt.grid_, lt.thread_, 1);
-}
-
-void test_dynamic_mem_no_arg_launch_q() {
-  std::cout << __PRETTY_FUNCTION__ << std::endl;
-  LaunchTest lt;
-
-  syclcompat::launch<dynamic_local_mem_empty_kernel>(lt.range_1_, 1, lt.q_);
-  syclcompat::launch<dynamic_local_mem_empty_kernel>(lt.range_2_, 1, lt.q_);
-  syclcompat::launch<dynamic_local_mem_empty_kernel>(lt.range_3_, 1, lt.q_);
-  syclcompat::launch<dynamic_local_mem_empty_kernel>(lt.grid_, lt.thread_, 1,
-                                                     lt.q_);
-}
-
-template <typename T> void test_basic_dt_launch() {
-  std::cout << __PRETTY_FUNCTION__ << std::endl;
-
-  T d_a = T(1);
-  LaunchTestWithArgs<T> ltt;
-
-  if (ltt.skip_) // Unsupported aspect
-    return;
-
-  syclcompat::launch<dynamic_local_mem_basicdt_kernel<T>>(ltt.range_1_,
-                                                          ltt.memsize_, d_a);
-  syclcompat::launch<dynamic_local_mem_basicdt_kernel<T>>(ltt.range_2_,
-                                                          ltt.memsize_, d_a);
-  syclcompat::launch<dynamic_local_mem_basicdt_kernel<T>>(ltt.range_3_,
-                                                          ltt.memsize_, d_a);
-  syclcompat::launch<dynamic_local_mem_basicdt_kernel<T>>(
-      ltt.grid_, ltt.thread_, ltt.memsize_, d_a);
-}
-
-template <typename T> void test_basic_dt_launch_q() {
-  std::cout << __PRETTY_FUNCTION__ << std::endl;
-
-  T d_a = T(1);
-  LaunchTestWithArgs<T> ltt;
-
-  if (ltt.skip_) // Unsupported aspect
-    return;
-
-  syclcompat::launch<dynamic_local_mem_basicdt_kernel<T>>(
-      ltt.range_1_, ltt.memsize_, ltt.in_order_q_, d_a);
-  syclcompat::launch<dynamic_local_mem_basicdt_kernel<T>>(
-      ltt.range_2_, ltt.memsize_, ltt.in_order_q_, d_a);
-  syclcompat::launch<dynamic_local_mem_basicdt_kernel<T>>(
-      ltt.range_3_, ltt.memsize_, ltt.in_order_q_, d_a);
-  syclcompat::launch<dynamic_local_mem_basicdt_kernel<T>>(
-      ltt.grid_, ltt.thread_, ltt.memsize_, ltt.in_order_q_, d_a);
-}
-
-template <typename T> void test_arg_launch() {
-  std::cout << __PRETTY_FUNCTION__ << std::endl;
-
-  LaunchTestWithArgs<T> ltt;
-  if (ltt.skip_) // Unsupported aspect
-    return;
-
-  T *d_a = (T *)syclcompat::malloc(ltt.memsize_);
-
-  syclcompat::launch<dynamic_local_mem_typed_kernel<T>>(ltt.range_1_,
-                                                        ltt.memsize_, d_a);
-  syclcompat::launch<dynamic_local_mem_typed_kernel<T>>(ltt.range_2_,
-                                                        ltt.memsize_, d_a);
-  syclcompat::launch<dynamic_local_mem_typed_kernel<T>>(ltt.range_3_,
-                                                        ltt.memsize_, d_a);
-  syclcompat::launch<dynamic_local_mem_typed_kernel<T>>(ltt.grid_, ltt.thread_,
-                                                        ltt.memsize_, d_a);
-
-  syclcompat::free(d_a);
-}
-
-template <typename T> void test_arg_launch_q() {
-  std::cout << __PRETTY_FUNCTION__ << std::endl;
-
-  LaunchTestWithArgs<T> ltt;
-  if (ltt.skip_) // Unsupported aspect
-    return;
-
-  T *d_a = (T *)syclcompat::malloc(ltt.memsize_, ltt.in_order_q_);
-
-  syclcompat::launch<dynamic_local_mem_typed_kernel<T>>(
-      ltt.range_1_, ltt.memsize_, ltt.in_order_q_, d_a);
-  syclcompat::launch<dynamic_local_mem_typed_kernel<T>>(
-      ltt.range_2_, ltt.memsize_, ltt.in_order_q_, d_a);
-  syclcompat::launch<dynamic_local_mem_typed_kernel<T>>(
-      ltt.range_3_, ltt.memsize_, ltt.in_order_q_, d_a);
-  syclcompat::launch<dynamic_local_mem_typed_kernel<T>>(
-      ltt.grid_, ltt.thread_, ltt.memsize_, ltt.in_order_q_, d_a);
-
-  syclcompat::free(d_a, ltt.in_order_q_);
-}
-
-template <typename T> void test_local_mem_usage() {
-  std::cout << __PRETTY_FUNCTION__ << std::endl;
-
-  LaunchTestWithArgs<T> ltt;
-  if (ltt.skip_) // Unsupported aspect
-    return;
-
-  size_t num_elements = ltt.memsize_ / sizeof(T);
-
-  T *h_a = (T *)syclcompat::malloc_host(ltt.memsize_);
-  T *d_a = (T *)syclcompat::malloc(ltt.memsize_);
-
-  // d_a is the kernel output, no memcpy needed
-  syclcompat::launch<dynamic_local_mem_typed_kernel<T>>(ltt.grid_, ltt.thread_,
-                                                        ltt.memsize_, d_a);
-
-  syclcompat::memcpy(h_a, d_a, ltt.memsize_);
-  syclcompat::free(d_a);
-
-  for (int i = 0; i < num_elements; i++) {
-    assert(h_a[i] == static_cast<T>(num_elements - i - 1));
-  }
-  syclcompat::free(h_a);
-}
-
-template <typename T> void test_local_mem_usage_q() {
-  std::cout << __PRETTY_FUNCTION__ << std::endl;
-
-  LaunchTestWithArgs<T> ltt;
-  if (ltt.skip_) // Unsupported aspect
-    return;
-
-  size_t num_elements = ltt.memsize_ / sizeof(T);
-  auto &q = ltt.in_order_q_;
-
-  T *h_a = (T *)syclcompat::malloc_host(ltt.memsize_);
-  T *d_a = (T *)syclcompat::malloc(ltt.memsize_, q);
-
-  // d_a is the kernel output, no memcpy needed
-  syclcompat::launch<dynamic_local_mem_typed_kernel<T>>(ltt.grid_, ltt.thread_,
-                                                        ltt.memsize_, q, d_a);
-
-  syclcompat::memcpy(h_a, d_a, ltt.memsize_, q);
-  syclcompat::free(d_a, q);
-
-  for (size_t i = 0; i < num_elements; i++) {
-    assert(h_a[i] == static_cast<T>(num_elements - i - 1));
-  }
-
-  syclcompat::free(h_a);
-}
-
-template <typename T> void test_memsize_no_arg_launch() {
-  std::cout << __PRETTY_FUNCTION__ << std::endl;
-
-  LaunchTest lt;
-  T memsize = static_cast<T>(8);
-
-  syclcompat::launch<dynamic_local_mem_empty_kernel>(lt.grid_, lt.thread_,
-                                                     memsize);
-}
-
-template <typename T> void test_memsize_no_arg_launch_q() {
-  std::cout << __PRETTY_FUNCTION__ << std::endl;
-
-  LaunchTest lt;
-  T memsize = static_cast<T>(8);
-
-  syclcompat::launch<dynamic_local_mem_empty_kernel>(lt.grid_, lt.thread_,
-                                                     memsize, lt.q_);
-}
-
-template <typename T> void test_reqd_sg_size() {
-  namespace syclc_exp = syclcompat::experimental;
-
-  std::cout << __PRETTY_FUNCTION__ << std::endl;
-
-  LaunchTestWithArgs<T> ltt;
-  if (ltt.skip_) // Unsupported aspect
-    return;
-
-  int SubgroupSize = 16;
-  const int modifier_val = 9;
-  const int num_elements = 1024;
-
-  T *h_a = (T *)syclcompat::malloc_host(num_elements * sizeof(T));
-  T *d_a = (T *)syclcompat::malloc(num_elements * sizeof(T));
-  auto sg_sizes = syclcompat::get_default_queue()
-                      .get_device()
-                      .get_info<sycl::info::device::sub_group_sizes>();
-
-  if (std::find(sg_sizes.begin(), sg_sizes.end(), 16) != sg_sizes.end()) {
-    syclc_exp::launch<reqd_sg_size_kernel<T>, 16>(
-        ltt.grid_, ltt.thread_, modifier_val, static_cast<int>(num_elements),
-        d_a);
-  } else {
-    SubgroupSize = 32;
-    syclc_exp::launch<reqd_sg_size_kernel<T>, 32>(
-        ltt.grid_, ltt.thread_, modifier_val, static_cast<int>(num_elements),
-        d_a);
-  }
-
-  syclcompat::wait_and_throw();
-  syclcompat::memcpy<T>(h_a, d_a, num_elements);
-  syclcompat::free(d_a);
-
-  for (int i = 0; i < static_cast<int>(num_elements); i++) {
-    T result;
-    if (i < (static_cast<int>(num_elements) - modifier_val)) {
-      result = static_cast<T>((i + modifier_val - SubgroupSize) < 0
-                                  ? 0
-                                  : (i + modifier_val - SubgroupSize));
-    } else {
-      result = static_cast<T>(i + modifier_val + SubgroupSize);
-    }
-    assert(h_a[i] == result);
-  }
-
-  syclcompat::free(h_a);
-}
-
-template <typename T> void test_reqd_sg_size_q() {
-  namespace syclc_exp = syclcompat::experimental;
-  std::cout << __PRETTY_FUNCTION__ << std::endl;
-
-  LaunchTestWithArgs<T> ltt;
-  if (ltt.skip_) // Unsupported aspect
-    return;
-  int SubgroupSize = 16;
-  const int modifier_val = 9;
-  auto &q = ltt.in_order_q_;
-  const int num_elements = 1024;
-
-  T *h_a = (T *)syclcompat::malloc_host(num_elements * sizeof(T), q);
-  T *d_a = (T *)syclcompat::malloc(num_elements * sizeof(T), q);
-  sycl::nd_range<3> launch_range(sycl::range<3>(ltt.grid_ * ltt.thread_),
-                                 sycl::range<3>(ltt.thread_));
-  auto sg_sizes =
-      q.get_device().template get_info<sycl::info::device::sub_group_sizes>();
-  if (std::find(sg_sizes.begin(), sg_sizes.end(), 16) != sg_sizes.end()) {
-    syclc_exp::launch<reqd_sg_size_kernel<T>, 16>(
-        launch_range, q, modifier_val, static_cast<int>(num_elements), d_a);
-  } else {
-    SubgroupSize = 32;
-    syclc_exp::launch<reqd_sg_size_kernel<T>, 32>(
-        launch_range, q, modifier_val, static_cast<int>(num_elements), d_a);
-  }
-
-  syclcompat::wait_and_throw();
-  syclcompat::memcpy<T>(h_a, d_a, num_elements, q);
-  syclcompat::free(d_a, q);
-
-  for (int i = 0; i < static_cast<int>(num_elements); i++) {
-    T result;
-    if (i < (static_cast<int>(num_elements) - modifier_val)) {
-      result = static_cast<T>((i + modifier_val - SubgroupSize) < 0
-                                  ? 0
-                                  : (i + modifier_val - SubgroupSize));
-    } else {
-      result = static_cast<T>(i + modifier_val + SubgroupSize);
-    }
-    assert(h_a[i] == result);
-  }
-  syclcompat::free(h_a, q);
-}
-
-template <typename T> void test_reqd_sg_size_with_local_memory() {
-  namespace syclc_exp = syclcompat::experimental;
-
-  std::cout << __PRETTY_FUNCTION__ << std::endl;
-
-  LaunchTestWithArgs<T> ltt;
-  if (ltt.skip_) // Unsupported aspect
-    return;
-
-  int SubgroupSize = 16;
-  const int modifier_val = 9;
-
-  std::size_t local_memory_size =
-      ltt.thread_.x * ltt.thread_.y * ltt.thread_.z * sizeof(T);
-  auto global_range = ltt.thread_ * ltt.grid_;
-
-  auto num_elements = global_range.x * global_range.y * global_range.z;
-
-  T *h_a = (T *)syclcompat::malloc_host(num_elements * sizeof(T));
-  T *d_a = (T *)syclcompat::malloc(num_elements * sizeof(T));
-
-  auto sg_sizes = syclcompat::get_default_queue()
-                      .get_device()
-                      .get_info<sycl::info::device::sub_group_sizes>();
-
-  if (std::find(sg_sizes.begin(), sg_sizes.end(), 16) != sg_sizes.end()) {
-    syclc_exp::launch<reqd_sg_size_kernel_with_local_memory<T>, 16>(
-        ltt.grid_, ltt.thread_, local_memory_size, modifier_val,
-        static_cast<int>(num_elements), d_a);
-  } else {
-    SubgroupSize = 32;
-    syclc_exp::launch<reqd_sg_size_kernel_with_local_memory<T>, 32>(
-        ltt.grid_, ltt.thread_, local_memory_size, modifier_val,
-        static_cast<int>(num_elements), d_a);
-  }
-
-  syclcompat::wait_and_throw();
-  syclcompat::memcpy<T>(h_a, d_a, num_elements);
-
-  for (int i = 0; i < static_cast<int>(num_elements); i++) {
-    T result;
-    if (i < (static_cast<int>(num_elements) - modifier_val)) {
-      result = static_cast<T>((i + modifier_val - SubgroupSize) < 0
-                                  ? 0
-                                  : (i + modifier_val - SubgroupSize));
-    } else {
-      result = static_cast<T>(i + modifier_val + SubgroupSize);
-    }
-    assert(h_a[i] == result);
-  }
-  syclcompat::free(d_a);
-  syclcompat::free(h_a);
-}
-
 int main() {
   test_launch_compute_nd_range_3d();
   test_no_arg_launch();
   test_one_arg_launch();
   test_ptr_arg_launch();
 
-  test_dynamic_mem_no_arg_launch();
-  test_dynamic_mem_no_arg_launch_q();
-
-  INSTANTIATE_ALL_TYPES(value_type_list, test_basic_dt_launch);
-  INSTANTIATE_ALL_TYPES(value_type_list, test_basic_dt_launch_q);
-  INSTANTIATE_ALL_TYPES(value_type_list, test_arg_launch);
-  INSTANTIATE_ALL_TYPES(value_type_list, test_arg_launch_q);
-  INSTANTIATE_ALL_TYPES(value_type_list, test_local_mem_usage);
-  INSTANTIATE_ALL_TYPES(value_type_list, test_local_mem_usage_q);
-
-  INSTANTIATE_ALL_TYPES(memsize_type_list, test_memsize_no_arg_launch);
-  INSTANTIATE_ALL_TYPES(memsize_type_list, test_memsize_no_arg_launch_q);
-
-  INSTANTIATE_ALL_TYPES(memsize_type_list, test_reqd_sg_size);
-  INSTANTIATE_ALL_TYPES(memsize_type_list, test_reqd_sg_size_q);
-  INSTANTIATE_ALL_TYPES(memsize_type_list, test_reqd_sg_size_with_local_memory);
-
   return 0;
 }
diff --git a/sycl/test-e2e/syclcompat/launch/launch_policy.cpp b/sycl/test-e2e/syclcompat/launch/launch_policy.cpp
new file mode 100644
index 0000000000000..b63438ffb497a
--- /dev/null
+++ b/sycl/test-e2e/syclcompat/launch/launch_policy.cpp
@@ -0,0 +1,359 @@
+/***************************************************************************
+ *
+ *  Copyright (C) Codeplay Software Ltd.
+ *
+ *  Part of the LLVM Project, under the Apache License v2.0 with LLVM
+ *  Exceptions. See https://llvm.org/LICENSE.txt for license information.
+ *  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ *  SYCLcompat API
+ *
+ *  launch_config.cpp
+ *
+ *  Description:
+ *     launch<F> with config tests
+ **************************************************************************/
+
+// RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out
+// RUN: %{run} %t.out
+
+#include <sycl/ext/intel/experimental/kernel_execution_properties.hpp>
+#include <sycl/ext/oneapi/kernel_properties/properties.hpp>
+#include <syclcompat/device.hpp>
+#include <sycl/detail/core.hpp>
+#include <sycl/ext/oneapi/properties/properties.hpp>
+#include <sycl/group_barrier.hpp>
+
+#include <syclcompat/launch.hpp>
+#include <syclcompat/memory.hpp>
+
+#include "../common.hpp"
+#include "launch_fixt.hpp"
+
+namespace compat_exp = syclcompat::experimental;
+namespace sycl_exp = sycl::ext::oneapi::experimental;
+namespace sycl_intel_exp = sycl::ext::intel::experimental;
+
+// Dummy kernel functions for testing
+// =======================================================================
+
+static constexpr int LOCAL_MEM_SIZE = 1024;
+
+using sycl::ext::oneapi::experimental::empty_properties_t;
+
+inline void empty_kernel(){};
+inline void int_kernel(int a){};
+inline void int_ptr_kernel(int *a){};
+
+inline void dynamic_local_mem_empty_kernel(char *a){};
+
+template <typename T>
+inline void dynamic_local_mem_basicdt_kernel(T value, char *local_mem){};
+
+template <typename T> void write_mem_kernel(T *data, int num_elements) {
+  const int id =
+      sycl::ext::oneapi::this_work_item::get_nd_item<1>().get_global_id(0);
+  if (id < num_elements) {
+    data[id] = static_cast<T>(id);
+  }
+};
+
+template <typename T>
+void dynamic_local_mem_typed_kernel(T *data, char *local_mem) {
+  constexpr size_t num_elements = LOCAL_MEM_SIZE / sizeof(T);
+  T *typed_local_mem = reinterpret_cast<T *>(local_mem);
+
+  const int id =
+      sycl::ext::oneapi::this_work_item::get_nd_item<1>().get_global_id(0);
+  if (id < num_elements) {
+    typed_local_mem[id] = static_cast<T>(id);
+  }
+  sycl::group_barrier(sycl::ext::oneapi::this_work_item::get_work_group<1>());
+  if (id < num_elements) {
+    data[id] = typed_local_mem[num_elements - id - 1];
+  }
+};
+// =======================================================================
+
+int test_variadic_config_ctor() {
+  std::cout << __PRETTY_FUNCTION__ << std::endl;
+
+  // nd_range and kernel_properties
+  {
+    compat_exp::launch_policy my_config(
+        sycl::nd_range<1>{{32}, {32}},
+        compat_exp::kernel_properties{sycl_exp::sub_group_size<32>});
+    static_assert(
+        std::is_same_v<decltype(my_config),
+                       compat_exp::launch_policy<
+                           sycl::nd_range<1>,
+                           decltype(sycl::ext::oneapi::experimental::properties{
+                               sycl_exp::sub_group_size<32>}),
+                           empty_properties_t, false>>);
+  }
+
+  // range and kernel_properties
+  {
+    compat_exp::launch_policy my_config(
+        sycl::range<3>{1, 1, 32},
+        compat_exp::kernel_properties{sycl_exp::sub_group_size<32>});
+    static_assert(
+        std::is_same_v<decltype(my_config),
+                       compat_exp::launch_policy<
+                           sycl::range<3>,
+                           decltype(sycl::ext::oneapi::experimental::properties{
+                               sycl_exp::sub_group_size<32>}),
+                           empty_properties_t, false>>);
+  }
+
+  // nd_range and kernel_properties properties ctor
+  {
+    sycl_exp::properties my_props{sycl_exp::sub_group_size<32>};
+    compat_exp::launch_policy my_config(
+        sycl::nd_range<1>{{32}, {32}},
+        compat_exp::kernel_properties(my_props));
+    static_assert(
+        std::is_same_v<decltype(my_config),
+                       compat_exp::launch_policy<
+                           sycl::nd_range<1>,
+                           decltype(sycl::ext::oneapi::experimental::properties{
+                               sycl_exp::sub_group_size<32>}),
+                           empty_properties_t, false>>);
+  }
+  // Empty kernel properties
+  {
+    compat_exp::launch_policy my_config(sycl::nd_range<1>{{32}, {32}},
+                                        compat_exp::kernel_properties{});
+    static_assert(
+        std::is_same_v<
+            decltype(my_config),
+            compat_exp::launch_policy<sycl::nd_range<1>, empty_properties_t,
+                                      empty_properties_t, false>>);
+  }
+
+  // Empty launch properties
+  {
+    compat_exp::launch_policy my_config(sycl::nd_range<1>{{32}, {32}},
+                                        compat_exp::launch_properties{});
+    static_assert(
+        std::is_same_v<
+            decltype(my_config),
+            compat_exp::launch_policy<sycl::nd_range<1>, empty_properties_t,
+                                      empty_properties_t, false>>);
+  }
+
+  // nd_range and launch_properties properties ctor
+  {
+
+    sycl_exp::cuda::cluster_size<1> ClusterDims(sycl::range<1>{32});
+    sycl_exp::properties my_props{ClusterDims};
+
+    compat_exp::launch_policy my_config(
+        sycl::nd_range<1>{{32}, {32}},
+        compat_exp::launch_properties(my_props));
+    static_assert(
+        std::is_same_v<decltype(my_config),
+                       compat_exp::launch_policy<
+                           sycl::nd_range<1>, empty_properties_t,
+                           decltype(sycl::ext::oneapi::experimental::properties{
+                               sycl_exp::cuda::cluster_size<1>{32}}),
+                           false>>);
+  }
+
+  // Just local mem
+  {
+    compat_exp::launch_policy my_config(sycl::nd_range<1>{{32}, {32}},
+                                        compat_exp::local_mem_size{1024});
+    static_assert(
+        std::is_same_v<
+            decltype(my_config),
+            compat_exp::launch_policy<sycl::nd_range<1>, empty_properties_t,
+                                      empty_properties_t, true>>);
+  }
+
+  // Just 0 local mem
+  {
+    compat_exp::launch_policy my_config(sycl::nd_range<1>{{32}, {32}},
+                                        compat_exp::local_mem_size{0});
+    static_assert(
+        std::is_same_v<
+            decltype(my_config),
+            compat_exp::launch_policy<sycl::nd_range<1>, empty_properties_t,
+                                      empty_properties_t, true>>);
+  }
+
+  return 0;
+}
+
+int test_basic_launch() {
+  std::cout << __PRETTY_FUNCTION__ << std::endl;
+
+  sycl_intel_exp::cache_config my_cache_config{
+      sycl_intel_exp::large_slm}; // constructed at runtime
+
+  compat_exp::kernel_properties my_k_props{
+      sycl_exp::sub_group_size<32>, sycl_exp::use_root_sync, my_cache_config};
+
+  compat_exp::launch_properties my_l_props{};
+
+  compat_exp::launch_policy my_config(sycl::nd_range<1>{{32}, {32}}, my_k_props,
+                                      my_l_props);
+
+  sycl::queue q = syclcompat::get_default_queue();
+
+  int dummy_int{1};
+
+  compat_exp::launch<empty_kernel>(my_config);
+  compat_exp::launch<int_kernel>(my_config, dummy_int);
+
+  compat_exp::launch<empty_kernel>(my_config, q);
+  compat_exp::launch<int_kernel>(my_config, q, dummy_int);
+
+  return 0;
+}
+
+int test_range_launch() {
+  std::cout << __PRETTY_FUNCTION__ << std::endl;
+
+  compat_exp::launch_policy my_config(sycl::range<1>{32});
+
+  sycl::queue q = syclcompat::get_default_queue();
+
+  int dummy_int{1};
+
+  compat_exp::launch<empty_kernel>(my_config);
+  compat_exp::launch<int_kernel>(my_config, dummy_int);
+
+  compat_exp::launch<empty_kernel>(my_config, q);
+  compat_exp::launch<int_kernel>(my_config, q, dummy_int);
+
+  return 0;
+}
+
+int test_lmem_launch() {
+  std::cout << __PRETTY_FUNCTION__ << std::endl;
+
+  using T = int;
+  // A property constructed at runtime:
+  sycl_intel_exp::cache_config my_cache_config{sycl_intel_exp::large_slm};
+
+  int local_mem_size = LOCAL_MEM_SIZE; // rt value
+
+  size_t num_elements = local_mem_size / sizeof(T);
+  T *h_a = (T *)syclcompat::malloc_host(local_mem_size);
+  T *d_a = (T *)syclcompat::malloc(local_mem_size);
+
+  compat_exp::launch_policy my_config(
+      sycl::nd_range<1>{{256}, {256}},
+      compat_exp::kernel_properties{sycl_exp::sub_group_size<32>,
+                                    sycl_exp::use_root_sync, my_cache_config},
+      compat_exp::launch_properties{},
+      compat_exp::local_mem_size(local_mem_size));
+
+  compat_exp::launch<dynamic_local_mem_empty_kernel>(my_config).wait();
+  std::cout << "Launched 1 succesfully" << std::endl;
+
+  compat_exp::launch<dynamic_local_mem_typed_kernel<int>>(my_config, d_a)
+      .wait();
+  std::cout << "Launched 2 succesfully" << std::endl;
+
+  syclcompat::memcpy(h_a, d_a, local_mem_size);
+  syclcompat::free(d_a);
+
+  for (int i = 0; i < num_elements; i++) {
+    assert(h_a[i] == static_cast<T>(num_elements - i - 1));
+  }
+
+  syclcompat::free(h_a);
+  return 0;
+}
+
+int test_dim3_launch_policy() {
+  std::cout << __PRETTY_FUNCTION__ << std::endl;
+
+  compat_exp::launch_policy my_dim3_config(syclcompat::dim3{32});
+
+  static_assert(
+      std::is_same_v<decltype(my_dim3_config)::RangeT, sycl::range<3>>);
+
+  compat_exp::launch_policy my_dim3_dim3_config(syclcompat::dim3{32},
+                                                syclcompat::dim3{32});
+
+  static_assert(
+      std::is_same_v<decltype(my_dim3_dim3_config)::RangeT, sycl::nd_range<3>>);
+
+  compat_exp::launch_policy my_nd_range_config(syclcompat::dim3{32},
+                                               syclcompat::dim3{32});
+
+  compat_exp::launch<empty_kernel>(my_dim3_config).wait();
+  std::cout << "Launched 1 succesfully" << std::endl;
+  compat_exp::launch<empty_kernel>(my_dim3_dim3_config).wait();
+  std::cout << "Launched 2 succesfully" << std::endl;
+
+  return 0;
+}
+
+int test_dim3_lmem_launch() {
+  std::cout << __PRETTY_FUNCTION__ << std::endl;
+
+  compat_exp::launch_policy my_dim3_dim3_config(syclcompat::dim3{32},
+                                                syclcompat::dim3{32},
+                                                compat_exp::local_mem_size{0});
+
+  static_assert(
+      std::is_same_v<decltype(my_dim3_dim3_config)::RangeT, sycl::nd_range<3>>);
+
+  compat_exp::launch<dynamic_local_mem_empty_kernel>(my_dim3_dim3_config)
+      .wait();
+  std::cout << "Launched 1 succesfully" << std::endl;
+
+  return 0;
+}
+
+int test_dim3_props_launch() {
+  std::cout << __PRETTY_FUNCTION__ << std::endl;
+  compat_exp::launch_policy my_dim3_config(syclcompat::dim3{32},
+                                           compat_exp::kernel_properties{});
+
+  static_assert(
+      std::is_same_v<decltype(my_dim3_config)::RangeT, sycl::range<3>>);
+
+  compat_exp::launch<int_kernel>(my_dim3_config, 9001);
+  return 0;
+}
+
+template <typename T> int test_write_mem() {
+  std::cout << __PRETTY_FUNCTION__ << std::endl;
+
+  LaunchTestWithArgs<T> ltt;
+  if (ltt.skip_) // Unsupported aspect
+    return 0;
+
+  compat_exp::launch_policy my_dim3_config(syclcompat::dim3{32});
+
+  const int memsize = 1024;
+  T *d_a = (T *)syclcompat::malloc(memsize);
+  compat_exp::launch<write_mem_kernel<T>>(my_dim3_config, d_a,
+                                              memsize / sizeof(T))
+      .wait();
+
+  syclcompat::free(d_a);
+  return 0;
+}
+
+int main() {
+  test_variadic_config_ctor();
+  test_basic_launch();
+  test_range_launch();
+  test_lmem_launch();
+  test_dim3_launch_policy();
+  test_dim3_lmem_launch();
+  test_dim3_props_launch();
+  INSTANTIATE_ALL_TYPES(value_type_list, test_write_mem);
+}
diff --git a/sycl/test-e2e/syclcompat/launch/launch_policy_lmem.cpp b/sycl/test-e2e/syclcompat/launch/launch_policy_lmem.cpp
new file mode 100644
index 0000000000000..3f5f7dbb689a9
--- /dev/null
+++ b/sycl/test-e2e/syclcompat/launch/launch_policy_lmem.cpp
@@ -0,0 +1,275 @@
+/***************************************************************************
+ *
+ *  Copyright (C) Codeplay Software Ltd.
+ *
+ *  Part of the LLVM Project, under the Apache License v2.0 with LLVM
+ *  Exceptions. See https://llvm.org/LICENSE.txt for license information.
+ *  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ *  SYCLcompat API
+ *
+ *  launch_policy_lmem.cpp
+ *
+ *  Description:
+ *     launch<F> with policy & use local memory tests
+ **************************************************************************/
+
+// RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out
+// RUN: %{run} %t.out
+
+#include <sycl/detail/core.hpp>
+#include <sycl/ext/oneapi/properties/properties.hpp>
+#include <sycl/group_barrier.hpp>
+
+#include <syclcompat/launch.hpp>
+#include <syclcompat/memory.hpp>
+#include <syclcompat/id_query.hpp>
+
+#include "../common.hpp"
+#include "launch_fixt.hpp"
+
+namespace compat_exp = syclcompat::experimental;
+
+using compat_exp::launch_policy;
+using compat_exp::local_mem_size;
+
+// Kernel functions for testing
+// =======================================================================
+inline void dynamic_local_mem_empty_kernel(char *a){};
+
+template <typename T>
+inline void dynamic_local_mem_basicdt_kernel(T value, char *local_mem){};
+
+template <typename T>
+void dynamic_local_mem_typed_kernel(T *data, char *local_mem) {
+  constexpr size_t memsize = LaunchTestWithArgs<T>::LOCAL_MEM_SIZE;
+  constexpr size_t num_elements = memsize / sizeof(T);
+  T *typed_local_mem = reinterpret_cast<T *>(local_mem);
+
+  const int id =
+      sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_global_linear_id();
+  if (id < num_elements) {
+    typed_local_mem[id] = static_cast<T>(id);
+  }
+  syclcompat::wg_barrier();
+  if (id < num_elements) {
+    data[id] = typed_local_mem[num_elements - id - 1];
+  }
+};
+
+// =======================================================================
+
+void test_dynamic_mem_no_arg_launch() {
+  std::cout << __PRETTY_FUNCTION__ << std::endl;
+  LaunchTest lt;
+
+  compat_exp::launch<dynamic_local_mem_empty_kernel>(
+      launch_policy{lt.range_1_, local_mem_size{1}});
+  compat_exp::launch<dynamic_local_mem_empty_kernel>(
+      launch_policy{lt.range_2_, local_mem_size{1}});
+  compat_exp::launch<dynamic_local_mem_empty_kernel>(
+      launch_policy{lt.range_3_, local_mem_size{1}});
+  compat_exp::launch<dynamic_local_mem_empty_kernel>(
+      launch_policy{lt.grid_, lt.thread_, local_mem_size{1}});
+}
+
+void test_dynamic_mem_no_arg_launch_q() {
+  std::cout << __PRETTY_FUNCTION__ << std::endl;
+  LaunchTest lt;
+
+  compat_exp::launch<dynamic_local_mem_empty_kernel>(
+      launch_policy{lt.range_1_, local_mem_size{1}}, lt.q_);
+  compat_exp::launch<dynamic_local_mem_empty_kernel>(
+      launch_policy{lt.range_2_, local_mem_size{1}}, lt.q_);
+  compat_exp::launch<dynamic_local_mem_empty_kernel>(
+      launch_policy{lt.range_3_, local_mem_size{1}}, lt.q_);
+  compat_exp::launch<dynamic_local_mem_empty_kernel>(
+      launch_policy{lt.grid_, lt.thread_, local_mem_size{1}}, lt.q_);
+}
+
+template <typename T> void test_basic_dt_launch() {
+  std::cout << __PRETTY_FUNCTION__ << std::endl;
+
+  T d_a = T(1);
+  LaunchTestWithArgs<T> ltt;
+
+  if (ltt.skip_) // Unsupported aspect
+    return;
+
+  compat_exp::launch<dynamic_local_mem_basicdt_kernel<T>>(
+      launch_policy{ltt.range_1_, local_mem_size{ltt.memsize_}}, d_a);
+  compat_exp::launch<dynamic_local_mem_basicdt_kernel<T>>(
+      launch_policy{ltt.range_2_, local_mem_size{ltt.memsize_}}, d_a);
+  compat_exp::launch<dynamic_local_mem_basicdt_kernel<T>>(
+      launch_policy{ltt.range_3_, local_mem_size{ltt.memsize_}}, d_a);
+  compat_exp::launch<dynamic_local_mem_basicdt_kernel<T>>(
+      launch_policy{ltt.grid_, ltt.thread_, local_mem_size{ltt.memsize_}}, d_a);
+}
+
+template <typename T> void test_basic_dt_launch_q() {
+  std::cout << __PRETTY_FUNCTION__ << std::endl;
+
+  T d_a = T(1);
+  LaunchTestWithArgs<T> ltt;
+
+  if (ltt.skip_) // Unsupported aspect
+    return;
+
+  compat_exp::launch<dynamic_local_mem_basicdt_kernel<T>>(
+      launch_policy{ltt.range_1_, local_mem_size{ltt.memsize_}},
+      ltt.in_order_q_, d_a);
+  compat_exp::launch<dynamic_local_mem_basicdt_kernel<T>>(
+      launch_policy{ltt.range_2_, local_mem_size{ltt.memsize_}},
+      ltt.in_order_q_, d_a);
+  compat_exp::launch<dynamic_local_mem_basicdt_kernel<T>>(
+      launch_policy{ltt.range_3_, local_mem_size{ltt.memsize_}},
+      ltt.in_order_q_, d_a);
+  compat_exp::launch<dynamic_local_mem_basicdt_kernel<T>>(
+      launch_policy{ltt.grid_, ltt.thread_, local_mem_size{ltt.memsize_}},
+      ltt.in_order_q_, d_a);
+}
+
+template <typename T> void test_arg_launch() {
+  std::cout << __PRETTY_FUNCTION__ << std::endl;
+
+  LaunchTestWithArgs<T> ltt;
+  if (ltt.skip_) // Unsupported aspect
+    return;
+
+  T *d_a = (T *)syclcompat::malloc(ltt.memsize_);
+
+  compat_exp::launch<dynamic_local_mem_typed_kernel<T>>(
+      launch_policy{ltt.range_1_, local_mem_size{ltt.memsize_}}, d_a);
+  compat_exp::launch<dynamic_local_mem_typed_kernel<T>>(
+      launch_policy{ltt.range_2_, local_mem_size{ltt.memsize_}}, d_a);
+  compat_exp::launch<dynamic_local_mem_typed_kernel<T>>(
+      launch_policy{ltt.range_3_, local_mem_size{ltt.memsize_}}, d_a);
+  compat_exp::launch<dynamic_local_mem_typed_kernel<T>>(
+      launch_policy{ltt.grid_, ltt.thread_, local_mem_size{ltt.memsize_}}, d_a);
+
+  syclcompat::wait();
+  syclcompat::free(d_a);
+}
+
+template <typename T> void test_arg_launch_q() {
+  std::cout << __PRETTY_FUNCTION__ << std::endl;
+
+  LaunchTestWithArgs<T> ltt;
+  if (ltt.skip_) // Unsupported aspect
+    return;
+
+  T *d_a = (T *)syclcompat::malloc(ltt.memsize_, ltt.in_order_q_);
+
+  compat_exp::launch<dynamic_local_mem_typed_kernel<T>>(
+      launch_policy{ltt.range_1_, local_mem_size{ltt.memsize_}},
+      ltt.in_order_q_, d_a);
+  compat_exp::launch<dynamic_local_mem_typed_kernel<T>>(
+      launch_policy{ltt.range_2_, local_mem_size{ltt.memsize_}},
+      ltt.in_order_q_, d_a);
+  compat_exp::launch<dynamic_local_mem_typed_kernel<T>>(
+      launch_policy{ltt.range_3_, local_mem_size{ltt.memsize_}},
+      ltt.in_order_q_, d_a);
+  compat_exp::launch<dynamic_local_mem_typed_kernel<T>>(
+      launch_policy{ltt.grid_, ltt.thread_, local_mem_size{ltt.memsize_}},
+      ltt.in_order_q_, d_a);
+
+  syclcompat::wait(ltt.in_order_q_);
+  syclcompat::free(d_a, ltt.in_order_q_);
+}
+
+template <typename T> void test_local_mem_usage() {
+  std::cout << __PRETTY_FUNCTION__ << std::endl;
+
+  LaunchTestWithArgs<T> ltt;
+  if (ltt.skip_) // Unsupported aspect
+    return;
+
+  size_t num_elements = ltt.memsize_ / sizeof(T);
+
+  T *h_a = (T *)syclcompat::malloc_host(ltt.memsize_);
+  T *d_a = (T *)syclcompat::malloc(ltt.memsize_);
+
+  // d_a is the kernel output, no memcpy needed
+  compat_exp::launch<dynamic_local_mem_typed_kernel<T>>(
+      launch_policy{ltt.grid_, ltt.thread_, local_mem_size{ltt.memsize_}}, d_a);
+
+  syclcompat::memcpy(h_a, d_a, ltt.memsize_);
+  syclcompat::free(d_a);
+
+  for (int i = 0; i < num_elements; i++) {
+    assert(h_a[i] == static_cast<T>(num_elements - i - 1));
+  }
+  syclcompat::free(h_a);
+}
+
+template <typename T> void test_local_mem_usage_q() {
+  std::cout << __PRETTY_FUNCTION__ << std::endl;
+
+  LaunchTestWithArgs<T> ltt;
+  if (ltt.skip_) // Unsupported aspect
+    return;
+
+  size_t num_elements = ltt.memsize_ / sizeof(T);
+  auto &q = ltt.in_order_q_;
+
+  T *h_a = (T *)syclcompat::malloc_host(ltt.memsize_);
+  T *d_a = (T *)syclcompat::malloc(ltt.memsize_, q);
+
+  // d_a is the kernel output, no memcpy needed
+  compat_exp::launch<dynamic_local_mem_typed_kernel<T>>(
+      launch_policy{ltt.grid_, ltt.thread_, local_mem_size{ltt.memsize_}}, q,
+      d_a);
+
+  syclcompat::memcpy(h_a, d_a, ltt.memsize_, q);
+  syclcompat::free(d_a, q);
+
+  for (size_t i = 0; i < num_elements; i++) {
+    assert(h_a[i] == static_cast<T>(num_elements - i - 1));
+  }
+
+  syclcompat::free(h_a);
+}
+
+template <typename T> void test_memsize_no_arg_launch() {
+  std::cout << __PRETTY_FUNCTION__ << std::endl;
+
+  LaunchTest lt;
+  T memsize = static_cast<T>(8);
+
+  compat_exp::launch<dynamic_local_mem_empty_kernel>(launch_policy{lt.grid_, lt.thread_,
+                                                     local_mem_size(memsize)});
+}
+
+template <typename T> void test_memsize_no_arg_launch_q() {
+  std::cout << __PRETTY_FUNCTION__ << std::endl;
+
+  LaunchTest lt;
+  T memsize = static_cast<T>(8);
+
+  compat_exp::launch<dynamic_local_mem_empty_kernel>(launch_policy{lt.grid_, lt.thread_,
+                                                     local_mem_size(memsize)}, lt.q_);
+}
+
+int main() {
+
+  test_dynamic_mem_no_arg_launch();
+  test_dynamic_mem_no_arg_launch_q();
+
+  INSTANTIATE_ALL_TYPES(value_type_list, test_basic_dt_launch);
+  INSTANTIATE_ALL_TYPES(value_type_list, test_basic_dt_launch_q);
+  INSTANTIATE_ALL_TYPES(value_type_list, test_arg_launch);
+  INSTANTIATE_ALL_TYPES(value_type_list, test_arg_launch_q);
+  INSTANTIATE_ALL_TYPES(value_type_list, test_local_mem_usage);
+  INSTANTIATE_ALL_TYPES(value_type_list, test_local_mem_usage_q);
+
+  INSTANTIATE_ALL_TYPES(memsize_type_list, test_memsize_no_arg_launch);
+  INSTANTIATE_ALL_TYPES(memsize_type_list, test_memsize_no_arg_launch_q);
+
+  return 0;
+}
diff --git a/sycl/test-e2e/syclcompat/launch/launch_policy_lmem_neg.cpp b/sycl/test-e2e/syclcompat/launch/launch_policy_lmem_neg.cpp
new file mode 100644
index 0000000000000..27fb160e54c4b
--- /dev/null
+++ b/sycl/test-e2e/syclcompat/launch/launch_policy_lmem_neg.cpp
@@ -0,0 +1,60 @@
+/***************************************************************************
+ *
+ *  Copyright (C) Codeplay Software Ltd.
+ *
+ *  Part of the LLVM Project, under the Apache License v2.0 with LLVM
+ *  Exceptions. See https://llvm.org/LICENSE.txt for license information.
+ *  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ *  SYCLcompat API
+ *
+ *  launch_policy_lmem_neg.cpp
+ *
+ *  Description:
+ *     Negative testing for launch_policy - local memory specific
+ *     These tests are in their own TU because they instantiate some of the same
+ *     templates as tests in launch_policy_neg.cpp
+ **************************************************************************/
+
+// RUN: not %{build} -o %t.out 2>&1 | FileCheck -vv %s
+
+#include <sycl/detail/core.hpp>
+#include <sycl/ext/oneapi/properties/properties.hpp>
+
+#include <syclcompat/launch.hpp>
+#include <syclcompat/memory.hpp>
+
+// Dummy kernels for testing
+inline void int_kernel(int a){};
+inline void dynamic_local_mem_empty_kernel(char *a){};
+
+namespace compat_exp = syclcompat::experimental;
+namespace sycl_exp = sycl::ext::oneapi::experimental;
+
+void test_lmem_launch() {
+  sycl::nd_range<3> launch_range{{1, 1, 32}, {1, 1, 32}};
+
+  // Missing local mem
+  {
+    compat_exp::launch_policy policy(
+        launch_range,
+        compat_exp::kernel_properties{sycl_exp::sub_group_size<32>});
+    compat_exp::launch<dynamic_local_mem_empty_kernel>(policy);
+    //CHECK-DAG: error: static assertion failed due to requirement 'syclcompat::args_compatible
+  }
+
+  // Unneeded local mem
+  {
+    compat_exp::launch_policy lmem_policy(launch_range,
+                                          compat_exp::local_mem_size{1024});
+    int int_arg{1};
+    compat_exp::launch<int_kernel>(lmem_policy, int_arg);
+    //CHECK-DAG: error: static assertion failed due to requirement 'syclcompat::args_compatible
+  }
+}
diff --git a/sycl/test-e2e/syclcompat/launch/launch_policy_neg.cpp b/sycl/test-e2e/syclcompat/launch/launch_policy_neg.cpp
new file mode 100644
index 0000000000000..a73a88abe02d5
--- /dev/null
+++ b/sycl/test-e2e/syclcompat/launch/launch_policy_neg.cpp
@@ -0,0 +1,191 @@
+/***************************************************************************
+ *
+ *  Copyright (C) Codeplay Software Ltd.
+ *
+ *  Part of the LLVM Project, under the Apache License v2.0 with LLVM
+ *  Exceptions. See https://llvm.org/LICENSE.txt for license information.
+ *  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ *  SYCLcompat API
+ *
+ *  launch_policy_neg.cpp
+ *
+ *  Description:
+ *     Negative tests for new launch_policy.
+ **************************************************************************/
+
+// RUN: not %{build} -DCHECK1 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK1
+// RUN: not %{build} -DCHECK2 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK2
+// RUN: not %{build} -DCHECK3 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK3
+// RUN: not %{build} -DCHECK4 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK4
+// RUN: not %{build} -DCHECK5 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK5
+// RUN: not %{build} -DCHECK6 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK6
+// RUN: not %{build} -DCHECK7 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK7
+// RUN: not %{build} -DCHECK8 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK8
+// RUN: not %{build} -DCHECK9 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK9
+// RUN: not %{build} -DCHECK10 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK10
+// RUN: not %{build} -DCHECK11 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK11
+// RUN: not %{build} -DCHECK12 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK12
+
+#include <sycl/ext/oneapi/kernel_properties/properties.hpp>
+#include <sycl/detail/core.hpp>
+#include <sycl/ext/oneapi/properties/properties.hpp>
+#include <sycl/group_barrier.hpp>
+
+#include <syclcompat/launch.hpp>
+#include <syclcompat/memory.hpp>
+#include <syclcompat/dims.hpp>
+
+namespace compat_exp = syclcompat::experimental;
+namespace sycl_exp = sycl::ext::oneapi::experimental;
+
+// Notes on use of FileCheck here:
+// Failures do not necessarily occur in order (hence use of CHECK-DAG)
+// Additionally a `static_assert` hit during a template instantiation will only
+// be hit once per unique concrete class. The only solution (aside from hacking
+// the examples to have different template types) would presumably be multiple
+// compilation units?
+
+// Dummy kernels for testing
+inline void empty_kernel(){};
+inline void int_kernel(int a){};
+inline void int_ptr_kernel(int *a){};
+
+inline void dynamic_local_mem_empty_kernel(char *a){};
+
+template <typename T>
+inline void dynamic_local_mem_basicdt_kernel(T value, char *local_mem){};
+
+
+// Dummy property container for negative testing
+template <typename Properties> struct dummy_properties {
+  static_assert(sycl_exp::is_property_list_v<Properties>);
+  using Props = Properties;
+
+  template <typename... Props>
+  dummy_properties(Props... properties) : props{properties...} {}
+
+  Properties props;
+};
+template <typename... Props>
+dummy_properties(Props... props)
+    -> dummy_properties<decltype(sycl_exp::properties(props...))>;
+
+void test_variadic_config_ctor() {
+  std::cout << __PRETTY_FUNCTION__ << std::endl;
+
+#ifdef CHECK1
+  // Missing range
+  {
+    compat_exp::launch_policy missing_range_config(
+        compat_exp::kernel_properties{sycl_exp::sub_group_size<32>});
+    //CHECK1: error: static assertion failed due to requirement 'syclcompat::detail::is_range_or_nd_range_v
+  }
+#endif
+#ifdef CHECK2
+  // Duplicate nd_range
+  {
+    sycl::nd_range<3> launch_range{{1,1,32},{1,1,32}};
+    compat_exp::launch_policy duplicate_nd_range_config(launch_range, launch_range);
+    //CHECK2: error: static assertion failed{{.*Did you forget to wrap}}
+  }
+#endif
+#ifdef CHECK3
+  // Duplicate range
+  {
+    sycl::range<3> launch_range{1,1,32};
+    compat_exp::launch_policy duplicate_nd_range_config(launch_range, launch_range);
+    //CHECK3: error: static assertion failed{{.*Did you forget to wrap}}
+  }
+#endif
+#ifdef CHECK4
+  // Unwrapped property
+  {
+    sycl::nd_range<3> launch_range{{1,1,32},{1,1,32}};
+    compat_exp::launch_policy unwrapped_property_config(launch_range, {sycl_exp::sub_group_size<32>});
+    //CHECK4: error: no viable constructor or deduction guide for deduction of template arguments of 'compat_exp::launch_policy'
+  }
+#endif
+#ifdef CHECK5
+  // Foreign object in ctor
+  {
+    dummy_properties foreign_object{sycl_exp::sub_group_size<32>};
+    sycl::nd_range<3> launch_range{{1,1,32},{1,1,32}};
+    compat_exp::launch_policy unwrapped_property_config(launch_range, foreign_object);
+    //CHECK5: error: static assertion failed{{.*Did you forget to wrap}}
+  }
+#endif
+#ifdef CHECK6
+  // Local mem with sycl::range launch 1
+  {
+    sycl::range<3> launch_range{1, 1, 32};
+    compat_exp::local_mem_size lmem_size(0);
+    compat_exp::launch_policy range_and_local_mem_config_1(launch_range,
+                                                         lmem_size);
+    //CHECK6: error: static assertion failed due to requirement 'syclcompat::detail::is_nd_range_v<sycl::range<3>> || !true': sycl::range kernel launches are incompatible with local
+  }
+#endif
+#ifdef CHECK7
+  // Local mem with sycl::range launch 2
+  {
+    syclcompat::dim3 launch_range{32, 1, 1};
+    compat_exp::local_mem_size lmem_size(0);
+    compat_exp::launch_policy range_and_local_mem_config_2(launch_range, compat_exp::kernel_properties{sycl_exp::sub_group_size<32>},
+                                                         lmem_size);
+    //CHECK7: error: static assertion failed due to requirement 'syclcompat::detail::is_nd_range_v<sycl::range<3>> || !true': sycl::range kernel launches are incompatible with local
+  }
+#endif
+#ifdef CHECK8
+  // Duplicate local_mem spec
+  {
+    sycl::nd_range<3> launch_range{{1, 1, 32}, {1, 1, 32}};
+    compat_exp::local_mem_size lmem_size(0);
+    compat_exp::launch_policy duplicate_local_mem_config(launch_range, lmem_size, lmem_size);
+    //CHECK8: error: static assertion failed due to requirement{{.*(exactly once|duplicate type)}}
+  }
+#endif
+#ifdef CHECK9
+  // Duplicate kernel_properties spec
+  {
+    sycl::nd_range<3> launch_range{{1, 1, 32}, {1, 1, 32}};
+    compat_exp::kernel_properties kernel_props{sycl_exp::sub_group_size<32>};
+    compat_exp::launch_policy duplicate_kernel_properties_config(launch_range, kernel_props, kernel_props);
+    //CHECK9: error: static assertion failed due to requirement{{.*type appears more than once}}
+  }
+#endif
+#ifdef CHECK10
+  // Duplicate launch_properties spec
+  {
+    sycl::nd_range<3> launch_range{{1, 1, 32}, {1, 1, 32}};
+    compat_exp::launch_properties launch_props{};
+    compat_exp::local_mem_size lmem_size(0);
+    compat_exp::launch_policy duplicate_launch_properties_config(launch_range, launch_props, lmem_size, launch_props);
+    //CHECK10: error: static assertion failed due to requirement{{.*type appears more than once}}
+  }
+#endif
+#ifdef CHECK11
+  // Missing kernel args
+  {
+    sycl::range<3> launch_range{1, 1, 32};
+    compat_exp::launch_policy range_only(launch_range);
+    compat_exp::launch<int_kernel>(range_only);
+    //CHECK11: error: static assertion failed due to requirement 'syclcompat::args_compatible
+  }
+#endif
+#ifdef CHECK12
+  // Extra kernel args
+  {
+    sycl::nd_range<3> launch_range{{1, 1, 32}, {1, 1, 32}};
+    compat_exp::launch_policy range_only(launch_range);
+    int extra_arg = 1;
+    compat_exp::launch<empty_kernel>(range_only, extra_arg);
+    //CHECK12: error: static assertion failed due to requirement 'syclcompat::args_compatible
+  }
+#endif
+}
diff --git a/sycl/test-e2e/syclcompat/launch/launch_properties.cpp b/sycl/test-e2e/syclcompat/launch/launch_properties.cpp
new file mode 100644
index 0000000000000..2ed7fc673b09a
--- /dev/null
+++ b/sycl/test-e2e/syclcompat/launch/launch_properties.cpp
@@ -0,0 +1,106 @@
+/***************************************************************************
+ *
+ *  Copyright (C) Codeplay Software Ltd.
+ *
+ *  Part of the LLVM Project, under the Apache License v2.0 with LLVM
+ *  Exceptions. See https://llvm.org/LICENSE.txt for license information.
+ *  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ *  SYCLcompat API
+ *
+ *  launch_properties.cpp
+ *
+ *  Description:
+ *     launch<F> with launch properties tests - test cluster_dims passed
+ *     correctly. Adapted from
+ *     sycl/test-e2e/ClusterLaunch/cluster_launch_parallel_for.cpp
+ **************************************************************************/
+
+// REQUIRES: aspect-ext_oneapi_cuda_cluster_group
+// RUN: %{build} -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_90 -o %t.out
+// RUN: %{run} %t.out
+
+#include <sycl/detail/core.hpp>
+#include <sycl/ext/oneapi/experimental/cluster_group_prop.hpp>
+#include <sycl/ext/oneapi/properties/properties.hpp>
+
+#include <syclcompat/launch.hpp>
+#include <syclcompat/memory.hpp>
+
+namespace compat_exp = syclcompat::experimental;
+namespace sycl_exp = sycl::ext::oneapi::experimental;
+
+template <int Dim>
+void cluster_launch_kernel(sycl::range<Dim> cluster_range,
+                           int *correct_result_flag) {
+  uint32_t cluster_dim_x, cluster_dim_y, cluster_dim_z;
+// Temporary solution till cluster group class is implemented
+#if defined(__SYCL_DEVICE_ONLY__) && defined(__SYCL_CUDA_ARCH__) &&            \
+    (__SYCL_CUDA_ARCH__ >= 900)
+  asm volatile("\n\t"
+               "mov.u32 %0, %%cluster_nctaid.x; \n\t"
+               "mov.u32 %1, %%cluster_nctaid.y; \n\t"
+               "mov.u32 %2, %%cluster_nctaid.z; \n\t"
+               : "=r"(cluster_dim_z), "=r"(cluster_dim_y), "=r"(cluster_dim_x));
+#endif
+  if constexpr (Dim == 1) {
+    if (cluster_dim_z == cluster_range[0] && cluster_dim_y == 1 &&
+        cluster_dim_x == 1) {
+      *correct_result_flag = 1;
+    }
+  } else if constexpr (Dim == 2) {
+    if (cluster_dim_z == cluster_range[1] && cluster_dim_y == cluster_range[0] &&
+        cluster_dim_x == 1) {
+      *correct_result_flag = 1;
+    }
+  } else {
+    if (cluster_dim_z == cluster_range[2] && cluster_dim_y == cluster_range[1] &&
+        cluster_dim_x == cluster_range[0]) {
+      *correct_result_flag = 1;
+    }
+  }
+};
+
+template <int Dim>
+int test_cluster_launch_parallel_for(sycl::range<Dim> global_range,
+                                     sycl::range<Dim> local_range,
+                                     sycl::range<Dim> cluster_range) {
+
+  sycl_exp::cuda::cluster_size cluster_dims(cluster_range);
+
+  int *correct_result_flag = syclcompat::malloc<int>(1);
+  syclcompat::memset(correct_result_flag, 0, sizeof(int));
+
+  compat_exp::launch_policy policy{global_range, local_range,
+                                   compat_exp::launch_properties{cluster_dims}};
+  compat_exp::launch<cluster_launch_kernel<Dim>>(policy, cluster_range,
+                                                 correct_result_flag);
+
+  int correct_result_flag_host = 0;
+  syclcompat::memcpy<int>(&correct_result_flag_host, correct_result_flag, 1);
+  return correct_result_flag_host;
+}
+
+int main() {
+
+  sycl::queue Queue;
+
+  int host_correct_flag =
+      test_cluster_launch_parallel_for(sycl::range{128, 128, 128},
+                                       sycl::range{16, 16, 2},
+                                       sycl::range{2, 4, 1}) &&
+      test_cluster_launch_parallel_for(
+          sycl::range{512, 1024}, sycl::range{32, 32}, sycl::range{4, 2}) &&
+      test_cluster_launch_parallel_for(sycl::range{128}, sycl::range{32},
+                                       sycl::range{2}) &&
+      test_cluster_launch_parallel_for(sycl::range{16384}, sycl::range{32},
+                                       sycl::range{16});
+
+  return !host_correct_flag;
+}
diff --git a/sycl/test-e2e/syclcompat/lit.local.cfg b/sycl/test-e2e/syclcompat/lit.local.cfg
new file mode 100644
index 0000000000000..621766a7c01f1
--- /dev/null
+++ b/sycl/test-e2e/syclcompat/lit.local.cfg
@@ -0,0 +1,7 @@
+# TODO: Remove this once the warnings are resolved
+original_clangxx=""
+for substitution in config.substitutions:
+  if substitution[0] == "%clangxx":
+    original_clangxx=substitution[1]
+config.substitutions.insert(0,
+  ("%clangxx", original_clangxx + ' -Wno-error=c++20-extensions -Wno-error=#warnings -Wno-error=deprecated-declarations'))
diff --git a/sycl/test-e2e/syclcompat/memory/memory_async.cpp b/sycl/test-e2e/syclcompat/memory/memory_async.cpp
index 7515891766ddf..aa3d8a4231bbb 100644
--- a/sycl/test-e2e/syclcompat/memory/memory_async.cpp
+++ b/sycl/test-e2e/syclcompat/memory/memory_async.cpp
@@ -32,9 +32,11 @@
 
 // RUN: %{build} -o %t.out
 // RUN: %{run} %t.out
-
 // Tests for the sycl::events returned from syclcompat::*Async API calls
 
+// TODO: Re-enable, see https://github.com/intel/llvm/issues/13636
+// and possibly related: https://github.com/intel/llvm/issues/14623
+// UNSUPPORTED: true
 #include <stdio.h>
 
 #include <sycl/detail/core.hpp>
diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump
index e5e3d93237251..091fc56494f1d 100644
--- a/sycl/test/abi/sycl_symbols_linux.dump
+++ b/sycl/test/abi/sycl_symbols_linux.dump
@@ -3027,26 +3027,26 @@ _ZN4sycl3_V13ext6oneapi12experimental22get_image_channel_typeENS3_16image_mem_ha
 _ZN4sycl3_V13ext6oneapi12experimental22get_image_channel_typeENS3_16image_mem_handleERKNS0_6deviceERKNS0_7contextE
 _ZN4sycl3_V13ext6oneapi12experimental22get_image_num_channelsENS3_16image_mem_handleERKNS0_5queueE
 _ZN4sycl3_V13ext6oneapi12experimental22get_image_num_channelsENS3_16image_mem_handleERKNS0_6deviceERKNS0_7contextE
-_ZN4sycl3_V13ext6oneapi12experimental22import_external_memoryINS3_11resource_fdEEENS3_18interop_mem_handleENS3_23external_mem_descriptorIT_EERKNS0_5queueE
-_ZN4sycl3_V13ext6oneapi12experimental22import_external_memoryINS3_11resource_fdEEENS3_18interop_mem_handleENS3_23external_mem_descriptorIT_EERKNS0_6deviceERKNS0_7contextE
-_ZN4sycl3_V13ext6oneapi12experimental22import_external_memoryINS3_21resource_win32_handleEEENS3_18interop_mem_handleENS3_23external_mem_descriptorIT_EERKNS0_5queueE
-_ZN4sycl3_V13ext6oneapi12experimental22import_external_memoryINS3_21resource_win32_handleEEENS3_18interop_mem_handleENS3_23external_mem_descriptorIT_EERKNS0_6deviceERKNS0_7contextE
+_ZN4sycl3_V13ext6oneapi12experimental22import_external_memoryINS3_11resource_fdEEENS3_12external_memENS3_23external_mem_descriptorIT_EERKNS0_5queueE
+_ZN4sycl3_V13ext6oneapi12experimental22import_external_memoryINS3_11resource_fdEEENS3_12external_memENS3_23external_mem_descriptorIT_EERKNS0_6deviceERKNS0_7contextE
+_ZN4sycl3_V13ext6oneapi12experimental22import_external_memoryINS3_21resource_win32_handleEEENS3_12external_memENS3_23external_mem_descriptorIT_EERKNS0_5queueE
+_ZN4sycl3_V13ext6oneapi12experimental22import_external_memoryINS3_21resource_win32_handleEEENS3_12external_memENS3_23external_mem_descriptorIT_EERKNS0_6deviceERKNS0_7contextE
 _ZN4sycl3_V13ext6oneapi12experimental23prepare_for_device_copyEPKvmRKNS0_5queueE
 _ZN4sycl3_V13ext6oneapi12experimental23prepare_for_device_copyEPKvmRKNS0_7contextE
-_ZN4sycl3_V13ext6oneapi12experimental23release_external_memoryENS3_18interop_mem_handleERKNS0_5queueE
-_ZN4sycl3_V13ext6oneapi12experimental23release_external_memoryENS3_18interop_mem_handleERKNS0_6deviceERKNS0_7contextE
+_ZN4sycl3_V13ext6oneapi12experimental23release_external_memoryENS3_12external_memERKNS0_5queueE
+_ZN4sycl3_V13ext6oneapi12experimental23release_external_memoryENS3_12external_memERKNS0_6deviceERKNS0_7contextE
 _ZN4sycl3_V13ext6oneapi12experimental24get_mip_level_mem_handleENS3_16image_mem_handleEjRKNS0_5queueE
 _ZN4sycl3_V13ext6oneapi12experimental24get_mip_level_mem_handleENS3_16image_mem_handleEjRKNS0_6deviceERKNS0_7contextE
 _ZN4sycl3_V13ext6oneapi12experimental24release_from_device_copyEPKvRKNS0_5queueE
 _ZN4sycl3_V13ext6oneapi12experimental24release_from_device_copyEPKvRKNS0_7contextE
-_ZN4sycl3_V13ext6oneapi12experimental25import_external_semaphoreINS3_11resource_fdEEENS3_24interop_semaphore_handleENS3_29external_semaphore_descriptorIT_EERKNS0_5queueE
-_ZN4sycl3_V13ext6oneapi12experimental25import_external_semaphoreINS3_11resource_fdEEENS3_24interop_semaphore_handleENS3_29external_semaphore_descriptorIT_EERKNS0_6deviceERKNS0_7contextE
-_ZN4sycl3_V13ext6oneapi12experimental25import_external_semaphoreINS3_21resource_win32_handleEEENS3_24interop_semaphore_handleENS3_29external_semaphore_descriptorIT_EERKNS0_5queueE
-_ZN4sycl3_V13ext6oneapi12experimental25import_external_semaphoreINS3_21resource_win32_handleEEENS3_24interop_semaphore_handleENS3_29external_semaphore_descriptorIT_EERKNS0_6deviceERKNS0_7contextE
-_ZN4sycl3_V13ext6oneapi12experimental25map_external_image_memoryENS3_18interop_mem_handleERKNS3_16image_descriptorERKNS0_5queueE
-_ZN4sycl3_V13ext6oneapi12experimental25map_external_image_memoryENS3_18interop_mem_handleERKNS3_16image_descriptorERKNS0_6deviceERKNS0_7contextE
-_ZN4sycl3_V13ext6oneapi12experimental26release_external_semaphoreENS3_24interop_semaphore_handleERKNS0_5queueE
-_ZN4sycl3_V13ext6oneapi12experimental26release_external_semaphoreENS3_24interop_semaphore_handleERKNS0_6deviceERKNS0_7contextE
+_ZN4sycl3_V13ext6oneapi12experimental25import_external_semaphoreINS3_11resource_fdEEENS3_18external_semaphoreENS3_29external_semaphore_descriptorIT_EERKNS0_5queueE
+_ZN4sycl3_V13ext6oneapi12experimental25import_external_semaphoreINS3_11resource_fdEEENS3_18external_semaphoreENS3_29external_semaphore_descriptorIT_EERKNS0_6deviceERKNS0_7contextE
+_ZN4sycl3_V13ext6oneapi12experimental25import_external_semaphoreINS3_21resource_win32_handleEEENS3_18external_semaphoreENS3_29external_semaphore_descriptorIT_EERKNS0_5queueE
+_ZN4sycl3_V13ext6oneapi12experimental25import_external_semaphoreINS3_21resource_win32_handleEEENS3_18external_semaphoreENS3_29external_semaphore_descriptorIT_EERKNS0_6deviceERKNS0_7contextE
+_ZN4sycl3_V13ext6oneapi12experimental25map_external_image_memoryENS3_12external_memERKNS3_16image_descriptorERKNS0_5queueE
+_ZN4sycl3_V13ext6oneapi12experimental25map_external_image_memoryENS3_12external_memERKNS3_16image_descriptorERKNS0_6deviceERKNS0_7contextE
+_ZN4sycl3_V13ext6oneapi12experimental26release_external_semaphoreENS3_18external_semaphoreERKNS0_5queueE
+_ZN4sycl3_V13ext6oneapi12experimental26release_external_semaphoreENS3_18external_semaphoreERKNS0_6deviceERKNS0_7contextE
 _ZN4sycl3_V13ext6oneapi12experimental4node12update_rangeILi1EEEvNS0_5rangeIXT_EEE
 _ZN4sycl3_V13ext6oneapi12experimental4node12update_rangeILi2EEEvNS0_5rangeIXT_EEE
 _ZN4sycl3_V13ext6oneapi12experimental4node12update_rangeILi3EEEvNS0_5rangeIXT_EEE
@@ -3518,10 +3518,10 @@ _ZN4sycl3_V17handler28setStateExplicitKernelBundleEv
 _ZN4sycl3_V17handler30memcpyFromHostOnlyDeviceGlobalEPvPKvbmm
 _ZN4sycl3_V17handler30verifyUsedKernelBundleInternalENS0_6detail11string_viewE
 _ZN4sycl3_V17handler32verifyDeviceHasProgressGuaranteeENS0_3ext6oneapi12experimental26forward_progress_guaranteeENS4_15execution_scopeES6_
-_ZN4sycl3_V17handler34ext_oneapi_wait_external_semaphoreENS0_3ext6oneapi12experimental24interop_semaphore_handleE
-_ZN4sycl3_V17handler34ext_oneapi_wait_external_semaphoreENS0_3ext6oneapi12experimental24interop_semaphore_handleEm
-_ZN4sycl3_V17handler36ext_oneapi_signal_external_semaphoreENS0_3ext6oneapi12experimental24interop_semaphore_handleE
-_ZN4sycl3_V17handler36ext_oneapi_signal_external_semaphoreENS0_3ext6oneapi12experimental24interop_semaphore_handleEm
+_ZN4sycl3_V17handler34ext_oneapi_wait_external_semaphoreENS0_3ext6oneapi12experimental18external_semaphoreE
+_ZN4sycl3_V17handler34ext_oneapi_wait_external_semaphoreENS0_3ext6oneapi12experimental18external_semaphoreEm
+_ZN4sycl3_V17handler36ext_oneapi_signal_external_semaphoreENS0_3ext6oneapi12experimental18external_semaphoreE
+_ZN4sycl3_V17handler36ext_oneapi_signal_external_semaphoreENS0_3ext6oneapi12experimental18external_semaphoreEm
 _ZN4sycl3_V17handler6addArgENS0_6detail19kernel_param_kind_tEPvii
 _ZN4sycl3_V17handler6memcpyEPvPKvm
 _ZN4sycl3_V17handler6memsetEPvim
diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump
index 2957c2aabf5fd..72fe0ebad1ed8 100644
--- a/sycl/test/abi/sycl_symbols_windows.dump
+++ b/sycl/test/abi/sycl_symbols_windows.dump
@@ -247,14 +247,14 @@
 ??$get_profiling_info@Ucommand_end@event_profiling@info@_V1@sycl@@@event@_V1@sycl@@QEBA_KXZ
 ??$get_profiling_info@Ucommand_start@event_profiling@info@_V1@sycl@@@event@_V1@sycl@@QEBA_KXZ
 ??$get_profiling_info@Ucommand_submit@event_profiling@info@_V1@sycl@@@event@_V1@sycl@@QEBA_KXZ
-??$import_external_memory@Uresource_fd@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA?AUinterop_mem_handle@01234@U?$external_mem_descriptor@Uresource_fd@experimental@oneapi@ext@_V1@sycl@@@01234@AEBVdevice@34@AEBVcontext@34@@Z
-??$import_external_memory@Uresource_fd@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA?AUinterop_mem_handle@01234@U?$external_mem_descriptor@Uresource_fd@experimental@oneapi@ext@_V1@sycl@@@01234@AEBVqueue@34@@Z
-??$import_external_memory@Uresource_win32_handle@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA?AUinterop_mem_handle@01234@U?$external_mem_descriptor@Uresource_win32_handle@experimental@oneapi@ext@_V1@sycl@@@01234@AEBVdevice@34@AEBVcontext@34@@Z
-??$import_external_memory@Uresource_win32_handle@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA?AUinterop_mem_handle@01234@U?$external_mem_descriptor@Uresource_win32_handle@experimental@oneapi@ext@_V1@sycl@@@01234@AEBVqueue@34@@Z
-??$import_external_semaphore@Uresource_fd@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA?AUinterop_semaphore_handle@01234@U?$external_semaphore_descriptor@Uresource_fd@experimental@oneapi@ext@_V1@sycl@@@01234@AEBVdevice@34@AEBVcontext@34@@Z
-??$import_external_semaphore@Uresource_fd@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA?AUinterop_semaphore_handle@01234@U?$external_semaphore_descriptor@Uresource_fd@experimental@oneapi@ext@_V1@sycl@@@01234@AEBVqueue@34@@Z
-??$import_external_semaphore@Uresource_win32_handle@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA?AUinterop_semaphore_handle@01234@U?$external_semaphore_descriptor@Uresource_win32_handle@experimental@oneapi@ext@_V1@sycl@@@01234@AEBVdevice@34@AEBVcontext@34@@Z
-??$import_external_semaphore@Uresource_win32_handle@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA?AUinterop_semaphore_handle@01234@U?$external_semaphore_descriptor@Uresource_win32_handle@experimental@oneapi@ext@_V1@sycl@@@01234@AEBVqueue@34@@Z
+??$import_external_memory@Uresource_fd@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA?AUexternal_mem@01234@U?$external_mem_descriptor@Uresource_fd@experimental@oneapi@ext@_V1@sycl@@@01234@AEBVdevice@34@AEBVcontext@34@@Z
+??$import_external_memory@Uresource_fd@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA?AUexternal_mem@01234@U?$external_mem_descriptor@Uresource_fd@experimental@oneapi@ext@_V1@sycl@@@01234@AEBVqueue@34@@Z
+??$import_external_memory@Uresource_win32_handle@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA?AUexternal_mem@01234@U?$external_mem_descriptor@Uresource_win32_handle@experimental@oneapi@ext@_V1@sycl@@@01234@AEBVdevice@34@AEBVcontext@34@@Z
+??$import_external_memory@Uresource_win32_handle@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA?AUexternal_mem@01234@U?$external_mem_descriptor@Uresource_win32_handle@experimental@oneapi@ext@_V1@sycl@@@01234@AEBVqueue@34@@Z
+??$import_external_semaphore@Uresource_fd@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA?AUexternal_semaphore@01234@U?$external_semaphore_descriptor@Uresource_fd@experimental@oneapi@ext@_V1@sycl@@@01234@AEBVdevice@34@AEBVcontext@34@@Z
+??$import_external_semaphore@Uresource_fd@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA?AUexternal_semaphore@01234@U?$external_semaphore_descriptor@Uresource_fd@experimental@oneapi@ext@_V1@sycl@@@01234@AEBVqueue@34@@Z
+??$import_external_semaphore@Uresource_win32_handle@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA?AUexternal_semaphore@01234@U?$external_semaphore_descriptor@Uresource_win32_handle@experimental@oneapi@ext@_V1@sycl@@@01234@AEBVdevice@34@AEBVcontext@34@@Z
+??$import_external_semaphore@Uresource_win32_handle@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA?AUexternal_semaphore@01234@U?$external_semaphore_descriptor@Uresource_win32_handle@experimental@oneapi@ext@_V1@sycl@@@01234@AEBVqueue@34@@Z
 ??$update_nd_range@$00@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$nd_range@$00@45@@Z
 ??$update_nd_range@$01@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$nd_range@$01@45@@Z
 ??$update_nd_range@$02@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$nd_range@$02@45@@Z
@@ -3837,14 +3837,14 @@
 ?ext_oneapi_owner_before@?$OwnerLessBase@Vstream@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBVstream@34@@Z
 ?ext_oneapi_prod@queue@_V1@sycl@@QEAAXXZ
 ?ext_oneapi_set_external_event@queue@_V1@sycl@@QEAAXAEBVevent@23@@Z
-?ext_oneapi_signal_external_semaphore@handler@_V1@sycl@@QEAAXUinterop_semaphore_handle@experimental@oneapi@ext@23@@Z
-?ext_oneapi_signal_external_semaphore@handler@_V1@sycl@@QEAAXUinterop_semaphore_handle@experimental@oneapi@ext@23@_K@Z
-?ext_oneapi_signal_external_semaphore@queue@_V1@sycl@@QEAA?AVevent@23@Uinterop_semaphore_handle@experimental@oneapi@ext@23@AEBUcode_location@detail@23@@Z
-?ext_oneapi_signal_external_semaphore@queue@_V1@sycl@@QEAA?AVevent@23@Uinterop_semaphore_handle@experimental@oneapi@ext@23@AEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z
-?ext_oneapi_signal_external_semaphore@queue@_V1@sycl@@QEAA?AVevent@23@Uinterop_semaphore_handle@experimental@oneapi@ext@23@V423@AEBUcode_location@detail@23@@Z
-?ext_oneapi_signal_external_semaphore@queue@_V1@sycl@@QEAA?AVevent@23@Uinterop_semaphore_handle@experimental@oneapi@ext@23@_KAEBUcode_location@detail@23@@Z
-?ext_oneapi_signal_external_semaphore@queue@_V1@sycl@@QEAA?AVevent@23@Uinterop_semaphore_handle@experimental@oneapi@ext@23@_KAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z
-?ext_oneapi_signal_external_semaphore@queue@_V1@sycl@@QEAA?AVevent@23@Uinterop_semaphore_handle@experimental@oneapi@ext@23@_KV423@AEBUcode_location@detail@23@@Z
+?ext_oneapi_signal_external_semaphore@handler@_V1@sycl@@QEAAXUexternal_semaphore@experimental@oneapi@ext@23@@Z
+?ext_oneapi_signal_external_semaphore@handler@_V1@sycl@@QEAAXUexternal_semaphore@experimental@oneapi@ext@23@_K@Z
+?ext_oneapi_signal_external_semaphore@queue@_V1@sycl@@QEAA?AVevent@23@Uexternal_semaphore@experimental@oneapi@ext@23@AEBUcode_location@detail@23@@Z
+?ext_oneapi_signal_external_semaphore@queue@_V1@sycl@@QEAA?AVevent@23@Uexternal_semaphore@experimental@oneapi@ext@23@AEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z
+?ext_oneapi_signal_external_semaphore@queue@_V1@sycl@@QEAA?AVevent@23@Uexternal_semaphore@experimental@oneapi@ext@23@V423@AEBUcode_location@detail@23@@Z
+?ext_oneapi_signal_external_semaphore@queue@_V1@sycl@@QEAA?AVevent@23@Uexternal_semaphore@experimental@oneapi@ext@23@_KAEBUcode_location@detail@23@@Z
+?ext_oneapi_signal_external_semaphore@queue@_V1@sycl@@QEAA?AVevent@23@Uexternal_semaphore@experimental@oneapi@ext@23@_KAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z
+?ext_oneapi_signal_external_semaphore@queue@_V1@sycl@@QEAA?AVevent@23@Uexternal_semaphore@experimental@oneapi@ext@23@_KV423@AEBUcode_location@detail@23@@Z
 ?ext_oneapi_submit_barrier@queue@_V1@sycl@@QEAA?AVevent@23@AEBUcode_location@detail@23@@Z
 ?ext_oneapi_submit_barrier@queue@_V1@sycl@@QEAA?AVevent@23@AEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z
 ?ext_oneapi_supports_cl_c_feature@device@_V1@sycl@@AEAA_NVstring_view@detail@23@@Z
@@ -3852,14 +3852,14 @@
 ?ext_oneapi_supports_cl_c_version@device@_V1@sycl@@QEBA_NAEBUcl_version@experimental@oneapi@ext@23@@Z
 ?ext_oneapi_supports_cl_extension@device@_V1@sycl@@AEBA_NVstring_view@detail@23@PEAUcl_version@experimental@oneapi@ext@23@@Z
 ?ext_oneapi_supports_cl_extension@device@_V1@sycl@@QEBA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUcl_version@experimental@oneapi@ext@23@@Z
-?ext_oneapi_wait_external_semaphore@handler@_V1@sycl@@QEAAXUinterop_semaphore_handle@experimental@oneapi@ext@23@@Z
-?ext_oneapi_wait_external_semaphore@handler@_V1@sycl@@QEAAXUinterop_semaphore_handle@experimental@oneapi@ext@23@_K@Z
-?ext_oneapi_wait_external_semaphore@queue@_V1@sycl@@QEAA?AVevent@23@Uinterop_semaphore_handle@experimental@oneapi@ext@23@AEBUcode_location@detail@23@@Z
-?ext_oneapi_wait_external_semaphore@queue@_V1@sycl@@QEAA?AVevent@23@Uinterop_semaphore_handle@experimental@oneapi@ext@23@AEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z
-?ext_oneapi_wait_external_semaphore@queue@_V1@sycl@@QEAA?AVevent@23@Uinterop_semaphore_handle@experimental@oneapi@ext@23@V423@AEBUcode_location@detail@23@@Z
-?ext_oneapi_wait_external_semaphore@queue@_V1@sycl@@QEAA?AVevent@23@Uinterop_semaphore_handle@experimental@oneapi@ext@23@_KAEBUcode_location@detail@23@@Z
-?ext_oneapi_wait_external_semaphore@queue@_V1@sycl@@QEAA?AVevent@23@Uinterop_semaphore_handle@experimental@oneapi@ext@23@_KAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z
-?ext_oneapi_wait_external_semaphore@queue@_V1@sycl@@QEAA?AVevent@23@Uinterop_semaphore_handle@experimental@oneapi@ext@23@_KV423@AEBUcode_location@detail@23@@Z
+?ext_oneapi_wait_external_semaphore@handler@_V1@sycl@@QEAAXUexternal_semaphore@experimental@oneapi@ext@23@@Z
+?ext_oneapi_wait_external_semaphore@handler@_V1@sycl@@QEAAXUexternal_semaphore@experimental@oneapi@ext@23@_K@Z
+?ext_oneapi_wait_external_semaphore@queue@_V1@sycl@@QEAA?AVevent@23@Uexternal_semaphore@experimental@oneapi@ext@23@AEBUcode_location@detail@23@@Z
+?ext_oneapi_wait_external_semaphore@queue@_V1@sycl@@QEAA?AVevent@23@Uexternal_semaphore@experimental@oneapi@ext@23@AEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z
+?ext_oneapi_wait_external_semaphore@queue@_V1@sycl@@QEAA?AVevent@23@Uexternal_semaphore@experimental@oneapi@ext@23@V423@AEBUcode_location@detail@23@@Z
+?ext_oneapi_wait_external_semaphore@queue@_V1@sycl@@QEAA?AVevent@23@Uexternal_semaphore@experimental@oneapi@ext@23@_KAEBUcode_location@detail@23@@Z
+?ext_oneapi_wait_external_semaphore@queue@_V1@sycl@@QEAA?AVevent@23@Uexternal_semaphore@experimental@oneapi@ext@23@_KAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z
+?ext_oneapi_wait_external_semaphore@queue@_V1@sycl@@QEAA?AVevent@23@Uexternal_semaphore@experimental@oneapi@ext@23@_KV423@AEBUcode_location@detail@23@@Z
 ?extractArgsAndReqs@handler@_V1@sycl@@AEAAXXZ
 ?extractArgsAndReqsFromLambda@handler@_V1@sycl@@AEAAXPEAD_KPEBUkernel_param_desc_t@detail@23@_N@Z
 ?fill_impl@handler@_V1@sycl@@AEAAXPEAXPEBX_K2@Z
@@ -4126,8 +4126,8 @@
 ?malloc_shared@_V1@sycl@@YAPEAX_KAEBVqueue@12@AEBUcode_location@detail@12@@Z
 ?malloc_shared@_V1@sycl@@YAPEAX_KAEBVqueue@12@AEBVproperty_list@12@AEBUcode_location@detail@12@@Z
 ?map@physical_mem@experimental@oneapi@ext@_V1@sycl@@QEBAPEAX_K0W4address_access_mode@23456@0@Z
-?map_external_image_memory@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@Uinterop_mem_handle@12345@AEBUimage_descriptor@12345@AEBVdevice@45@AEBVcontext@45@@Z
-?map_external_image_memory@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@Uinterop_mem_handle@12345@AEBUimage_descriptor@12345@AEBVqueue@45@@Z
+?map_external_image_memory@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@Uexternal_mem@12345@AEBUimage_descriptor@12345@AEBVdevice@45@AEBVcontext@45@@Z
+?map_external_image_memory@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@Uexternal_mem@12345@AEBUimage_descriptor@12345@AEBVqueue@45@@Z
 ?markBufferAsInternal@detail@_V1@sycl@@YAXAEBV?$shared_ptr@Vbuffer_impl@detail@_V1@sycl@@@std@@@Z
 ?mem_advise@experimental@oneapi@ext@_V1@sycl@@YAXVqueue@45@PEAX_KHAEBUcode_location@detail@45@@Z
 ?mem_advise@handler@_V1@sycl@@QEAAXPEBX_KH@Z
@@ -4177,10 +4177,10 @@
 ?reduGetMaxWGSize@detail@_V1@sycl@@YA_KV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K@Z
 ?reduGetPreferredWGSize@detail@_V1@sycl@@YA_KAEAV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K@Z
 ?registerDynamicParameter@handler@_V1@sycl@@AEAAXAEAVdynamic_parameter_base@detail@experimental@oneapi@ext@23@H@Z
-?release_external_memory@experimental@oneapi@ext@_V1@sycl@@YAXUinterop_mem_handle@12345@AEBVdevice@45@AEBVcontext@45@@Z
-?release_external_memory@experimental@oneapi@ext@_V1@sycl@@YAXUinterop_mem_handle@12345@AEBVqueue@45@@Z
-?release_external_semaphore@experimental@oneapi@ext@_V1@sycl@@YAXUinterop_semaphore_handle@12345@AEBVdevice@45@AEBVcontext@45@@Z
-?release_external_semaphore@experimental@oneapi@ext@_V1@sycl@@YAXUinterop_semaphore_handle@12345@AEBVqueue@45@@Z
+?release_external_memory@experimental@oneapi@ext@_V1@sycl@@YAXUexternal_mem@12345@AEBVdevice@45@AEBVcontext@45@@Z
+?release_external_memory@experimental@oneapi@ext@_V1@sycl@@YAXUexternal_mem@12345@AEBVqueue@45@@Z
+?release_external_semaphore@experimental@oneapi@ext@_V1@sycl@@YAXUexternal_semaphore@12345@AEBVdevice@45@AEBVcontext@45@@Z
+?release_external_semaphore@experimental@oneapi@ext@_V1@sycl@@YAXUexternal_semaphore@12345@AEBVqueue@45@@Z
 ?release_from_device_copy@experimental@oneapi@ext@_V1@sycl@@YAXPEBXAEBVcontext@45@@Z
 ?release_from_device_copy@experimental@oneapi@ext@_V1@sycl@@YAXPEBXAEBVqueue@45@@Z
 ?removeDuplicateDevices@detail@_V1@sycl@@YA?BV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@AEBV45@@Z
diff --git a/sycl/tools/xpti_helpers/usm_analyzer.hpp b/sycl/tools/xpti_helpers/usm_analyzer.hpp
index 3a5daea9dfdd7..6df1c522899d9 100644
--- a/sycl/tools/xpti_helpers/usm_analyzer.hpp
+++ b/sycl/tools/xpti_helpers/usm_analyzer.hpp
@@ -61,7 +61,7 @@ class USMAnalyzer {
       const void *End =
           static_cast<const char *>(Alloc.first) + Alloc.second.Length;
 
-      if (PtrToValidate >= Begin && PtrToValidate <= End) {
+      if (PtrToValidate >= Begin && PtrToValidate < End) {
         PointerFound = true;
         const void *CopyRegionEnd =
             static_cast<const char *>(PtrToValidate) + size;
@@ -138,7 +138,7 @@ class USMAnalyzer {
       const void *Begin = Alloc.first;
       const void *End =
           static_cast<const char *>(Alloc.first) + Alloc.second.Length;
-      if (PtrToValidate >= Begin && PtrToValidate <= End) {
+      if (PtrToValidate >= Begin && PtrToValidate < End) {
         PointerFound = true;
         const void *CopyRegionEnd =
             static_cast<const char *>(PtrToValidate) + pitch * length;
@@ -338,7 +338,7 @@ class USMAnalyzer {
       const void *End =
           static_cast<const char *>(Alloc.first) + Alloc.second.Length;
       // Host pointer was allocated with USM APIs
-      if (HostPtr >= Begin && HostPtr <= End) {
+      if (HostPtr >= Begin && HostPtr < End) {
         bool NeedsTerminate = false;
         if (Alloc.second.Kind != AllocKind::host) {
           OutStream << PrintPrefix
diff --git a/sycl/unittests/program_manager/DynamicLinking.cpp b/sycl/unittests/program_manager/DynamicLinking.cpp
index c8c9b101baf9e..7c86296afab5b 100644
--- a/sycl/unittests/program_manager/DynamicLinking.cpp
+++ b/sycl/unittests/program_manager/DynamicLinking.cpp
@@ -197,4 +197,30 @@ TEST(DynamicLinking, AheadOfTime) {
   }
 }
 
+static ur_result_t redefined_urProgramCompileExp(void *pParams) {
+  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
+
+TEST(DynamicLinking, UnsupportedCompileExp) {
+  sycl::unittest::UrMock<> Mock;
+  setupRuntimeLinkingMock();
+  mock::getCallbacks().set_replace_callback("urProgramCompileExp",
+                                            redefined_urProgramCompileExp);
+
+  sycl::platform Plt = sycl::platform();
+  sycl::queue Q(Plt.get_devices()[0]);
+
+  CapturedLinkingData.clear();
+
+  Q.single_task<DynamicLinkingTest::BasicCaseKernel>([=]() {});
+  ASSERT_EQ(CapturedLinkingData.NumOfUrProgramCreateCalls, 3u);
+  // Both programs should be linked together.
+  ASSERT_EQ(CapturedLinkingData.NumOfUrProgramLinkCalls, 1u);
+  ASSERT_TRUE(CapturedLinkingData.LinkedProgramsContains(
+      {BASIC_CASE_PRG, BASIC_CASE_PRG_DEP, BASIC_CASE_PRG_DEP_DEP}));
+  // And the linked program should be used to create a kernel.
+  ASSERT_EQ(CapturedLinkingData.ProgramUsedToCreateKernel,
+            BASIC_CASE_PRG * BASIC_CASE_PRG_DEP * BASIC_CASE_PRG_DEP_DEP);
+}
+
 } // anonymous namespace