ROCm · wangye805 · May 17, 2025 · May 19, 2025 · May 19, 2025 · May 19, 2025
@@ -6,14 +6,6 @@
 
 """FW agnostic user-end APIs"""
 
-<<<<<<< HEAD
-import functools
-import sys
-import glob
-import sysconfig
-import subprocess
-=======
->>>>>>> ca7407e
 import ctypes
 import functools
 import glob
@@ -30,7 +22,6 @@
 
 import transformer_engine
 
-
 _logger = logging.getLogger(__name__)
 
 
@@ -119,40 +110,9 @@ def _get_shared_object_file(library: str) -> Path:
     if so_path is not None:
         return so_path
 
-<<<<<<< HEAD
-    # Case 1: Typical user workflow: Both locations are the same, return any result.
-    if te_install_dir == site_packages_dir:
-        if so_path_in_install_dir is not None:
-            return so_path_in_install_dir
-        raise FileNotFoundError(f"Could not find shared object file for Transformer Engine {library} lib.")
-
-    # Case 2: ERR! Both locations are different but returned a valid result.
-    # NOTE: Unlike for source installations, pip does not wipe out artifacts from
-    # editable builds. In case developers are executing inside a TE directory via
-    # an inplace build, and then move to a regular build, the local shared object
-    # file will be incorrectly picked up without the following logic.
-    if so_path_in_install_dir is not None and so_path_in_default_dir is not None:
-        raise RuntimeError(
-            f"Found multiple shared object files: {so_path_in_install_dir} and"
-            f" {so_path_in_default_dir}. Remove local shared objects installed"
-            f" here {so_path_in_install_dir} or change the working directory to"
-            "execute from outside TE."
-        )
-
-    # Case 3: Typical dev workflow: Editable install
-    if so_path_in_install_dir is not None:
-        return so_path_in_install_dir
-
-    # Case 4: Executing from inside a TE directory without an inplace build available.
-    if so_path_in_default_dir is not None:
-        return so_path_in_default_dir
-
-    raise FileNotFoundError(f"Could not find shared object file for Transformer Engine {library} lib.")
-=======
     raise FileNotFoundError(
         f"Could not find shared object file for Transformer Engine {library} lib."
     )
->>>>>>> ca7407e
 
 
 @functools.lru_cache(maxsize=None)
@@ -178,7 +138,6 @@ def load_framework_extension(framework: str) -> None:
     # If the framework extension pip package is installed, it means that TE is installed via
     # PyPI. For this case we need to make sure that the metapackage, the core lib, and framework
     # extension are all installed via PyPI and have matching version.
-    '''
     if _is_pip_package_installed(module_name):
         assert _is_pip_package_installed(
             "transformer_engine"
@@ -197,7 +156,6 @@ def load_framework_extension(framework: str) -> None:
             f" v{version(f'transformer-engine-{te_cuda_vers}')}. Install transformer-engine using "
             f"'pip3 install transformer-engine[{extra_dep_name}]==VERSION'"
         )
-    '''
 
     # If the core package is installed via PyPI, log if
     # the framework extension is not found from PyPI.

@@ -11,12 +11,11 @@
 
 #ifndef __HIP_PLATFORM_AMD__
 #include <cudaTypedefs.h>
-<<<<<<< HEAD
-#endif //#ifndef __HIP_PLATFORM_AMD__
-=======
 #define FP4_TYPE_SUPPORTED (CUDA_VERSION >= 12080)
+#else
+#define FP4_TYPE_SUPPORTED false
+#endif //#ifndef __HIP_PLATFORM_AMD__
 
->>>>>>> ca7407e
 #include <cuda_bf16.h>
 #include <cuda_fp16.h>
 #include <cuda_fp8.h>
@@ -301,19 +300,16 @@ using fp8e4m3 = __nv_fp8_e4m3;
 using fp8e5m2 = __nv_fp8_e5m2;
 #if CUDA_VERSION >= 12080
 using fp8e8m0 = __nv_fp8_e8m0;
-<<<<<<< HEAD
 #endif // CUDA_VERSION >= 12080
+#if FP4_TYPE_SUPPORTED
+using fp4e2m1 = __nv_fp4_e2m1;
+#endif //FP4_TYPE_SUPPORTED
 #else
 using bf16 = hip_bfloat16;
 using fp8e4m3 = te_hip_fp8_e4m3;
 using fp8e5m2 = te_hip_fp8_e5m2;
 #endif //__HIP_PLATFORM_AMD__
-=======
-#endif
-#if FP4_TYPE_SUPPORTED
-using fp4e2m1 = __nv_fp4_e2m1;
-#endif
->>>>>>> ca7407e
+
 using e8m0_t = uint8_t;
 
 namespace detail {
@@ -341,15 +337,11 @@ TRANSFORMER_ENGINE_TYPE_NAME(__nv_fp8_e4m3)
 TRANSFORMER_ENGINE_TYPE_NAME(__nv_fp8_e5m2)
 #if CUDA_VERSION >= 12080
 TRANSFORMER_ENGINE_TYPE_NAME(__nv_fp8_e8m0)
-<<<<<<< HEAD
 #endif // CUDA_VERSION >= 12080
-#endif // #ifdef __HIP_PLATFORM_AMD__
-=======
-#endif
 #if FP4_TYPE_SUPPORTED
 TRANSFORMER_ENGINE_TYPE_NAME(__nv_fp4_e2m1)
 #endif
->>>>>>> ca7407e
+#endif // #ifdef __HIP_PLATFORM_AMD__
 #undef TRANSFORMER_ENGINE_TYPE_NAME
 
 template <typename T>
@@ -741,12 +733,8 @@ CUtensorMapDataType get_CUtensorMapDataType(DType dtype);
 void create_2D_tensor_map(CUtensorMap &tensorMap, const SimpleTensor &tensor,
                           const uint64_t globalY, const uint64_t globalX, const uint32_t shmemY,
                           const uint32_t shmemX, const uint32_t stride_elems,
-<<<<<<< HEAD
-                          const uint32_t offset_elems, const size_t type_size);
-#endif //#ifndef __HIP_PLATFORM_AMD__
-=======
                           const uint32_t offset_elems, const size_t type_num_bits);
->>>>>>> ca7407e
+#endif //#ifndef __HIP_PLATFORM_AMD__
 
 bool is_supported_by_CC_100();