From b9a811aacd9de96e1749dd36c707aff499dec970 Mon Sep 17 00:00:00 2001
From: Zarko Asen <asenzz@gmail.com>
Date: Sun, 31 Aug 2025 12:30:37 +0200
Subject: [PATCH 01/27] Port of gesv_rbt_async() from the Bitbucket repo, unit
 tests need to be updated.

---
 CMakeLists.txt            | 879 +++++++++++++++++++-------------------
 include/magma_auxiliary.h |  32 ++
 include/magma_z.h         |  65 +++
 include/magmablas_z.h     |  16 +
 interface_cuda/alloc.cpp  | 105 ++++-
 magmablas/ztrsm.cu        | 326 ++++++++++++++
 magmablas/ztrtri_diag.cu  | 102 +++++
 src/zgerbt_gpu.cpp        |  64 +++
 src/zgerfs_nopiv_gpu.cpp  | 190 ++++++++
 src/zgesv_nopiv_gpu.cpp   |  35 ++
 src/zgesv_rbt.cpp         | 198 +++++++++
 src/zgetrf_nopiv_gpu.cpp  | 150 +++++++
 src/zgetrs_nopiv_gpu.cpp  |  60 +++
 13 files changed, 1784 insertions(+), 438 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index fc61af74d..ea852439f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,16 +1,23 @@
-cmake_minimum_required( VERSION 3.18 )
+cmake_minimum_required(VERSION 3.18)
 
 # ----------------------------------------
 # to disable Fortran, set this to "off"
 # see also -DADD_ below
-option( USE_FORTRAN "Fortran is required for some tester checks, but can be disabled with reduced functionality" ON )
+option(USE_FORTRAN "Fortran is required for some tester checks, but can be disabled with reduced functionality" ON)
 
 if (USE_FORTRAN)
-    project( MAGMA C CXX Fortran )
-else()
-    project( MAGMA C CXX )
-endif()
-
+    project(MAGMA C CXX Fortran)
+else ()
+    project(MAGMA C CXX)
+endif ()
+
+FIND_PROGRAM(PROGRAM_CCACHE ccache)
+IF (PROGRAM_CCACHE)
+    SET(CMAKE_CXX_COMPILER_LAUNCHER ${PROGRAM_CCACHE})
+    SET(CMAKE_C_COMPILER_LAUNCHER ${PROGRAM_CCACHE})
+    SET(CMAKE_CUDA_COMPILER_LAUNCHER ${PROGRAM_CCACHE})
+    SET(CMAKE_OPENCL_COMPILER_LAUNCHER ${PROGRAM_CCACHE})
+ENDIF ()
 
 # ----------------------------------------
 # to show compile commands, set this here or use 'make VERBOSE=1'
@@ -18,17 +25,17 @@ endif()
 
 # ----------------------------------------
 # MAGMA requires one backend to be enabled
-option(MAGMA_ENABLE_CUDA     "Enable the CUDA backend"  OFF)
-option(MAGMA_ENABLE_HIP      "Enable the HIP  backend"  OFF)
+option(MAGMA_ENABLE_CUDA "Enable the CUDA backend" OFF)
+option(MAGMA_ENABLE_HIP "Enable the HIP  backend" OFF)
 
 # check if one backend has been enabled
 if (NOT MAGMA_ENABLE_CUDA AND
-    NOT MAGMA_ENABLE_HIP
-    )
-  message(STATUS "MAGMA requires one enabled backend!")
-  message(STATUS "Building CUDA backend")
-  set( MAGMA_ENABLE_CUDA ON )
-endif()
+        NOT MAGMA_ENABLE_HIP
+)
+    message(STATUS "MAGMA requires one enabled backend!")
+    message(STATUS "Building CUDA backend")
+    set(MAGMA_ENABLE_CUDA ON)
+endif ()
 
 # ----------------------------------------
 # don't regenerate files during make.
@@ -40,25 +47,25 @@ set(CMAKE_SUPPRESS_REGENERATION on)
 # ----------------------------------------
 # force an out-of-source build, to not overwrite the existing Makefiles
 # (out-of-source is cleaner, too)
-string( COMPARE EQUAL "${CMAKE_SOURCE_DIR}" "${CMAKE_BINARY_DIR}" MAGMA_COMPILE_INPLACE )
+string(COMPARE EQUAL "${CMAKE_SOURCE_DIR}" "${CMAKE_BINARY_DIR}" MAGMA_COMPILE_INPLACE)
 if (MAGMA_COMPILE_INPLACE)
-    message( FATAL_ERROR "Compiling MAGMA with CMake requires an out-of-source build. To proceed:
+    message(FATAL_ERROR "Compiling MAGMA with CMake requires an out-of-source build. To proceed:
     rm -rf CMakeCache.txt CMakeFiles/   # delete files in ${CMAKE_SOURCE_DIR}
     mkdir build
     cd build
     cmake ..
-    make" )
-endif()
+    make")
+endif ()
 
 
 # ----------------------------------------
 # prefer shared libraries
-option( BUILD_SHARED_LIBS "If on, build shared libraries, otherwise build static libraries" ON )
+option(BUILD_SHARED_LIBS "If on, build shared libraries, otherwise build static libraries" ON)
 
 # prefer /usr/local/magma, instead of /usr/local.
 if (UNIX AND CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
     set(CMAKE_INSTALL_PREFIX "/usr/local/magma" CACHE PATH "..." FORCE)
-endif()
+endif ()
 
 # ----------------------------------------
 # use C++14 and C99
@@ -70,92 +77,96 @@ CHECK_CXX_COMPILER_FLAG("-std=c++0x" COMPILER_SUPPORTS_CXX0X)
 CHECK_CXX_COMPILER_FLAG("-fPIC" COMPILER_SUPPORTS_FPIC)
 if (COMPILER_SUPPORTS_CXX14)
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14")
-elseif(COMPILER_SUPPORTS_CXX0X)
+elseif (COMPILER_SUPPORTS_CXX0X)
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x")
-else()
+else ()
     message( WARNING "The compiler ${CMAKE_CXX_COMPILER} doesn't support the -std=c++14 flag. Some code may not compile.")
-endif()
+endif ()
 
 CHECK_C_COMPILER_FLAG("-std=c99" COMPILER_SUPPORTS_C99)
 if (COMPILER_SUPPORTS_C99)
     set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99")
-else()
-    message( WARNING "The compiler ${CMAKE_C_COMPILER} doesn't support the -std=c99 flag. Some code may not compile.")
-endif()
+else ()
+    message(WARNING "The compiler ${CMAKE_C_COMPILER} doesn't support the -std=c99 flag. Some code may not compile.")
+endif ()
 
 
 # ----------------------------------------
 # check Fortran name mangling
 if (USE_FORTRAN)
-    include( FortranCInterface )
-    FortranCInterface_HEADER( ${CMAKE_SOURCE_DIR}/include/magma_mangling_cmake.h MACRO_NAMESPACE MAGMA_ )
-else()
+    include(FortranCInterface)
+    FortranCInterface_HEADER(${CMAKE_SOURCE_DIR}/include/magma_mangling_cmake.h MACRO_NAMESPACE MAGMA_)
+else ()
     # set one of -DADD_, -DUPCASE, or -DNOCHANGE. See README.
-    message( STATUS "Building without Fortran compiler" )
-    set( FORTRAN_CONVENTION "-DADD_" CACHE STRING "Fortran calling convention, one of -DADD_, -DNOCHANGE, -DUPCASE" )
-    set_property( CACHE FORTRAN_CONVENTION PROPERTY STRINGS -DADD_ -DNOCHANGE -DUPCASE )
-    message( STATUS "    Using ${FORTRAN_CONVENTION} for Fortran calling convention" )
-    set( CMAKE_C_FLAGS   "${CMAKE_C_FLAGS}   ${FORTRAN_CONVENTION}" )
-    set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FORTRAN_CONVENTION}" )
+    message(STATUS "Building without Fortran compiler")
+    set(FORTRAN_CONVENTION "-DADD_" CACHE STRING "Fortran calling convention, one of -DADD_, -DNOCHANGE, -DUPCASE")
+    set_property(CACHE FORTRAN_CONVENTION PROPERTY STRINGS -DADD_ -DNOCHANGE -DUPCASE)
+    message(STATUS "    Using ${FORTRAN_CONVENTION} for Fortran calling convention")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}   ${FORTRAN_CONVENTION}")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FORTRAN_CONVENTION}")
     # see also NVCC_FLAGS below
-endif()
+endif ()
 
 
 # ----------------------------------------
 # locate OpenMP
-find_package( OpenMP )
+find_package(OpenMP)
 if (OPENMP_FOUND)
-    message( STATUS "Found OpenMP" )
-    message( STATUS "    OpenMP_C_FLAGS   ${OpenMP_C_FLAGS}" )
-    message( STATUS "    OpenMP_CXX_FLAGS ${OpenMP_CXX_FLAGS}" )
-    set( CMAKE_C_FLAGS   "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}" )
-    set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}" )
-endif()
+    message(STATUS "Found OpenMP")
+    message(STATUS "    OpenMP_C_FLAGS   ${OpenMP_C_FLAGS}")
+    message(STATUS "    OpenMP_CXX_FLAGS ${OpenMP_CXX_FLAGS}")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
+endif ()
 
 # ----------------------------------------
 # locate CUDA libraries
 if (MAGMA_ENABLE_CUDA)
-    enable_language( CUDA )
+    if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
+        set(CMAKE_CUDA_COMPILER_WORKS ON) # Fix for intel compiler and CUDA
+    endif ()
+    enable_language(CUDA)
 
-    set( CUDA_NAMES
-         "one or more of "
+    set(CUDA_NAMES
+            "one or more of "
          "Fermi, Kepler, Maxwell, Pascal, Volta, Turing, Ampere, Ada, Hopper, "
-         "or valid sm_XY or sm_XYZ" )
-    set( GPU_TARGET "" CACHE STRING
-         "CUDA architectures to compile for, overrides CMAKE_CUDA_ARCHITECTURES; ${CUDA_NAMES}" )
-    find_package( CUDAToolkit )
+            "or valid sm_XY or sm_XYZ")
+    set(GPU_TARGET "" CACHE STRING
+            "CUDA architectures to compile for, overrides CMAKE_CUDA_ARCHITECTURES; ${CUDA_NAMES}")
+    find_package(CUDAToolkit)
     if (CUDAToolkit_FOUND)
-        message( STATUS "Found CUDA ${CUDA_VERSION}" )
-        message( STATUS "    CUDA_CUDART_LIBRARY: CUDA::cudart" )
+        message(STATUS "Found CUDA ${CUDA_VERSION}")
+        message(STATUS "    CUDA_CUDART_LIBRARY: CUDA::cudart")
         #message( STATUS "    CUDA_CUBLAS_LIBRARIES: CUDA::cublas" )
 
-        include_directories( ${CUDAToolkit_INCLUDE_DIRS} )
+        include_directories(${CUDAToolkit_INCLUDE_DIRS})
+        link_directories(${CUDAToolkit_LIBRARY_DIR})
 
         if (GPU_TARGET)
             # Map names to architectures.
             if (GPU_TARGET MATCHES Fermi)
-                set( GPU_TARGET "${GPU_TARGET} sm_20" )
-            endif()
+                set(GPU_TARGET "${GPU_TARGET} sm_20")
+            endif ()
 
             if (GPU_TARGET MATCHES Kepler)
-                set( GPU_TARGET "${GPU_TARGET} sm_30 sm_35 sm_37" )
-            endif()
+                set(GPU_TARGET "${GPU_TARGET} sm_30 sm_35 sm_37")
+            endif ()
 
             if (GPU_TARGET MATCHES Maxwell)
                 set( GPU_TARGET "${GPU_TARGET} sm_50 sm_52 sm_53" )
-            endif()
+            endif ()
 
             if (GPU_TARGET MATCHES Pascal)
                 set( GPU_TARGET "${GPU_TARGET} sm_60 sm_61 sm_62" )
-            endif()
+            endif ()
 
             if (GPU_TARGET MATCHES Volta)
                 set( GPU_TARGET "${GPU_TARGET} sm_70 sm_72" )
-            endif()
+            endif ()
 
             if (GPU_TARGET MATCHES Turing)
-                set( GPU_TARGET "${GPU_TARGET} sm_75" )
-            endif()
+                set(GPU_TARGET "${GPU_TARGET} sm_75")
+            endif ()
 
             if (GPU_TARGET MATCHES Ampere)
                 set( GPU_TARGET "${GPU_TARGET} sm_80 sm_86 sm_87" )
@@ -163,223 +174,223 @@ if (MAGMA_ENABLE_CUDA)
 
             if (GPU_TARGET MATCHES Ada)
                 set( GPU_TARGET "${GPU_TARGET} sm_89" )
-            endif()
+            endif ()
 
             if (GPU_TARGET MATCHES Hopper)
                 set( GPU_TARGET "${GPU_TARGET} sm_90 sm_90a" )
-            endif()
+            endif ()
 
             # Find all sm_XY and sm_XYZ, then strip off sm_.
             string( REGEX MATCHALL "sm_[0-9][0-9a-z]+" sms "${GPU_TARGET}" )
-            string( REPLACE "sm_" "" __cuda_architectures "${sms}" )
+            string(REPLACE "sm_" "" __cuda_architectures "${sms}")
 
             if (NOT __cuda_architectures)
-                message( FATAL_ERROR
-                         "GPU_TARGET must contain ${CUDA_NAMES}. "
-                         "Was: ${GPU_TARGET}" )
-            endif()
+                message(FATAL_ERROR
+                        "GPU_TARGET must contain ${CUDA_NAMES}. "
+                        "Was: ${GPU_TARGET}")
+            endif ()
 
-            set( CMAKE_CUDA_ARCHITECTURES "${__cuda_architectures}" )
-        endif()
+            set(CMAKE_CUDA_ARCHITECTURES "${__cuda_architectures}")
+        endif ()
 
-        message( STATUS "    Compile for CMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}" )
-        set( MAGMA_CUDA_ARCH "${CMAKE_CUDA_ARCHITECTURES}" )
+        message(STATUS "    Compile for CMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}")
+        set(MAGMA_CUDA_ARCH "${CMAKE_CUDA_ARCHITECTURES}")
 
         # Find minimum arch in CMAKE_CUDA_ARCHITECTURES, if they're all numeric.
-        set( min_arch 9999 )
-        foreach( arch ${CMAKE_CUDA_ARCHITECTURES} )
+        set(min_arch 9999)
+        foreach (arch ${CMAKE_CUDA_ARCHITECTURES})
             if (arch MATCHES "^([0-9]+)")  # 80-real, 80-virtual, etc. okay
                 if (CMAKE_MATCH_1 LESS min_arch)
-                    set( min_arch "${CMAKE_MATCH_1}" )
-                endif()
-            else()
-                set( min_arch 0 )  # arch like "native", min unknown
+                    set(min_arch "${CMAKE_MATCH_1}")
+                endif ()
+            else ()
+                set(min_arch 0)  # arch like "native", min unknown
                 break()
-            endif()
-        endforeach()
+            endif ()
+        endforeach ()
         # Append zero, so it is comparable to '__CUDA_ARCH__'
-        set( MAGMA_CUDA_ARCH_MIN "${min_arch}0" )
+        set(MAGMA_CUDA_ARCH_MIN "${min_arch}0")
 
-        add_library( magma_nvcc_flags INTERFACE )
+        add_library(magma_nvcc_flags INTERFACE)
         if (COMPILER_SUPPORTS_FPIC)
             target_compile_options(magma_nvcc_flags
-                INTERFACE
-                $<$<COMPILE_LANGUAGE:CUDA>:--compiler-options;-fPIC,${FORTRAN_CONVENTION}>
+                    INTERFACE
+                    $<$<COMPILE_LANGUAGE:CUDA>:--compiler-options;-fPIC,${FORTRAN_CONVENTION}>
             )
-        else()
+        else ()
             # No Position Independent Code on Windows.
             # Compiler will complain if you add that flag.
             target_compile_options(magma_nvcc_flags
-                INTERFACE
-                $<$<COMPILE_LANGUAGE:CUDA>:--compiler-options;${FORTRAN_CONVENTION}>
+                    INTERFACE
+                    $<$<COMPILE_LANGUAGE:CUDA>:--compiler-options;${FORTRAN_CONVENTION}>
             )
-        endif()
+        endif ()
 
-        set( MAGMA_HAVE_CUDA "1" )
+        set(MAGMA_HAVE_CUDA "1")
 
-        message( STATUS "Define -DMAGMA_HAVE_CUDA -DMAGMA_CUDA_ARCH_MIN=${MAGMA_CUDA_ARCH_MIN}" )
-    else()
-        message( STATUS "Could not find CUDA" )
-    endif()
-endif()
+        message(STATUS "Define -DMAGMA_HAVE_CUDA -DMAGMA_CUDA_ARCH_MIN=${MAGMA_CUDA_ARCH_MIN}")
+    else ()
+        message(STATUS "Could not find CUDA")
+    endif ()
+endif ()
 
 # ----------------------------------------
 # locate HIP/ROCm libraries
 if (MAGMA_ENABLE_HIP)
-  set( GPU_TARGET "gfx900" CACHE STRING "HIP architectures to compile for" )
-  list(APPEND CMAKE_PREFIX_PATH /opt/rocm /opt/rocm/lib/cmake/hip)
-  find_package( HIP )
-  if (HIP_FOUND)
-    message( STATUS "Found HIP ${HIP_VERSION}" )
-    message( STATUS "    HIP_INCLUDE_DIRS:   ${HIP_INCLUDE_DIRS}"   )
-    message( STATUS "GPU_TARGET:  ${GPU_TARGET}"   )
-
-    include_directories( ${HIP_INCLUDE_DIRS} )
-
-    set(HIP_SEPARABLE_COMPILATION ON)
-
-    if (GPU_TARGET MATCHES kaveri)
-      set( GPU_TARGET ${GPU_TARGET} gfx700 )
-    endif()
-
-    if (GPU_TARGET MATCHES hawaii)
-      set( GPU_TARGET ${GPU_TARGET} gfx701 )
-    endif()
-
-    if (GPU_TARGET MATCHES kabini)
-      set( GPU_TARGET ${GPU_TARGET} gfx703 )
-    endif()
-
-    if (GPU_TARGET MATCHES mullins)
-      set( GPU_TARGET ${GPU_TARGET} gfx703 )
-    endif()
-
-    if (GPU_TARGET MATCHES bonaire)
-      set( GPU_TARGET ${GPU_TARGET} gfx704 )
-    endif()
-
-    if (GPU_TARGET MATCHES carrizo)
-      set( GPU_TARGET ${GPU_TARGET} gfx801 )
-    endif()
-
-    if (GPU_TARGET MATCHES iceland)
-      set( GPU_TARGET ${GPU_TARGET} gfx802 )
-    endif()
-
-    if (GPU_TARGET MATCHES tonga)
-      set( GPU_TARGET ${GPU_TARGET} gfx802 )
-    endif()
-
-    if (GPU_TARGET MATCHES fiji)
-      set( GPU_TARGET ${GPU_TARGET} gfx803 )
-    endif()
-
-    if (GPU_TARGET MATCHES polaris10)
-      set( GPU_TARGET ${GPU_TARGET} gfx803 )
-    endif()
-
-    if (GPU_TARGET MATCHES tongapro)
-      set( GPU_TARGET ${GPU_TARGET} gfx805 )
-    endif()
-
-    if (GPU_TARGET MATCHES stoney)
-      set( GPU_TARGET ${GPU_TARGET} gfx810 )
-    endif()
-
-    set( DEVCCFLAGS  "" )
-    set(VALID_GFXS "700;701;702;703;704;705;801;802;803;805;810;900;902;904;906;908;909;90c;1010;1011;1012;1030;1031;1032;1033")
-    foreach( GFX ${VALID_GFXS} )
-      if ( GPU_TARGET MATCHES gfx${GFX} )
-        set( DEVCCFLAGS ${DEVCCFLAGS} --offload-arch=gfx${GFX} )
-      endif()
-    endforeach()
-
-    set( DEVCCFLAGS ${DEVCCFLAGS} -fPIC ${FORTRAN_CONVENTION} )
-    set(MAGMA_HAVE_HIP "1")
-    message( STATUS "Define -DMAGMA_HAVE_HIP" )
-
-    set_property(TARGET hip::device APPEND PROPERTY COMPATIBLE_INTERFACE_BOOL INTERFACE_HIP_DEVICE_COMPILE)
-    set_property(TARGET hip::device PROPERTY INTERFACE_HIP_DEVICE_COMPILE ON)
-    set(GPU_ARCH_FLAGS ${DEVCCFLAGS})
-
-    #add_compile_options(${GPU_ARCH_FLAGS})
-    set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_AMD__" )
-    option(ROCM_CORE "Location of the rocm-core package")
-    execute_process(COMMAND "${CMAKE_SOURCE_DIR}/tools/get-rocm-version.sh" "${ROCM_CORE}" OUTPUT_VARIABLE ROCM_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE)
-    message(STATUS "ROCM_VERSION=${ROCM_VERSION}")
-    set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DROCM_VERSION=${ROCM_VERSION}" )
-  else()
-    message( STATUS "Could not find HIP" )
-  endif()
-endif()
+    set(GPU_TARGET "gfx900" CACHE STRING "HIP architectures to compile for")
+    list(APPEND CMAKE_PREFIX_PATH /opt/rocm /opt/rocm/lib/cmake/hip)
+    find_package(HIP)
+    if (HIP_FOUND)
+        message(STATUS "Found HIP ${HIP_VERSION}")
+        message(STATUS "    HIP_INCLUDE_DIRS:   ${HIP_INCLUDE_DIRS}")
+        message(STATUS "GPU_TARGET:  ${GPU_TARGET}")
+
+        include_directories(${HIP_INCLUDE_DIRS})
+
+        set(HIP_SEPARABLE_COMPILATION ON)
+
+        if (GPU_TARGET MATCHES kaveri)
+            set(GPU_TARGET ${GPU_TARGET} gfx700)
+        endif ()
+
+        if (GPU_TARGET MATCHES hawaii)
+            set(GPU_TARGET ${GPU_TARGET} gfx701)
+        endif ()
+
+        if (GPU_TARGET MATCHES kabini)
+            set(GPU_TARGET ${GPU_TARGET} gfx703)
+        endif ()
+
+        if (GPU_TARGET MATCHES mullins)
+            set(GPU_TARGET ${GPU_TARGET} gfx703)
+        endif ()
+
+        if (GPU_TARGET MATCHES bonaire)
+            set(GPU_TARGET ${GPU_TARGET} gfx704)
+        endif ()
+
+        if (GPU_TARGET MATCHES carrizo)
+            set(GPU_TARGET ${GPU_TARGET} gfx801)
+        endif ()
+
+        if (GPU_TARGET MATCHES iceland)
+            set(GPU_TARGET ${GPU_TARGET} gfx802)
+        endif ()
+
+        if (GPU_TARGET MATCHES tonga)
+            set(GPU_TARGET ${GPU_TARGET} gfx802)
+        endif ()
+
+        if (GPU_TARGET MATCHES fiji)
+            set(GPU_TARGET ${GPU_TARGET} gfx803)
+        endif ()
+
+        if (GPU_TARGET MATCHES polaris10)
+            set(GPU_TARGET ${GPU_TARGET} gfx803)
+        endif ()
+
+        if (GPU_TARGET MATCHES tongapro)
+            set(GPU_TARGET ${GPU_TARGET} gfx805)
+        endif ()
+
+        if (GPU_TARGET MATCHES stoney)
+            set(GPU_TARGET ${GPU_TARGET} gfx810)
+        endif ()
+
+        set(DEVCCFLAGS "")
+        set(VALID_GFXS "700;701;702;703;704;705;801;802;803;805;810;900;902;904;906;908;909;90c;1010;1011;1012;1030;1031;1032;1033")
+        foreach (GFX ${VALID_GFXS})
+            if (GPU_TARGET MATCHES gfx${GFX})
+                set(DEVCCFLAGS ${DEVCCFLAGS} --offload-arch=gfx${GFX})
+            endif ()
+        endforeach ()
+
+        set(DEVCCFLAGS ${DEVCCFLAGS} -fPIC ${FORTRAN_CONVENTION})
+        set(MAGMA_HAVE_HIP "1")
+        message(STATUS "Define -DMAGMA_HAVE_HIP")
+
+        set_property(TARGET hip::device APPEND PROPERTY COMPATIBLE_INTERFACE_BOOL INTERFACE_HIP_DEVICE_COMPILE)
+        set_property(TARGET hip::device PROPERTY INTERFACE_HIP_DEVICE_COMPILE ON)
+        set(GPU_ARCH_FLAGS ${DEVCCFLAGS})
+
+        #add_compile_options(${GPU_ARCH_FLAGS})
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_AMD__")
+        option(ROCM_CORE "Location of the rocm-core package")
+        execute_process(COMMAND "${CMAKE_SOURCE_DIR}/tools/get-rocm-version.sh" "${ROCM_CORE}" OUTPUT_VARIABLE ROCM_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE)
+        message(STATUS "ROCM_VERSION=${ROCM_VERSION}")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DROCM_VERSION=${ROCM_VERSION}")
+    else ()
+        message(STATUS "Could not find HIP")
+    endif ()
+endif ()
 
 # ----------------------------------------
 # locate LAPACK libraries
 
 set(BLA_VENDOR "" CACHE STRING
-    "Use specified BLAS library. See https://cmake.org/cmake/help/latest/module/FindBLAS.html")
+        "Use specified BLAS library. See https://cmake.org/cmake/help/latest/module/FindBLAS.html")
 
 # List from CMake 3.17, minus some obsolete ones:
 # PhiPACK, Compaq CXML, DEC Alpha DXML, SunPerf, SGI SCSL, SGIMATH,
 # Intel, NAS (Apple veclib).
 # FLAME is BLIS.
 set_property(CACHE BLA_VENDOR PROPERTY STRINGS
-    "" "All" "Goto" "OpenBLAS" "FLAME" "ATLAS" "IBMESSL"
-    "Intel10_64lp" "Intel10_64lp_seq" "Intel10_64ilp" "Intel10_64ilp_seq"
-    "ACML" "ACML_MP" "ACML_GPU"
-    "Apple"
-    "Arm" "Arm_mp" "Arm_ilp64" "Arm_ilp64_mp"
-    "Generic")
-
-set( LAPACK_LIBRARIES "" CACHE STRING "Libraries for LAPACK and BLAS, to manually override search" )
+        "" "All" "Goto" "OpenBLAS" "FLAME" "ATLAS" "IBMESSL"
+        "Intel10_64lp" "Intel10_64lp_seq" "Intel10_64ilp" "Intel10_64ilp_seq"
+        "ACML" "ACML_MP" "ACML_GPU"
+        "Apple"
+        "Arm" "Arm_mp" "Arm_ilp64" "Arm_ilp64_mp"
+        "Generic")
+
+set(LAPACK_LIBRARIES "" CACHE STRING "Libraries for LAPACK and BLAS, to manually override search")
 if (LAPACK_LIBRARIES STREQUAL "")
-    message( STATUS "Searching for BLAS and LAPACK. To override, set LAPACK_LIBRARIES using ccmake." )
-    find_package( LAPACK )
+    message(STATUS "Searching for BLAS and LAPACK. To override, set LAPACK_LIBRARIES using ccmake.")
+    find_package(LAPACK)
     # force showing updated LAPACK_LIBRARIES in ccmake / cmake-gui.
-    set( LAPACK_LIBRARIES ${LAPACK_LIBRARIES} CACHE STRING "Libraries for LAPACK and BLAS, to manually override search" FORCE )
-else()
-    message( STATUS "User set LAPACK_LIBRARIES. To change, edit LAPACK_LIBRARIES using ccmake (set to empty to enable search)." )
+    set(LAPACK_LIBRARIES ${LAPACK_LIBRARIES} CACHE STRING "Libraries for LAPACK and BLAS, to manually override search" FORCE)
+else ()
+    message(STATUS "User set LAPACK_LIBRARIES. To change, edit LAPACK_LIBRARIES using ccmake (set to empty to enable search).")
     # Check either -lname syntax or file existence
-    foreach( LIB ${LAPACK_LIBRARIES} )
+    foreach (LIB ${LAPACK_LIBRARIES})
         if (NOT LIB MATCHES "^-l[a-zA-Z0-9_-]+$")
-        if (NOT EXISTS ${LIB})
-            message( WARNING "\n      Warning: file ${LIB} does not exist.\n" )
-        endif()
-        endif()
-    endforeach()
-endif()
+            if (NOT EXISTS ${LIB})
+                message(WARNING "\n      Warning: file ${LIB} does not exist.\n")
+            endif ()
+        endif ()
+    endforeach ()
+endif ()
 
 # If using MKL, add it to includes and define MAGMA_WITH_MKL
 # Initially, this gets MKLROOT from environment, but then the user can edit it.
 if (LAPACK_LIBRARIES MATCHES mkl_core)
-    set( MKLROOT $ENV{MKLROOT} CACHE STRING "MKL installation directory" )
+    set(MKLROOT $ENV{MKLROOT} CACHE STRING "MKL installation directory")
     if (MKLROOT STREQUAL "")
-        message( WARNING "LAPACK_LIBRARIES has MKL, but MKLROOT not set; can't add include directory." )
-    else()
-        message( STATUS "MKLROOT set to ${MKLROOT}. To change, edit MKLROOT using ccmake." )
+        message(WARNING "LAPACK_LIBRARIES has MKL, but MKLROOT not set; can't add include directory.")
+    else ()
+        message(STATUS "MKLROOT set to ${MKLROOT}. To change, edit MKLROOT using ccmake.")
         if (NOT EXISTS ${MKLROOT})
-            message( FATAL_ERROR "MKLROOT ${MKLROOT} directory does not exist." )
-        endif()
-        include_directories( ${MKLROOT}/include )
-        add_definitions( -DMAGMA_WITH_MKL )
-        message( STATUS "Define -DMAGMA_WITH_MKL" )
-    endif()
-endif()
+            message(FATAL_ERROR "MKLROOT ${MKLROOT} directory does not exist.")
+        endif ()
+        include_directories(${MKLROOT}/include)
+        add_definitions(-DMAGMA_WITH_MKL)
+        message(STATUS "Define -DMAGMA_WITH_MKL")
+    endif ()
+endif ()
 
 
 # ----------------------------------------
 # save magma.lib, magma_sparse.lib, etc. in lib/
-set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY lib )
-set( CMAKE_LIBRARY_OUTPUT_DIRECTORY lib )
+set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY lib)
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY lib)
 
 
 # ----------------------------------------
 # list of sources
 if (MAGMA_ENABLE_CUDA)
-  include( ${CMAKE_SOURCE_DIR}/CMake.src.cuda )
-else()
-  include( ${CMAKE_SOURCE_DIR}/CMake.src.hip )
-endif()
+    include(${CMAKE_SOURCE_DIR}/CMake.src.cuda)
+else ()
+    include(${CMAKE_SOURCE_DIR}/CMake.src.hip)
+endif ()
 
 # ----------------------------------------
 # common flags
@@ -390,20 +401,20 @@ if (WIN32)
     #     -Wall is way too verbose; use -W4
     #     -MP enables parallel builds
     #     -std=c99 is not implemented, so skip that
-    string( REGEX REPLACE " */W3" "" CMAKE_C_FLAGS   "${CMAKE_C_FLAGS}"   )
-    string( REGEX REPLACE " */W3" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" )
-    set( CMAKE_C_FLAGS     "${CMAKE_C_FLAGS} -W4 -MP -DMAGMA_NOAFFINITY" )
-    set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -W4 -MP -DMAGMA_NOAFFINITY" )
-else()
+    string(REGEX REPLACE " */W3" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
+    string(REGEX REPLACE " */W3" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -W4 -MP -DMAGMA_NOAFFINITY")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -W4 -MP -DMAGMA_NOAFFINITY")
+else ()
     # Primarily for gcc / nvcc:
     # Ignore unused static functions in headers.
-    set( CMAKE_C_FLAGS     "${CMAKE_C_FLAGS} -Wall -Wno-unused-function" )
-    set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-unused-function" )
-endif()
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wno-unused-function")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-unused-function")
+endif ()
 
 if (CMAKE_HOST_APPLE)
     # Use rpaths, which is on by default in CMake 3.
-    set( CMAKE_MACOSX_RPATH 1 )
+    set(CMAKE_MACOSX_RPATH 1)
 
     # 64-bit veclib (Accelerate) has issues; substitute correct functions from LAPACK.
     # (The issue is single precision functions that return doubles;
@@ -411,48 +422,48 @@ if (CMAKE_HOST_APPLE)
     # but this is not feasible in Fortran.)
     if (LAPACK_LIBRARIES MATCHES "Accelerate")
         if (USE_FORTRAN)
-            message( STATUS "MacOS X: adding blas_fix library" )
-            add_library( blas_fix ${libblas_fix_src} )
-            target_link_libraries( blas_fix
-                ${LAPACK_LIBRARIES}
+            message(STATUS "MacOS X: adding blas_fix library")
+            add_library(blas_fix ${libblas_fix_src})
+            target_link_libraries(blas_fix
+                    ${LAPACK_LIBRARIES}
             )
-            set( blas_fix blas_fix )
-            set( blas_fix_lib -lblas_fix )
-        else()
-            message( WARNING "\n      Warning: cannot compile blas_fix library for MacOS X without Fortran compiler.\n" )
-        endif()
-    endif()
+            set(blas_fix blas_fix)
+            set(blas_fix_lib -lblas_fix)
+        else ()
+            message(WARNING "\n      Warning: cannot compile blas_fix library for MacOS X without Fortran compiler.\n")
+        endif ()
+    endif ()
 
-    set( CMAKE_C_FLAGS     "${CMAKE_C_FLAGS} -DMAGMA_NOAFFINITY" )
-    set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMAGMA_NOAFFINITY" )
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMAGMA_NOAFFINITY")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMAGMA_NOAFFINITY")
 
     # previously, just compile as 32-bit, but CUDA 6.5 no longer has 32-bit FAT libraries
     ## set( CMAKE_C_FLAGS       "${CMAKE_C_FLAGS} -m32" )
     ## set( CMAKE_CXX_FLAGS     "${CMAKE_CXX_FLAGS} -m32" )
     ## set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -m32" )
     ## set( CUDA_64_BIT_DEVICE_CODE OFF )
-endif()
+endif ()
 
-include_directories( "${CMAKE_BINARY_DIR}/include" )
+include_directories("${CMAKE_BINARY_DIR}/include")
 
-include_directories( include )
-include_directories( control )
+include_directories(include)
+include_directories(control)
 if (MAGMA_ENABLE_CUDA)
-  include_directories( magmablas )  # e.g., shuffle.cuh
-else()
-  include_directories( magmablas_hip )  # e.g., shuffle.cuh
-endif()
+    include_directories(magmablas)  # e.g., shuffle.cuh
+else ()
+    include_directories(magmablas_hip)  # e.g., shuffle.cuh
+endif ()
 
 # Need to check sizeof(void*) after setting flags above;
 # CMAKE_SIZEOF_VOID_P can be wrong.
-include( CheckTypeSize )
-CHECK_TYPE_SIZE( void* SIZEOF_VOID_PTR )
+include(CheckTypeSize)
+CHECK_TYPE_SIZE(void* SIZEOF_VOID_PTR)
 if (USE_FORTRAN)
-    set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -Dmagma_devptr_t=\"integer\(kind=${SIZEOF_VOID_PTR}\)\"" )
-endif()
+    set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -Dmagma_devptr_t=\"integer\(kind=${SIZEOF_VOID_PTR}\)\"")
+endif ()
 
 # Configure config
-configure_file(${CMAKE_SOURCE_DIR}/include/magma_config.h.in  ${CMAKE_BINARY_DIR}/include/magma_config.h)
+configure_file(${CMAKE_SOURCE_DIR}/include/magma_config.h.in ${CMAKE_BINARY_DIR}/include/magma_config.h)
 
 # ----------------------------------------
 # compile MAGMA library
@@ -462,24 +473,24 @@ if (WIN32)
     # understand that .F90 files should be pre-processed.
 
     # separate Fortran and C/C++/CUDA files
-    foreach( filename ${libmagma_all} )
+    foreach (filename ${libmagma_all})
         if (filename MATCHES "\\.(f)$")  # |f90|F90
-            list( APPEND libmagma_all_f   ${filename} )
+            list(APPEND libmagma_all_f ${filename})
         elseif (filename MATCHES "\\.(c|cu|cpp)$")
-            list( APPEND libmagma_all_cpp ${filename} )
-        endif()
-    endforeach()
+            list(APPEND libmagma_all_cpp ${filename})
+        endif ()
+    endforeach ()
     #message( "libmagma_all_cpp ${libmagma_all_cpp}" )
     #message( "libmagma_all_f   ${libmagma_all_f}"   )
 
     # on Windows, Fortran files aren't compiled if listed here...
-    add_library( magma ${libmagma_all_cpp} )
-    target_link_libraries( magma
-        ${LAPACK_LIBRARIES}
-        CUDA::cudart
-        CUDA::cublas
-        CUDA::cusparse
-        magma_nvcc_flags
+    add_library(magma ${libmagma_all_cpp})
+    target_link_libraries(magma
+            ${LAPACK_LIBRARIES}
+            CUDA::cudart
+            CUDA::cublas
+            CUDA::cusparse
+            magma_nvcc_flags
     )
 
     # no Fortran files at the moment (how to test libmagma_all_f is not empty?),
@@ -494,51 +505,51 @@ if (WIN32)
     ##     CUDA::cusparse
     ## )
     ## make list of Fortran .mod files to install, as below
-else()
+else ()
     # Unix doesn't seem to have a problem with mixing C, CUDA, and Fortran files
     if (MAGMA_ENABLE_CUDA)
-    #message(FATAL_ERROR "${libmagma_all}")
-      add_library( magma ${libmagma_all} )
-      target_link_libraries( magma
-        ${blas_fix}
-        ${LAPACK_LIBRARIES}
-        CUDA::cudart
-        CUDA::cublas
-        CUDA::cusparse
-        magma_nvcc_flags
+        #message(FATAL_ERROR "${libmagma_all}")
+        add_library(magma ${libmagma_all})
+        target_link_libraries(magma
+                ${blas_fix}
+                ${LAPACK_LIBRARIES}
+                CUDA::cudart
+                CUDA::cublas
+                CUDA::cusparse
+                magma_nvcc_flags
         )
-    else()
-      find_package( hipBLAS )
-      if (hipBLAS_FOUND)
-        message( STATUS "Found rocBLAS ${rocBLAS_VERSION}" )
-      endif()
-      find_package( hipSPARSE )
-      if (hipSPARSE_FOUND)
-        message( STATUS "Found rocSPARSE ${rocSPARSE_VERSION}" )
-      endif()
-      add_library( magma ${libmagma_all} )
-      target_link_libraries( magma
-        hip::host
-        ${blas_fix}
-        ${LAPACK_LIBRARIES}
+    else ()
+        find_package(hipBLAS)
+        if (hipBLAS_FOUND)
+            message(STATUS "Found rocBLAS ${rocBLAS_VERSION}")
+        endif ()
+        find_package(hipSPARSE)
+        if (hipSPARSE_FOUND)
+            message(STATUS "Found rocSPARSE ${rocSPARSE_VERSION}")
+        endif ()
+        add_library(magma ${libmagma_all})
+        target_link_libraries(magma
+                hip::host
+                ${blas_fix}
+                ${LAPACK_LIBRARIES}
         hip::device
-        roc::hipblas
-        roc::hipsparse
+                roc::hipblas
+                roc::hipsparse
         )
-    endif()
+    endif ()
 
     if (USE_FORTRAN)
         # make list of Fortran .mod files to install
-        foreach( filename ${libmagma_all} )
+        foreach (filename ${libmagma_all})
             if (filename MATCHES "\\.(f90|F90)$")
                 # mod files seem to wind up in root build directory
-                get_filename_component( fmod ${filename} NAME_WE )
-                list( APPEND modules "${CMAKE_BINARY_DIR}/${fmod}.mod" )
-            endif()
-        endforeach()
-    endif()
-endif()
-add_custom_target( lib DEPENDS magma )
+                get_filename_component(fmod ${filename} NAME_WE)
+                list(APPEND modules "${CMAKE_BINARY_DIR}/${fmod}.mod")
+            endif ()
+        endforeach ()
+    endif ()
+endif ()
+add_custom_target(lib DEPENDS magma)
 
 
 # ----------------------------------------
@@ -546,35 +557,35 @@ add_custom_target( lib DEPENDS magma )
 # If use fortran, compile only Fortran files, not magma_[sdcz]_no_fortran.cpp
 # else,           compile only C++     files, not Fortran files
 if (USE_FORTRAN)
-    foreach( filename ${liblapacktest_all} )
+    foreach (filename ${liblapacktest_all})
         if (filename MATCHES "\\.(f|f90|F90)$")
-            list( APPEND liblapacktest_all_f ${filename} )
-        endif()
-    endforeach()
-    add_library( lapacktest ${liblapacktest_all_f} )
-else()
+            list(APPEND liblapacktest_all_f ${filename})
+        endif ()
+    endforeach ()
+    add_library(lapacktest ${liblapacktest_all_f})
+else ()
     # alternatively, use only C/C++/CUDA files, including magma_[sdcz]_no_fortran.cpp
-    foreach( filename ${liblapacktest_all} )
+    foreach (filename ${liblapacktest_all})
         if (filename MATCHES "\\.(c|cu|cpp)$")
-            list( APPEND liblapacktest_all_cpp ${filename} )
-        endif()
-    endforeach()
-    add_library( lapacktest ${liblapacktest_all_cpp} )
-endif()
-target_link_libraries( lapacktest
-    ${blas_fix}
-    ${LAPACK_LIBRARIES}
+            list(APPEND liblapacktest_all_cpp ${filename})
+        endif ()
+    endforeach ()
+    add_library(lapacktest ${liblapacktest_all_cpp})
+endif ()
+target_link_libraries(lapacktest
+        ${blas_fix}
+        ${LAPACK_LIBRARIES}
 )
 
 
 # ----------------------------------------
 # compile tester library
-add_library( tester ${libtest_all} )
-target_link_libraries( tester
-    magma
-    lapacktest
-    ${blas_fix}
-    ${LAPACK_LIBRARIES}
+add_library(tester ${libtest_all})
+target_link_libraries(tester
+        magma
+        lapacktest
+        ${blas_fix}
+        ${LAPACK_LIBRARIES}
 )
 
 
@@ -583,38 +594,38 @@ target_link_libraries( tester
 
 # sparse doesn't have Fortran at the moment, so no need for above shenanigans
 if (MAGMA_ENABLE_CUDA)
-  include_directories( sparse/include )
-  include_directories( sparse/control )
-else()
-  include_directories( sparse_hip/include )
-  include_directories( sparse_hip/control )
-endif()
-include_directories( testing )
+    include_directories(sparse/include)
+    include_directories(sparse/control)
+else ()
+    include_directories(sparse_hip/include)
+    include_directories(sparse_hip/control)
+endif ()
+include_directories(testing)
 
 if (MAGMA_ENABLE_CUDA)
-  add_library( magma_sparse ${libsparse_all} )
+    add_library(magma_sparse ${libsparse_all})
   set_property(TARGET magma_sparse PROPERTY CUDA_STANDARD 14)
-  target_link_libraries( magma_sparse
-    magma
-    ${blas_fix}
-    ${LAPACK_LIBRARIES}
-    CUDA::cudart
-    CUDA::cublas
-    CUDA::cusparse
-    magma_nvcc_flags
+    target_link_libraries(magma_sparse
+            magma
+            ${blas_fix}
+            ${LAPACK_LIBRARIES}
+            CUDA::cudart
+            CUDA::cublas
+            CUDA::cusparse
+            magma_nvcc_flags
     )
-else()
-  add_library( magma_sparse ${libsparse_all} )
-  target_link_libraries( magma_sparse
-    magma
-    ${blas_fix}
-    ${LAPACK_LIBRARIES}
-    hip::device
-    roc::hipblas
-    roc::hipsparse
+else ()
+    add_library(magma_sparse ${libsparse_all})
+    target_link_libraries(magma_sparse
+            magma
+            ${blas_fix}
+            ${LAPACK_LIBRARIES}
+            hip::device
+            roc::hipblas
+            roc::hipsparse
     )
-endif()
-add_custom_target( sparse-lib DEPENDS magma_sparse )
+endif ()
+add_custom_target(sparse-lib DEPENDS magma_sparse)
 
 
 # ----------------------------------------
@@ -622,119 +633,119 @@ add_custom_target( sparse-lib DEPENDS magma_sparse )
 
 # save testers to testing/
 # save tester lib files to testing_lib/ to avoid cluttering lib/
-set( CMAKE_RUNTIME_OUTPUT_DIRECTORY testing )
-set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY testing_lib )
-set( CMAKE_LIBRARY_OUTPUT_DIRECTORY testing_lib )
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY testing)
+set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY testing_lib)
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY testing_lib)
 
 # skip Fortran testers, which require an extra file from CUDA
-foreach( filename ${testing_all} )
+foreach (filename ${testing_all})
     if (filename MATCHES "\\.(c|cu|cpp)$")
-        list( APPEND testing_all_cpp ${filename} )
-    endif()
-endforeach()
-foreach( TEST ${testing_all_cpp} )
-    string( REGEX REPLACE "\\.(cpp|f90|F90)" "" EXE ${TEST} )
-    string( REGEX REPLACE "testing/" "" EXE ${EXE} )
+        list(APPEND testing_all_cpp ${filename})
+    endif ()
+endforeach ()
+foreach (TEST ${testing_all_cpp})
+    string(REGEX REPLACE "\\.(cpp|f90|F90)" "" EXE ${TEST})
+    string(REGEX REPLACE "testing/" "" EXE ${EXE})
     #message( "${TEST} --> ${EXE}" )
-    add_executable( ${EXE} ${TEST} )
-    target_link_libraries( ${EXE} tester lapacktest magma )
-    list( APPEND testing ${EXE} )
-endforeach()
-add_custom_target( testing DEPENDS ${testing} )
+    add_executable(${EXE} ${TEST})
+    target_link_libraries(${EXE} tester lapacktest magma)
+    list(APPEND testing ${EXE})
+endforeach ()
+add_custom_target(testing DEPENDS ${testing})
 
 
 # ----------------------------------------
 # compile each sparse tester
 
 if (MAGMA_ENABLE_CUDA)
-  set(SPARSE_TEST_DIR "sparse/testing")
-else()
-  set(SPARSE_TEST_DIR "sparse_hip/testing")
-endif()
+    set(SPARSE_TEST_DIR "sparse/testing")
+else ()
+    set(SPARSE_TEST_DIR "sparse_hip/testing")
+endif ()
 
 
-set( CMAKE_RUNTIME_OUTPUT_DIRECTORY "${SPARSE_TEST_DIR}" )
-cmake_policy( SET CMP0037 OLD)
-foreach( TEST ${sparse_testing_all} )
-    string( REGEX REPLACE "\\.(cpp|f90|F90)"     "" EXE ${TEST} )
-    string( REGEX REPLACE "${SPARSE_TEST_DIR}/" "" EXE ${EXE} )
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${SPARSE_TEST_DIR}")
+cmake_policy(SET CMP0037 OLD)
+foreach (TEST ${sparse_testing_all})
+    string(REGEX REPLACE "\\.(cpp|f90|F90)" "" EXE ${TEST})
+    string(REGEX REPLACE "${SPARSE_TEST_DIR}/" "" EXE ${EXE})
     #message( "${TEST} --> ${EXE}" )
-    add_executable( ${EXE} ${TEST} )
-    target_link_libraries( ${EXE} magma_sparse magma )
-    list( APPEND sparse-testing ${EXE} )
-endforeach()
-add_custom_target( sparse-testing DEPENDS ${sparse-testing} )
+    add_executable(${EXE} ${TEST})
+    target_link_libraries(${EXE} magma_sparse magma)
+    list(APPEND sparse-testing ${EXE})
+endforeach ()
+add_custom_target(sparse-testing DEPENDS ${sparse-testing})
 
 
 # ----------------------------------------
 # what to install
-install( TARGETS magma magma_sparse ${blas_fix}
-         RUNTIME DESTINATION bin
-         LIBRARY DESTINATION lib
-         ARCHIVE DESTINATION lib )
+install(TARGETS magma magma_sparse ${blas_fix}
+        RUNTIME DESTINATION bin
+        LIBRARY DESTINATION lib
+        ARCHIVE DESTINATION lib)
 if (MAGMA_ENABLE_CUDA)
-  file( GLOB headers include/*.h sparse/include/*.h "${CMAKE_BINARY_DIR}/include/*.h" )
-else()
-  file( GLOB headers include/*.h sparse_hip/include/*.h "${CMAKE_BINARY_DIR}/include/*.h" )
-endif()
+    file(GLOB headers include/*.h sparse/include/*.h "${CMAKE_BINARY_DIR}/include/*.h")
+else ()
+    file(GLOB headers include/*.h sparse_hip/include/*.h "${CMAKE_BINARY_DIR}/include/*.h")
+endif ()
 if (USE_FORTRAN)
-    install( FILES ${headers} ${modules}
-             DESTINATION include )
-else()
-    install( FILES ${headers} DESTINATION include )
-endif()
+    install(FILES ${headers} ${modules}
+            DESTINATION include)
+else ()
+    install(FILES ${headers} DESTINATION include)
+endif ()
 
 # ----------------------------------------
 # pkg-config
 get_target_property(MAGMA_INCLUDE magma INCLUDE_DIRECTORIES)
-foreach(dir ${MAGMA_INCLUDE})
+foreach (dir ${MAGMA_INCLUDE})
     string(APPEND INCLUDE_COMPILER_STRING "-I${dir} ")
-endforeach()
-set( MAGMA_INCLUDE "${INCLUDE_COMPILER_STRING}" )
-set( pkgconfig lib/pkgconfig/magma.pc )
-message( STATUS "pkgconfig ${pkgconfig}" )
-set( INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}" )
-set( CFLAGS "${CMAKE_C_FLAGS}" )
-set( CXXFLAGS "${CMAKE_CXX_FLAGS}" )
+endforeach ()
+set(MAGMA_INCLUDE "${INCLUDE_COMPILER_STRING}")
+set(pkgconfig lib/pkgconfig/magma.pc)
+message(STATUS "pkgconfig ${pkgconfig}")
+set(INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}")
+set(CFLAGS "${CMAKE_C_FLAGS}")
+set(CXXFLAGS "${CMAKE_CXX_FLAGS}")
 # CMake finds the Accelerate directory; we want -framework Accelerate for linking.
-string( REPLACE "/System/Library/Frameworks/Accelerate.framework" "-framework Accelerate" LAPACK_LIBS "${LAPACK_LIBRARIES}" )
+string(REPLACE "/System/Library/Frameworks/Accelerate.framework" "-framework Accelerate" LAPACK_LIBS "${LAPACK_LIBRARIES}")
 if (MAGMA_ENABLE_CUDA)
-  string( REPLACE ";" " " LIBS
-    "${blas_fix_lib} ${LAPACK_LIBS} -L${CUDAToolkit_LIBRARY_DIR} -lcudart -lcublas -lcusparse")
-else()
-  string( REPLACE ";" " " LIBS
-     "${blas_fix_lib} ${LAPACK_LIBS} ${HIP} ${rocBLAS} ${rocSPARSE}" )
-#    "${blas_fix_lib} ${LAPACK_LIBS} hip::device roc::hipblas roc::hipsparse" )
-endif()
-set( MAGMA_REQUIRED "" )
-configure_file( "${pkgconfig}.in" "${pkgconfig}" @ONLY )
-install( FILES "${CMAKE_BINARY_DIR}/${pkgconfig}"
-         DESTINATION lib/pkgconfig )
+    string(REPLACE ";" " " LIBS
+            "${blas_fix_lib} ${LAPACK_LIBS} -L${CUDAToolkit_LIBRARY_DIR} -lcudart -lcublas -lcusparse")
+else ()
+    string(REPLACE ";" " " LIBS
+            "${blas_fix_lib} ${LAPACK_LIBS} ${HIP} ${rocBLAS} ${rocSPARSE}")
+    #    "${blas_fix_lib} ${LAPACK_LIBS} hip::device roc::hipblas roc::hipsparse" )
+endif ()
+set(MAGMA_REQUIRED "")
+configure_file("${pkgconfig}.in" "${pkgconfig}" @ONLY)
+install(FILES "${CMAKE_BINARY_DIR}/${pkgconfig}"
+        DESTINATION lib/pkgconfig)
 
 # ----------------------------------------
-get_directory_property( compile_definitions COMPILE_DEFINITIONS )
+get_directory_property(compile_definitions COMPILE_DEFINITIONS)
 
-message( STATUS "Flags" )
-message( STATUS "    CMAKE_INSTALL_PREFIX:  ${CMAKE_INSTALL_PREFIX}" )
-message( STATUS "    CFLAGS:                ${CMAKE_C_FLAGS}" )
-message( STATUS "    CXXFLAGS:              ${CMAKE_CXX_FLAGS}" )
+message(STATUS "Flags")
+message(STATUS "    CMAKE_INSTALL_PREFIX:  ${CMAKE_INSTALL_PREFIX}")
+message(STATUS "    CFLAGS:                ${CMAKE_C_FLAGS}")
+message(STATUS "    CXXFLAGS:              ${CMAKE_CXX_FLAGS}")
 if (MAGMA_ENABLE_CUDA)
-  message( STATUS "    NVCCFLAGS:             ${CMAKE_CUDA_FLAGS}" )
-else()
-  message( STATUS "    DEVCCFLAGS:            ${DEVCCFLAGS}" )
-endif()
-message( STATUS "    FFLAGS:                ${CMAKE_Fortran_FLAGS}" )
-message( STATUS "    LIBS:                  ${LIBS}" )
-message( STATUS "    blas_fix:              ${blas_fix}  (MacOS Accelerate only)" )
-message( STATUS "    LAPACK_LIBRARIES:      ${LAPACK_LIBRARIES}"      )
-message( STATUS "    INCLUDE_DIRECTORIES:   ${MAGMA_INCLUDE}"   )
+    message(STATUS "    NVCCFLAGS:             ${CMAKE_CUDA_FLAGS}")
+else ()
+    message(STATUS "    DEVCCFLAGS:            ${DEVCCFLAGS}")
+endif ()
+message(STATUS "    FFLAGS:                ${CMAKE_Fortran_FLAGS}")
+message(STATUS "    LIBS:                  ${LIBS}")
+message(STATUS "    blas_fix:              ${blas_fix}  (MacOS Accelerate only)")
+message(STATUS "    LAPACK_LIBRARIES:      ${LAPACK_LIBRARIES}")
+message(STATUS "    INCLUDE_DIRECTORIES:   ${MAGMA_INCLUDE}")
 if (MAGMA_ENABLE_CUDA)
-  message( STATUS "    CUDA_CUDART_LIBRARY:   CUDA::cudart"   )
-  message( STATUS "    CUDA_CUBLAS_LIBRARIES: CUDA::cublas" )
-  message( STATUS "    CUDA_cusparse_LIBRARY: CUDA::cusparse" )
-else()
-  message( STATUS "    HIP_LIBRARY:   hip::device"   )
-  message( STATUS "    HIP_BLAS_LIBRARIES: roc::hipblas" )
-  message( STATUS "    HIP_sparse_LIBRARY: roc::hipsparse" )
-endif()
-message( STATUS "    Fortran modules:       ${modules}" )
+    message(STATUS "    CUDA_CUDART_LIBRARY:   CUDA::cudart")
+    message(STATUS "    CUDA_CUBLAS_LIBRARIES: CUDA::cublas")
+    message(STATUS "    CUDA_cusparse_LIBRARY: CUDA::cusparse")
+else ()
+    message(STATUS "    HIP_LIBRARY:   hip::device")
+    message(STATUS "    HIP_BLAS_LIBRARIES: roc::hipblas")
+    message(STATUS "    HIP_sparse_LIBRARY: roc::hipsparse")
+endif ()
+message(STATUS "    Fortran modules:       ${modules}")
diff --git a/include/magma_auxiliary.h b/include/magma_auxiliary.h
index c4f06d55d..2c7fd075c 100644
--- a/include/magma_auxiliary.h
+++ b/include/magma_auxiliary.h
@@ -81,6 +81,9 @@ magma_int_t magma_get_smlsize_divideconquer();
 magma_int_t
 magma_malloc( magma_ptr *ptr_ptr, size_t bytes );
 
+magma_int_t
+magma_malloc_async( magma_ptr* ptrPtr, size_t size, magma_queue_t queue);
+
 magma_int_t
 magma_malloc_cpu( void **ptr_ptr, size_t bytes );
 
@@ -93,6 +96,9 @@ magma_free_cpu( void *ptr );
 #define magma_free( ptr ) \
         magma_free_internal( ptr, __func__, __FILE__, __LINE__ )
 
+#define magma_free_async( ptr, queue ) \
+        magma_free_internal_async( ptr, __func__, __FILE__, __LINE__, queue )
+
 #define magma_free_pinned( ptr ) \
         magma_free_pinned_internal( ptr, __func__, __FILE__, __LINE__ )
 
@@ -101,6 +107,11 @@ magma_free_internal(
     magma_ptr ptr,
     const char* func, const char* file, int line );
 
+magma_int_t
+magma_free_internal_async(
+    magma_ptr ptr,
+    const char* func, const char* file, int line, magma_queue_t queue );
+
 magma_int_t
 magma_free_pinned_internal(
     void *ptr,
@@ -128,24 +139,45 @@ magma_memset_async(void * ptr, int value, size_t count, magma_queue_t queue);
 /// Type-safe version of magma_malloc(), for magma_int_t arrays. Allocates n*sizeof(magma_int_t) bytes.
 static inline magma_int_t magma_imalloc( magmaInt_ptr           *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(magma_int_t)        ); }
 
+/// Type-safe version of magma_malloc(), for magma_int_t arrays. Allocates n*sizeof(magma_int_t) bytes.
+static inline magma_int_t magma_imalloc_async( magmaInt_ptr           *ptr_ptr, size_t n, magma_queue_t queue ) { return magma_malloc_async( (magma_ptr*) ptr_ptr, n*sizeof(magma_int_t), queue ); }
+
 /// Type-safe version of magma_malloc(), for magma_index_t arrays. Allocates n*sizeof(magma_index_t) bytes.
 static inline magma_int_t magma_index_malloc( magmaIndex_ptr    *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(magma_index_t)      ); }
 
+/// Type-safe version of magma_malloc(), for magma_index_t arrays. Allocates n*sizeof(magma_index_t) bytes.
+static inline magma_int_t magma_index_malloc_async( magmaIndex_ptr    *ptr_ptr, size_t n, magma_queue_t queue ) { return magma_malloc_async( (magma_ptr*) ptr_ptr, n*sizeof(magma_index_t), queue ); }
+
 /// Type-safe version of magma_malloc(), for magma_uindex_t arrays. Allocates n*sizeof(magma_uindex_t) bytes.
 static inline magma_int_t magma_uindex_malloc( magmaUIndex_ptr    *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(magma_uindex_t)      ); }
 
+/// Type-safe version of magma_malloc(), for magma_uindex_t arrays. Allocates n*sizeof(magma_uindex_t) bytes.
+static inline magma_int_t magma_uindex_malloc_async( magmaUIndex_ptr    *ptr_ptr, size_t n, magma_queue_t queue ) { return magma_malloc_async( (magma_ptr*) ptr_ptr, n*sizeof(magma_uindex_t), queue); }
+
 /// Type-safe version of magma_malloc(), for float arrays. Allocates n*sizeof(float) bytes.
 static inline magma_int_t magma_smalloc( magmaFloat_ptr         *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(float)              ); }
 
+/// Type-safe version of magma_malloc(), for float arrays. Allocates n*sizeof(float) bytes.
+static inline magma_int_t magma_smalloc_async( magmaFloat_ptr         *ptr_ptr, size_t n, magma_queue_t queue ) { return magma_malloc_async( (magma_ptr*) ptr_ptr, n*sizeof(float), queue); }
+
 /// Type-safe version of magma_malloc(), for double arrays. Allocates n*sizeof(double) bytes.
 static inline magma_int_t magma_dmalloc( magmaDouble_ptr        *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(double)             ); }
 
+/// Type-safe version of magma_malloc(), for double arrays. Allocates n*sizeof(double) bytes.
+static inline magma_int_t magma_dmalloc_async( magmaDouble_ptr        *ptr_ptr, size_t n, magma_queue_t queue ) { return magma_malloc_async( (magma_ptr*) ptr_ptr, n*sizeof(double), queue); }
+
 /// Type-safe version of magma_malloc(), for magmaFloatComplex arrays. Allocates n*sizeof(magmaFloatComplex) bytes.
 static inline magma_int_t magma_cmalloc( magmaFloatComplex_ptr  *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(magmaFloatComplex)  ); }
 
+/// Type-safe version of magma_malloc(), for magmaFloatComplex arrays. Allocates n*sizeof(magmaFloatComplex) bytes.
+static inline magma_int_t magma_cmalloc_async( magmaFloatComplex_ptr  *ptr_ptr, size_t n, magma_queue_t queue ) { return magma_malloc_async( (magma_ptr*) ptr_ptr, n*sizeof(magmaFloatComplex), queue ); }
+
 /// Type-safe version of magma_malloc(), for magmaDoubleComplex arrays. Allocates n*sizeof(magmaDoubleComplex) bytes.
 static inline magma_int_t magma_zmalloc( magmaDoubleComplex_ptr *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(magmaDoubleComplex) ); }
 
+/// Type-safe version of magma_malloc_async(), for magmaDoubleComplex arrays. Allocates n*sizeof(magmaDoubleComplex) bytes.
+static inline magma_int_t magma_zmalloc_async( magmaDoubleComplex_ptr *ptr_ptr, size_t n, magma_queue_t queue ) { return magma_malloc_async( (magma_ptr*) ptr_ptr, n*sizeof(magmaDoubleComplex), queue ); }
+
 /// @}
 
 
diff --git a/include/magma_z.h b/include/magma_z.h
index f66ba76c1..d8d0091c9 100644
--- a/include/magma_z.h
+++ b/include/magma_z.h
@@ -505,6 +505,16 @@ magma_zgerbt_gpu(
     magmaDoubleComplex *U, magmaDoubleComplex *V,
     magma_int_t *info);
 
+// CUDA MAGMA only
+magma_int_t
+magma_zgerbt_gpu_async(
+        const magma_bool_t gen, const magma_int_t n, const magma_int_t nrhs,
+        magmaDoubleComplex_ptr const dA, magma_int_t const ldda,
+        magmaDoubleComplex_ptr const dB, magma_int_t const lddb,
+        magmaDoubleComplex_ptr const dU, magmaDoubleComplex_ptr const dV,
+        magma_int_t *info,
+        magma_queue_t queue);
+
 // CUDA MAGMA only
 magma_int_t
 magma_zgerfs_nopiv_gpu(
@@ -516,6 +526,20 @@ magma_zgerfs_nopiv_gpu(
     magma_int_t *iter,
     magma_int_t *info);
 
+// CUDA MAGMA only
+magma_int_t
+magma_zgerfs_nopiv_gpu_async(
+        magma_trans_t trans, magma_int_t n, magma_int_t nrhs,
+        magmaDoubleComplex_ptr dA, magma_int_t ldda,
+        magmaDoubleComplex_ptr dB, magma_int_t lddb,
+        magmaDoubleComplex_ptr dX, magma_int_t lddx,
+        magmaDoubleComplex_ptr dworkd, magmaDoubleComplex_ptr dAF,
+        magma_int_t *iter,
+        magma_int_t *info,
+        magma_int_t iter_max,
+        double bwdmax,
+        magma_queue_t queue);
+
 magma_int_t
 magma_zgesdd(
     magma_vec_t jobz, magma_int_t m, magma_int_t n,
@@ -553,6 +577,13 @@ magma_zgesv_nopiv_gpu(
     magmaDoubleComplex_ptr dB, magma_int_t lddb,
     magma_int_t *info);
 
+magma_int_t
+magma_zgesv_nopiv_gpu_async(
+        magma_int_t n, magma_int_t nrhs,
+        magmaDoubleComplex_ptr dA, magma_int_t ldda,
+        magmaDoubleComplex_ptr dB, magma_int_t lddb,
+        magma_int_t *info, magma_queue_t queue );
+
 // CUDA MAGMA only
 magma_int_t
 magma_zgesv_rbt(
@@ -561,6 +592,26 @@ magma_zgesv_rbt(
     magmaDoubleComplex *B, magma_int_t ldb,
     magma_int_t *info);
 
+// CUDA MAGMA only
+magma_int_t
+magma_zgesv_rbt_async(
+        const magma_bool_t refine, const magma_int_t n, const magma_int_t nrhs,
+        const magmaDoubleComplex *const dA, const magma_int_t lda,
+        magmaDoubleComplex *const dB, const magma_int_t ldb,
+        magma_int_t *info,
+        const magma_int_t iter_max, const double bwdmax,
+        magma_queue_t queue );
+
+// CUDA MAGMA only
+magma_int_t
+magma_zgesv_rbt_refine_async(
+        const magma_int_t n, const magma_int_t nrhs,
+        const magmaDoubleComplex *const dA_, const magma_int_t lda,
+        magmaDoubleComplex *const dB_, const magma_int_t ldb,
+        magma_int_t *info,
+        const magma_int_t iter_max, const double bwdmax,
+        magma_queue_t queue);
+
 magma_int_t
 magma_zgesvd(
     magma_vec_t jobu, magma_vec_t jobvt, magma_int_t m, magma_int_t n,
@@ -704,6 +755,13 @@ magma_zgetrf_nopiv_gpu(
     magmaDoubleComplex_ptr dA, magma_int_t ldda,
     magma_int_t *info);
 
+magma_int_t
+magma_zgetrf_nopiv_gpu_async(
+        magma_int_t m, magma_int_t n,
+        magmaDoubleComplex_ptr dA, magma_int_t ldda,
+        magma_int_t *info,
+        magma_queue_t queue);
+
 magma_int_t
 magma_zgetri_gpu(
     magma_int_t n,
@@ -749,6 +807,13 @@ magma_zgetrs_nopiv_gpu(
     magmaDoubleComplex_ptr dB, magma_int_t lddb,
     magma_int_t *info);
 
+magma_int_t
+magma_zgetrs_nopiv_gpu_async(
+        magma_trans_t trans, magma_int_t n, magma_int_t nrhs,
+        magmaDoubleComplex_ptr dA, magma_int_t ldda,
+        magmaDoubleComplex_ptr dB, magma_int_t lddb,
+        magma_int_t *info, magma_queue_t queue);
+
 // ------------------------------------------------------------ zhe routines
 magma_int_t
 magma_zheevd(
diff --git a/include/magmablas_z.h b/include/magmablas_z.h
index 8a10d4622..d8e0bf029 100644
--- a/include/magmablas_z.h
+++ b/include/magmablas_z.h
@@ -496,6 +496,13 @@ magmablas_ztrtri_diag(
     magmaDoubleComplex_ptr d_dinvA,
     magma_queue_t queue );
 
+void
+magmablas_ztrtri_diag_async(
+    magma_uplo_t uplo, magma_diag_t diag, magma_int_t n,
+    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
+    magmaDoubleComplex_ptr d_dinvA,
+    magma_queue_t queue );
+
   /*
    * to cleanup (alphabetical order)
    */
@@ -757,6 +764,15 @@ magmablas_ztrsm(
     magmaDoubleComplex_ptr       dB, magma_int_t lddb,
     magma_queue_t queue );
 
+void
+magmablas_ztrsm_async(
+    magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag,
+    magma_int_t m, magma_int_t n,
+    magmaDoubleComplex alpha,
+    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
+    magmaDoubleComplex_ptr       dB, magma_int_t lddb,
+    magma_queue_t queue );
+
 void
 magmablas_ztrsm_outofplace(
     magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag,
diff --git a/interface_cuda/alloc.cpp b/interface_cuda/alloc.cpp
index d7e178baa..74f9bdbd1 100644
--- a/interface_cuda/alloc.cpp
+++ b/interface_cuda/alloc.cpp
@@ -23,6 +23,7 @@
 #include "error.h"
 
 //#ifdef MAGMA_HAVE_CUDA
+#define NOPINNED_ALLOC // Fix for multi GPU systems running jemalloc, where cudaHostAlloc and cudaHostRegister kill parallelism across multiple threads using multiple GPUs
 
 
 #ifdef DEBUG_MEMORY
@@ -77,6 +78,53 @@ magma_malloc( magma_ptr* ptrPtr, size_t size )
 }
 
 
+/***************************************************************************//**
+    Allocates memory on the GPU. CUDA imposes a synchronization.
+    Use magma_free() to free this memory.
+
+    @param[out]
+    ptrPtr  On output, set to the pointer that was allocated.
+            NULL on failure.
+
+    @param[in]
+    size    Size in bytes to allocate. If size = 0, allocates some minimal size.
+
+     @param[in]
+    queue    Magma queue whose CUDA stream is used to put the cudaMalloc on.
+
+    @return MAGMA_SUCCESS
+    @return MAGMA_ERR_DEVICE_ALLOC on failure
+
+    Type-safe versions avoid the need for a (void**) cast and explicit sizeof.
+    @see magma_smalloc_q
+    @see magma_dmalloc_q
+    @see magma_cmalloc_q
+    @see magma_zmalloc_q
+    @see magma_imalloc_q
+    @see magma_index_malloc_q
+
+    @ingroup magma_malloc
+*******************************************************************************/
+extern "C" magma_int_t
+magma_malloc_async( magma_ptr* ptrPtr, size_t size, magma_queue_t queue)
+{
+    // malloc and free sometimes don't work for size=0, so allocate some minimal size
+    if ( size == 0 )
+        size = sizeof(magmaDoubleComplex);
+    if ( cudaSuccess != cudaMallocAsync( ptrPtr, size, queue->cuda_stream() )) {
+        return MAGMA_ERR_DEVICE_ALLOC;
+    }
+
+    #ifdef DEBUG_MEMORY
+    g_pointers_mutex.lock();
+    g_pointers_dev[ *ptrPtr ] = size;
+    g_pointers_mutex.unlock();
+    #endif
+
+    return MAGMA_SUCCESS;
+}
+
+
 /***************************************************************************//**
     @fn magma_free( ptr )
 
@@ -114,6 +162,45 @@ magma_free_internal( magma_ptr ptr,
 }
 
 
+/***************************************************************************//**
+    @fn magma_free( ptr )
+
+    Frees GPU memory previously allocated by magma_malloc().
+
+    @param[in]
+    ptr     Pointer to free.
+    @param[in]
+    queue   MAGMA queue to use the CYDA stream to free on.
+
+    @return MAGMA_SUCCESS
+    @return MAGMA_ERR_INVALID_PTR on failure
+
+    @ingroup magma_malloc
+*******************************************************************************/
+extern "C" magma_int_t
+magma_free_internal_async( magma_ptr ptr,
+    const char* func, const char* file, int line, magma_queue_t queue )
+{
+    #ifdef DEBUG_MEMORY
+    g_pointers_mutex.lock();
+    if ( ptr != NULL && g_pointers_dev.count( ptr ) == 0 ) {
+        fprintf( stderr, "magma_free( %p ) that wasn't allocated with magma_malloc.\n", ptr );
+    }
+    else {
+        g_pointers_dev.erase( ptr );
+    }
+    g_pointers_mutex.unlock();
+    #endif
+
+    cudaError_t err = cudaFreeAsync( ptr, queue->cuda_stream() );
+    check_xerror( err, func, file, line );
+    if ( err != cudaSuccess ) {
+        return MAGMA_ERR_INVALID_PTR;
+    }
+    return MAGMA_SUCCESS;
+}
+
+
 /***************************************************************************//**
     Allocate size bytes on CPU.
     The purpose of using this instead of malloc is to properly align arrays
@@ -162,10 +249,10 @@ magma_malloc_cpu( void** ptrPtr, size_t size )
     }
 #endif
 #else
-    *ptrPtr = malloc( size );
-    if ( *ptrPtr == NULL ) {
-        return MAGMA_ERR_HOST_ALLOC;
-    }
+        *ptrPtr = malloc( size );
+        if ( *ptrPtr == NULL ) {
+            return MAGMA_ERR_HOST_ALLOC;
+        }
 #endif
 
     #ifdef DEBUG_MEMORY
@@ -242,10 +329,15 @@ magma_free_cpu( void* ptr )
 extern "C" magma_int_t
 magma_malloc_pinned( void** ptrPtr, size_t size )
 {
+    #ifdef NOPINNED_ALLOC
+    return magma_malloc_cpu( ptrPtr, size );
+    #endif
+
     // malloc and free sometimes don't work for size=0, so allocate some minimal size
     // (for pinned memory, the error is detected in free)
     if ( size == 0 )
         size = sizeof(magmaDoubleComplex);
+
     if ( cudaSuccess != cudaHostAlloc( ptrPtr, size, cudaHostAllocPortable )) {
         return MAGMA_ERR_HOST_ALLOC;
     }
@@ -277,6 +369,10 @@ extern "C" magma_int_t
 magma_free_pinned_internal( void* ptr,
     const char* func, const char* file, int line )
 {
+    #ifdef NOPINNED_ALLOC
+    return magma_free_cpu( ptr );
+    #endif
+
     #ifdef DEBUG_MEMORY
     g_pointers_mutex.lock();
     if ( ptr != NULL && g_pointers_pin.count( ptr ) == 0 ) {
@@ -293,6 +389,7 @@ magma_free_pinned_internal( void* ptr,
     if ( cudaSuccess != err ) {
         return MAGMA_ERR_INVALID_PTR;
     }
+
     return MAGMA_SUCCESS;
 }
 
diff --git a/magmablas/ztrsm.cu b/magmablas/ztrsm.cu
index ff63b4471..81cc626a6 100644
--- a/magmablas/ztrsm.cu
+++ b/magmablas/ztrsm.cu
@@ -380,6 +380,244 @@ void magmablas_ztrsm_outofplace(
     }
 }
 
+extern "C"
+void magmablas_ztrsm_outofplace_async(
+    magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag,
+    magma_int_t m, magma_int_t n,
+    magmaDoubleComplex alpha,
+    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
+    magmaDoubleComplex_ptr       dB, magma_int_t lddb,
+    magmaDoubleComplex_ptr       dX, magma_int_t lddx,
+    magma_int_t flag,
+    magmaDoubleComplex_ptr d_dinvA, magma_int_t dinvA_length,
+    magma_queue_t queue )
+{
+    #define dA(i_, j_) (dA + (i_) + (j_)*ldda)
+    #define dB(i_, j_) (dB + (i_) + (j_)*lddb)
+    #define dX(i_, j_) (dX + (i_) + (j_)*lddx)
+    #define d_dinvA(i_) (d_dinvA + (i_)*NB)
+
+    const magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;
+    const magmaDoubleComplex c_one     = MAGMA_Z_ONE;
+    const magmaDoubleComplex c_zero    = MAGMA_Z_ZERO;
+
+    magma_int_t i, jb;
+    magma_int_t nrowA = (side == MagmaLeft ? m : n);
+
+    magma_int_t min_dinvA_length;
+    if ( side == MagmaLeft ) {
+        min_dinvA_length = magma_roundup( m, NB )*NB;
+    }
+    else {
+        min_dinvA_length = magma_roundup( n, NB )*NB;
+    }
+
+    magma_int_t info = 0;
+    if ( side != MagmaLeft && side != MagmaRight ) {
+        info = -1;
+    } else if ( uplo != MagmaUpper && uplo != MagmaLower ) {
+        info = -2;
+    } else if ( transA != MagmaNoTrans && transA != MagmaTrans && transA != MagmaConjTrans ) {
+        info = -3;
+    } else if ( diag != MagmaUnit && diag != MagmaNonUnit ) {
+        info = -4;
+    } else if (m < 0) {
+        info = -5;
+    } else if (n < 0) {
+        info = -6;
+    } else if (dA == NULL) {
+        info = -8;
+    } else if (ldda < max(1,nrowA)) {
+        info = -9;
+    } else if (dB == NULL) {
+        info = -10;
+    } else if (lddb < max(1,m)) {
+        info = -11;
+    } else if (dX == NULL) {
+        info = -12;
+    } else if (lddx < max(1,m)) {
+        info = -13;
+    } else if (d_dinvA == NULL) {
+        info = -15;
+    } else if (dinvA_length < min_dinvA_length) {
+        info = -16;
+    }
+
+    if (info != 0) {
+        magma_xerbla( __func__, -(info) );
+        return;
+    }
+
+    // quick return if possible.
+    if (m == 0 || n == 0)
+        return;
+
+    if (side == MagmaLeft) {
+        // invert diagonal blocks
+        if (flag)
+            magmablas_ztrtri_diag_async( uplo, diag, m, dA, ldda, d_dinvA, queue );
+
+        if (transA == MagmaNoTrans) {
+            if (uplo == MagmaLower) {
+                // left, lower no-transpose
+                // handle first block separately with alpha
+                jb = min(NB, m);
+                magma_zgemm( MagmaNoTrans, MagmaNoTrans, jb, n, jb, alpha, d_dinvA(0), NB, dB, lddb, c_zero, dX, lddx, queue );
+                if (NB < m) {
+                    magma_zgemm( MagmaNoTrans, MagmaNoTrans, m-NB, n, NB, c_neg_one, dA(NB,0), ldda, dX, lddx, alpha, dB(NB,0), lddb, queue );
+
+                    // remaining blocks
+                    for( i=NB; i < m; i += NB ) {
+                        jb = min(m-i, NB);
+                        magma_zgemm( MagmaNoTrans, MagmaNoTrans, jb, n, jb, c_one, d_dinvA(i), NB, dB(i,0), lddb, c_zero, dX(i,0), lddx, queue );
+                        if (i+NB >= m)
+                            break;
+                        magma_zgemm( MagmaNoTrans, MagmaNoTrans, m-i-NB, n, NB, c_neg_one, dA(i+NB,i), ldda, dX(i,0), lddx, c_one, dB(i+NB,0), lddb, queue );
+                    }
+                }
+            }
+            else {
+                // left, upper no-transpose
+                // handle first block separately with alpha
+                jb = (m % NB == 0) ? NB : (m % NB);
+                i = m-jb;
+                magma_zgemm( MagmaNoTrans, MagmaNoTrans, jb, n, jb, alpha, d_dinvA(i), NB, dB(i,0), lddb, c_zero, dX(i,0), lddx, queue );
+                if (i-NB >= 0) {
+                    magma_zgemm( MagmaNoTrans, MagmaNoTrans, i, n, jb, c_neg_one, dA(0,i), ldda, dX(i,0), lddx, alpha, dB, lddb, queue );
+
+                    // remaining blocks
+                    for( i=m-jb-NB; i >= 0; i -= NB ) {
+                        magma_zgemm( MagmaNoTrans, MagmaNoTrans, NB, n, NB, c_one, d_dinvA(i), NB, dB(i,0), lddb, c_zero, dX(i,0), lddx, queue );
+                        if (i-NB < 0)
+                            break;
+                        magma_zgemm( MagmaNoTrans, MagmaNoTrans, i, n, NB, c_neg_one, dA(0,i), ldda, dX(i,0), lddx, c_one, dB, lddb, queue );
+                    }
+                }
+            }
+        }
+        else {  // transA == MagmaTrans || transA == MagmaConjTrans
+            if (uplo == MagmaLower) {
+                // left, lower transpose
+                // handle first block separately with alpha
+                jb = (m % NB == 0) ? NB : (m % NB);
+                i = m-jb;
+                magma_zgemm( transA, MagmaNoTrans, jb, n, jb, alpha, d_dinvA(i), NB, dB(i,0), lddb, c_zero, dX(i,0), lddx, queue );
+                if (i-NB >= 0) {
+                    magma_zgemm( transA, MagmaNoTrans, i, n, jb, c_neg_one, dA(i,0), ldda, dX(i,0), lddx, alpha, dB, lddb, queue );
+
+                    // remaining blocks
+                    for( i=m-jb-NB; i >= 0; i -= NB ) {
+                        magma_zgemm( transA, MagmaNoTrans, NB, n, NB, c_one, d_dinvA(i), NB, dB(i,0), lddb, c_zero, dX(i,0), lddx, queue );
+                        if (i-NB < 0)
+                            break;
+                        magma_zgemm( transA, MagmaNoTrans, i, n, NB, c_neg_one, dA(i,0), ldda, dX(i,0), lddx, c_one, dB, lddb, queue );
+                    }
+                }
+            }
+            else {
+                // left, upper transpose
+                // handle first block separately with alpha
+                jb = min(NB, m);
+                magma_zgemm( transA, MagmaNoTrans, jb, n, jb, alpha, d_dinvA(0), NB, dB, lddb, c_zero, dX, lddx, queue );
+                if (NB < m) {
+                    magma_zgemm( transA, MagmaNoTrans, m-NB, n, NB, c_neg_one, dA(0,NB), ldda, dX, lddx, alpha, dB(NB,0), lddb, queue );
+
+                    // remaining blocks
+                    for( i=NB; i < m; i += NB ) {
+                        jb = min(m-i, NB);
+                        magma_zgemm( transA, MagmaNoTrans, jb, n, jb, c_one, d_dinvA(i), NB, dB(i,0), lddb, c_zero, dX(i,0), lddx, queue );
+                        if (i+NB >= m)
+                            break;
+                        magma_zgemm( transA, MagmaNoTrans, m-i-NB, n, NB, c_neg_one, dA(i,i+NB), ldda, dX(i,0), lddx, c_one, dB(i+NB,0), lddb, queue );
+                    }
+                }
+            }
+        }
+    }
+    else {  // side == MagmaRight
+        // invert diagonal blocks
+        if (flag)
+            magmablas_ztrtri_diag_async( uplo, diag, n, dA, ldda, d_dinvA, queue );
+
+        if (transA == MagmaNoTrans) {
+            if (uplo == MagmaLower) {
+                // right, lower no-transpose
+                // handle first block separately with alpha
+                jb = (n % NB == 0) ? NB : (n % NB);
+                i = n-jb;
+                magma_zgemm( MagmaNoTrans, MagmaNoTrans, m, jb, jb, alpha, dB(0,i), lddb, d_dinvA(i), NB, c_zero, dX(0,i), lddx, queue );
+                if (i-NB >= 0) {
+                    magma_zgemm( MagmaNoTrans, MagmaNoTrans, m, i, jb, c_neg_one, dX(0,i), lddx, dA(i,0), ldda, alpha, dB, lddb, queue );
+
+                    // remaining blocks
+                    for( i=n-jb-NB; i >= 0; i -= NB ) {
+                        magma_zgemm( MagmaNoTrans, MagmaNoTrans, m, NB, NB, c_one, dB(0,i), lddb, d_dinvA(i), NB, c_zero, dX(0,i), lddx, queue );
+                        if (i-NB < 0)
+                            break;
+                        magma_zgemm( MagmaNoTrans, MagmaNoTrans, m, i, NB, c_neg_one, dX(0,i), lddx, dA(i,0), ldda, c_one, dB, lddb, queue );
+                    }
+                }
+            }
+            else {
+                // right, upper no-transpose
+                // handle first block separately with alpha
+                jb = min(NB, n);
+                magma_zgemm( MagmaNoTrans, MagmaNoTrans, m, jb, jb, alpha, dB, lddb, d_dinvA(0), NB, c_zero, dX, lddx, queue );
+                if (NB < n) {
+                    magma_zgemm( MagmaNoTrans, MagmaNoTrans, m, n-NB, NB, c_neg_one, dX, lddx, dA(0,NB), ldda, alpha, dB(0,NB), lddb, queue );
+
+                    // remaining blocks
+                    for( i=NB; i < n; i += NB ) {
+                        jb = min(NB, n-i);
+                        magma_zgemm( MagmaNoTrans, MagmaNoTrans, m, jb, jb, c_one, dB(0,i), lddb, d_dinvA(i), NB, c_zero, dX(0,i), lddx, queue );
+                        if (i+NB >= n)
+                            break;
+                        magma_zgemm( MagmaNoTrans, MagmaNoTrans, m, n-i-NB, NB, c_neg_one, dX(0,i), lddx, dA(i,i+NB), ldda, c_one, dB(0,i+NB), lddb, queue );
+                    }
+                }
+            }
+        }
+        else { // transA == MagmaTrans || transA == MagmaConjTrans
+            if (uplo == MagmaLower) {
+                // right, lower transpose
+                // handle first block separately with alpha
+                jb = min(NB, n);
+                magma_zgemm( MagmaNoTrans, transA, m, jb, jb, alpha, dB, lddb, d_dinvA(0), NB, c_zero, dX, lddx, queue );
+                if (NB < n) {
+                    magma_zgemm( MagmaNoTrans, transA, m, n-NB, NB, c_neg_one, dX, lddx, dA(NB,0), ldda, alpha, dB(0,NB), lddb, queue );
+
+                    // remaining blocks
+                    for( i=NB; i < n; i += NB ) {
+                        jb = min(NB, n-i);
+                        magma_zgemm( MagmaNoTrans, transA, m, jb, jb, c_one, dB(0,i), lddb, d_dinvA(i), NB, c_zero, dX(0,i), lddx, queue );
+                        if (i+NB >= n)
+                            break;
+                        magma_zgemm( MagmaNoTrans, transA, m, n-i-NB, NB, c_neg_one, dX(0,i), lddx, dA(NB+i,i), ldda, c_one, dB(0,i+NB), lddb, queue );
+                    }
+                }
+            }
+            else {
+                // right, upper transpose
+                // handle first block separately with alpha
+                jb = (n % NB == 0) ? NB : (n % NB);
+                i = n-jb;
+                magma_zgemm( MagmaNoTrans, transA, m, jb, jb, alpha, dB(0,i), lddb, d_dinvA(i), NB, c_zero, dX(0,i), lddx, queue );
+                if (i-NB >= 0) {
+                    magma_zgemm( MagmaNoTrans, transA, m, i, jb, c_neg_one, dX(0,i), lddx, dA(0,i), ldda, alpha, dB, lddb, queue );
+
+                    // remaining blocks
+                    for( i=n-jb-NB; i >= 0; i -= NB ) {
+                        magma_zgemm( MagmaNoTrans, transA, m, NB, NB, c_one, dB(0,i), lddb, d_dinvA(i), NB, c_zero, dX(0,i), lddx, queue );
+                        if (i-NB < 0)
+                            break;
+                        magma_zgemm( MagmaNoTrans, transA, m, i, NB, c_neg_one, dX(0,i), lddx, dA(0,i), ldda, c_one, dB, lddb, queue );
+                    }
+                }
+            }
+        }
+    }
+}
+
 
 /***************************************************************************//**
     Similar to magmablas_ztrsm_outofplace(), but copies result dX back to dB,
@@ -407,6 +645,25 @@ void magmablas_ztrsm_work(
     magmablas_zlacpy( MagmaFull, m, n, dX, lddx, dB, lddb, queue );
 }
 
+extern "C"
+void magmablas_ztrsm_work_async(
+    magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag,
+    magma_int_t m, magma_int_t n,
+    magmaDoubleComplex alpha,
+    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
+    magmaDoubleComplex_ptr       dB, magma_int_t lddb,
+    magmaDoubleComplex_ptr       dX, magma_int_t lddx,
+    magma_int_t flag,
+    magmaDoubleComplex_ptr d_dinvA, magma_int_t dinvA_length,
+    magma_queue_t queue )
+{
+    magmablas_ztrsm_outofplace_async( side, uplo, transA, diag, m, n, alpha,
+                                  dA, ldda, dB, lddb, dX, lddx, flag,
+                                  d_dinvA, dinvA_length, queue );
+    // copy X to B
+    magmablas_zlacpy( MagmaFull, m, n, dX, lddx, dB, lddb, queue );
+}
+
 
 /***************************************************************************//**
     Similar to magmablas_ztrsm_outofplace(), but allocates dX and d_dinvA
@@ -484,3 +741,72 @@ void magmablas_ztrsm(
     magma_free( d_dinvA );
     magma_free( dX );
 }
+
+extern "C"
+void magmablas_ztrsm_async(
+    magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag,
+    magma_int_t m, magma_int_t n,
+    magmaDoubleComplex alpha,
+    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
+    magmaDoubleComplex_ptr       dB, magma_int_t lddb,
+    magma_queue_t queue )
+{
+    const magma_int_t nrowA = (side == MagmaLeft ? m : n);
+
+    magma_int_t info = 0;
+    if ( side != MagmaLeft && side != MagmaRight ) {
+        info = -1;
+    } else if ( uplo != MagmaUpper && uplo != MagmaLower ) {
+        info = -2;
+    } else if ( transA != MagmaNoTrans && transA != MagmaTrans && transA != MagmaConjTrans ) {
+        info = -3;
+    } else if ( diag != MagmaUnit && diag != MagmaNonUnit ) {
+        info = -4;
+    } else if (m < 0) {
+        info = -5;
+    } else if (n < 0) {
+        info = -6;
+    } else if (dA == NULL) {
+        info = -8;
+    } else if (ldda < max(1,nrowA)) {
+        info = -9;
+    } else if (dB == NULL) {
+        info = -10;
+    } else if (lddb < max(1,m)) {
+        info = -11;
+    }
+
+    if (info != 0) {
+        magma_xerbla( __func__, -(info) );
+        return;
+    }
+
+    magmaDoubleComplex_ptr d_dinvA=NULL, dX=NULL;
+    const magma_int_t lddx = magma_roundup( m, 32 );
+    const magma_int_t size_x = lddx*n;
+    magma_int_t dinvA_length;
+    if ( side == MagmaLeft ) {
+        dinvA_length = magma_roundup( m, NB )*NB;
+    }
+    else {
+        dinvA_length = magma_roundup( n, NB )*NB;
+    }
+
+    magma_zmalloc_async( &d_dinvA, dinvA_length, queue );
+    magma_zmalloc_async( &dX, size_x, queue );
+
+    if ( d_dinvA == NULL || dX == NULL ) {
+        info = MAGMA_ERR_DEVICE_ALLOC;
+        magma_xerbla( __func__, -(info) );
+        // continue to free
+    }
+    else {
+        magmablas_zlaset( MagmaFull, dinvA_length, 1, MAGMA_Z_ZERO, MAGMA_Z_ZERO, d_dinvA, dinvA_length, queue );
+        magmablas_zlaset( MagmaFull, m, n, MAGMA_Z_ZERO, MAGMA_Z_ZERO, dX, lddx, queue );
+        magmablas_ztrsm_work_async( side, uplo, transA, diag, m, n, alpha,
+                                dA, ldda, dB, lddb, dX, lddx, 1, d_dinvA, dinvA_length, queue );
+    }
+
+    magma_free_async( d_dinvA, queue );
+    magma_free_async( dX, queue );
+}
diff --git a/magmablas/ztrtri_diag.cu b/magmablas/ztrtri_diag.cu
index 89f057971..2e75f20e3 100644
--- a/magmablas/ztrtri_diag.cu
+++ b/magmablas/ztrtri_diag.cu
@@ -178,3 +178,105 @@ magmablas_ztrtri_diag(
         }
     }
 }
+
+extern "C" void
+magmablas_ztrtri_diag_async(
+    magma_uplo_t uplo, magma_diag_t diag, magma_int_t n,
+    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
+    magmaDoubleComplex_ptr d_dinvA,
+    magma_queue_t queue)
+{
+    magma_int_t info = 0;
+    if (uplo != MagmaLower && uplo != MagmaUpper)
+        info = -1;
+    else if (diag != MagmaNonUnit && diag != MagmaUnit)
+        info = -2;
+    else if (n < 0)
+        info = -3;
+    else if (ldda < n)
+        info = -5;
+
+    if (info != 0) {
+        magma_xerbla( __func__, -(info) );
+        return;  //info
+    }
+
+    const int nblocks = magma_ceildiv( n, IB );
+
+    cudaMemsetAsync( d_dinvA, 0, magma_roundup( n, NB )*NB * sizeof(magmaDoubleComplex), queue->cuda_stream() );
+
+    if ( uplo == MagmaLower ) {
+        // invert diagonal IB x IB inner blocks
+        ztrtri_diag_lower_kernel
+            <<< nblocks, IB, 0, queue->cuda_stream() >>>
+            ( diag, n, dA, ldda, d_dinvA );
+
+        // build up NB x NB blocks (assuming IB=16 here):
+        // use   16 x 16  blocks to build  32 x 32  blocks,  1 x (1 x npages) grid,  4 x 4 threads;
+        // then  32 x 32  blocks to build  64 x 64  blocks,  1 x (2 x npages) grid,  8 x 4 threads;
+        // then  64 x 64  blocks to build 128 x 128 blocks,  1 x (4 x npages) grid, 16 x 4 threads;
+        // then 128 x 128 blocks to build 256 x 256 blocks,  2 x (8 x npages) grid, 16 x 4 threads.
+        for( int jb=IB; jb < NB; jb *= 2 ) {
+            const int kb = jb*2;
+            const int npages = magma_ceildiv( n, kb );
+            const dim3 threads( (jb <= 32 ? jb/4 : 16), 4 );
+            const dim3 grid( jb/(threads.x*threads.y), npages*(jb/16) );  // emulate 3D grid: NX * (NY*npages), for CUDA ARCH 1.x
+
+            //printf( "n %d, jb %d, grid %d x %d (%d x %d)\n", n, jb, grid.x, grid.y, grid.y / npages, npages );
+            switch (jb) {
+                case 16:
+                    triple_zgemm16_part1_lower_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages );
+                    triple_zgemm16_part2_lower_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages );
+                    break;
+                case 32:
+                    triple_zgemm32_part1_lower_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages );
+                    triple_zgemm32_part2_lower_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages );
+                    break;
+                case 64:
+                    triple_zgemm64_part1_lower_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages );
+                    triple_zgemm64_part2_lower_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages );
+                    break;
+                default:
+                    triple_zgemm_above64_part1_lower_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages );
+                    triple_zgemm_above64_part2_lower_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages );
+                    triple_zgemm_above64_part3_lower_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages );
+                    break;
+            }
+            if ( kb >= n ) break;
+        }
+    }
+    else {
+        ztrtri_diag_upper_kernel
+            <<< nblocks, IB, 0, queue->cuda_stream() >>>
+            ( diag, n, dA, ldda, d_dinvA );
+
+        // update the inverse up to the size of IB
+        for( int jb=IB; jb < NB; jb *= 2 ) {
+            const int kb = jb*2;
+            const int npages = magma_ceildiv( n, kb );
+            const dim3 threads( (jb <= 32 ? jb/4 : 16), 4 );
+            const dim3 grid( jb/(threads.x*threads.y), npages*(jb/16) );  // emulate 3D grid: NX * (NY*npages), for CUDA ARCH 1.x
+
+            switch (jb) {
+                case 16:
+                    triple_zgemm16_part1_upper_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages );
+                    triple_zgemm16_part2_upper_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages );
+                    break;
+                case 32:
+                    triple_zgemm32_part1_upper_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages );
+                    triple_zgemm32_part2_upper_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages );
+                    break;
+                case 64:
+                    triple_zgemm64_part1_upper_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages );
+                    triple_zgemm64_part2_upper_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages );
+                    break;
+                default:
+                    triple_zgemm_above64_part1_upper_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages );
+                    triple_zgemm_above64_part2_upper_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages );
+                    triple_zgemm_above64_part3_upper_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages );
+                    break;
+            }
+            if ( kb >= n ) break;
+        }
+    }
+}
diff --git a/src/zgerbt_gpu.cpp b/src/zgerbt_gpu.cpp
index 6c0e8f405..436b13ce1 100644
--- a/src/zgerbt_gpu.cpp
+++ b/src/zgerbt_gpu.cpp
@@ -169,3 +169,67 @@ magma_zgerbt_gpu(
 
     return *info;
 }
+
+extern "C" magma_int_t
+magma_zgerbt_gpu_async(
+        const magma_bool_t gen, const magma_int_t n, const magma_int_t nrhs,
+        magmaDoubleComplex_ptr const dA, magma_int_t const ldda,
+        magmaDoubleComplex_ptr const dB, magma_int_t const lddb,
+        magmaDoubleComplex_ptr const dU, magmaDoubleComplex_ptr const dV,
+        magma_int_t *info,
+        magma_queue_t queue)
+{
+#define dB(i_, j_) (dB + (i_) + (j_)*lddb)
+
+    /* Function Body */
+    *info = 0;
+    if ( ! (gen == MagmaTrue) &&
+         ! (gen == MagmaFalse) ) {
+        *info = -1;
+    }
+    else if (n < 0) {
+        *info = -2;
+    } else if (nrhs < 0) {
+        *info = -3;
+    } else if (ldda < max(1,n)) {
+        *info = -5;
+    } else if (lddb < max(1,n)) {
+        *info = -7;
+    }
+    if (*info != 0) {
+        magma_xerbla( __func__, -(*info) );
+        return *info;
+    }
+
+    /* Quick return if possible */
+    if (nrhs == 0 || n == 0)
+        return *info;
+
+    magmaDoubleComplex *U, *V;
+    if (MAGMA_SUCCESS != magma_zmalloc_cpu( &U, 2*n ) ||
+        MAGMA_SUCCESS != magma_zmalloc_cpu( &V, 2*n ))
+    {
+        *info = MAGMA_ERR_HOST_ALLOC;
+        goto cleanup;
+    }
+
+    /* Initialize Butterfly matrix on the CPU */
+    if (gen == MagmaTrue)
+        init_butterfly( 2*n, U, V );
+
+    /* Copy the butterfly to the GPU */
+    magma_zsetvector_async( 2*n, U, 1, dU, 1, queue );
+    magma_zsetvector_async( 2*n, V, 1, dV, 1, queue );
+
+    /* Perform Partial Random Butterfly Transformation on the GPU */
+    magmablas_zprbt( n, dA, ldda, dU, dV, queue );
+
+    /* Compute U^T * b on the GPU*/
+    magmablas_zprbt_mtv(n, nrhs, dU, dB, lddb, queue);
+
+    cleanup:
+    magma_free_cpu( U );
+    magma_free_cpu( V );
+
+    return *info;
+}
diff --git a/src/zgerfs_nopiv_gpu.cpp b/src/zgerfs_nopiv_gpu.cpp
index 2c2f9c04a..3432d67b5 100644
--- a/src/zgerfs_nopiv_gpu.cpp
+++ b/src/zgerfs_nopiv_gpu.cpp
@@ -8,6 +8,7 @@
        @precisions normal z -> s d c
 
 */
+#include <limits>
 #include "magma_internal.h"
 
 #define BWDMAX 1.0
@@ -275,3 +276,192 @@ magma_zgerfs_nopiv_gpu(
     
     return *info;
 }
+
+extern "C" magma_int_t
+magma_zgerfs_nopiv_gpu_async(
+        magma_trans_t trans, magma_int_t n, magma_int_t nrhs,
+        magmaDoubleComplex_ptr dA, magma_int_t ldda,
+        magmaDoubleComplex_ptr dB, magma_int_t lddb,
+        magmaDoubleComplex_ptr dX, magma_int_t lddx,
+        magmaDoubleComplex_ptr dworkd, magmaDoubleComplex_ptr dAF,
+        magma_int_t *iter,
+        magma_int_t *info,
+        magma_int_t iter_max,
+        double bwdmax,
+        magma_queue_t queue)
+{
+#define dB(i,j)     (dB + (i) + (j)*lddb)
+#define dX(i,j)     (dX + (i) + (j)*lddx)
+#define dR(i,j)     (dR + (i) + (j)*lddr)
+
+    /* Constants */
+    const magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;
+    const magmaDoubleComplex c_one     = MAGMA_Z_ONE;
+    constexpr magma_int_t ione = 1;
+    const auto n_elem = n * nrhs;
+
+    /* Local variables */
+    magmaDoubleComplex_ptr dR;
+    magmaDoubleComplex Xnrmv, Rnrmv;
+    double Anrm, Xnrm, Rnrm, cte, eps, work[1];
+    double sae, best_sae = std::numeric_limits<double>::infinity();
+    magma_int_t i, j, iiter, lddsa, lddr;
+    magmaDoubleComplex_ptr best_dX = nullptr;
+
+    /* Check arguments */
+    *iter = 0;
+    *info = 0;
+    if ( n < 0 )
+        *info = -1;
+    else if ( nrhs < 0 )
+        *info = -2;
+    else if ( ldda < max(1,n))
+        *info = -4;
+    else if ( lddb < max(1,n))
+        *info = -8;
+    else if ( lddx < max(1,n))
+        *info = -10;
+
+    if (*info != 0) {
+        magma_xerbla( __func__, -(*info) );
+        return *info;
+    }
+
+    if ( n == 0 || nrhs == 0 )
+        return *info;
+
+    lddsa = n;
+    lddr  = n;
+
+    dR  = dworkd;
+
+    eps  = lapackf77_dlamch("Epsilon");
+    if ( bwdmax == 0 ) {
+        Anrm = cte = 0;
+    } else {
+        Anrm = magmablas_zlange(MagmaInfNorm, n, n, dA, ldda, (magmaDouble_ptr) dworkd, n_elem, queue);
+        cte = Anrm * eps * magma_dsqrt((double) n) * bwdmax;
+    }
+
+    // residual dR = dB - dA*dX in double precision
+    magmablas_zlacpy( MagmaFull, n, nrhs, dB, lddb, dR, lddr, queue );
+    if ( nrhs == 1 ) {
+        magma_zgemv( trans, n, n,
+                     c_neg_one, dA, ldda,
+                     dX, 1,
+                     c_one,     dR, 1, queue );
+    }
+    else {
+        magma_zgemm( trans, MagmaNoTrans, n, nrhs, n,
+                     c_neg_one, dA, ldda,
+                     dX, lddx,
+                     c_one,     dR, lddr, queue );
+    }
+
+    if ( bwdmax == 0 ) goto refinement;
+
+    // TODO: use MAGMA_Z_ABS( dX(i,j) ) instead of zlange?
+    // TODO implement Xamax for one GPU copy less
+    for( j=0; j < nrhs; j++ ) {
+        i = magma_izamax( n, dX(0,j), 1, queue ) - 1;
+        magma_zgetmatrix( 1, 1, dX(i,j), 1, &Xnrmv, 1, queue );
+        Xnrm = lapackf77_zlange( "F", &ione, &ione, &Xnrmv, &ione, work );
+
+        i = magma_izamax( n, dR(0,j), 1, queue ) - 1;
+        magma_zgetmatrix( 1, 1, dR(i,j), 1, &Rnrmv, 1, queue );
+        Rnrm = lapackf77_zlange( "F", &ione, &ione, &Rnrmv, &ione, work );
+        //printf("Rnrm : %e, Xnrm*cte : %e\n", Rnrm, Xnrm*cte);
+        if ( Rnrm >  Xnrm*cte ) {
+            goto refinement;
+        }
+    }
+
+    *iter = 0;
+    goto cleanup;
+
+    refinement:
+    magma_zmalloc_async( &best_dX, n_elem, queue );
+    for( iiter=1; iiter < iter_max; ) {
+        *info = 0;
+        // solve dAF*dX = dR
+        // it's okay that dR is used for both dB input and dX output.
+        magma_zgetrs_nopiv_gpu_async( trans, n, nrhs, dAF, lddsa, dR, lddr, info, queue );
+        if (*info != 0) {
+            *iter = -3;
+            goto fallback;
+        }
+
+        // Add correction and setup residual
+        // dX += dR  --and--
+        // dR = dB
+        // This saves going through dR a second time (if done with one more kernel).
+        // -- not really: first time is read, second time is write.
+        for( j=0; j < nrhs; ++j ) {
+            magmablas_zaxpycp( n, dR(0,j), dX(0,j), dB(0,j), queue );
+        }
+
+        // residual dR = dB - dA*dX in double precision
+        if ( nrhs == 1 ) {
+            magma_zgemv( trans, n, n,
+                         c_neg_one, dA, ldda,
+                         dX, 1,
+                         c_one,     dR, 1, queue );
+        }
+        else {
+            magma_zgemm( trans, MagmaNoTrans, n, nrhs, n,
+                         c_neg_one, dA, ldda,
+                         dX, lddx,
+                         c_one,     dR, lddr, queue );
+        }
+
+        /*  Sum of absolute error is compared between each iteration
+         *  and the solution with best residuals copied on the side. */
+        sae = magma_dzasum(n_elem, dR, 1, queue);
+        if (sae < best_sae) {
+            best_sae = sae;
+            magma_zcopymatrix_async(n, nrhs, dX, n, best_dX, n, queue);
+        }
+        if (bwdmax == 0) goto L20;
+
+        /*  Check whether the nrhs normwise backward errors satisfy the
+         *  stopping criterion. If yes, set ITER=IITER > 0 and return. */
+        for( j=0; j < nrhs; ++j ) {
+            i = magma_izamax( n, dX(0,j), 1, queue ) - 1;
+            magma_zgetmatrix( 1, 1, dX(i,j), 1, &Xnrmv, 1, queue );
+            Xnrm = lapackf77_zlange( "F", &ione, &ione, &Xnrmv, &ione, work );
+
+            i = magma_izamax( n, dR(0,j), 1, queue ) - 1;
+            magma_zgetmatrix( 1, 1, dR(i,j), 1, &Rnrmv, 1, queue );
+            Rnrm = lapackf77_zlange( "F", &ione, &ione, &Rnrmv, &ione, work );
+
+            if (  bwdmax == 0 || Rnrm >  Xnrm*cte ) {
+                goto L20;
+            }
+        }
+
+        /*  If we are here, the nrhs normwise backward errors satisfy
+         *  the stopping criterion, we are good to exit. */
+        *iter = iiter;
+        goto cleanup;
+
+        L20:
+        ++iiter;
+    }
+    /* If we are at this place of the code, this is because we have
+     * performed ITER=iter_max iterations and never satisified the
+     * stopping criterion. Set up the ITER flag accordingly. */
+    *iter = -iter_max - 1;
+
+    fallback:
+    /* Iterative refinement failed to converge to a
+     * satisfactory solution. */
+
+    cleanup:
+
+    if (best_dX) {
+        magma_zcopymatrix_async( n, nrhs, best_dX, n, dX, n, queue );
+        magma_free_async( best_dX, queue );
+    }
+
+    return *info;
+}
diff --git a/src/zgesv_nopiv_gpu.cpp b/src/zgesv_nopiv_gpu.cpp
index c142b7092..ab76c5ef9 100644
--- a/src/zgesv_nopiv_gpu.cpp
+++ b/src/zgesv_nopiv_gpu.cpp
@@ -93,3 +93,38 @@ magma_zgesv_nopiv_gpu(
     
     return *info;
 }
+
+extern "C" magma_int_t
+magma_zgesv_nopiv_gpu_async(
+        magma_int_t n, magma_int_t nrhs,
+        magmaDoubleComplex_ptr dA, magma_int_t ldda,
+        magmaDoubleComplex_ptr dB, magma_int_t lddb,
+        magma_int_t *info, magma_queue_t queue )
+{
+    *info = 0;
+    if (n < 0) {
+        *info = -1;
+    } else if (nrhs < 0) {
+        *info = -2;
+    } else if (ldda < max(1,n)) {
+        *info = -4;
+    } else if (lddb < max(1,n)) {
+        *info = -6;
+    }
+    if (*info != 0) {
+        magma_xerbla( __func__, -(*info) );
+        return *info;
+    }
+
+    /* Quick return if possible */
+    if (n == 0 || nrhs == 0) {
+        return *info;
+    }
+
+    magma_zgetrf_nopiv_gpu_async( n, n, dA, ldda, info, queue );
+    if ( *info == MAGMA_SUCCESS ) {
+        magma_zgetrs_nopiv_gpu_async( MagmaNoTrans, n, nrhs, dA, ldda, dB, lddb, info, queue );
+    }
+
+    return *info;
+}
diff --git a/src/zgesv_rbt.cpp b/src/zgesv_rbt.cpp
index a5cd7620d..9b11daa77 100644
--- a/src/zgesv_rbt.cpp
+++ b/src/zgesv_rbt.cpp
@@ -201,3 +201,201 @@ magma_zgesv_rbt(
 
     return *info;
 }
+
+extern "C" magma_int_t
+magma_zgesv_rbt_async(
+        const magma_bool_t refine, const magma_int_t n, const magma_int_t nrhs,
+        const magmaDoubleComplex *const dA_, const magma_int_t lda,
+        magmaDoubleComplex *const dB_, const magma_int_t ldb,
+        magma_int_t *const info,
+        const magma_int_t iter_max, const double bwdmax,
+        magma_queue_t queue)
+{
+    /* Constants */
+    const magmaDoubleComplex c_zero = MAGMA_Z_ZERO;
+    const magmaDoubleComplex c_one  = MAGMA_Z_ONE;
+
+    /* Local variables */
+    magma_int_t nn = n;//magma_roundup( n, 4 );
+    magmaDoubleComplex_ptr dA=NULL, dB=NULL, dAo=NULL, dBo=NULL, dwork=NULL, dU=NULL, dV=NULL;
+    magma_int_t iter;
+
+    /* Function Body */
+    *info = 0;
+    if ( ! (refine == MagmaTrue) &&
+         ! (refine == MagmaFalse) ) {
+        *info = -1;
+    }
+    else if (n < 0) {
+        *info = -2;
+    } else if (nrhs < 0) {
+        *info = -3;
+    } else if (lda < max(1,n)) {
+        *info = -5;
+    } else if (ldb < max(1,n)) {
+        *info = -7;
+    }
+    if (*info != 0) {
+        magma_xerbla( __func__, -(*info) );
+        return *info;
+    }
+
+    /* Quick return if possible */
+    if (nrhs == 0 || n == 0)
+        return *info;
+
+    // TODO: investigate failures on AMD GPUs
+    // For now ignore refine and always set it to False
+    // there is probably a bug in the refinement code for the HIP backend
+    #ifdef MAGMA_HAVE_HIP
+    refine = MagmaFalse;
+    #endif
+
+    if (MAGMA_SUCCESS != magma_zmalloc_async( &dA, nn*nn, queue ) ||
+        MAGMA_SUCCESS != magma_zmalloc_async( &dB, nn*nrhs, queue ))
+    {
+        *info = MAGMA_ERR_DEVICE_ALLOC;
+        goto cleanup;
+    }
+
+    /* Allocate memory for the buterfly matrices */
+    if ((MAGMA_SUCCESS != magma_zmalloc_async( &dU, 2*n, queue )) ||
+        (MAGMA_SUCCESS != magma_zmalloc_async( &dV, 2*n, queue ))) {
+        magma_free_async( dU, queue );
+        magma_free_async( dV, queue );
+        *info = MAGMA_ERR_DEVICE_ALLOC;
+        return *info;
+    }
+
+    if (refine == MagmaTrue) {
+        if (MAGMA_SUCCESS != magma_zmalloc_async( &dAo,   nn*nn, queue ) ||
+            MAGMA_SUCCESS != magma_zmalloc_async( &dwork, nn*nrhs, queue ) ||
+            MAGMA_SUCCESS != magma_zmalloc_async( &dBo,   nn*nrhs, queue ))
+        {
+            *info = MAGMA_ERR_DEVICE_ALLOC;
+            goto cleanup;
+        }
+    }
+
+    magmablas_zlaset( MagmaFull, nn, nn, c_zero, c_one, dA, nn, queue );
+
+    magma_zcopymatrix_async( n, n, dA_, lda, dA, nn, queue );
+    magma_zcopymatrix_async( n, nrhs, dB_, ldb, dB, nn, queue );
+
+    *info = magma_zgerbt_gpu_async( MagmaTrue, nn, nrhs, dA, nn, dB, nn, dU, dV, info, queue );
+    if (*info != MAGMA_SUCCESS)  {
+        return *info;
+    }
+
+    if (refine == MagmaTrue) {
+        magma_zcopymatrix_async( nn, nn, dA, nn, dAo, nn, queue );
+        magma_zcopymatrix_async( nn, nrhs, dB, nn, dBo, nn, queue );
+    }
+    /* Solve the system U^TAV.y = U^T.b on the GPU */
+    magma_zgesv_nopiv_gpu_async( nn, nrhs, dA, nn, dB, nn, info, queue );
+
+    /* Iterative refinement */
+    if (refine == MagmaTrue) {
+        magma_zgerfs_nopiv_gpu_async( MagmaNoTrans, nn, nrhs, dAo, nn, dBo, nn, dB, nn, dwork, dA, &iter, info, iter_max, bwdmax, queue );
+    }
+
+    magmablas_zprbt_mv(nn, nrhs, dV, dB, nn, queue);
+
+    magma_zcopymatrix_async( n, nrhs, dB, nn, dB_, ldb, queue );
+
+    cleanup:
+    magma_free_async( dA, queue );
+    magma_free_async( dB, queue );
+    magma_free_async( dU, queue );
+    magma_free_async( dV, queue );
+
+    if (refine == MagmaTrue) {
+        magma_free_async( dAo, queue );
+        magma_free_async( dBo, queue );
+        magma_free_async( dwork, queue );
+    }
+
+    return *info;
+}
+
+extern "C" magma_int_t
+magma_zgesv_rbt_refine_async(
+        const magma_int_t n, const magma_int_t nrhs,
+        const magmaDoubleComplex *const dA_, const magma_int_t lda,
+        magmaDoubleComplex *const dB_, const magma_int_t ldb,
+        magma_int_t *info,
+        const magma_int_t iter_max, const double bwdmax,
+        magma_queue_t queue)
+{
+    /* Constants */
+    const magmaDoubleComplex c_zero = MAGMA_Z_ZERO;
+    const magmaDoubleComplex c_one  = MAGMA_Z_ONE;
+
+    /* Local variables */
+    magma_int_t nn = n;//magma_roundup( n, 4 );
+    magmaDoubleComplex_ptr dA=NULL, dB=NULL, dAo=NULL, dBo=NULL, dwork=NULL;
+    magma_int_t iter;
+
+    /* Function Body */
+    *info = 0;
+    if (n < 0) {
+        *info = -2;
+    } else if (nrhs < 0) {
+        *info = -3;
+    } else if (lda < max(1,n)) {
+        *info = -5;
+    } else if (ldb < max(1,n)) {
+        *info = -7;
+    }
+    if (*info != 0) {
+        magma_xerbla( __func__, -(*info) );
+        return *info;
+    }
+
+    /* Quick return if possible */
+    if (nrhs == 0 || n == 0)
+        return *info;
+
+    // TODO: investigate failures on AMD GPUs
+    // For now ignore refine and always set it to False
+    // there is probably a bug in the refinement code for the HIP backend
+    #ifdef MAGMA_HAVE_HIP
+    *info = -8;
+    return *info;
+    #endif
+
+    if (MAGMA_SUCCESS != magma_zmalloc_async( &dA, nn*nn, queue ) ||
+        MAGMA_SUCCESS != magma_zmalloc_async( &dB, nn*nrhs, queue ))
+    {
+        *info = MAGMA_ERR_DEVICE_ALLOC;
+        goto cleanup;
+    }
+
+    if (MAGMA_SUCCESS != magma_zmalloc_async( &dAo,   nn*nn, queue ) ||
+        MAGMA_SUCCESS != magma_zmalloc_async( &dwork, nn*nrhs, queue ) ||
+        MAGMA_SUCCESS != magma_zmalloc_async( &dBo,   nn*nrhs, queue ))
+    {
+        *info = MAGMA_ERR_DEVICE_ALLOC;
+        goto cleanup;
+    }
+
+    magmablas_zlaset( MagmaFull, nn, nn, c_zero, c_one, dA, nn, queue );
+
+    magma_zcopymatrix_async( n, n, dA_, lda, dA, nn, queue );
+    magma_zcopymatrix_async( nn, nn, dA_, nn, dAo, nn, queue );
+    magma_zcopymatrix_async( n, nrhs, dB_, ldb, dB, nn, queue );
+    magma_zcopymatrix_async( nn, nrhs, dB_, nn, dBo, nn, queue );
+
+    magma_zgerfs_nopiv_gpu_async( MagmaNoTrans, nn, nrhs, dAo, nn, dBo, nn, dB, nn, dwork, dA, &iter, info, iter_max, bwdmax, queue );
+
+    magma_zcopymatrix_async( n, nrhs, dB, nn, dB_, ldb, queue );
+
+    cleanup:
+    magma_free_async( dA, queue );
+    magma_free_async( dB, queue );
+    magma_free_async( dAo, queue );
+    magma_free_async( dBo, queue );
+    magma_free_async( dwork, queue );
+
+    return *info;
+}
diff --git a/src/zgetrf_nopiv_gpu.cpp b/src/zgetrf_nopiv_gpu.cpp
index 472491e2f..5abbbafa7 100644
--- a/src/zgetrf_nopiv_gpu.cpp
+++ b/src/zgetrf_nopiv_gpu.cpp
@@ -209,3 +209,153 @@ magma_zgetrf_nopiv_gpu(
     
     return *info;
 } /* magma_zgetrf_nopiv_gpu */
+
+extern "C" magma_int_t
+magma_zgetrf_nopiv_gpu_async(
+        magma_int_t m, magma_int_t n,
+        magmaDoubleComplex_ptr dA, magma_int_t ldda,
+        magma_int_t *info,
+        magma_queue_t queue)
+{
+#ifdef MAGMA_HAVE_OPENCL
+#define  dA(i_, j_) dA,  (dA_offset  + (i_)*nb       + (j_)*nb*ldda)
+#else
+#define  dA(i_, j_) (dA  + (i_)*nb       + (j_)*nb*ldda)
+#endif
+
+    const magmaDoubleComplex c_one     = MAGMA_Z_ONE;
+    const magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;
+
+    magma_int_t iinfo, nb;
+    magma_int_t maxm, mindim;
+    magma_int_t j, rows, s, ldwork;
+    magmaDoubleComplex *work;
+
+    /* Check arguments */
+    *info = 0;
+    if (m < 0)
+        *info = -1;
+    else if (n < 0)
+        *info = -2;
+    else if (ldda < max(1,m))
+        *info = -4;
+
+    if (*info != 0) {
+        magma_xerbla( __func__, -(*info) );
+        return *info;
+    }
+
+    /* Quick return if possible */
+    if (m == 0 || n == 0)
+        return *info;
+
+    /* Function Body */
+    mindim = min( m, n );
+    nb     = magma_get_zgetrf_nb( m, n );
+    s      = mindim / nb;
+
+    magma_queue_t queues[2];
+    queues[0] = queue;
+    magma_queue_create( queue->device(), &queues[1] );
+
+    if (nb <= 1 || nb >= min(m,n)) {
+        /* Use CPU code. */
+        if ( MAGMA_SUCCESS != magma_zmalloc_cpu( &work, m*n )) {
+            *info = MAGMA_ERR_HOST_ALLOC;
+            return *info;
+        }
+        magma_zgetmatrix( m, n, dA(0,0), ldda, work, m, queues[0] );
+        magma_zgetrf_nopiv( m, n, work, m, info );
+        magma_zsetmatrix( m, n, work, m, dA(0,0), ldda, queues[0] );
+        magma_free_cpu( work );
+    }
+    else {
+        /* Use hybrid blocked code. */
+        maxm = magma_roundup( m, 32 );
+
+        ldwork = maxm;
+        if (MAGMA_SUCCESS != magma_zmalloc_pinned( &work, ldwork*nb )) {
+            *info = MAGMA_ERR_HOST_ALLOC;
+            return *info;
+        }
+
+        for( j=0; j < s; j++ ) {
+            // get j-th panel from device
+            magma_queue_sync( queues[1] );
+            magma_zgetmatrix_async( m-j*nb, nb, dA(j,j), ldda, work, ldwork, queues[0] );
+
+            if ( j > 0 ) {
+                magma_ztrsm( MagmaLeft, MagmaLower, MagmaNoTrans, MagmaUnit,
+                             nb, n - (j+1)*nb,
+                             c_one, dA(j-1,j-1), ldda,
+                             dA(j-1,j+1), ldda, queues[1] );
+                magma_zgemm( MagmaNoTrans, MagmaNoTrans,
+                             m-j*nb, n-(j+1)*nb, nb,
+                             c_neg_one, dA(j,  j-1), ldda,
+                             dA(j-1,j+1), ldda,
+                             c_one,     dA(j,  j+1), ldda, queues[1] );
+            }
+
+            // do the cpu part
+            rows = m - j*nb;
+            magma_queue_sync( queues[0] );
+            magma_zgetrf_nopiv( rows, nb, work, ldwork, &iinfo );
+            if ( *info == 0 && iinfo > 0 )
+                *info = iinfo + j*nb;
+
+            // send j-th panel to device
+            magma_zsetmatrix_async( m-j*nb, nb, work, ldwork, dA(j, j), ldda, queues[1] );
+
+            // do the small non-parallel computations (next panel update)
+            if ( s > j+1 ) {
+                magma_ztrsm( MagmaLeft, MagmaLower, MagmaNoTrans, MagmaUnit,
+                             nb, nb,
+                             c_one, dA(j, j), ldda,
+                             dA(j, j+1), ldda, queues[1] );
+                magma_zgemm( MagmaNoTrans, MagmaNoTrans,
+                             m-(j+1)*nb, nb, nb,
+                             c_neg_one, dA(j+1, j), ldda,
+                             dA(j,   j+1), ldda,
+                             c_one,     dA(j+1, j+1), ldda, queues[1] );
+            }
+            else {
+                magma_ztrsm( MagmaLeft, MagmaLower, MagmaNoTrans, MagmaUnit,
+                             nb, n-s*nb,
+                             c_one, dA(j, j  ), ldda,
+                             dA(j, j+1), ldda, queues[1] );
+                magma_zgemm( MagmaNoTrans, MagmaNoTrans,
+                             m-(j+1)*nb, n-(j+1)*nb, nb,
+                             c_neg_one, dA(j+1, j  ), ldda,
+                             dA(j,   j+1), ldda,
+                             c_one,     dA(j+1, j+1), ldda, queues[1] );
+            }
+        }
+
+        magma_int_t nb0 = min( m - s*nb, n - s*nb );
+        if ( nb0 > 0 ) {
+            rows = m - s*nb;
+
+            magma_zgetmatrix( rows, nb0, dA(s,s), ldda, work, ldwork, queues[1] );
+
+            // do the cpu part
+            magma_zgetrf_nopiv( rows, nb0, work, ldwork, &iinfo );
+            if ( *info == 0 && iinfo > 0 )
+                *info = iinfo + s*nb;
+
+            // send j-th panel to device
+            magma_zsetmatrix( rows, nb0, work, ldwork, dA(s,s), ldda, queues[1] );
+
+            magmablas_ztrsm_async( MagmaLeft, MagmaLower, MagmaNoTrans, MagmaUnit,
+                         nb0, n-s*nb-nb0,
+                         c_one, dA(s,s),     ldda,
+                         dA(s,s)+nb0, ldda, queues[1] );
+        }
+        magma_queue_sync( queues[1] );
+
+        magma_free_pinned( work );
+    }
+
+    magma_queue_destroy( queues[1] );
+
+    return *info;
+} /* magma_zgetrf_nopiv_gpu_async */
diff --git a/src/zgetrs_nopiv_gpu.cpp b/src/zgetrs_nopiv_gpu.cpp
index bd9dea284..1a56bee75 100644
--- a/src/zgetrs_nopiv_gpu.cpp
+++ b/src/zgetrs_nopiv_gpu.cpp
@@ -130,3 +130,63 @@ magma_zgetrs_nopiv_gpu(
     
     return *info;
 }
+
+extern "C" magma_int_t
+magma_zgetrs_nopiv_gpu_async(
+        magma_trans_t trans, magma_int_t n, magma_int_t nrhs,
+        magmaDoubleComplex_ptr dA, magma_int_t ldda,
+        magmaDoubleComplex_ptr dB, magma_int_t lddb,
+        magma_int_t *info, magma_queue_t queue)
+{
+    // Constants
+    const magmaDoubleComplex c_one = MAGMA_Z_ONE;
+
+    // Local variables
+    const bool notran = trans == MagmaNoTrans;
+
+    *info = 0;
+    if ( (! notran) &&
+         (trans != MagmaTrans) &&
+         (trans != MagmaConjTrans) ) {
+        *info = -1;
+    } else if (n < 0) {
+        *info = -2;
+    } else if (nrhs < 0) {
+        *info = -3;
+    } else if (ldda < max(1,n)) {
+        *info = -5;
+    } else if (lddb < max(1,n)) {
+        *info = -7;
+    }
+    if (*info != 0) {
+        magma_xerbla( __func__, -(*info) );
+        return *info;
+    }
+
+    /* Quick return if possible */
+    if (n == 0 || nrhs == 0) {
+        return *info;
+    }
+
+    if (notran) {
+        /* Solve A * X = B. */
+        if ( nrhs == 1) {
+            magma_ztrsv( MagmaLower, MagmaNoTrans, MagmaUnit,    n, dA, ldda, dB, 1, queue );
+            magma_ztrsv( MagmaUpper, MagmaNoTrans, MagmaNonUnit, n, dA, ldda, dB, 1, queue );
+        } else {
+            magma_ztrsm( MagmaLeft, MagmaLower, MagmaNoTrans, MagmaUnit,    n, nrhs, c_one, dA, ldda, dB, lddb, queue );
+            magma_ztrsm( MagmaLeft, MagmaUpper, MagmaNoTrans, MagmaNonUnit, n, nrhs, c_one, dA, ldda, dB, lddb, queue );
+        }
+    } else {
+        /* Solve A**T * X = B  or  A**H * X = B. */
+        if ( nrhs == 1) {
+            magma_ztrsv( MagmaUpper, trans, MagmaNonUnit, n, dA, ldda, dB, 1, queue );
+            magma_ztrsv( MagmaLower, trans, MagmaUnit,    n, dA, ldda, dB, 1, queue );
+        } else {
+            magma_ztrsm( MagmaLeft, MagmaUpper, trans, MagmaNonUnit, n, nrhs, c_one, dA, ldda, dB, lddb, queue );
+            magma_ztrsm( MagmaLeft, MagmaLower, trans, MagmaUnit,    n, nrhs, c_one, dA, ldda, dB, lddb, queue );
+        }
+    }
+
+    return *info;
+}

From 9ccac3a6e59a8e0f0872ab5087c5d7b16348d9d2 Mon Sep 17 00:00:00 2001
From: nbeams <246972+nbeams@users.noreply.github.com>
Date: Tue, 10 Dec 2024 23:21:08 +0000
Subject: [PATCH 02/27] Remove deprecated v1 interface

---
 control/trace.cpp               |   16 +-
 docs/documentation.txt          |    5 -
 include/Makefile.src            |    4 -
 include/magma.h                 |   10 -
 include/magma_copy_v1.h         |  361 ---------
 include/magmablas_v1.h          |  102 ---
 include/magmablas_v1_map.h      |   72 --
 include/magmablas_z_v1.h        | 1250 -------------------------------
 include/magmablas_z_v1_map.h    |  135 ----
 include/magmablas_zc_v1.h       |   91 ---
 include/magmablas_zc_v1_map.h   |   24 -
 interface_cuda/Makefile.src     |    3 -
 interface_cuda/blas_z_v1.cpp    |  549 --------------
 interface_cuda/copy_v1.cpp      |  133 ----
 interface_cuda/interface.cpp    |   49 --
 interface_cuda/interface_v1.cpp |  189 -----
 magmablas/Makefile.src          |    2 -
 magmablas/magmablas_z_v1.cpp    |  825 --------------------
 magmablas/magmablas_zc_v1.cpp   |   96 ---
 src/zpotrf_mgpu_right.cpp       |    3 -
 20 files changed, 9 insertions(+), 3910 deletions(-)
 delete mode 100644 include/magma_copy_v1.h
 delete mode 100644 include/magmablas_v1.h
 delete mode 100644 include/magmablas_v1_map.h
 delete mode 100644 include/magmablas_z_v1.h
 delete mode 100644 include/magmablas_z_v1_map.h
 delete mode 100644 include/magmablas_zc_v1.h
 delete mode 100644 include/magmablas_zc_v1_map.h
 delete mode 100644 interface_cuda/blas_z_v1.cpp
 delete mode 100644 interface_cuda/copy_v1.cpp
 delete mode 100644 interface_cuda/interface_v1.cpp
 delete mode 100644 magmablas/magmablas_z_v1.cpp
 delete mode 100644 magmablas/magmablas_zc_v1.cpp

diff --git a/control/trace.cpp b/control/trace.cpp
index 480b8b1b1..cf7245efa 100644
--- a/control/trace.cpp
+++ b/control/trace.cpp
@@ -14,7 +14,6 @@
 
 #include <cuda_runtime.h>
 #include "magma_internal.h"
-#include "magmablas_v1.h"
 
 // set TRACE_METHOD = 2 to record start time as
 // later of CPU time and previous event's end time.
@@ -79,9 +78,8 @@ void trace_init( int ncore, int ngpu, int nqueue, magma_queue_t* queues )
             int t = dev*glog.nqueue + s;
             glog.gpu_id[t] = 0;
             glog.queues[t] = queues[t];
+	    magma_queue_sync(queues[t]);
         }
-        magma_setdevice( dev );
-        magma_device_sync();
     }
     // now that all GPUs are sync'd, record start time
     for( int dev = 0; dev < ngpu; ++dev ) {
@@ -94,8 +92,10 @@ void trace_init( int ncore, int ngpu, int nqueue, magma_queue_t* queues )
     }
     // sync again
     for( int dev = 0; dev < ngpu; ++dev ) {
-        magma_setdevice( dev );
-        magma_device_sync();
+        for( int s = 0; s < nqueue; ++s ) {
+            int t = dev*glog.nqueue + s;
+	    magma_queue_sync(queues[t]);
+        }
     }
     glog.cpu_first = magma_wtime();
 }
@@ -173,8 +173,10 @@ void trace_finalize( const char* filename, const char* cssfile )
     
     // sync devices
     for( int dev = 0; dev < glog.ngpu; ++dev ) {
-        magma_setdevice( dev );
-        magma_device_sync();
+        for( int s = 0; s < nqueue; ++s ) {
+            int t = dev*glog.nqueue + s;
+	    magma_queue_sync(queues[t]);
+        }
     }
     double time = magma_wtime() - glog.cpu_first;
     
diff --git a/docs/documentation.txt b/docs/documentation.txt
index 9b91ec983..200198361 100644
--- a/docs/documentation.txt
+++ b/docs/documentation.txt
@@ -188,11 +188,6 @@ using the `-D` compiler flag, e.g., `-DMAGMA_WITH_MKL` in CFLAGS.
     If linked with ACML 5 or later, allows MAGMA to get ACML's version.
     ACML's number of threads are set via OpenMP.
 
-- `MAGMA_NO_V1`
-
-    Disables MAGMA v1.x compatability. Skips compiling non-queue versions
-    of MAGMA BLAS routines, and simplifies magma_init().
-
 - `MAGMA_NOAFFINITY`
 
     Disables thread affinity, available in glibc 2.6 and later.
diff --git a/include/Makefile.src b/include/Makefile.src
index af83d8314..2c6cbe42c 100644
--- a/include/Makefile.src
+++ b/include/Makefile.src
@@ -18,11 +18,7 @@ hdr += \
 	$(cdir)/magma_z.h		\
 	$(cdir)/magma_zc.h		\
 	$(cdir)/magmablas_z.h		\
-	$(cdir)/magmablas_z_v1.h	\
-	$(cdir)/magmablas_z_v1_map.h	\
 	$(cdir)/magmablas_zc.h		\
-	$(cdir)/magmablas_zc_v1.h	\
-	$(cdir)/magmablas_zc_v1_map.h	\
 	$(cdir)/magma_zlapack.h		\
 	$(cdir)/magma_zbulge.h		\
 	$(cdir)/magma_zbulgeinc.h	\
diff --git a/include/magma.h b/include/magma.h
index 29362099d..425dd22b6 100644
--- a/include/magma.h
+++ b/include/magma.h
@@ -9,10 +9,6 @@
 #ifndef MAGMA_H
 #define MAGMA_H
 
-#ifdef MAGMA_NO_V1
-#error "Since MAGMA_NO_V1 is defined, magma.h is invalid; use magma_v2.h"
-#endif
-
 // =============================================================================
 // MAGMA configuration
 #include "magma_config.h"
@@ -25,13 +21,7 @@
 #endif
 #endif
 
-// Include the MAGMA v2 and v1 APIs,
-// then map names to the v1 API (e.g., magma_zgemm => magma_zgemm_v1).
-// Some functions (like setmatrix_async) are the same in v1 and v2,
-// so are provided by the v2 API.
 #include "magma_v2.h"
-#include "magmablas_v1.h"
-#include "magmablas_v1_map.h"
 
 #undef  MAGMA_API
 #define MAGMA_API 1
diff --git a/include/magma_copy_v1.h b/include/magma_copy_v1.h
deleted file mode 100644
index dbe1af138..000000000
--- a/include/magma_copy_v1.h
+++ /dev/null
@@ -1,361 +0,0 @@
-/*
-    -- MAGMA (version 2.0) --
-       Univ. of Tennessee, Knoxville
-       Univ. of California, Berkeley
-       Univ. of Colorado, Denver
-       @date
-*/
-
-#ifndef MAGMA_COPY_V1_H
-#define MAGMA_COPY_V1_H
-
-#ifdef MAGMA_NO_V1
-#error "Since MAGMA_NO_V1 is defined, magma.h is invalid; use magma_v2.h"
-#endif
-
-#include "magma_types.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// =============================================================================
-// copying vectors
-// set  copies host   to device
-// get  copies device to host
-// copy copies device to device
-// (with CUDA unified addressing, copy can be between same or different devices)
-// Add the function, file, and line for error-reporting purposes.
-// async versions are same for v1 and v2; see magmablas_q.h
-
-#define magma_setvector_v1(           n, elemSize, hx_src, incx, dy_dst, incy ) \
-        magma_setvector_v1_internal(  n, elemSize, hx_src, incx, dy_dst, incy, __func__, __FILE__, __LINE__ )
-
-#define magma_getvector_v1(           n, elemSize, dx_src, incx, hy_dst, incy ) \
-        magma_getvector_v1_internal(  n, elemSize, dx_src, incx, hy_dst, incy, __func__, __FILE__, __LINE__ )
-
-#define magma_copyvector_v1(          n, elemSize, dx_src, incx, dy_dst, incy ) \
-        magma_copyvector_v1_internal( n, elemSize, dx_src, incx, dy_dst, incy, __func__, __FILE__, __LINE__ )
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_setvector_v1_internal(
-    magma_int_t n, magma_int_t elemSize,
-    const void *hx_src, magma_int_t incx,
-    magma_ptr   dy_dst, magma_int_t incy,
-    const char* func, const char* file, int line );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_getvector_v1_internal(
-    magma_int_t n, magma_int_t elemSize,
-    magma_const_ptr dx_src, magma_int_t incx,
-    void           *hy_dst, magma_int_t incy,
-    const char* func, const char* file, int line );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_copyvector_v1_internal(
-    magma_int_t n, magma_int_t elemSize,
-    magma_const_ptr dx_src, magma_int_t incx,
-    magma_ptr       dy_dst, magma_int_t incy,
-    const char* func, const char* file, int line );
-
-
-// =============================================================================
-// copying sub-matrices (contiguous columns)
-
-#define magma_setmatrix_v1(           m, n, elemSize, hA_src, lda,  dB_dst, lddb ) \
-        magma_setmatrix_v1_internal(  m, n, elemSize, hA_src, lda,  dB_dst, lddb, __func__, __FILE__, __LINE__ )
-
-#define magma_getmatrix_v1(           m, n, elemSize, dA_src, ldda, hB_dst, ldb ) \
-        magma_getmatrix_v1_internal(  m, n, elemSize, dA_src, ldda, hB_dst, ldb, __func__, __FILE__, __LINE__ )
-
-#define magma_copymatrix_v1(          m, n, elemSize, dA_src, ldda, dB_dst, lddb ) \
-        magma_copymatrix_v1_internal( m, n, elemSize, dA_src, ldda, dB_dst, lddb, __func__, __FILE__, __LINE__ )
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_setmatrix_v1_internal(
-    magma_int_t m, magma_int_t n, magma_int_t elemSize,
-    const void *hA_src, magma_int_t lda,
-    magma_ptr   dB_dst, magma_int_t lddb,
-    const char* func, const char* file, int line );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_getmatrix_v1_internal(
-    magma_int_t m, magma_int_t n, magma_int_t elemSize,
-    magma_const_ptr dA_src, magma_int_t ldda,
-    void           *hB_dst, magma_int_t ldb,
-    const char* func, const char* file, int line );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_copymatrix_v1_internal(
-    magma_int_t m, magma_int_t n, magma_int_t elemSize,
-    magma_const_ptr dA_src, magma_int_t ldda,
-    magma_ptr       dB_dst, magma_int_t lddb,
-    const char* func, const char* file, int line );
-
-
-// =============================================================================
-// copying vectors - version for magma_int_t
-
-#define magma_isetvector_v1(           n, hx_src, incx, dy_dst, incy ) \
-        magma_isetvector_v1_internal(  n, hx_src, incx, dy_dst, incy, __func__, __FILE__, __LINE__ )
-
-#define magma_igetvector_v1(           n, dx_src, incx, hy_dst, incy ) \
-        magma_igetvector_v1_internal(  n, dx_src, incx, hy_dst, incy, __func__, __FILE__, __LINE__ )
-
-#define magma_icopyvector_v1(          n, dx_src, incx, dy_dst, incy ) \
-        magma_icopyvector_v1_internal( n, dx_src, incx, dy_dst, incy, __func__, __FILE__, __LINE__ )
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-static inline void
-magma_isetvector_v1_internal(
-    magma_int_t n,
-    const magma_int_t *hx_src, magma_int_t incx,
-    magmaInt_ptr       dy_dst, magma_int_t incy,
-    const char* func, const char* file, int line )
-{
-    magma_setvector_v1_internal( n, sizeof(magma_int_t),
-                                 hx_src, incx,
-                                 dy_dst, incy,
-                                 func, file, line );
-}
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-static inline void
-magma_igetvector_v1_internal(
-    magma_int_t n,
-    magmaInt_const_ptr dx_src, magma_int_t incx,
-    magma_int_t       *hy_dst, magma_int_t incy,
-    const char* func, const char* file, int line )
-{
-    magma_getvector_v1_internal( n, sizeof(magma_int_t),
-                                 dx_src, incx,
-                                 hy_dst, incy,
-                                 func, file, line );
-}
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-static inline void
-magma_icopyvector_v1_internal(
-    magma_int_t n,
-    magmaInt_const_ptr dx_src, magma_int_t incx,
-    magmaInt_ptr       dy_dst, magma_int_t incy,
-    const char* func, const char* file, int line )
-{
-    magma_copyvector_v1_internal( n, sizeof(magma_int_t),
-                                  dx_src, incx,
-                                  dy_dst, incy,
-                                  func, file, line );
-}
-
-
-// =============================================================================
-// copying sub-matrices - version for magma_int_t
-
-#define magma_isetmatrix_v1(           m, n, hA_src, lda,  dB_dst, lddb ) \
-        magma_isetmatrix_v1_internal(  m, n, hA_src, lda,  dB_dst, lddb, __func__, __FILE__, __LINE__ )
-
-#define magma_igetmatrix_v1(           m, n, dA_src, ldda, hB_dst, ldb ) \
-        magma_igetmatrix_v1_internal(  m, n, dA_src, ldda, hB_dst, ldb, __func__, __FILE__, __LINE__ )
-
-#define magma_icopymatrix_v1(          m, n, dA_src, ldda, dB_dst, lddb ) \
-        magma_icopymatrix_v1_internal( m, n, dA_src, ldda, dB_dst, lddb, __func__, __FILE__, __LINE__ )
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-static inline void
-magma_isetmatrix_v1_internal(
-    magma_int_t m, magma_int_t n,
-    const magma_int_t *hA_src, magma_int_t lda,
-    magmaInt_ptr       dB_dst, magma_int_t lddb,
-    const char* func, const char* file, int line )
-{
-    magma_setmatrix_v1_internal( m, n, sizeof(magma_int_t),
-                                 hA_src, lda,
-                                 dB_dst, lddb,
-                                 func, file, line );
-}
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-static inline void
-magma_igetmatrix_v1_internal(
-    magma_int_t m, magma_int_t n,
-    magmaInt_const_ptr dA_src, magma_int_t ldda,
-    magma_int_t       *hB_dst, magma_int_t ldb,
-    const char* func, const char* file, int line )
-{
-    magma_getmatrix_v1_internal( m, n, sizeof(magma_int_t),
-                                 dA_src, ldda,
-                                 hB_dst, ldb,
-                                 func, file, line );
-}
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-static inline void
-magma_icopymatrix_v1_internal(
-    magma_int_t m, magma_int_t n,
-    magmaInt_const_ptr dA_src, magma_int_t ldda,
-    magmaInt_ptr       dB_dst, magma_int_t lddb,
-    const char* func, const char* file, int line )
-{
-    magma_copymatrix_v1_internal( m, n, sizeof(magma_int_t),
-                                  dA_src, ldda,
-                                  dB_dst, lddb,
-                                  func, file, line );
-}
-
-
-// =============================================================================
-// copying vectors - version for magma_index_t
-
-#define magma_index_setvector_v1(           n, hx_src, incx, dy_dst, incy ) \
-        magma_index_setvector_v1_internal(  n, hx_src, incx, dy_dst, incy, __func__, __FILE__, __LINE__ )
-
-#define magma_index_getvector_v1(           n, dx_src, incx, hy_dst, incy ) \
-        magma_index_getvector_v1_internal(  n, dx_src, incx, hy_dst, incy, __func__, __FILE__, __LINE__ )
-
-#define magma_index_copyvector_v1(          n, dx_src, incx, dy_dst, incy ) \
-        magma_index_copyvector_v1_internal( n, dx_src, incx, dy_dst, incy, __func__, __FILE__, __LINE__ )
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-static inline void
-magma_index_setvector_v1_internal(
-    magma_int_t n,
-    const magma_index_t *hx_src, magma_int_t incx,
-    magmaIndex_ptr       dy_dst, magma_int_t incy,
-    const char* func, const char* file, int line )
-{
-    magma_setvector_v1_internal( n, sizeof(magma_index_t),
-                                 hx_src, incx,
-                                 dy_dst, incy,
-                                 func, file, line );
-}
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-static inline void
-magma_index_getvector_v1_internal(
-    magma_int_t n,
-    magmaIndex_const_ptr dx_src, magma_int_t incx,
-    magma_index_t       *hy_dst, magma_int_t incy,
-    const char* func, const char* file, int line )
-{
-    magma_getvector_v1_internal( n, sizeof(magma_index_t),
-                                 dx_src, incx,
-                                 hy_dst, incy,
-                                 func, file, line );
-}
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-static inline void
-magma_index_copyvector_v1_internal(
-    magma_int_t n,
-    magmaIndex_const_ptr dx_src, magma_int_t incx,
-    magmaIndex_ptr       dy_dst, magma_int_t incy,
-    const char* func, const char* file, int line )
-{
-    magma_copyvector_v1_internal( n, sizeof(magma_index_t),
-                                  dx_src, incx,
-                                  dy_dst, incy,
-                                  func, file, line );
-}
-
-
-// =============================================================================
-// copying sub-matrices - version for magma_index_t
-
-#define magma_index_setmatrix_v1(           m, n, hA_src, lda,  dB_dst, lddb ) \
-        magma_index_setmatrix_v1_internal(  m, n, hA_src, lda,  dB_dst, lddb, __func__, __FILE__, __LINE__ )
-
-#define magma_index_getmatrix_v1(           m, n, dA_src, ldda, hB_dst, ldb ) \
-        magma_index_getmatrix_v1_internal(  m, n, dA_src, ldda, hB_dst, ldb, __func__, __FILE__, __LINE__ )
-
-#define magma_index_copymatrix_v1(          m, n, dA_src, ldda, dB_dst, lddb ) \
-        magma_index_copymatrix_v1_internal( m, n, dA_src, ldda, dB_dst, lddb, __func__, __FILE__, __LINE__ )
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-static inline void
-magma_index_setmatrix_v1_internal(
-    magma_int_t m, magma_int_t n,
-    const magma_index_t *hA_src, magma_int_t lda,
-    magmaIndex_ptr       dB_dst, magma_int_t lddb,
-    const char* func, const char* file, int line )
-{
-    magma_setmatrix_v1_internal( m, n, sizeof(magma_index_t),
-                                 hA_src, lda,
-                                 dB_dst, lddb,
-                                 func, file, line );
-}
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-static inline void
-magma_index_getmatrix_v1_internal(
-    magma_int_t m, magma_int_t n,
-    magmaIndex_const_ptr dA_src, magma_int_t ldda,
-    magma_index_t       *hB_dst, magma_int_t ldb,
-    const char* func, const char* file, int line )
-{
-    magma_getmatrix_v1_internal( m, n, sizeof(magma_index_t),
-                                 dA_src, ldda,
-                                 hB_dst, ldb,
-                                 func, file, line );
-}
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-static inline void
-magma_index_copymatrix_v1_internal(
-    magma_int_t m, magma_int_t n,
-    magmaIndex_const_ptr dA_src, magma_int_t ldda,
-    magmaIndex_ptr       dB_dst, magma_int_t lddb,
-    const char* func, const char* file, int line )
-{
-    magma_copymatrix_v1_internal( m, n, sizeof(magma_index_t),
-                                  dA_src, ldda,
-                                  dB_dst, lddb,
-                                  func, file, line );
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // MAGMA_COPY_V1_H
diff --git a/include/magmablas_v1.h b/include/magmablas_v1.h
deleted file mode 100644
index 865e833ff..000000000
--- a/include/magmablas_v1.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
-    -- MAGMA (version 2.0) --
-       Univ. of Tennessee, Knoxville
-       Univ. of California, Berkeley
-       Univ. of Colorado, Denver
-       @date
-       @file
-*/
-
-#ifndef MAGMABLAS_V1_H
-#define MAGMABLAS_V1_H
-
-#ifdef MAGMA_NO_V1
-#error "Since MAGMA_NO_V1 is defined, magma.h is invalid; use magma_v2.h"
-#endif
-
-#include "magma_copy_v1.h"
-#include "magmablas_z.h"
-#include "magmablas_z_v1.h"
-#include "magmablas_c_v1.h"
-#include "magmablas_d_v1.h"
-#include "magmablas_s_v1.h"
-#include "magmablas_zc_v1.h"
-#include "magmablas_ds_v1.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// =============================================================================
-// queue support
-// new magma_queue_create adds device
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-#define magma_queue_create_v1( queue_ptr ) \
-        magma_queue_create_v1_internal( queue_ptr, __func__, __FILE__, __LINE__ )
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void magma_queue_create_v1_internal(
-    magma_queue_t* queue_ptr,
-    const char* func, const char* file, int line );
-
-
-// =============================================================================
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-#define MagmaUpperLower     MagmaFull
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-#define MagmaUpperLowerStr  MagmaFullStr
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-#define MAGMA_Z_CNJG(a)     MAGMA_Z_CONJ(a)
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-#define MAGMA_C_CNJG(a)     MAGMA_C_CONJ(a)
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-#define MAGMA_D_CNJG(a)     MAGMA_D_CONJ(a)
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-#define MAGMA_S_CNJG(a)     MAGMA_S_CONJ(a)
-
-// device_sync is not portable to OpenCL, and is generally not needed
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void magma_device_sync();
-
-
-// =============================================================================
-// Define magma queue
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-magma_int_t magmablasSetKernelStream( magma_queue_t queue );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-magma_int_t magmablasGetKernelStream( magma_queue_t *queue );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-magma_queue_t magmablasGetQueue();
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // MAGMABLAS_V1_H
diff --git a/include/magmablas_v1_map.h b/include/magmablas_v1_map.h
deleted file mode 100644
index 141f96023..000000000
--- a/include/magmablas_v1_map.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
-    -- MAGMA (version 2.0) --
-       Univ. of Tennessee, Knoxville
-       Univ. of California, Berkeley
-       Univ. of Colorado, Denver
-       @date
-*/
-
-#ifndef MAGMABLAS_V1_MAP_H
-#define MAGMABLAS_V1_MAP_H
-
-#ifdef MAGMA_NO_V1
-#error "Since MAGMA_NO_V1 is defined, magma.h is invalid; use magma_v2.h"
-#endif
-
-// =============================================================================
-// map function names to old v1 routines
-
-#include "magmablas_s_v1_map.h"
-#include "magmablas_d_v1_map.h"
-#include "magmablas_c_v1_map.h"
-#include "magmablas_z_v1_map.h"
-#include "magmablas_ds_v1_map.h"
-#include "magmablas_zc_v1_map.h"
-
-#undef magma_queue_create
-
-#undef magma_setvector
-#undef magma_getvector
-#undef magma_copyvector
-#undef magma_setmatrix
-#undef magma_getmatrix
-#undef magma_copymatrix
-                                            
-#undef magma_isetvector
-#undef magma_igetvector
-#undef magma_icopyvector
-#undef magma_isetmatrix
-#undef magma_igetmatrix
-#undef magma_icopymatrix
-                                            
-#undef magma_index_setvector
-#undef magma_index_getvector
-#undef magma_index_copyvector
-#undef magma_index_setmatrix
-#undef magma_index_getmatrix
-#undef magma_index_copymatrix
-
-#define magma_queue_create                  magma_queue_create_v1
-
-#define magma_setvector                     magma_setvector_v1
-#define magma_getvector                     magma_getvector_v1
-#define magma_copyvector                    magma_copyvector_v1
-#define magma_setmatrix                     magma_setmatrix_v1
-#define magma_getmatrix                     magma_getmatrix_v1
-#define magma_copymatrix                    magma_copymatrix_v1
-                                            
-#define magma_isetvector                    magma_isetvector_v1
-#define magma_igetvector                    magma_igetvector_v1
-#define magma_icopyvector                   magma_icopyvector_v1
-#define magma_isetmatrix                    magma_isetmatrix_v1
-#define magma_igetmatrix                    magma_igetmatrix_v1
-#define magma_icopymatrix                   magma_icopymatrix_v1
-                                            
-#define magma_index_setvector               magma_index_setvector_v1
-#define magma_index_getvector               magma_index_getvector_v1
-#define magma_index_copyvector              magma_index_copyvector_v1
-#define magma_index_setmatrix               magma_index_setmatrix_v1
-#define magma_index_getmatrix               magma_index_getmatrix_v1
-#define magma_index_copymatrix              magma_index_copymatrix_v1
-
-#endif // MAGMABLAS_V1_MAP_H
diff --git a/include/magmablas_z_v1.h b/include/magmablas_z_v1.h
deleted file mode 100644
index 3c126478d..000000000
--- a/include/magmablas_z_v1.h
+++ /dev/null
@@ -1,1250 +0,0 @@
-/*
-    -- MAGMA (version 2.0) --
-       Univ. of Tennessee, Knoxville
-       Univ. of California, Berkeley
-       Univ. of Colorado, Denver
-       @date
-
-       @precisions normal z -> s d c
-*/
-
-#ifndef MAGMABLAS_Z_V1_H
-#define MAGMABLAS_Z_V1_H
-
-#ifdef MAGMA_NO_V1
-#error "Since MAGMA_NO_V1 is defined, magma.h is invalid; use magma_v2.h"
-#endif
-
-#include "magma_types.h"
-#include "magma_copy_v1.h"
-
-#define MAGMA_COMPLEX
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-  /*
-   * Transpose functions
-   */
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_ztranspose_inplace_v1(
-    magma_int_t n,
-    magmaDoubleComplex_ptr dA, magma_int_t ldda );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_ztranspose_conj_inplace_v1(
-    magma_int_t n,
-    magmaDoubleComplex_ptr dA, magma_int_t ldda );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_ztranspose_v1(
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex_const_ptr dA,  magma_int_t ldda,
-    magmaDoubleComplex_ptr       dAT, magma_int_t lddat );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_ztranspose_conj_v1(
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex_const_ptr dA,  magma_int_t ldda,
-    magmaDoubleComplex_ptr       dAT, magma_int_t lddat );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zgetmatrix_transpose_v1(
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex_const_ptr dAT,   magma_int_t ldda,
-    magmaDoubleComplex          *hA,    magma_int_t lda,
-    magmaDoubleComplex_ptr       dwork, magma_int_t lddwork, magma_int_t nb );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zsetmatrix_transpose_v1(
-    magma_int_t m, magma_int_t n,
-    const magmaDoubleComplex *hA,    magma_int_t lda,
-    magmaDoubleComplex_ptr    dAT,   magma_int_t ldda,
-    magmaDoubleComplex_ptr    dwork, magma_int_t lddwork, magma_int_t nb );
-
-  /*
-   * RBT-related functions
-   */
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zprbt_v1(
-    magma_int_t n,
-    magmaDoubleComplex_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_ptr du,
-    magmaDoubleComplex_ptr dv );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zprbt_mv_v1(
-    magma_int_t n, magma_int_t nrhs,
-    magmaDoubleComplex_ptr dv,
-    magmaDoubleComplex_ptr db, magma_int_t lddb);
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zprbt_mtv_v1(
-    magma_int_t n, magma_int_t nrhs,
-    magmaDoubleComplex_ptr du,
-    magmaDoubleComplex_ptr db, magma_int_t lddb);
-
-  /*
-   * Multi-GPU copy functions
-   */
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_zgetmatrix_1D_col_bcyclic_v1(
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex_const_ptr const dA[], magma_int_t ldda,
-    magmaDoubleComplex                *hA,   magma_int_t lda,
-    magma_int_t ngpu, magma_int_t nb );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_zsetmatrix_1D_col_bcyclic_v1(
-    magma_int_t m, magma_int_t n,
-    const magmaDoubleComplex *hA,   magma_int_t lda,
-    magmaDoubleComplex_ptr    dA[], magma_int_t ldda,
-    magma_int_t ngpu, magma_int_t nb );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_zgetmatrix_1D_row_bcyclic_v1(
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex_const_ptr const dA[], magma_int_t ldda,
-    magmaDoubleComplex                *hA,   magma_int_t lda,
-    magma_int_t ngpu, magma_int_t nb );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_zsetmatrix_1D_row_bcyclic_v1(
-    magma_int_t m, magma_int_t n,
-    const magmaDoubleComplex *hA,   magma_int_t lda,
-    magmaDoubleComplex_ptr    dA[], magma_int_t ldda,
-    magma_int_t ngpu, magma_int_t nb );
-
-
-  /*
-   * LAPACK auxiliary functions (alphabetical order)
-   */
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zgeadd_v1(
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_ptr       dB, magma_int_t lddb );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zgeadd2_v1(
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex beta,
-    magmaDoubleComplex_ptr       dB, magma_int_t lddb );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zlacpy_v1(
-    magma_uplo_t uplo,
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_ptr       dB, magma_int_t lddb );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zlacpy_conj_v1(
-    magma_int_t n,
-    magmaDoubleComplex_ptr dA1, magma_int_t lda1,
-    magmaDoubleComplex_ptr dA2, magma_int_t lda2 );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zlacpy_sym_in_v1(
-    magma_uplo_t uplo, magma_int_t m, magma_int_t n,
-    magma_int_t *rows, magma_int_t *perm,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_ptr       dB, magma_int_t lddb );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zlacpy_sym_out_v1(
-    magma_uplo_t uplo, magma_int_t m, magma_int_t n,
-    magma_int_t *rows, magma_int_t *perm,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_ptr       dB, magma_int_t lddb );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-double
-magmablas_zlange_v1(
-    magma_norm_t norm,
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDouble_ptr dwork, magma_int_t lwork );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-double
-magmablas_zlanhe_v1(
-    magma_norm_t norm, magma_uplo_t uplo,
-    magma_int_t n,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDouble_ptr dwork, magma_int_t lwork );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-double
-magmablas_zlansy_v1(
-    magma_norm_t norm, magma_uplo_t uplo,
-    magma_int_t n,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDouble_ptr dwork, magma_int_t lwork );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zlarfg_v1(
-    magma_int_t n,
-    magmaDoubleComplex_ptr dalpha,
-    magmaDoubleComplex_ptr dx, magma_int_t incx,
-    magmaDoubleComplex_ptr dtau );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zlascl_v1(
-    magma_type_t type, magma_int_t kl, magma_int_t ku,
-    double cfrom, double cto,
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex_ptr dA, magma_int_t ldda,
-    magma_int_t *info );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zlascl_2x2_v1(
-    magma_type_t type, magma_int_t m,
-    magmaDoubleComplex_const_ptr dW, magma_int_t lddw,
-    magmaDoubleComplex_ptr       dA, magma_int_t ldda,
-    magma_int_t *info );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zlascl2_v1(
-    magma_type_t type,
-    magma_int_t m, magma_int_t n,
-    magmaDouble_const_ptr dD,
-    magmaDoubleComplex_ptr dA, magma_int_t ldda,
-    magma_int_t *info );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zlascl_diag_v1(
-    magma_type_t type, magma_int_t m, magma_int_t n,
-    magmaDoubleComplex_const_ptr dD, magma_int_t lddd,
-    magmaDoubleComplex_ptr       dA, magma_int_t ldda,
-    magma_int_t *info );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zlaset_v1(
-    magma_uplo_t uplo, magma_int_t m, magma_int_t n,
-    magmaDoubleComplex offdiag, magmaDoubleComplex diag,
-    magmaDoubleComplex_ptr dA, magma_int_t ldda );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zlaset_band_v1(
-    magma_uplo_t uplo, magma_int_t m, magma_int_t n, magma_int_t k,
-    magmaDoubleComplex offdiag, magmaDoubleComplex diag,
-    magmaDoubleComplex_ptr dA, magma_int_t ldda );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zlaswp_v1(
-    magma_int_t n,
-    magmaDoubleComplex_ptr dAT, magma_int_t ldda,
-    magma_int_t k1, magma_int_t k2,
-    const magma_int_t *ipiv, magma_int_t inci );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zlaswp2_v1(
-    magma_int_t n,
-    magmaDoubleComplex_ptr dAT, magma_int_t ldda,
-    magma_int_t k1, magma_int_t k2,
-    magmaInt_const_ptr d_ipiv, magma_int_t inci );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zlaswp_sym_v1(
-    magma_int_t n,
-    magmaDoubleComplex_ptr dA, magma_int_t ldda,
-    magma_int_t k1, magma_int_t k2,
-    const magma_int_t *ipiv, magma_int_t inci );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zlaswpx_v1(
-    magma_int_t n,
-    magmaDoubleComplex_ptr dA, magma_int_t ldx, magma_int_t ldy,
-    magma_int_t k1, magma_int_t k2,
-    const magma_int_t *ipiv, magma_int_t inci );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zsymmetrize_v1(
-    magma_uplo_t uplo, magma_int_t m,
-    magmaDoubleComplex_ptr dA, magma_int_t ldda );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zsymmetrize_tiles_v1(
-    magma_uplo_t uplo, magma_int_t m,
-    magmaDoubleComplex_ptr dA, magma_int_t ldda,
-    magma_int_t ntile, magma_int_t mstride, magma_int_t nstride );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_ztrtri_diag_v1(
-    magma_uplo_t uplo, magma_diag_t diag, magma_int_t n,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_ptr d_dinvA );
-
-  /*
-   * to cleanup (alphabetical order)
-   */
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_dznrm2_adjust_v1(
-    magma_int_t k,
-    magmaDouble_ptr dxnorm,
-    magmaDoubleComplex_ptr dc );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_dznrm2_check_v1(
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex_ptr dA, magma_int_t ldda,
-    magmaDouble_ptr dxnorm,
-    magmaDouble_ptr dlsticc );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_dznrm2_cols_v1(
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex_ptr dA, magma_int_t ldda,
-    magmaDouble_ptr dxnorm );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_dznrm2_row_check_adjust_v1(
-    magma_int_t k, double tol,
-    magmaDouble_ptr dxnorm,
-    magmaDouble_ptr dxnorm2,
-    magmaDoubleComplex_ptr dC, magma_int_t lddc,
-    magmaDouble_ptr dlsticc );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-magma_int_t
-magma_zlarfb_gpu_v1(
-    magma_side_t side, magma_trans_t trans, magma_direct_t direct, magma_storev_t storev,
-    magma_int_t m, magma_int_t n, magma_int_t k,
-    magmaDoubleComplex_const_ptr dV, magma_int_t lddv,
-    magmaDoubleComplex_const_ptr dT, magma_int_t lddt,
-    magmaDoubleComplex_ptr dC,       magma_int_t lddc,
-    magmaDoubleComplex_ptr dwork,    magma_int_t ldwork );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-magma_int_t
-magma_zlarfb_gpu_gemm_v1(
-    magma_side_t side, magma_trans_t trans, magma_direct_t direct, magma_storev_t storev,
-    magma_int_t m, magma_int_t n, magma_int_t k,
-    magmaDoubleComplex_const_ptr dV, magma_int_t lddv,
-    magmaDoubleComplex_const_ptr dT, magma_int_t lddt,
-    magmaDoubleComplex_ptr dC,       magma_int_t lddc,
-    magmaDoubleComplex_ptr dwork,    magma_int_t ldwork,
-    magmaDoubleComplex_ptr dworkvt,  magma_int_t ldworkvt );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_zlarfbx_gpu_v1(
-    magma_int_t m, magma_int_t k,
-    magmaDoubleComplex_ptr V,  magma_int_t ldv,
-    magmaDoubleComplex_ptr dT, magma_int_t ldt,
-    magmaDoubleComplex_ptr c,
-    magmaDoubleComplex_ptr dwork );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_zlarfg_gpu_v1(
-    magma_int_t n,
-    magmaDoubleComplex_ptr dx0,
-    magmaDoubleComplex_ptr dx,
-    magmaDoubleComplex_ptr dtau,
-    magmaDouble_ptr        dxnorm,
-    magmaDoubleComplex_ptr dAkk );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_zlarfgtx_gpu_v1(
-    magma_int_t n,
-    magmaDoubleComplex_ptr dx0,
-    magmaDoubleComplex_ptr dx,
-    magmaDoubleComplex_ptr dtau,
-    magmaDouble_ptr        dxnorm,
-    magmaDoubleComplex_ptr dA, magma_int_t iter,
-    magmaDoubleComplex_ptr V,  magma_int_t ldv,
-    magmaDoubleComplex_ptr T,  magma_int_t ldt,
-    magmaDoubleComplex_ptr dwork );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_zlarfgx_gpu_v1(
-    magma_int_t n,
-    magmaDoubleComplex_ptr dx0,
-    magmaDoubleComplex_ptr dx,
-    magmaDoubleComplex_ptr dtau,
-    magmaDouble_ptr        dxnorm,
-    magmaDoubleComplex_ptr dA, magma_int_t iter );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_zlarfx_gpu_v1(
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex_ptr v,
-    magmaDoubleComplex_ptr tau,
-    magmaDoubleComplex_ptr C,  magma_int_t ldc,
-    magmaDouble_ptr        xnorm,
-    magmaDoubleComplex_ptr dT, magma_int_t iter,
-    magmaDoubleComplex_ptr work );
-
-
-  /*
-   * Level 1 BLAS (alphabetical order)
-   */
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zaxpycp_v1(
-    magma_int_t m,
-    magmaDoubleComplex_ptr dr,
-    magmaDoubleComplex_ptr dx,
-    magmaDoubleComplex_const_ptr db );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zswap_v1(
-    magma_int_t n,
-    magmaDoubleComplex_ptr dx, magma_int_t incx,
-    magmaDoubleComplex_ptr dy, magma_int_t incy );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zswapblk_v1(
-    magma_order_t order,
-    magma_int_t n,
-    magmaDoubleComplex_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_ptr dB, magma_int_t lddb,
-    magma_int_t i1, magma_int_t i2,
-    const magma_int_t *ipiv, magma_int_t inci,
-    magma_int_t offset );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zswapdblk_v1(
-    magma_int_t n, magma_int_t nb,
-    magmaDoubleComplex_ptr dA, magma_int_t ldda, magma_int_t inca,
-    magmaDoubleComplex_ptr dB, magma_int_t lddb, magma_int_t incb );
-
-  /*
-   * Level 2 BLAS (alphabetical order)
-   */
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zgemv_v1(
-    magma_trans_t trans, magma_int_t m, magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx,
-    magmaDoubleComplex beta,
-    magmaDoubleComplex_ptr       dy, magma_int_t incy );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zgemv_conj_v1(
-    magma_int_t m, magma_int_t n, magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx,
-    magmaDoubleComplex beta,
-    magmaDoubleComplex_ptr dy, magma_int_t incy );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-magma_int_t
-magmablas_zhemv_v1(
-    magma_uplo_t uplo, magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx,
-    magmaDoubleComplex beta,
-    magmaDoubleComplex_ptr       dy, magma_int_t incy );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-magma_int_t
-magmablas_zsymv_v1(
-    magma_uplo_t uplo, magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx,
-    magmaDoubleComplex beta,
-    magmaDoubleComplex_ptr       dy, magma_int_t incy );
-
-  /*
-   * Level 3 BLAS (alphabetical order)
-   */
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zgemm_v1(
-    magma_trans_t transA, magma_trans_t transB,
-    magma_int_t m, magma_int_t n, magma_int_t k,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_const_ptr dB, magma_int_t lddb,
-    magmaDoubleComplex beta,
-    magmaDoubleComplex_ptr       dC, magma_int_t lddc );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zgemm_reduce_v1(
-    magma_int_t m, magma_int_t n, magma_int_t k,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_const_ptr dB, magma_int_t lddb,
-    magmaDoubleComplex beta,
-    magmaDoubleComplex_ptr       dC, magma_int_t lddc );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zhemm_v1(
-    magma_side_t side, magma_uplo_t uplo,
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_const_ptr dB, magma_int_t lddb,
-    magmaDoubleComplex beta,
-    magmaDoubleComplex_ptr       dC, magma_int_t lddc );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zsymm_v1(
-    magma_side_t side, magma_uplo_t uplo,
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_const_ptr dB, magma_int_t lddb,
-    magmaDoubleComplex beta,
-    magmaDoubleComplex_ptr       dC, magma_int_t lddc );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zsyr2k_v1(
-    magma_uplo_t uplo, magma_trans_t trans,
-    magma_int_t n, magma_int_t k,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_const_ptr dB, magma_int_t lddb,
-    magmaDoubleComplex beta,
-    magmaDoubleComplex_ptr       dC, magma_int_t lddc );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zher2k_v1(
-    magma_uplo_t uplo, magma_trans_t trans,
-    magma_int_t n, magma_int_t k,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_const_ptr dB, magma_int_t lddb,
-    double  beta,
-    magmaDoubleComplex_ptr       dC, magma_int_t lddc );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zsyrk_v1(
-    magma_uplo_t uplo, magma_trans_t trans,
-    magma_int_t n, magma_int_t k,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex beta,
-    magmaDoubleComplex_ptr       dC, magma_int_t lddc );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zherk_v1(
-    magma_uplo_t uplo, magma_trans_t trans,
-    magma_int_t n, magma_int_t k,
-    double  alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    double  beta,
-    magmaDoubleComplex_ptr       dC, magma_int_t lddc );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_ztrsm_v1(
-    magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag,
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_ptr       dB, magma_int_t lddb );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_ztrsm_outofplace_v1(
-    magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag,
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_ptr       dB, magma_int_t lddb,
-    magmaDoubleComplex_ptr       dX, magma_int_t lddx,
-    magma_int_t flag,
-    magmaDoubleComplex_ptr d_dinvA, magma_int_t dinvA_length );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_ztrsm_work_v1(
-    magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag,
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_ptr       dB, magma_int_t lddb,
-    magmaDoubleComplex_ptr       dX, magma_int_t lddx,
-    magma_int_t flag,
-    magmaDoubleComplex_ptr d_dinvA, magma_int_t dinvA_length );
-
-
-  /*
-   * Wrappers for platform independence.
-   * These wrap CUBLAS or AMD OpenCL BLAS functions.
-   */
-
-// =============================================================================
-// copying vectors
-// set  copies host   to device
-// get  copies device to host
-// copy copies device to device
-// (with CUDA unified addressing, copy can be between same or different devices)
-// Add the function, file, and line for error-reporting purposes.
-
-#define magma_zsetvector_v1(           n, hx_src, incx, dy_dst, incy ) \
-        magma_zsetvector_v1_internal(  n, hx_src, incx, dy_dst, incy, __func__, __FILE__, __LINE__ )
-
-#define magma_zgetvector_v1(           n, dx_src, incx, hy_dst, incy ) \
-        magma_zgetvector_v1_internal(  n, dx_src, incx, hy_dst, incy, __func__, __FILE__, __LINE__ )
-
-#define magma_zcopyvector_v1(          n, dx_src, incx, dy_dst, incy ) \
-        magma_zcopyvector_v1_internal( n, dx_src, incx, dy_dst, incy, __func__, __FILE__, __LINE__ )
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-static inline void
-magma_zsetvector_v1_internal(
-    magma_int_t n,
-    magmaDoubleComplex const    *hx_src, magma_int_t incx,
-    magmaDoubleComplex_ptr       dy_dst, magma_int_t incy,
-    const char* func, const char* file, int line )
-{
-    magma_setvector_v1_internal( n, sizeof(magmaDoubleComplex),
-                                 hx_src, incx,
-                                 dy_dst, incy,
-                                 func, file, line );
-}
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-static inline void
-magma_zgetvector_v1_internal(
-    magma_int_t n,
-    magmaDoubleComplex_const_ptr dx_src, magma_int_t incx,
-    magmaDoubleComplex          *hy_dst, magma_int_t incy,
-    const char* func, const char* file, int line )
-{
-    magma_getvector_v1_internal( n, sizeof(magmaDoubleComplex),
-                                 dx_src, incx,
-                                 hy_dst, incy,
-                                 func, file, line );
-}
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-static inline void
-magma_zcopyvector_v1_internal(
-    magma_int_t n,
-    magmaDoubleComplex_const_ptr dx_src, magma_int_t incx,
-    magmaDoubleComplex_ptr       dy_dst, magma_int_t incy,
-    const char* func, const char* file, int line )
-{
-    magma_copyvector_v1_internal( n, sizeof(magmaDoubleComplex),
-                                  dx_src, incx,
-                                  dy_dst, incy,
-                                  func, file, line );
-}
-
-
-// =============================================================================
-// copying sub-matrices (contiguous columns)
-
-#define magma_zsetmatrix_v1(           m, n, hA_src, lda,  dB_dst, lddb ) \
-        magma_zsetmatrix_v1_internal(  m, n, hA_src, lda,  dB_dst, lddb, __func__, __FILE__, __LINE__ )
-
-#define magma_zgetmatrix_v1(           m, n, dA_src, ldda, hB_dst, ldb  ) \
-        magma_zgetmatrix_v1_internal(  m, n, dA_src, ldda, hB_dst, ldb,  __func__, __FILE__, __LINE__ )
-
-#define magma_zcopymatrix_v1(          m, n, dA_src, ldda, dB_dst, lddb ) \
-        magma_zcopymatrix_v1_internal( m, n, dA_src, ldda, dB_dst, lddb, __func__, __FILE__, __LINE__ )
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-static inline void
-magma_zsetmatrix_v1_internal(
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex const    *hA_src, magma_int_t lda,
-    magmaDoubleComplex_ptr       dB_dst, magma_int_t lddb,
-    const char* func, const char* file, int line )
-{
-    magma_setmatrix_v1_internal( m, n, sizeof(magmaDoubleComplex),
-                                 hA_src, lda,
-                                 dB_dst, lddb,
-                                 func, file, line );
-}
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-static inline void
-magma_zgetmatrix_v1_internal(
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex_const_ptr dA_src, magma_int_t ldda,
-    magmaDoubleComplex          *hB_dst, magma_int_t ldb,
-    const char* func, const char* file, int line )
-{
-    magma_getmatrix_v1_internal( m, n, sizeof(magmaDoubleComplex),
-                                 dA_src, ldda,
-                                 hB_dst, ldb,
-                                 func, file, line );
-}
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-static inline void
-magma_zcopymatrix_v1_internal(
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex_const_ptr dA_src, magma_int_t ldda,
-    magmaDoubleComplex_ptr       dB_dst, magma_int_t lddb,
-    const char* func, const char* file, int line )
-{
-    magma_copymatrix_v1_internal( m, n, sizeof(magmaDoubleComplex),
-                                  dA_src, ldda,
-                                  dB_dst, lddb,
-                                  func, file, line );
-}
-
-
-// =============================================================================
-// Level 1 BLAS (alphabetical order)
-
-// in cublas_v2, result returned through output argument
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-magma_int_t
-magma_izamax_v1(
-    magma_int_t n,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx );
-
-// in cublas_v2, result returned through output argument
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-magma_int_t
-magma_izamin_v1(
-    magma_int_t n,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx );
-
-// in cublas_v2, result returned through output argument
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-double
-magma_dzasum_v1(
-    magma_int_t n,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_zaxpy_v1(
-    magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx,
-    magmaDoubleComplex_ptr       dy, magma_int_t incy );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_zcopy_v1(
-    magma_int_t n,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx,
-    magmaDoubleComplex_ptr       dy, magma_int_t incy );
-
-// in cublas_v2, result returned through output argument
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-magmaDoubleComplex
-magma_zdotc_v1(
-    magma_int_t n,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx,
-    magmaDoubleComplex_const_ptr dy, magma_int_t incy );
-
-// in cublas_v2, result returned through output argument
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-magmaDoubleComplex
-magma_zdotu_v1(
-    magma_int_t n,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx,
-    magmaDoubleComplex_const_ptr dy, magma_int_t incy );
-
-// in cublas_v2, result returned through output argument
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-double
-magma_dznrm2_v1(
-    magma_int_t n,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_zrot_v1(
-    magma_int_t n,
-    magmaDoubleComplex_ptr dx, magma_int_t incx,
-    magmaDoubleComplex_ptr dy, magma_int_t incy,
-    double dc, magmaDoubleComplex ds );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_zdrot_v1(
-    magma_int_t n,
-    magmaDoubleComplex_ptr dx, magma_int_t incx,
-    magmaDoubleComplex_ptr dy, magma_int_t incy,
-    double dc, double ds );
-
-#ifdef MAGMA_REAL
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_zrotm_v1(
-    magma_int_t n,
-    magmaDouble_ptr dx, magma_int_t incx,
-    magmaDouble_ptr dy, magma_int_t incy,
-    magmaDouble_const_ptr param );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_zrotmg_v1(
-    magmaDouble_ptr d1, magmaDouble_ptr       d2,
-    magmaDouble_ptr x1, magmaDouble_const_ptr y1,
-    magmaDouble_ptr param );
-#endif  // MAGMA_REAL
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_zscal_v1(
-    magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_ptr dx, magma_int_t incx );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_zdscal_v1(
-    magma_int_t n,
-    double alpha,
-    magmaDoubleComplex_ptr dx, magma_int_t incx );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_zswap_v1(
-    magma_int_t n,
-    magmaDoubleComplex_ptr dx, magma_int_t incx,
-    magmaDoubleComplex_ptr dy, magma_int_t incy );
-
-// =============================================================================
-// Level 2 BLAS (alphabetical order)
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_zgemv_v1(
-    magma_trans_t transA,
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx,
-    magmaDoubleComplex beta,
-    magmaDoubleComplex_ptr       dy, magma_int_t incy );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_zgerc_v1(
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx,
-    magmaDoubleComplex_const_ptr dy, magma_int_t incy,
-    magmaDoubleComplex_ptr       dA, magma_int_t ldda );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_zgeru_v1(
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx,
-    magmaDoubleComplex_const_ptr dy, magma_int_t incy,
-    magmaDoubleComplex_ptr       dA, magma_int_t ldda );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_zhemv_v1(
-    magma_uplo_t uplo,
-    magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx,
-    magmaDoubleComplex beta,
-    magmaDoubleComplex_ptr       dy, magma_int_t incy );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_zher_v1(
-    magma_uplo_t uplo,
-    magma_int_t n,
-    double alpha,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx,
-    magmaDoubleComplex_ptr       dA, magma_int_t ldda );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_zher2_v1(
-    magma_uplo_t uplo,
-    magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx,
-    magmaDoubleComplex_const_ptr dy, magma_int_t incy,
-    magmaDoubleComplex_ptr       dA, magma_int_t ldda );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_ztrmv_v1(
-    magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag,
-    magma_int_t n,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_ptr       dx, magma_int_t incx );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_ztrsv_v1(
-    magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag,
-    magma_int_t n,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_ptr       dx, magma_int_t incx );
-
-// =============================================================================
-// Level 3 BLAS (alphabetical order)
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_zgemm_v1(
-    magma_trans_t transA, magma_trans_t transB,
-    magma_int_t m, magma_int_t n, magma_int_t k,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_const_ptr dB, magma_int_t lddb,
-    magmaDoubleComplex beta,
-    magmaDoubleComplex_ptr       dC, magma_int_t lddc );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_zsymm_v1(
-    magma_side_t side, magma_uplo_t uplo,
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_const_ptr dB, magma_int_t lddb,
-    magmaDoubleComplex beta,
-    magmaDoubleComplex_ptr       dC, magma_int_t lddc );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_zhemm_v1(
-    magma_side_t side, magma_uplo_t uplo,
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_const_ptr dB, magma_int_t lddb,
-    magmaDoubleComplex beta,
-    magmaDoubleComplex_ptr       dC, magma_int_t lddc );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_zsyr2k_v1(
-    magma_uplo_t uplo, magma_trans_t trans,
-    magma_int_t n, magma_int_t k,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_const_ptr dB, magma_int_t lddb,
-    magmaDoubleComplex beta,
-    magmaDoubleComplex_ptr       dC, magma_int_t lddc );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_zher2k_v1(
-    magma_uplo_t uplo, magma_trans_t trans,
-    magma_int_t n, magma_int_t k,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_const_ptr dB, magma_int_t lddb,
-    double beta,
-    magmaDoubleComplex_ptr       dC, magma_int_t lddc );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_zsyrk_v1(
-    magma_uplo_t uplo, magma_trans_t trans,
-    magma_int_t n, magma_int_t k,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex beta,
-    magmaDoubleComplex_ptr       dC, magma_int_t lddc );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_zherk_v1(
-    magma_uplo_t uplo, magma_trans_t trans,
-    magma_int_t n, magma_int_t k,
-    double alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    double beta,
-    magmaDoubleComplex_ptr       dC, magma_int_t lddc );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_ztrmm_v1(
-    magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag,
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_ptr       dB, magma_int_t lddb );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magma_ztrsm_v1(
-    magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag,
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_ptr       dB, magma_int_t lddb );
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#undef MAGMA_COMPLEX
-
-#endif // MAGMABLAS_Z_H
diff --git a/include/magmablas_z_v1_map.h b/include/magmablas_z_v1_map.h
deleted file mode 100644
index c62d2c9e0..000000000
--- a/include/magmablas_z_v1_map.h
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
-    -- MAGMA (version 2.0) --
-       Univ. of Tennessee, Knoxville
-       Univ. of California, Berkeley
-       Univ. of Colorado, Denver
-       @date
-
-       @precisions normal z -> s d c
-*/
-
-#ifndef MAGMABLAS_Z_V1_MAP_H
-#define MAGMABLAS_Z_V1_MAP_H
-
-#ifdef MAGMA_NO_V1
-#error "Since MAGMA_NO_V1 is defined, magma.h is invalid; use magma_v2.h"
-#endif
-
-// =============================================================================
-// map function names to old v1 routines
-
-#define magmablas_ztranspose_inplace        magmablas_ztranspose_inplace_v1
-#define magmablas_ztranspose_conj_inplace   magmablas_ztranspose_conj_inplace_v1
-#define magmablas_ztranspose                magmablas_ztranspose_v1
-#define magmablas_ztranspose_conj           magmablas_ztranspose_conj_v1
-#define magmablas_zgetmatrix_transpose      magmablas_zgetmatrix_transpose_v1
-#define magmablas_zsetmatrix_transpose      magmablas_zsetmatrix_transpose_v1
-#define magmablas_zprbt                     magmablas_zprbt_v1
-#define magmablas_zprbt_mv                  magmablas_zprbt_mv_v1
-#define magmablas_zprbt_mtv                 magmablas_zprbt_mtv_v1
-#define magma_zgetmatrix_1D_col_bcyclic     magma_zgetmatrix_1D_col_bcyclic_v1
-#define magma_zsetmatrix_1D_col_bcyclic     magma_zsetmatrix_1D_col_bcyclic_v1
-#define magma_zgetmatrix_1D_row_bcyclic     magma_zgetmatrix_1D_row_bcyclic_v1
-#define magma_zsetmatrix_1D_row_bcyclic     magma_zsetmatrix_1D_row_bcyclic_v1
-#define magmablas_zgeadd                    magmablas_zgeadd_v1
-#define magmablas_zgeadd2                   magmablas_zgeadd2_v1
-#define magmablas_zlacpy                    magmablas_zlacpy_v1
-#define magmablas_zlacpy_conj               magmablas_zlacpy_conj_v1
-#define magmablas_zlacpy_sym_in             magmablas_zlacpy_sym_in_v1
-#define magmablas_zlacpy_sym_out            magmablas_zlacpy_sym_out_v1
-#define magmablas_zlange                    magmablas_zlange_v1
-#define magmablas_zlanhe                    magmablas_zlanhe_v1
-#define magmablas_zlansy                    magmablas_zlansy_v1
-#define magmablas_zlarfg                    magmablas_zlarfg_v1
-#define magmablas_zlascl                    magmablas_zlascl_v1
-#define magmablas_zlascl_2x2                magmablas_zlascl_2x2_v1
-#define magmablas_zlascl2                   magmablas_zlascl2_v1
-#define magmablas_zlascl_diag               magmablas_zlascl_diag_v1
-#define magmablas_zlaset                    magmablas_zlaset_v1
-#define magmablas_zlaset_band               magmablas_zlaset_band_v1
-#define magmablas_zlaswp                    magmablas_zlaswp_v1
-#define magmablas_zlaswp2                   magmablas_zlaswp2_v1
-#define magmablas_zlaswp_sym                magmablas_zlaswp_sym_v1
-#define magmablas_zlaswpx                   magmablas_zlaswpx_v1
-#define magmablas_zsymmetrize               magmablas_zsymmetrize_v1
-#define magmablas_zsymmetrize_tiles         magmablas_zsymmetrize_tiles_v1
-#define magmablas_ztrtri_diag               magmablas_ztrtri_diag_v1
-#define magmablas_dznrm2_adjust             magmablas_dznrm2_adjust_v1
-#define magmablas_dznrm2_check              magmablas_dznrm2_check_v1
-#define magmablas_dznrm2_cols               magmablas_dznrm2_cols_v1
-#define magmablas_dznrm2_row_check_adjust   magmablas_dznrm2_row_check_adjust_v1
-#define magma_zlarfb_gpu                    magma_zlarfb_gpu_v1
-#define magma_zlarfb_gpu_gemm               magma_zlarfb_gpu_gemm_v1
-#define magma_zlarfbx_gpu                   magma_zlarfbx_gpu_v1
-#define magma_zlarfg_gpu                    magma_zlarfg_gpu_v1
-#define magma_zlarfgtx_gpu                  magma_zlarfgtx_gpu_v1
-#define magma_zlarfgx_gpu                   magma_zlarfgx_gpu_v1
-#define magma_zlarfx_gpu                    magma_zlarfx_gpu_v1
-#define magmablas_zaxpycp                   magmablas_zaxpycp_v1
-#define magmablas_zswap                     magmablas_zswap_v1
-#define magmablas_zswapblk                  magmablas_zswapblk_v1
-#define magmablas_zswapdblk                 magmablas_zswapdblk_v1
-#define magmablas_zgemv                     magmablas_zgemv_v1
-#define magmablas_zgemv_conj                magmablas_zgemv_conj_v1
-#define magmablas_zhemv                     magmablas_zhemv_v1
-#define magmablas_zsymv                     magmablas_zsymv_v1
-#define magmablas_zgemm                     magmablas_zgemm_v1
-#define magmablas_zgemm_reduce              magmablas_zgemm_reduce_v1
-#define magmablas_zhemm                     magmablas_zhemm_v1
-#define magmablas_zsymm                     magmablas_zsymm_v1
-#define magmablas_zsyr2k                    magmablas_zsyr2k_v1
-#define magmablas_zher2k                    magmablas_zher2k_v1
-#define magmablas_zsyrk                     magmablas_zsyrk_v1
-#define magmablas_zherk                     magmablas_zherk_v1
-#define magmablas_ztrsm                     magmablas_ztrsm_v1
-#define magmablas_ztrsm_outofplace          magmablas_ztrsm_outofplace_v1
-#define magmablas_ztrsm_work                magmablas_ztrsm_work_v1
-
-#undef magma_zsetvector
-#undef magma_zgetvector
-#undef magma_zcopyvector
-#undef magma_zsetmatrix
-#undef magma_zgetmatrix
-#undef magma_zcopymatrix
-
-#define magma_zsetvector                    magma_zsetvector_v1
-#define magma_zgetvector                    magma_zgetvector_v1
-#define magma_zcopyvector                   magma_zcopyvector_v1
-#define magma_zsetmatrix                    magma_zsetmatrix_v1
-#define magma_zgetmatrix                    magma_zgetmatrix_v1
-#define magma_zcopymatrix                   magma_zcopymatrix_v1
-
-#define magma_izamax                        magma_izamax_v1
-#define magma_izamin                        magma_izamin_v1
-#define magma_dzasum                        magma_dzasum_v1
-#define magma_zaxpy                         magma_zaxpy_v1
-#define magma_zcopy                         magma_zcopy_v1
-#define magma_zdotc                         magma_zdotc_v1
-#define magma_zdotu                         magma_zdotu_v1
-#define magma_dznrm2                        magma_dznrm2_v1
-#define magma_zrot                          magma_zrot_v1
-#define magma_zdrot                         magma_zdrot_v1
-#define magma_zrotm                         magma_zrotm_v1
-#define magma_zrotmg                        magma_zrotmg_v1
-#define magma_zscal                         magma_zscal_v1
-#define magma_zdscal                        magma_zdscal_v1
-#define magma_zswap                         magma_zswap_v1
-#define magma_zgemv                         magma_zgemv_v1
-#define magma_zgerc                         magma_zgerc_v1
-#define magma_zgeru                         magma_zgeru_v1
-#define magma_zhemv                         magma_zhemv_v1
-#define magma_zher                          magma_zher_v1
-#define magma_zher2                         magma_zher2_v1
-#define magma_ztrmv                         magma_ztrmv_v1
-#define magma_ztrsv                         magma_ztrsv_v1
-#define magma_zgemm                         magma_zgemm_v1
-#define magma_zsymm                         magma_zsymm_v1
-#define magma_zhemm                         magma_zhemm_v1
-#define magma_zsyr2k                        magma_zsyr2k_v1
-#define magma_zher2k                        magma_zher2k_v1
-#define magma_zsyrk                         magma_zsyrk_v1
-#define magma_zherk                         magma_zherk_v1
-#define magma_ztrmm                         magma_ztrmm_v1
-#define magma_ztrsm                         magma_ztrsm_v1
-
-#endif // MAGMABLAS_Z_V1_MAP_H
diff --git a/include/magmablas_zc_v1.h b/include/magmablas_zc_v1.h
deleted file mode 100644
index d42ed3aee..000000000
--- a/include/magmablas_zc_v1.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
-    -- MAGMA (version 2.0) --
-       Univ. of Tennessee, Knoxville
-       Univ. of California, Berkeley
-       Univ. of Colorado, Denver
-       @date
-
-       @precisions mixed zc -> ds
-*/
-
-#ifndef MAGMABLAS_ZC_V1_H
-#define MAGMABLAS_ZC_V1_H
-
-#ifdef MAGMA_NO_V1
-#error "Since MAGMA_NO_V1 is defined, magma.h is invalid; use magma_v2.h"
-#endif
-
-#include "magma_types.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-  /* Mixed precision */
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zcaxpycp_v1(
-    magma_int_t m,
-    magmaFloatComplex_ptr  r,
-    magmaDoubleComplex_ptr x,
-    magmaDoubleComplex_const_ptr b,
-    magmaDoubleComplex_ptr w );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zclaswp_v1(
-    magma_int_t n,
-    magmaDoubleComplex_ptr  A, magma_int_t lda,
-    magmaFloatComplex_ptr  SA,
-    magma_int_t m,
-    const magma_int_t *ipiv, magma_int_t incx );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zlag2c_v1(
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex_const_ptr  A, magma_int_t lda,
-    magmaFloatComplex_ptr        SA, magma_int_t ldsa,
-    magma_int_t *info );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_clag2z_v1(
-    magma_int_t m, magma_int_t n,
-    magmaFloatComplex_const_ptr  SA, magma_int_t ldsa,
-    magmaDoubleComplex_ptr        A, magma_int_t lda,
-    magma_int_t *info );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_zlat2c_v1(
-    magma_uplo_t uplo, magma_int_t n,
-    magmaDoubleComplex_const_ptr  A, magma_int_t lda,
-    magmaFloatComplex_ptr        SA, magma_int_t ldsa,
-    magma_int_t *info );
-
-/// @deprecated
-/// @ingroup magma_deprecated_v1
-MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release")
-void
-magmablas_clat2z_v1(
-    magma_uplo_t uplo, magma_int_t n,
-    magmaFloatComplex_const_ptr  SA, magma_int_t ldsa,
-    magmaDoubleComplex_ptr        A, magma_int_t lda,
-    magma_int_t *info );
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // MAGMABLAS_ZC_V1_H
diff --git a/include/magmablas_zc_v1_map.h b/include/magmablas_zc_v1_map.h
deleted file mode 100644
index cd94d87d0..000000000
--- a/include/magmablas_zc_v1_map.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
-    -- MAGMA (version 2.0) --
-       Univ. of Tennessee, Knoxville
-       Univ. of California, Berkeley
-       Univ. of Colorado, Denver
-       @date
-
-       @precisions mixed zc -> ds
-*/
-
-#ifndef MAGMABLAS_ZC_V1_MAP_H
-#define MAGMABLAS_ZC_V1_MAP_H
-
-// =============================================================================
-// map function names to old v1 routines
-
-#define magmablas_zcaxpycp                  magmablas_zcaxpycp_v1
-#define magmablas_zclaswp                   magmablas_zclaswp_v1
-#define magmablas_zlag2c                    magmablas_zlag2c_v1
-#define magmablas_clag2z                    magmablas_clag2z_v1
-#define magmablas_zlat2c                    magmablas_zlat2c_v1
-#define magmablas_clat2z                    magmablas_clat2z_v1
-
-#endif // MAGMABLAS_ZC_V1_MAP_H
diff --git a/interface_cuda/Makefile.src b/interface_cuda/Makefile.src
index c9c02bcc1..b5a052c88 100644
--- a/interface_cuda/Makefile.src
+++ b/interface_cuda/Makefile.src
@@ -16,14 +16,11 @@ cdir      := interface_cuda
 libmagma_src += \
 	$(cdir)/alloc.cpp	\
 	$(cdir)/blas_h_v2.cpp	\
-	$(cdir)/blas_z_v1.cpp	\
 	$(cdir)/blas_z_v2.cpp	\
-	$(cdir)/copy_v1.cpp	\
 	$(cdir)/copy_v2.cpp	\
 	$(cdir)/error.cpp	\
 	$(cdir)/connection_mgpu.cpp	\
 	$(cdir)/interface.cpp	\
-	$(cdir)/interface_v1.cpp	\
 
 
 # ----------------------------------------------------------------------
diff --git a/interface_cuda/blas_z_v1.cpp b/interface_cuda/blas_z_v1.cpp
deleted file mode 100644
index 09399c1a0..000000000
--- a/interface_cuda/blas_z_v1.cpp
+++ /dev/null
@@ -1,549 +0,0 @@
-/*
-    -- MAGMA (version 2.0) --
-       Univ. of Tennessee, Knoxville
-       Univ. of California, Berkeley
-       Univ. of Colorado, Denver
-       @date
-
-       @author Mark Gates
-       @precisions normal z -> s d c
-*/
-#ifndef MAGMA_NO_V1
-
-#include "magma_internal.h"
-#include "magmablas_v1.h"  // includes v1 prototypes; does NOT map routine names
-#include "error.h"
-
-#define COMPLEX
-
-#ifdef MAGMA_HAVE_CUDA
-
-// These MAGMA v1 routines are all deprecated.
-// See blas_z_v2.cpp for documentation.
-
-
-// =============================================================================
-// Level 1 BLAS
-
-/******************************************************************************/
-extern "C" magma_int_t
-magma_izamax_v1(
-    magma_int_t n,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx )
-{
-    return magma_izamax( n, dx, incx, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" magma_int_t
-magma_izamin_v1(
-    magma_int_t n,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx )
-{
-    return magma_izamin( n, dx, incx, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" double
-magma_dzasum_v1(
-    magma_int_t n,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx )
-{
-    return magma_dzasum( n, dx, incx, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magma_zaxpy_v1(
-    magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx,
-    magmaDoubleComplex_ptr       dy, magma_int_t incy )
-{
-    magma_zaxpy( n, alpha, dx, incx, dy, incy, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magma_zcopy_v1(
-    magma_int_t n,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx,
-    magmaDoubleComplex_ptr       dy, magma_int_t incy )
-{
-    magma_zcopy( n, dx, incx, dy, incy, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C"
-magmaDoubleComplex magma_zdotc_v1(
-    magma_int_t n,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx,
-    magmaDoubleComplex_const_ptr dy, magma_int_t incy )
-{
-    return magma_zdotc( n, dx, incx, dy, incy, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-#ifdef COMPLEX
-extern "C"
-magmaDoubleComplex magma_zdotu_v1(
-    magma_int_t n,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx,
-    magmaDoubleComplex_const_ptr dy, magma_int_t incy )
-{
-    return magma_zdotu( n, dx, incx, dy, incy, magmablasGetQueue() );
-}
-#endif // COMPLEX
-
-
-/******************************************************************************/
-extern "C" double
-magma_dznrm2_v1(
-    magma_int_t n,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx )
-{
-    return magma_dznrm2( n, dx, incx, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magma_zrot_v1(
-    magma_int_t n,
-    magmaDoubleComplex_ptr dx, magma_int_t incx,
-    magmaDoubleComplex_ptr dy, magma_int_t incy,
-    double c, magmaDoubleComplex s )
-{
-    magma_zrot( n, dx, incx, dy, incy, c, s, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-#ifdef COMPLEX
-extern "C" void
-magma_zdrot_v1(
-    magma_int_t n,
-    magmaDoubleComplex_ptr dx, magma_int_t incx,
-    magmaDoubleComplex_ptr dy, magma_int_t incy,
-    double c, double s )
-{
-    magma_zdrot( n, dx, incx, dy, incy, c, s, magmablasGetQueue() );
-}
-#endif // COMPLEX
-
-
-/******************************************************************************/
-#ifdef REAL
-extern "C" void
-magma_zrotm_v1(
-    magma_int_t n,
-    double *dx, magma_int_t incx,
-    double *dy, magma_int_t incy,
-    const double *param )
-{
-    magma_zrotm( n, dx, incx, dy, incy, param, magmablasGetQueue() );
-}
-#endif // REAL
-
-
-/******************************************************************************/
-#ifdef REAL
-extern "C" void
-magma_zrotmg_v1(
-    double *d1, double       *d2,
-    double *x1, const double *y1,
-    double *param )
-{
-    magma_zrotmg( d1, d2, x1, y1, param, magmablasGetQueue() );
-}
-#endif // REAL
-
-
-/******************************************************************************/
-extern "C" void
-magma_zscal_v1(
-    magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_ptr dx, magma_int_t incx )
-{
-    magma_zscal( n, alpha, dx, incx, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-#ifdef COMPLEX
-extern "C" void
-magma_zdscal_v1(
-    magma_int_t n,
-    double alpha,
-    magmaDoubleComplex_ptr dx, magma_int_t incx )
-{
-    magma_zdscal( n, alpha, dx, incx, magmablasGetQueue() );
-}
-#endif // COMPLEX
-
-
-/******************************************************************************/
-extern "C" void
-magma_zswap_v1(
-    magma_int_t n,
-    magmaDoubleComplex_ptr dx, magma_int_t incx,
-    magmaDoubleComplex_ptr dy, magma_int_t incy )
-{
-    magma_zswap( n, dx, incx, dy, incy, magmablasGetQueue() );
-}
-
-
-// =============================================================================
-// Level 2 BLAS
-
-/******************************************************************************/
-extern "C" void
-magma_zgemv_v1(
-    magma_trans_t transA,
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx,
-    magmaDoubleComplex beta,
-    magmaDoubleComplex_ptr       dy, magma_int_t incy )
-{
-    magma_zgemv(
-        transA,
-        m, n,
-        alpha, dA, ldda,
-               dx, incx,
-        beta,  dy, incy,
-        magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magma_zgerc_v1(
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx,
-    magmaDoubleComplex_const_ptr dy, magma_int_t incy,
-    magmaDoubleComplex_ptr       dA, magma_int_t ldda )
-{
-    magma_zgerc(
-        m, n,
-        alpha, dx, incx,
-               dy, incy,
-               dA, ldda,
-        magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-#ifdef COMPLEX
-extern "C" void
-magma_zgeru_v1(
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx,
-    magmaDoubleComplex_const_ptr dy, magma_int_t incy,
-    magmaDoubleComplex_ptr       dA, magma_int_t ldda )
-{
-    magma_zgeru(
-        m, n,
-        alpha, dx, incx,
-               dy, incy,
-               dA, ldda,
-        magmablasGetQueue() );
-}
-#endif // COMPLEX
-
-
-/******************************************************************************/
-extern "C" void
-magma_zhemv_v1(
-    magma_uplo_t uplo,
-    magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx,
-    magmaDoubleComplex beta,
-    magmaDoubleComplex_ptr       dy, magma_int_t incy )
-{
-    magma_zhemv(
-        uplo,
-        n,
-        alpha, dA, ldda,
-               dx, incx,
-        beta,  dy, incy,
-        magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magma_zher_v1(
-    magma_uplo_t uplo,
-    magma_int_t n,
-    double alpha,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx,
-    magmaDoubleComplex_ptr       dA, magma_int_t ldda )
-{
-    magma_zher(
-        uplo,
-        n,
-        alpha, dx, incx,
-               dA, ldda,
-        magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magma_zher2_v1(
-    magma_uplo_t uplo,
-    magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx,
-    magmaDoubleComplex_const_ptr dy, magma_int_t incy,
-    magmaDoubleComplex_ptr       dA, magma_int_t ldda )
-{
-    magma_zher2(
-        uplo,
-        n,
-        alpha, dx, incx,
-               dy, incy,
-               dA, ldda,
-        magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magma_ztrmv_v1(
-    magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag,
-    magma_int_t n,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_ptr       dx, magma_int_t incx )
-{
-    magma_ztrmv(
-        uplo, trans, diag,
-        n,
-        dA, ldda,
-        dx, incx,
-        magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magma_ztrsv_v1(
-    magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag,
-    magma_int_t n,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_ptr       dx, magma_int_t incx )
-{
-    magma_ztrsv(
-        uplo, trans, diag,
-        n,
-        dA, ldda,
-        dx, incx,
-        magmablasGetQueue() );
-}
-
-
-// =============================================================================
-// Level 3 BLAS
-
-/******************************************************************************/
-extern "C" void
-magma_zgemm_v1(
-    magma_trans_t transA, magma_trans_t transB,
-    magma_int_t m, magma_int_t n, magma_int_t k,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_const_ptr dB, magma_int_t lddb,
-    magmaDoubleComplex beta,
-    magmaDoubleComplex_ptr       dC, magma_int_t lddc )
-{
-    magma_zgemm(
-        transA, transB,
-        m, n, k,
-        alpha, dA, ldda,
-               dB, lddb,
-        beta,  dC, lddc,
-        magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magma_zsymm_v1(
-    magma_side_t side, magma_uplo_t uplo,
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_const_ptr dB, magma_int_t lddb,
-    magmaDoubleComplex beta,
-    magmaDoubleComplex_ptr       dC, magma_int_t lddc )
-{
-    magma_zsymm(
-        side, uplo,
-        m, n,
-        alpha, dA, ldda,
-               dB, lddb,
-        beta,  dC, lddc,
-        magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magma_zsyrk_v1(
-    magma_uplo_t uplo, magma_trans_t trans,
-    magma_int_t n, magma_int_t k,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex beta,
-    magmaDoubleComplex_ptr       dC, magma_int_t lddc )
-{
-    magma_zsyrk(
-        uplo, trans,
-        n, k,
-        alpha, dA, ldda,
-        beta,  dC, lddc,
-        magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magma_zsyr2k_v1(
-    magma_uplo_t uplo, magma_trans_t trans,
-    magma_int_t n, magma_int_t k,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_const_ptr dB, magma_int_t lddb,
-    magmaDoubleComplex beta,
-    magmaDoubleComplex_ptr       dC, magma_int_t lddc )
-{
-    magma_zsyr2k(
-        uplo, trans,
-        n, k,
-        alpha, dA, ldda,
-               dB, lddb,
-        beta,  dC, lddc,
-        magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-#ifdef COMPLEX
-extern "C" void
-magma_zhemm_v1(
-    magma_side_t side, magma_uplo_t uplo,
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_const_ptr dB, magma_int_t lddb,
-    magmaDoubleComplex beta,
-    magmaDoubleComplex_ptr       dC, magma_int_t lddc )
-{
-    magma_zhemm(
-        side, uplo,
-        m, n,
-        alpha, dA, ldda,
-               dB, lddb,
-        beta,  dC, lddc,
-        magmablasGetQueue() );
-}
-#endif // COMPLEX
-
-
-/******************************************************************************/
-#ifdef COMPLEX
-extern "C" void
-magma_zherk_v1(
-    magma_uplo_t uplo, magma_trans_t trans,
-    magma_int_t n, magma_int_t k,
-    double alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    double beta,
-    magmaDoubleComplex_ptr       dC, magma_int_t lddc )
-{
-    magma_zherk(
-        uplo, trans,
-        n, k,
-        alpha, dA, ldda,
-        beta,  dC, lddc,
-        magmablasGetQueue() );
-}
-#endif // COMPLEX
-
-
-/******************************************************************************/
-#ifdef COMPLEX
-extern "C" void
-magma_zher2k_v1(
-    magma_uplo_t uplo, magma_trans_t trans,
-    magma_int_t n, magma_int_t k,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_const_ptr dB, magma_int_t lddb,
-    double beta,
-    magmaDoubleComplex_ptr       dC, magma_int_t lddc )
-{
-    magma_zher2k(
-        uplo, trans,
-        n, k,
-        alpha, dA, ldda,
-               dB, lddb,
-        beta,  dC, lddc,
-        magmablasGetQueue() );
-}
-#endif // COMPLEX
-
-
-/******************************************************************************/
-extern "C" void
-magma_ztrmm_v1(
-    magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag,
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_ptr       dB, magma_int_t lddb )
-{
-    magma_ztrmm(
-        side, uplo, trans, diag,
-        m, n,
-        alpha, dA, ldda,
-               dB, lddb,
-        magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magma_ztrsm_v1(
-    magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag,
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_ptr       dB, magma_int_t lddb )
-{
-    magma_ztrsm(
-        side, uplo, trans, diag,
-        m, n,
-        alpha, dA, ldda,
-               dB, lddb,
-        magmablasGetQueue() );
-}
-
-#endif // MAGMA_HAVE_CUDA
-
-#undef COMPLEX
-
-#endif // MAGMA_NO_V1
diff --git a/interface_cuda/copy_v1.cpp b/interface_cuda/copy_v1.cpp
deleted file mode 100644
index 5d7d3fd95..000000000
--- a/interface_cuda/copy_v1.cpp
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
-    -- MAGMA (version 2.0) --
-       Univ. of Tennessee, Knoxville
-       Univ. of California, Berkeley
-       Univ. of Colorado, Denver
-       @date
-
-       @author Mark Gates
-*/
-#ifndef MAGMA_NO_V1
-
-#include "magma_internal.h"
-#include "magmablas_v1.h"  // includes v1 prototypes; does NOT map routine names
-#include "error.h"
-
-#include <cuda_runtime.h>
-
-#if defined(MAGMA_HAVE_CUDA) || defined(MAGMA_HAVE_HIP)
-
-// These MAGMA v1 routines are all deprecated.
-// See copy_v2.cpp for documentation.
-
-// Generic, type-independent routines to copy data.
-// Type-safe versions which avoid the user needing sizeof(...) are in headers;
-// see magma_{s,d,c,z,i,index_}{set,get,copy}{matrix,vector}
-
-/******************************************************************************/
-extern "C" void
-magma_setvector_v1_internal(
-    magma_int_t n, magma_int_t elemSize,
-    void const* hx_src, magma_int_t incx,
-    magma_ptr   dy_dst, magma_int_t incy,
-    const char* func, const char* file, int line )
-{
-    magma_setvector_internal(
-        n, elemSize,
-        hx_src, incx,
-        dy_dst, incy,
-        magmablasGetQueue(),
-        func, file, line );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magma_getvector_v1_internal(
-    magma_int_t n, magma_int_t elemSize,
-    magma_const_ptr dx_src, magma_int_t incx,
-    void*           hy_dst, magma_int_t incy,
-    const char* func, const char* file, int line )
-{
-    magma_getvector_internal(
-        n, elemSize,
-        dx_src, incx,
-        hy_dst, incy,
-        magmablasGetQueue(),
-        func, file, line );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magma_copyvector_v1_internal(
-    magma_int_t n, magma_int_t elemSize,
-    magma_const_ptr dx_src, magma_int_t incx,
-    magma_ptr       dy_dst, magma_int_t incy,
-    const char* func, const char* file, int line )
-{
-    magma_copyvector_internal(
-        n, elemSize,
-        dx_src, incx,
-        dy_dst, incy,
-        magmablasGetQueue(),
-        func, file, line );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magma_setmatrix_v1_internal(
-    magma_int_t m, magma_int_t n, magma_int_t elemSize,
-    void const* hA_src, magma_int_t lda,
-    magma_ptr   dB_dst, magma_int_t lddb,
-    const char* func, const char* file, int line )
-{
-    cublasStatus_t status;
-    status = cublasSetMatrix(
-        int(m), int(n), int(elemSize),
-        hA_src, int(lda),
-        dB_dst, int(lddb) );
-    check_xerror( status, func, file, line );
-    MAGMA_UNUSED( status );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magma_getmatrix_v1_internal(
-    magma_int_t m, magma_int_t n, magma_int_t elemSize,
-    magma_const_ptr dA_src, magma_int_t ldda,
-    void*           hB_dst, magma_int_t ldb,
-    const char* func, const char* file, int line )
-{
-    cublasStatus_t status;
-    status = cublasGetMatrix(
-        int(m), int(n), int(elemSize),
-        dA_src, int(ldda),
-        hB_dst, int(ldb) );
-    check_xerror( status, func, file, line );
-    MAGMA_UNUSED( status );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magma_copymatrix_v1_internal(
-    magma_int_t m, magma_int_t n, magma_int_t elemSize,
-    magma_const_ptr dA_src, magma_int_t ldda,
-    magma_ptr       dB_dst, magma_int_t lddb,
-    const char* func, const char* file, int line )
-{
-    cudaError_t status;
-    status = cudaMemcpy2D(
-        dB_dst, int(lddb*elemSize),
-        dA_src, int(ldda*elemSize),
-        int(m*elemSize), int(n), cudaMemcpyDeviceToDevice );
-    check_xerror( status, func, file, line );
-    MAGMA_UNUSED( status );
-}
-
-#endif // MAGMA_HAVE_CUDA
-
-#endif // MAGMA_NO_V1
diff --git a/interface_cuda/interface.cpp b/interface_cuda/interface.cpp
index 50b4fa3d3..0c317ebb5 100644
--- a/interface_cuda/interface.cpp
+++ b/interface_cuda/interface.cpp
@@ -123,17 +123,6 @@ enum {
 // count of (init - finalize) calls
 static int g_init = 0;
 
-#ifndef MAGMA_NO_V1
-    magma_queue_t* g_null_queues = NULL;
-
-    #ifdef HAVE_PTHREAD_KEY
-    pthread_key_t g_magma_queue_key;
-    #else
-    magma_queue_t g_magma_queue = NULL;
-    #endif
-#endif // MAGMA_NO_V1
-
-
 // -----------------------------------------------------------------------------
 // subset of the CUDA device properties, set by magma_init()
 struct magma_device_info
@@ -232,29 +221,6 @@ magma_init()
                     #endif
                 }
             }
-
-            #ifndef MAGMA_NO_V1
-                #ifdef HAVE_PTHREAD_KEY
-                    // create thread-specific key
-                    // currently, this is needed only for MAGMA v1 compatability
-                    // see magma_init, magmablas(Set|Get)KernelStream, magmaGetQueue
-                    info = pthread_key_create( &g_magma_queue_key, NULL );
-                    if ( info != 0 ) {
-                        info = MAGMA_ERR_UNKNOWN;
-                        goto cleanup;
-                    }
-                #endif
-
-                // ----- queues with NULL streams (for backwards compatability with MAGMA 1.x)
-                // allocate array of queues with NULL stream
-                size = max( 1, g_magma_devices_cnt ) * sizeof(magma_queue_t);
-                magma_malloc_cpu( (void**) &g_null_queues, size );
-                if ( g_null_queues == NULL ) {
-                    info = MAGMA_ERR_HOST_ALLOC;
-                    goto cleanup;
-                }
-                memset( g_null_queues, 0, size );
-            #endif // MAGMA_NO_V1
         }
 cleanup:
         g_init += 1;  // increment (init - finalize) count
@@ -291,21 +257,6 @@ magma_finalize()
                     g_magma_devices = NULL;
                 }
 
-                #ifndef MAGMA_NO_V1
-                if ( g_null_queues != NULL ) {
-                    for( int dev=0; dev < g_magma_devices_cnt; ++dev ) {
-                        magma_queue_destroy( g_null_queues[dev] );
-                        g_null_queues[dev] = NULL;
-                    }
-                    magma_free_cpu( g_null_queues );
-                    g_null_queues = NULL;
-                }
-
-                #ifdef HAVE_PTHREAD_KEY
-                    pthread_key_delete( g_magma_queue_key );
-                #endif
-                #endif // MAGMA_NO_V1
-
                 #ifdef DEBUG_MEMORY
                 magma_warn_leaks( g_pointers_dev, "device" );
                 magma_warn_leaks( g_pointers_cpu, "CPU" );
diff --git a/interface_cuda/interface_v1.cpp b/interface_cuda/interface_v1.cpp
deleted file mode 100644
index d6eb4e095..000000000
--- a/interface_cuda/interface_v1.cpp
+++ /dev/null
@@ -1,189 +0,0 @@
-/*
-    -- MAGMA (version 2.0) --
-       Univ. of Tennessee, Knoxville
-       Univ. of California, Berkeley
-       Univ. of Colorado, Denver
-       @date
-
-       @author Mark Gates
-*/
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <time.h>
-
-// these are included already in magma_internal.h & other headers
-#include <cuda_runtime.h>
-//#include <cublas_v2.h>
-
-#include "magma_internal.h"
-#include "error.h"
-
-
-#if defined(MAGMA_HAVE_CUDA) || defined(MAGMA_HAVE_HIP)
-#ifndef MAGMA_NO_V1
-
-// -----------------------------------------------------------------------------
-// globals
-// see interface.cpp for definitions
-
-#ifndef MAGMA_NO_V1
-    extern magma_queue_t* g_null_queues;
-
-    #ifdef HAVE_PTHREAD_KEY
-    extern pthread_key_t g_magma_queue_key;
-    #else
-    extern magma_queue_t g_magma_queue;
-    #endif
-#endif // MAGMA_NO_V1
-
-
-// -----------------------------------------------------------------------------
-extern int g_magma_devices_cnt;
-
-
-// =============================================================================
-// device support
-
-/***************************************************************************//**
-    @deprecated
-    Synchronize the current device.
-    This functionality does not exist in OpenCL, so it is deprecated for CUDA, too.
-
-    @ingroup magma_device
-*******************************************************************************/
-extern "C" void
-magma_device_sync()
-{
-    cudaError_t err;
-    err = cudaDeviceSynchronize();
-    check_error( err );
-    MAGMA_UNUSED( err );
-}
-
-
-// =============================================================================
-// queue support
-
-/***************************************************************************//**
-    @deprecated
-
-    Sets the current global MAGMA v1 queue for kernels to execute in.
-    In MAGMA v2, all kernels take queue as an argument, so this is deprecated.
-    If compiled with MAGMA_NO_V1, this is not defined.
-
-    @param[in]
-    queue       Queue to set as current global MAGMA v1 queue.
-
-    @return MAGMA_SUCCESS if successful
-
-    @ingroup magma_queue
-*******************************************************************************/
-extern "C" magma_int_t
-magmablasSetKernelStream( magma_queue_t queue )
-{
-    magma_int_t info = 0;
-    #ifdef HAVE_PTHREAD_KEY
-    info = pthread_setspecific( g_magma_queue_key, queue );
-    #else
-    g_magma_queue = queue;
-    #endif
-    return info;
-}
-
-
-/***************************************************************************//**
-    @deprecated
-
-    Gets the current global MAGMA v1 queue for kernels to execute in.
-    In MAGMA v2, all kernels take queue as an argument, so this is deprecated.
-    If compiled with MAGMA_NO_V1, this is not defined.
-
-    @param[out]
-    queue_ptr    On output, set to the current global MAGMA v1 queue.
-
-    @return MAGMA_SUCCESS if successful
-
-    @ingroup magma_queue
-*******************************************************************************/
-extern "C" magma_int_t
-magmablasGetKernelStream( magma_queue_t *queue_ptr )
-{
-    #ifdef HAVE_PTHREAD_KEY
-    *queue_ptr = (magma_queue_t) pthread_getspecific( g_magma_queue_key );
-    #else
-    *queue_ptr = g_magma_queue;
-    #endif
-    return 0;
-}
-
-
-/***************************************************************************//**
-    @deprecated
-
-    Gets the current global MAGMA v1 queue for kernels to execute in.
-    Unlike magmablasGetKernelStream(), if the current queue is NULL,
-    this will return a special MAGMA queue that has a NULL CUDA stream.
-    This allows MAGMA v1 wrappers to call v2 kernels with a non-NULL queue.
-
-    In MAGMA v2, all kernels take queue as an argument, so this is deprecated.
-    If compiled with MAGMA_NO_V1, this is not defined.
-
-    @return Current global MAGMA v1 queue.
-
-    @ingroup magma_queue
-*******************************************************************************/
-extern "C"
-magma_queue_t magmablasGetQueue()
-{
-    magma_queue_t queue;
-    #ifdef HAVE_PTHREAD_KEY
-    queue = (magma_queue_t) pthread_getspecific( g_magma_queue_key );
-    #else
-    queue = g_magma_queue;
-    #endif
-    if ( queue == NULL ) {
-        magma_device_t dev;
-        magma_getdevice( &dev );
-        if ( dev >= g_magma_devices_cnt || g_null_queues == NULL ) {
-            fprintf( stderr, "Error: %s requires magma_init() to be called first for MAGMA v1 compatability.\n",
-                     __func__ );
-            return NULL;
-        }
-        // create queue w/ NULL stream first time that NULL queue is used
-        if ( g_null_queues[dev] == NULL ) {
-            #ifdef MAGMA_HAVE_CUDA
-            magma_queue_create_from_cuda( dev, NULL, NULL, NULL, &g_null_queues[dev] );
-            #elif defined(MAGMA_HAVE_HIP)
-            magma_queue_create_from_hip( dev, NULL, NULL, NULL, &g_null_queues[dev] );
-            #endif
-            //printf( "dev %lld create queue %p\n", (long long) dev, (void*) g_null_queues[dev] );
-            assert( g_null_queues[dev] != NULL );
-        }
-        queue = g_null_queues[dev];
-    }
-    assert( queue != NULL );
-    return queue;
-}
-
-
-/***************************************************************************//**
-    @deprecated
-    MAGMA v1 version that doesn't take device ID.
-*******************************************************************************/
-extern "C" void
-magma_queue_create_v1_internal(
-    magma_queue_t* queue_ptr,
-    const char* func, const char* file, int line )
-{
-    int device;
-    cudaError_t err;
-    err = cudaGetDevice( &device );
-    check_xerror( err, func, file, line );
-    MAGMA_UNUSED( err );
-
-    magma_queue_create_internal( device, queue_ptr, func, file, line );
-}
-
-#endif // not MAGMA_NO_V1
-#endif // MAGMA_HAVE_CUDA
diff --git a/magmablas/Makefile.src b/magmablas/Makefile.src
index c72e8789f..ef6952115 100644
--- a/magmablas/Makefile.src
+++ b/magmablas/Makefile.src
@@ -102,8 +102,6 @@ libmagma_src += \
 	$(cdir)/ztrtri_lower_batched.cu		\
 	$(cdir)/ztrtri_upper.cu		\
 	$(cdir)/ztrtri_upper_batched.cu		\
-	$(cdir)/magmablas_z_v1.cpp	\
-	$(cdir)/magmablas_zc_v1.cpp	\
 
 # multi-GPU
 libmagma_src += \
diff --git a/magmablas/magmablas_z_v1.cpp b/magmablas/magmablas_z_v1.cpp
deleted file mode 100644
index 59fb13b07..000000000
--- a/magmablas/magmablas_z_v1.cpp
+++ /dev/null
@@ -1,825 +0,0 @@
-/*
-    -- MAGMA (version 2.0) --
-       Univ. of Tennessee, Knoxville
-       Univ. of California, Berkeley
-       Univ. of Colorado, Denver
-       @date
-
-       @precisions normal z -> c d s
-
-       @author Mark Gates
-
-       Implements all the wrappers for v1 backwards compatability.
-       Separating the wrappers allows the new functions to use magma_internal.h
-*/
-#ifndef MAGMA_NO_V1
-
-#include "magma_internal.h"
-#include "magmablas_v1.h"  // includes v1 prototypes; does NOT map routine names
-
-#define COMPLEX
-
-// These MAGMA v1 routines are all deprecated.
-// See corresponding v2 functions for documentation.
-
-/******************************************************************************/
-extern "C" void
-magmablas_zaxpycp_v1(
-    magma_int_t m,
-    magmaDoubleComplex_ptr r,
-    magmaDoubleComplex_ptr x,
-    magmaDoubleComplex_const_ptr b)
-{
-    magmablas_zaxpycp( m, r, x, b, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_zgeadd_v1(
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_ptr       dB, magma_int_t lddb )
-{
-    magmablas_zgeadd( m, n, alpha, dA, ldda, dB, lddb, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_zgeadd2_v1(
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex beta,
-    magmaDoubleComplex_ptr       dB, magma_int_t lddb )
-{
-    magmablas_zgeadd2( m, n, alpha, dA, ldda, beta, dB, lddb, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_zgemm_v1(
-    magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_const_ptr dB, magma_int_t lddb,
-    magmaDoubleComplex beta,
-    magmaDoubleComplex_ptr       dC, magma_int_t lddc )
-{
-    magmablas_zgemm( transA, transB, m, n, k,
-                       alpha, dA, ldda,
-                              dB, lddb,
-                       beta,  dC, lddc, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_zgemv_v1(
-    magma_trans_t trans, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx,
-    magmaDoubleComplex beta,
-    magmaDoubleComplex_ptr dy, magma_int_t incy)
-{
-    magmablas_zgemv( trans, m, n, alpha, dA, ldda, dx, incx, beta, dy, incy, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_zgemv_conj_v1(
-    magma_int_t m, magma_int_t n, magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx,
-    magmaDoubleComplex beta,
-    magmaDoubleComplex_ptr dy, magma_int_t incy)
-{
-    magmablas_zgemv_conj(
-        m, n, alpha, dA, ldda, dx, incx, beta, dy, incy, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_zgemm_reduce_v1(
-    magma_int_t m, magma_int_t n, magma_int_t k,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_const_ptr dB, magma_int_t lddb,
-    magmaDoubleComplex beta,
-    magmaDoubleComplex_ptr       dC, magma_int_t lddc )
-{
-    magmablas_zgemm_reduce(
-        m, n, k, alpha, dA, ldda, dB, lddb, beta, dC, lddc, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_zgetmatrix_transpose_v1(
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex_const_ptr dAT, magma_int_t ldda,
-    magmaDoubleComplex          *hA,  magma_int_t lda,
-    magmaDoubleComplex_ptr       dwork,  magma_int_t lddwork, magma_int_t nb )
-{
-    magma_queue_t queues[2];
-    magma_queue_create_v1( &queues[0] );
-    magma_queue_create_v1( &queues[1] );
-
-    magmablas_zgetmatrix_transpose( m, n, nb, dAT, ldda, hA, lda, dwork, lddwork, queues );
-
-    magma_queue_destroy( queues[0] );
-    magma_queue_destroy( queues[1] );
-}
-
-
-/******************************************************************************/
-extern "C" magma_int_t
-magmablas_zhemv_v1(
-    magma_uplo_t uplo, magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx,
-    magmaDoubleComplex beta,
-    magmaDoubleComplex_ptr       dy, magma_int_t incy )
-{
-    return magmablas_zhemv( uplo, n, alpha, dA, ldda, dx, incx, beta, dy, incy, magmablasGetQueue() );
-}
-
-
-#ifdef COMPLEX
-/******************************************************************************/
-extern "C" magma_int_t
-magmablas_zsymv_v1(
-    magma_uplo_t uplo, magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_const_ptr dx, magma_int_t incx,
-    magmaDoubleComplex beta,
-    magmaDoubleComplex_ptr       dy, magma_int_t incy )
-{
-    return magmablas_zsymv( uplo, n, alpha, dA, ldda, dx, incx, beta, dy, incy, magmablasGetQueue() );
-}
-#endif
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_zprbt_v1(
-    magma_int_t n,
-    magmaDoubleComplex *dA, magma_int_t ldda,
-    magmaDoubleComplex *du, magmaDoubleComplex *dv)
-{
-    magmablas_zprbt(n, dA, ldda, du, dv, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_zprbt_mv_v1(
-    magma_int_t n, magma_int_t nrhs,
-    magmaDoubleComplex *dv, magmaDoubleComplex *db, magma_int_t lddb)
-{
-    magmablas_zprbt_mv(n, nrhs, dv, db, lddb, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_zprbt_mtv_v1(
-    magma_int_t n, magma_int_t nrhs,
-    magmaDoubleComplex *du, magmaDoubleComplex *db, magma_int_t lddb)
-{
-    magmablas_zprbt_mtv(n, nrhs, du, db, lddb, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_zlacpy_v1(
-    magma_uplo_t uplo, magma_int_t m, magma_int_t n,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_ptr       dB, magma_int_t lddb )
-{
-    magmablas_zlacpy( uplo, m, n, dA, ldda, dB, lddb, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_zlacpy_conj_v1(
-    magma_int_t n,
-    magmaDoubleComplex_ptr dA1, magma_int_t lda1,
-    magmaDoubleComplex_ptr dA2, magma_int_t lda2)
-{
-    magmablas_zlacpy_conj( n, dA1, lda1, dA2, lda2, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_zlacpy_sym_in_v1(
-    magma_uplo_t uplo, magma_int_t m, magma_int_t n,
-    magma_int_t *rows, magma_int_t *perm,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_ptr       dB, magma_int_t lddb )
-{
-    magmablas_zlacpy_sym_in( uplo, m, n, rows, perm, dA, ldda, dB, lddb, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_zlacpy_sym_out_v1(
-    magma_uplo_t uplo, magma_int_t m, magma_int_t n,
-    magma_int_t *rows, magma_int_t *perm,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_ptr       dB, magma_int_t lddb )
-{
-    magmablas_zlacpy_sym_out( uplo, m, n, rows, perm, dA, ldda, dB, lddb, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" double
-magmablas_zlange_v1(
-    magma_norm_t norm, magma_int_t m, magma_int_t n,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDouble_ptr dwork, magma_int_t lwork )
-{
-    return magmablas_zlange( norm, m, n, dA, ldda, dwork, lwork, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" double
-magmablas_zlanhe_v1(
-    magma_norm_t norm, magma_uplo_t uplo, magma_int_t n,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDouble_ptr dwork, magma_int_t lwork )
-{
-    return magmablas_zlanhe( norm, uplo, n, dA, ldda, dwork, lwork, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magma_zlarfx_gpu_v1(
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex_ptr v,
-    magmaDoubleComplex_ptr tau,
-    magmaDoubleComplex_ptr C, magma_int_t ldc,
-    magmaDouble_ptr        xnorm,
-    magmaDoubleComplex_ptr dT, magma_int_t iter,
-    magmaDoubleComplex_ptr work )
-{
-    magma_zlarfx_gpu(m, n, v, tau, C, ldc, xnorm, dT, iter, work,
-                       magmablasGetQueue());
-}
-
-
-/******************************************************************************/
-extern "C" void
-magma_zlarfbx_gpu_v1(
-    magma_int_t m, magma_int_t k,
-    magmaDoubleComplex_ptr V,  magma_int_t ldv,
-    magmaDoubleComplex_ptr dT, magma_int_t ldt,
-    magmaDoubleComplex_ptr c,
-    magmaDoubleComplex_ptr dwork)
-{
-    magma_zlarfbx_gpu( m, k, V, ldv, dT, ldt, c, dwork, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_zlarfg_v1(
-    magma_int_t n,
-    magmaDoubleComplex_ptr dalpha,
-    magmaDoubleComplex_ptr dx, magma_int_t incx,
-    magmaDoubleComplex_ptr dtau )
-{
-    magmablas_zlarfg( n, dalpha, dx, incx, dtau, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magma_zlarfg_gpu_v1(
-    magma_int_t n,
-    magmaDoubleComplex_ptr dx0,
-    magmaDoubleComplex_ptr dx,
-    magmaDoubleComplex_ptr dtau,
-    magmaDouble_ptr        dxnorm,
-    magmaDoubleComplex_ptr dAkk )
-{
-    magma_zlarfg_gpu( n, dx0, dx, dtau, dxnorm, dAkk, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magma_zlarfgx_gpu_v1(
-    magma_int_t n,
-    magmaDoubleComplex_ptr dx0,
-    magmaDoubleComplex_ptr dx,
-    magmaDoubleComplex_ptr dtau,
-    magmaDouble_ptr        dxnorm,
-    magmaDoubleComplex_ptr dA, magma_int_t iter)
-{
-    magma_zlarfgx_gpu( n, dx0, dx, dtau, dxnorm, dA, iter, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magma_zlarfgtx_gpu_v1(
-    magma_int_t n,
-    magmaDoubleComplex_ptr dx0,
-    magmaDoubleComplex_ptr dx,
-    magmaDoubleComplex_ptr dtau,
-    magmaDouble_ptr        dxnorm,
-    magmaDoubleComplex_ptr dA, magma_int_t iter,
-    magmaDoubleComplex_ptr V,  magma_int_t ldv,
-    magmaDoubleComplex_ptr T,  magma_int_t ldt,
-    magmaDoubleComplex_ptr dwork )
-{
-    magma_zlarfgtx_gpu(n, dx0, dx, dtau, dxnorm, dA, iter, V, ldv,
-                         T, ldt, dwork, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_zlascl_v1(
-    magma_type_t type, magma_int_t kl, magma_int_t ku,
-    double cfrom, double cto,
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex_ptr dA, magma_int_t ldda,
-    magma_int_t *info )
-{
-    magmablas_zlascl( type, kl, ku, cfrom, cto, m, n, dA, ldda, magmablasGetQueue(), info );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_zlascl2_v1(
-    magma_type_t type, magma_int_t m, magma_int_t n,
-    magmaDouble_const_ptr dD,
-    magmaDoubleComplex_ptr dA, magma_int_t ldda, magma_int_t *info )
-{
-    magmablas_zlascl2( type, m, n, dD, dA, ldda, magmablasGetQueue(), info );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_zlascl_2x2_v1(
-    magma_type_t type, magma_int_t m,
-    magmaDoubleComplex_const_ptr dW, magma_int_t lddw,
-    magmaDoubleComplex_ptr       dA, magma_int_t ldda,
-    magma_int_t *info )
-{
-    magmablas_zlascl_2x2( type, m, dW, lddw, dA, ldda, magmablasGetQueue(), info );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_zlascl_diag_v1(
-    magma_type_t type, magma_int_t m, magma_int_t n,
-    magmaDoubleComplex_const_ptr dD, magma_int_t lddd,
-    magmaDoubleComplex_ptr       dA, magma_int_t ldda,
-    magma_int_t *info )
-{
-    magmablas_zlascl_diag( type, m, n, dD, lddd, dA, ldda, magmablasGetQueue(), info );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_zlaset_v1(
-    magma_uplo_t uplo, magma_int_t m, magma_int_t n,
-    magmaDoubleComplex offdiag, magmaDoubleComplex diag,
-    magmaDoubleComplex_ptr dA, magma_int_t ldda )
-{
-    magmablas_zlaset( uplo, m, n, offdiag, diag, dA, ldda, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_zlaset_band_v1(
-    magma_uplo_t uplo, magma_int_t m, magma_int_t n, magma_int_t k,
-    magmaDoubleComplex offdiag, magmaDoubleComplex diag,
-    magmaDoubleComplex_ptr dA, magma_int_t ldda)
-{
-    magmablas_zlaset_band(uplo, m, n, k, offdiag, diag, dA, ldda, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_zlaswp_v1(
-    magma_int_t n,
-    magmaDoubleComplex_ptr dAT, magma_int_t ldda,
-    magma_int_t k1, magma_int_t k2,
-    const magma_int_t *ipiv, magma_int_t inci )
-{
-    magmablas_zlaswp( n, dAT, ldda, k1, k2, ipiv, inci, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_zlaswpx_v1(
-    magma_int_t n,
-    magmaDoubleComplex_ptr dA, magma_int_t ldx, magma_int_t ldy,
-    magma_int_t k1, magma_int_t k2,
-    const magma_int_t *ipiv, magma_int_t inci )
-{
-    return magmablas_zlaswpx( n, dA, ldx, ldy, k1, k2, ipiv, inci, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_zlaswp2_v1(
-    magma_int_t n,
-    magmaDoubleComplex_ptr dAT, magma_int_t ldda,
-    magma_int_t k1, magma_int_t k2,
-    magmaInt_const_ptr d_ipiv, magma_int_t inci )
-{
-    magmablas_zlaswp2( n, dAT, ldda, k1, k2, d_ipiv, inci, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_zlaswp_sym_v1( magma_int_t n, magmaDoubleComplex *dA, magma_int_t lda,
-                      magma_int_t k1, magma_int_t k2,
-                      const magma_int_t *ipiv, magma_int_t inci )
-{
-    return magmablas_zlaswp_sym( n, dA, lda, k1, k2, ipiv, inci, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_dznrm2_check_v1(
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex_ptr dA, magma_int_t ldda,
-    magmaDouble_ptr dxnorm,
-    magmaDouble_ptr dlsticc )
-{
-    magmablas_dznrm2_check( m, n, dA, ldda, dxnorm, dlsticc, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_dznrm2_adjust_v1(
-    magma_int_t k,
-    magmaDouble_ptr dxnorm,
-    magmaDoubleComplex_ptr dc )
-{
-    magmablas_dznrm2_adjust( k, dxnorm, dc, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_dznrm2_row_check_adjust_v1(
-    magma_int_t k, double tol,
-    magmaDouble_ptr dxnorm,
-    magmaDouble_ptr dxnorm2,
-    magmaDoubleComplex_ptr dC, magma_int_t lddc,
-    magmaDouble_ptr dlsticc )
-{
-    magmablas_dznrm2_row_check_adjust( k, tol, dxnorm, dxnorm2, dC, lddc, dlsticc, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_dznrm2_cols_v1(
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex_ptr dA, magma_int_t ldda,
-    magmaDouble_ptr dxnorm )
-{
-    magmablas_dznrm2_cols( m, n, dA, ldda, dxnorm, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_zsetmatrix_transpose_v1(
-    magma_int_t m, magma_int_t n,
-    const magmaDoubleComplex     *hA, magma_int_t lda,
-    magmaDoubleComplex_ptr       dAT, magma_int_t ldda,
-    magmaDoubleComplex_ptr     dwork, magma_int_t lddwork, magma_int_t nb )
-{
-    magma_queue_t queues[2];
-    magma_queue_create_v1( &queues[0] );
-    magma_queue_create_v1( &queues[1] );
-
-    magmablas_zsetmatrix_transpose( m, n, nb, hA, lda, dAT, ldda, dwork, lddwork, queues );
-
-    magma_queue_destroy( queues[0] );
-    magma_queue_destroy( queues[1] );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_zswap_v1(
-    magma_int_t n,
-    magmaDoubleComplex_ptr dx, magma_int_t incx,
-    magmaDoubleComplex_ptr dy, magma_int_t incy)
-{
-    magmablas_zswap( n, dx, incx, dy, incy, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_zswapblk_v1(
-    magma_order_t order, magma_int_t n,
-    magmaDoubleComplex_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_ptr dB, magma_int_t lddb,
-    magma_int_t i1, magma_int_t i2,
-    const magma_int_t *ipiv, magma_int_t inci, magma_int_t offset )
-{
-    magmablas_zswapblk(
-        order, n, dA, ldda, dB, lddb, i1, i2, ipiv, inci, offset, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_zswapdblk_v1(
-    magma_int_t n, magma_int_t nb,
-    magmaDoubleComplex_ptr dA, magma_int_t ldda, magma_int_t inca,
-    magmaDoubleComplex_ptr dB, magma_int_t lddb, magma_int_t incb )
-{
-    magmablas_zswapdblk( n, nb, dA, ldda, inca, dB, lddb, incb, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_zsymmetrize_v1(
-    magma_uplo_t uplo, magma_int_t m,
-    magmaDoubleComplex_ptr dA, magma_int_t ldda )
-{
-    magmablas_zsymmetrize( uplo, m, dA, ldda, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_zsymmetrize_tiles_v1(
-    magma_uplo_t uplo, magma_int_t m,
-    magmaDoubleComplex_ptr dA, magma_int_t ldda,
-    magma_int_t ntile, magma_int_t mstride, magma_int_t nstride )
-{
-    magmablas_zsymmetrize_tiles( uplo, m, dA, ldda, ntile, mstride, nstride, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_ztranspose_v1(
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex_const_ptr dA,  magma_int_t ldda,
-    magmaDoubleComplex_ptr       dAT, magma_int_t lddat )
-{
-    magmablas_ztranspose( m, n, dA, ldda, dAT, lddat, magmablasGetQueue() );
-}
-
-
-#ifdef COMPLEX
-/******************************************************************************/
-extern "C" void
-magmablas_ztranspose_conj_v1(
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex_const_ptr dA,  magma_int_t ldda,
-    magmaDoubleComplex_ptr       dAT, magma_int_t lddat )
-{
-    magmablas_ztranspose_conj( m, n, dA, ldda, dAT, lddat, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_ztranspose_conj_inplace_v1(
-    magma_int_t n,
-    magmaDoubleComplex_ptr dA, magma_int_t ldda )
-{
-    magmablas_ztranspose_conj_inplace( n, dA, ldda, magmablasGetQueue() );
-}
-#endif
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_ztranspose_inplace_v1(
-    magma_int_t n,
-    magmaDoubleComplex_ptr dA, magma_int_t ldda )
-{
-    magmablas_ztranspose_inplace( n, dA, ldda, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_ztrsm_v1(
-    magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag,
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_ptr       dB, magma_int_t lddb )
-{
-    magmablas_ztrsm( side, uplo, transA, diag, m, n, alpha, dA, ldda, dB, lddb,
-                       magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_ztrsm_outofplace_v1(
-    magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag,
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_ptr       dB, magma_int_t lddb,
-    magmaDoubleComplex_ptr       dX, magma_int_t lddx,
-    magma_int_t flag,
-    magmaDoubleComplex_ptr d_dinvA, magma_int_t dinvA_length )
-{
-    magmablas_ztrsm_outofplace( side, uplo, transA, diag, m, n, alpha,
-                                  dA, ldda, dB, lddb, dX, lddx, flag,
-                                  d_dinvA, dinvA_length, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_ztrsm_work_v1(
-    magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag,
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex alpha,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_ptr       dB, magma_int_t lddb,
-    magmaDoubleComplex_ptr       dX, magma_int_t lddx,
-    magma_int_t flag,
-    magmaDoubleComplex_ptr d_dinvA, magma_int_t dinvA_length )
-{
-    magmablas_ztrsm_work( side, uplo, transA, diag, m, n, alpha,
-                            dA, ldda, dB, lddb, dX, lddx, flag,
-                            d_dinvA, dinvA_length, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_ztrtri_diag_v1(
-    magma_uplo_t uplo, magma_diag_t diag, magma_int_t n,
-    magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
-    magmaDoubleComplex_ptr d_dinvA)
-{
-    magmablas_ztrtri_diag( uplo, diag, n, dA, ldda, d_dinvA, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magma_zgetmatrix_1D_row_bcyclic_v1(
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex_const_ptr const *dA, magma_int_t ldda,
-    magmaDoubleComplex                 *hA, magma_int_t lda,
-    magma_int_t ngpu, magma_int_t nb )
-{
-    magma_queue_t queues[MagmaMaxGPUs];
-    for( int dev=0; dev < ngpu; dev++ ) {
-        magma_setdevice( dev );
-        magma_queue_create( dev, &queues[dev] );
-    }
-    magma_zgetmatrix_1D_row_bcyclic( ngpu, m, n, nb, dA, ldda, hA, lda, queues );
-    for( int dev=0; dev < ngpu; dev++ ) {
-        magma_setdevice( dev );
-        magma_queue_sync( queues[dev] );
-        magma_queue_destroy( queues[dev] );
-    }
-}
-
-
-/******************************************************************************/
-extern "C" void
-magma_zgetmatrix_1D_col_bcyclic_v1(
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex_const_ptr const *dA, magma_int_t ldda,
-    magmaDoubleComplex                 *hA, magma_int_t lda,
-    magma_int_t ngpu, magma_int_t nb )
-{
-    magma_queue_t queues[MagmaMaxGPUs];
-    for( int dev=0; dev < ngpu; dev++ ) {
-        magma_setdevice( dev );
-        magma_queue_create( dev, &queues[dev] );
-    }
-    magma_zgetmatrix_1D_col_bcyclic( ngpu, m, n, nb, dA, ldda, hA, lda, queues );
-    for( int dev=0; dev < ngpu; dev++ ) {
-        magma_setdevice( dev );
-        magma_queue_sync( queues[dev] );
-        magma_queue_destroy( queues[dev] );
-    }
-}
-
-
-/******************************************************************************/
-extern "C" void
-magma_zsetmatrix_1D_row_bcyclic_v1(
-    magma_int_t m, magma_int_t n,
-    const magmaDoubleComplex    *hA, magma_int_t lda,
-    magmaDoubleComplex_ptr      *dA, magma_int_t ldda,
-    magma_int_t ngpu, magma_int_t nb )
-{
-    magma_queue_t queues[MagmaMaxGPUs];
-    for( int dev=0; dev < ngpu; dev++ ) {
-        magma_setdevice( dev );
-        magma_queue_create( dev, &queues[dev] );
-    }
-    magma_zsetmatrix_1D_row_bcyclic( ngpu, m, n, nb, hA, lda, dA, ldda, queues );
-    for( int dev=0; dev < ngpu; dev++ ) {
-        magma_setdevice( dev );
-        magma_queue_sync( queues[dev] );
-        magma_queue_destroy( queues[dev] );
-    }
-}
-
-
-/******************************************************************************/
-extern "C" void
-magma_zsetmatrix_1D_col_bcyclic_v1(
-    magma_int_t m, magma_int_t n,
-    const magmaDoubleComplex *hA, magma_int_t lda,
-    magmaDoubleComplex_ptr   *dA, magma_int_t ldda,
-    magma_int_t ngpu, magma_int_t nb )
-{
-    magma_queue_t queues[MagmaMaxGPUs];
-    for( int dev=0; dev < ngpu; dev++ ) {
-        magma_setdevice( dev );
-        magma_queue_create( dev, &queues[dev] );
-    }
-    magma_zsetmatrix_1D_col_bcyclic( ngpu, m, n, nb, hA, lda, dA, ldda, queues );
-    for( int dev=0; dev < ngpu; dev++ ) {
-        magma_setdevice( dev );
-        magma_queue_sync( queues[dev] );
-        magma_queue_destroy( queues[dev] );
-    }
-}
-
-
-// in src/zlarfb_gpu.cpp
-/******************************************************************************/
-extern "C" magma_int_t
-magma_zlarfb_gpu_v1(
-    magma_side_t side, magma_trans_t trans, magma_direct_t direct, magma_storev_t storev,
-    magma_int_t m, magma_int_t n, magma_int_t k,
-    magmaDoubleComplex_const_ptr dV,    magma_int_t lddv,
-    magmaDoubleComplex_const_ptr dT,    magma_int_t lddt,
-    magmaDoubleComplex_ptr dC,          magma_int_t lddc,
-    magmaDoubleComplex_ptr dwork,       magma_int_t ldwork )
-{
-    return magma_zlarfb_gpu( side, trans, direct, storev,
-                               m, n, k,
-                               dV, lddv, dT, lddt, dC, lddc, dwork, ldwork,
-                               magmablasGetQueue() );
-}
-
-
-// in src/zlarfb_gpu_gemm.cpp
-/******************************************************************************/
-extern "C" magma_int_t
-magma_zlarfb_gpu_gemm_v1(
-    magma_side_t side, magma_trans_t trans, magma_direct_t direct, magma_storev_t storev,
-    magma_int_t m, magma_int_t n, magma_int_t k,
-    magmaDoubleComplex_const_ptr dV,    magma_int_t lddv,
-    magmaDoubleComplex_const_ptr dT,    magma_int_t lddt,
-    magmaDoubleComplex_ptr dC,          magma_int_t lddc,
-    magmaDoubleComplex_ptr dwork,       magma_int_t ldwork,
-    magmaDoubleComplex_ptr dworkvt,     magma_int_t ldworkvt )
-{
-    return magma_zlarfb_gpu_gemm( side, trans, direct, storev,
-                                    m, n, k,
-                                    dV, lddv, dT, lddt, dC, lddc,
-                                    dwork, ldwork, dworkvt, ldworkvt,
-                                    magmablasGetQueue() );
-}
-
-#endif // MAGMA_NO_V1
diff --git a/magmablas/magmablas_zc_v1.cpp b/magmablas/magmablas_zc_v1.cpp
deleted file mode 100644
index 896a6c54c..000000000
--- a/magmablas/magmablas_zc_v1.cpp
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
-    -- MAGMA (version 2.0) --
-       Univ. of Tennessee, Knoxville
-       Univ. of California, Berkeley
-       Univ. of Colorado, Denver
-       @date
-
-       @precisions mixed zc -> ds
-
-       @author Mark Gates
-       
-       Implements all the wrappers for v1 backwards compatability.
-       Separating the wrappers allows the new functions to use magma_internal.h
-*/
-#ifndef MAGMA_NO_V1
-
-#include "magma_internal.h"
-#include "magmablas_v1.h"  // includes v1 prototypes; does NOT map routine names
-
-// These MAGMA v1 routines are all deprecated.
-// See corresponding v2 functions for documentation.
-
-/******************************************************************************/
-extern "C" void
-magmablas_zcaxpycp_v1(
-    magma_int_t m,
-    magmaFloatComplex_ptr r,
-    magmaDoubleComplex_ptr x,
-    magmaDoubleComplex_const_ptr b,
-    magmaDoubleComplex_ptr w)
-{
-    magmablas_zcaxpycp( m, r, x, b, w, magmablasGetQueue() );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_clag2z_v1(
-    magma_int_t m, magma_int_t n,
-    magmaFloatComplex_const_ptr SA, magma_int_t ldsa,
-    magmaDoubleComplex_ptr       A, magma_int_t lda,
-    magma_int_t *info)
-{
-    magmablas_clag2z( m, n, SA, ldsa, A, lda, magmablasGetQueue(), info );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_clat2z_v1(
-    magma_uplo_t uplo, magma_int_t n,
-    magmaFloatComplex_const_ptr SA, magma_int_t ldsa,
-    magmaDoubleComplex_ptr      A,  magma_int_t lda,
-    magma_int_t *info )
-{
-    magmablas_clat2z( uplo, n, SA, ldsa, A, lda, magmablasGetQueue(), info );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_zlag2c_v1(
-    magma_int_t m, magma_int_t n,
-    magmaDoubleComplex_const_ptr A, magma_int_t lda,
-    magmaFloatComplex_ptr SA,       magma_int_t ldsa,
-    magma_int_t *info )
-{
-    magmablas_zlag2c( m, n, A, lda, SA, ldsa, magmablasGetQueue(), info );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_zlat2c_v1(
-    magma_uplo_t uplo, magma_int_t n,
-    magmaDoubleComplex_const_ptr  A, magma_int_t lda,
-    magmaFloatComplex_ptr        SA, magma_int_t ldsa,
-    magma_int_t *info )
-{
-    magmablas_zlat2c( uplo, n, A, lda, SA, ldsa, magmablasGetQueue(), info );
-}
-
-
-/******************************************************************************/
-extern "C" void
-magmablas_zclaswp_v1(
-    magma_int_t n,
-    magmaDoubleComplex_ptr A, magma_int_t lda,
-    magmaFloatComplex_ptr SA,
-    magma_int_t m,
-    const magma_int_t *ipiv, magma_int_t incx )
-{
-    magmablas_zclaswp( n, A, lda, SA, lda, m, ipiv, incx, magmablasGetQueue() );
-}
-
-#endif // MAGMA_NO_V1
diff --git a/src/zpotrf_mgpu_right.cpp b/src/zpotrf_mgpu_right.cpp
index 2158701bc..dde00e098 100644
--- a/src/zpotrf_mgpu_right.cpp
+++ b/src/zpotrf_mgpu_right.cpp
@@ -11,8 +11,6 @@
        @precisions normal z -> s d c
 
 */
-#ifndef MAGMA_NO_V1
-
 #include "magma_internal.h"
 #include "trace.h"
 
@@ -512,4 +510,3 @@ magma_zpotrf_mgpu_right(
 #undef STREAM_ID
 
 #endif // needs updating to MAGMA v2 interfaces
-#endif // MAGMA_NO_V1

From 8cb4cf7c6777c91f3e5f56353e3271190bed2be2 Mon Sep 17 00:00:00 2001
From: Cordell Bloor <Cordell.Bloor@amd.com>
Date: Thu, 19 Dec 2024 23:18:40 -0700
Subject: [PATCH 03/27] Check ROCm library versions directly

There is no guarantee that the version number of the rocm-core library
will match that of other ROCm libraries. It is both simpler and more
robust to directly check the version number of the relevent library.
For this reason, Debian has declined to package rocm-core. All
other AMD GPU dependencies of magma are available in Debian Trixie.

The Debian builds of magama-rocm currently use libamdhip64-dev 5.7.1,
libhipblas-dev 5.5.1, and libhipsparse-dev 5.7.1. The Ubuntu 24.04 LTS
was frozen with those package versions in its universe repositories.
There is no single value that could accurately describe the ROCM_VERSION
on Debian or Ubuntu.

At the moment, a failure to find rocm-core is treated by magma as a
version less than 6.0.0. Without this change, the Debian build of
magma will begin to fail when either of the libamdhip64-dev or
libhipblas-dev libraries are updated to a newer version.

Formatting and documentation fixes to "A leaner gesv_rbt_async with improved execution speed and accuracy (PR #62)"
---
 CMakeLists.txt                              | 863 ++++++++++----------
 Makefile                                    |   1 -
 include/magma_auxiliary.h                   |  14 +-
 interface_cuda/alloc.cpp                    |  47 +-
 interface_cuda/blas_z_v2.cpp                |   2 +-
 interface_cuda/interface.cpp                |   2 +-
 magmablas/ztrsm.cu                          |  10 +
 magmablas/ztrtri_diag.cu                    |  11 +
 make.inc-examples/make.inc.hip-gcc-mkl      |   3 -
 make.inc-examples/make.inc.hip-gcc-openblas |   3 -
 src/zgerbt_gpu.cpp                          |  10 +
 src/zgerfs_nopiv_gpu.cpp                    |  20 +
 src/zgesv_nopiv_gpu.cpp                     |  10 +
 src/zgesv_rbt.cpp                           |  20 +
 src/zgetrf_nopiv_gpu.cpp                    |  11 +
 src/zgetrs_nopiv_gpu.cpp                    |  10 +
 tools/get-rocm-version.sh                   |  68 --
 17 files changed, 575 insertions(+), 530 deletions(-)
 delete mode 100755 tools/get-rocm-version.sh

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ea852439f..7a9166be8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,15 +1,15 @@
-cmake_minimum_required(VERSION 3.18)
+cmake_minimum_required( VERSION 3.18 )
 
 # ----------------------------------------
 # to disable Fortran, set this to "off"
 # see also -DADD_ below
-option(USE_FORTRAN "Fortran is required for some tester checks, but can be disabled with reduced functionality" ON)
+option( USE_FORTRAN "Fortran is required for some tester checks, but can be disabled with reduced functionality" ON )
 
 if (USE_FORTRAN)
-    project(MAGMA C CXX Fortran)
-else ()
-    project(MAGMA C CXX)
-endif ()
+    project( MAGMA C CXX Fortran )
+else()
+    project( MAGMA C CXX )
+endif()
 
 FIND_PROGRAM(PROGRAM_CCACHE ccache)
 IF (PROGRAM_CCACHE)
@@ -25,17 +25,17 @@ ENDIF ()
 
 # ----------------------------------------
 # MAGMA requires one backend to be enabled
-option(MAGMA_ENABLE_CUDA "Enable the CUDA backend" OFF)
-option(MAGMA_ENABLE_HIP "Enable the HIP  backend" OFF)
+option(MAGMA_ENABLE_CUDA     "Enable the CUDA backend"  OFF)
+option(MAGMA_ENABLE_HIP      "Enable the HIP  backend"  OFF)
 
 # check if one backend has been enabled
 if (NOT MAGMA_ENABLE_CUDA AND
-        NOT MAGMA_ENABLE_HIP
-)
-    message(STATUS "MAGMA requires one enabled backend!")
-    message(STATUS "Building CUDA backend")
-    set(MAGMA_ENABLE_CUDA ON)
-endif ()
+    NOT MAGMA_ENABLE_HIP
+    )
+  message(STATUS "MAGMA requires one enabled backend!")
+  message(STATUS "Building CUDA backend")
+  set( MAGMA_ENABLE_CUDA ON )
+endif()
 
 # ----------------------------------------
 # don't regenerate files during make.
@@ -47,25 +47,25 @@ set(CMAKE_SUPPRESS_REGENERATION on)
 # ----------------------------------------
 # force an out-of-source build, to not overwrite the existing Makefiles
 # (out-of-source is cleaner, too)
-string(COMPARE EQUAL "${CMAKE_SOURCE_DIR}" "${CMAKE_BINARY_DIR}" MAGMA_COMPILE_INPLACE)
+string( COMPARE EQUAL "${CMAKE_SOURCE_DIR}" "${CMAKE_BINARY_DIR}" MAGMA_COMPILE_INPLACE )
 if (MAGMA_COMPILE_INPLACE)
-    message(FATAL_ERROR "Compiling MAGMA with CMake requires an out-of-source build. To proceed:
+    message( FATAL_ERROR "Compiling MAGMA with CMake requires an out-of-source build. To proceed:
     rm -rf CMakeCache.txt CMakeFiles/   # delete files in ${CMAKE_SOURCE_DIR}
     mkdir build
     cd build
     cmake ..
-    make")
-endif ()
+    make" )
+endif()
 
 
 # ----------------------------------------
 # prefer shared libraries
-option(BUILD_SHARED_LIBS "If on, build shared libraries, otherwise build static libraries" ON)
+option( BUILD_SHARED_LIBS "If on, build shared libraries, otherwise build static libraries" ON )
 
 # prefer /usr/local/magma, instead of /usr/local.
 if (UNIX AND CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
     set(CMAKE_INSTALL_PREFIX "/usr/local/magma" CACHE PATH "..." FORCE)
-endif ()
+endif()
 
 # ----------------------------------------
 # use C++14 and C99
@@ -77,47 +77,47 @@ CHECK_CXX_COMPILER_FLAG("-std=c++0x" COMPILER_SUPPORTS_CXX0X)
 CHECK_CXX_COMPILER_FLAG("-fPIC" COMPILER_SUPPORTS_FPIC)
 if (COMPILER_SUPPORTS_CXX14)
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14")
-elseif (COMPILER_SUPPORTS_CXX0X)
+elseif(COMPILER_SUPPORTS_CXX0X)
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x")
-else ()
+else()
     message( WARNING "The compiler ${CMAKE_CXX_COMPILER} doesn't support the -std=c++14 flag. Some code may not compile.")
-endif ()
+endif()
 
 CHECK_C_COMPILER_FLAG("-std=c99" COMPILER_SUPPORTS_C99)
 if (COMPILER_SUPPORTS_C99)
     set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99")
-else ()
-    message(WARNING "The compiler ${CMAKE_C_COMPILER} doesn't support the -std=c99 flag. Some code may not compile.")
-endif ()
+else()
+    message( WARNING "The compiler ${CMAKE_C_COMPILER} doesn't support the -std=c99 flag. Some code may not compile.")
+endif()
 
 
 # ----------------------------------------
 # check Fortran name mangling
 if (USE_FORTRAN)
-    include(FortranCInterface)
-    FortranCInterface_HEADER(${CMAKE_SOURCE_DIR}/include/magma_mangling_cmake.h MACRO_NAMESPACE MAGMA_)
-else ()
+    include( FortranCInterface )
+    FortranCInterface_HEADER( ${CMAKE_SOURCE_DIR}/include/magma_mangling_cmake.h MACRO_NAMESPACE MAGMA_ )
+else()
     # set one of -DADD_, -DUPCASE, or -DNOCHANGE. See README.
-    message(STATUS "Building without Fortran compiler")
-    set(FORTRAN_CONVENTION "-DADD_" CACHE STRING "Fortran calling convention, one of -DADD_, -DNOCHANGE, -DUPCASE")
-    set_property(CACHE FORTRAN_CONVENTION PROPERTY STRINGS -DADD_ -DNOCHANGE -DUPCASE)
-    message(STATUS "    Using ${FORTRAN_CONVENTION} for Fortran calling convention")
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}   ${FORTRAN_CONVENTION}")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FORTRAN_CONVENTION}")
+    message( STATUS "Building without Fortran compiler" )
+    set( FORTRAN_CONVENTION "-DADD_" CACHE STRING "Fortran calling convention, one of -DADD_, -DNOCHANGE, -DUPCASE" )
+    set_property( CACHE FORTRAN_CONVENTION PROPERTY STRINGS -DADD_ -DNOCHANGE -DUPCASE )
+    message( STATUS "    Using ${FORTRAN_CONVENTION} for Fortran calling convention" )
+    set( CMAKE_C_FLAGS   "${CMAKE_C_FLAGS}   ${FORTRAN_CONVENTION}" )
+    set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FORTRAN_CONVENTION}" )
     # see also NVCC_FLAGS below
-endif ()
+endif()
 
 
 # ----------------------------------------
 # locate OpenMP
-find_package(OpenMP)
+find_package( OpenMP )
 if (OPENMP_FOUND)
-    message(STATUS "Found OpenMP")
-    message(STATUS "    OpenMP_C_FLAGS   ${OpenMP_C_FLAGS}")
-    message(STATUS "    OpenMP_CXX_FLAGS ${OpenMP_CXX_FLAGS}")
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
-endif ()
+    message( STATUS "Found OpenMP" )
+    message( STATUS "    OpenMP_C_FLAGS   ${OpenMP_C_FLAGS}" )
+    message( STATUS "    OpenMP_CXX_FLAGS ${OpenMP_CXX_FLAGS}" )
+    set( CMAKE_C_FLAGS   "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}" )
+    set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}" )
+endif()
 
 # ----------------------------------------
 # locate CUDA libraries
@@ -125,48 +125,47 @@ if (MAGMA_ENABLE_CUDA)
     if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
         set(CMAKE_CUDA_COMPILER_WORKS ON) # Fix for intel compiler and CUDA
     endif ()
-    enable_language(CUDA)
+    enable_language( CUDA )
 
-    set(CUDA_NAMES
-            "one or more of "
+    set( CUDA_NAMES
+         "one or more of "
          "Fermi, Kepler, Maxwell, Pascal, Volta, Turing, Ampere, Ada, Hopper, "
-            "or valid sm_XY or sm_XYZ")
-    set(GPU_TARGET "" CACHE STRING
-            "CUDA architectures to compile for, overrides CMAKE_CUDA_ARCHITECTURES; ${CUDA_NAMES}")
-    find_package(CUDAToolkit)
+         "or valid sm_XY or sm_XYZ" )
+    set( GPU_TARGET "" CACHE STRING
+         "CUDA architectures to compile for, overrides CMAKE_CUDA_ARCHITECTURES; ${CUDA_NAMES}" )
+    find_package( CUDAToolkit )
     if (CUDAToolkit_FOUND)
-        message(STATUS "Found CUDA ${CUDA_VERSION}")
-        message(STATUS "    CUDA_CUDART_LIBRARY: CUDA::cudart")
+        message( STATUS "Found CUDA ${CUDA_VERSION}" )
+        message( STATUS "    CUDA_CUDART_LIBRARY: CUDA::cudart" )
         #message( STATUS "    CUDA_CUBLAS_LIBRARIES: CUDA::cublas" )
 
-        include_directories(${CUDAToolkit_INCLUDE_DIRS})
-        link_directories(${CUDAToolkit_LIBRARY_DIR})
+        include_directories( ${CUDAToolkit_INCLUDE_DIRS} )
 
         if (GPU_TARGET)
             # Map names to architectures.
             if (GPU_TARGET MATCHES Fermi)
-                set(GPU_TARGET "${GPU_TARGET} sm_20")
-            endif ()
+                set( GPU_TARGET "${GPU_TARGET} sm_20" )
+            endif()
 
             if (GPU_TARGET MATCHES Kepler)
-                set(GPU_TARGET "${GPU_TARGET} sm_30 sm_35 sm_37")
-            endif ()
+                set( GPU_TARGET "${GPU_TARGET} sm_30 sm_35 sm_37" )
+            endif()
 
             if (GPU_TARGET MATCHES Maxwell)
                 set( GPU_TARGET "${GPU_TARGET} sm_50 sm_52 sm_53" )
-            endif ()
+            endif()
 
             if (GPU_TARGET MATCHES Pascal)
                 set( GPU_TARGET "${GPU_TARGET} sm_60 sm_61 sm_62" )
-            endif ()
+            endif()
 
             if (GPU_TARGET MATCHES Volta)
                 set( GPU_TARGET "${GPU_TARGET} sm_70 sm_72" )
-            endif ()
+            endif()
 
             if (GPU_TARGET MATCHES Turing)
-                set(GPU_TARGET "${GPU_TARGET} sm_75")
-            endif ()
+                set( GPU_TARGET "${GPU_TARGET} sm_75" )
+            endif()
 
             if (GPU_TARGET MATCHES Ampere)
                 set( GPU_TARGET "${GPU_TARGET} sm_80 sm_86 sm_87" )
@@ -174,223 +173,219 @@ if (MAGMA_ENABLE_CUDA)
 
             if (GPU_TARGET MATCHES Ada)
                 set( GPU_TARGET "${GPU_TARGET} sm_89" )
-            endif ()
+            endif()
 
             if (GPU_TARGET MATCHES Hopper)
                 set( GPU_TARGET "${GPU_TARGET} sm_90 sm_90a" )
-            endif ()
+            endif()
 
             # Find all sm_XY and sm_XYZ, then strip off sm_.
             string( REGEX MATCHALL "sm_[0-9][0-9a-z]+" sms "${GPU_TARGET}" )
-            string(REPLACE "sm_" "" __cuda_architectures "${sms}")
+            string( REPLACE "sm_" "" __cuda_architectures "${sms}" )
 
             if (NOT __cuda_architectures)
-                message(FATAL_ERROR
-                        "GPU_TARGET must contain ${CUDA_NAMES}. "
-                        "Was: ${GPU_TARGET}")
-            endif ()
+                message( FATAL_ERROR
+                         "GPU_TARGET must contain ${CUDA_NAMES}. "
+                         "Was: ${GPU_TARGET}" )
+            endif()
 
-            set(CMAKE_CUDA_ARCHITECTURES "${__cuda_architectures}")
-        endif ()
+            set( CMAKE_CUDA_ARCHITECTURES "${__cuda_architectures}" )
+        endif()
 
-        message(STATUS "    Compile for CMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}")
-        set(MAGMA_CUDA_ARCH "${CMAKE_CUDA_ARCHITECTURES}")
+        message( STATUS "    Compile for CMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}" )
+        set( MAGMA_CUDA_ARCH "${CMAKE_CUDA_ARCHITECTURES}" )
 
         # Find minimum arch in CMAKE_CUDA_ARCHITECTURES, if they're all numeric.
-        set(min_arch 9999)
-        foreach (arch ${CMAKE_CUDA_ARCHITECTURES})
+        set( min_arch 9999 )
+        foreach( arch ${CMAKE_CUDA_ARCHITECTURES} )
             if (arch MATCHES "^([0-9]+)")  # 80-real, 80-virtual, etc. okay
                 if (CMAKE_MATCH_1 LESS min_arch)
-                    set(min_arch "${CMAKE_MATCH_1}")
-                endif ()
-            else ()
-                set(min_arch 0)  # arch like "native", min unknown
+                    set( min_arch "${CMAKE_MATCH_1}" )
+                endif()
+            else()
+                set( min_arch 0 )  # arch like "native", min unknown
                 break()
-            endif ()
-        endforeach ()
+            endif()
+        endforeach()
         # Append zero, so it is comparable to '__CUDA_ARCH__'
-        set(MAGMA_CUDA_ARCH_MIN "${min_arch}0")
+        set( MAGMA_CUDA_ARCH_MIN "${min_arch}0" )
 
-        add_library(magma_nvcc_flags INTERFACE)
+        add_library( magma_nvcc_flags INTERFACE )
         if (COMPILER_SUPPORTS_FPIC)
             target_compile_options(magma_nvcc_flags
-                    INTERFACE
-                    $<$<COMPILE_LANGUAGE:CUDA>:--compiler-options;-fPIC,${FORTRAN_CONVENTION}>
+                INTERFACE
+                $<$<COMPILE_LANGUAGE:CUDA>:--compiler-options;-fPIC,${FORTRAN_CONVENTION}>
             )
-        else ()
+        else()
             # No Position Independent Code on Windows.
             # Compiler will complain if you add that flag.
             target_compile_options(magma_nvcc_flags
-                    INTERFACE
-                    $<$<COMPILE_LANGUAGE:CUDA>:--compiler-options;${FORTRAN_CONVENTION}>
+                INTERFACE
+                $<$<COMPILE_LANGUAGE:CUDA>:--compiler-options;${FORTRAN_CONVENTION}>
             )
-        endif ()
+        endif()
 
-        set(MAGMA_HAVE_CUDA "1")
+        set( MAGMA_HAVE_CUDA "1" )
 
-        message(STATUS "Define -DMAGMA_HAVE_CUDA -DMAGMA_CUDA_ARCH_MIN=${MAGMA_CUDA_ARCH_MIN}")
-    else ()
-        message(STATUS "Could not find CUDA")
-    endif ()
-endif ()
+        message( STATUS "Define -DMAGMA_HAVE_CUDA -DMAGMA_CUDA_ARCH_MIN=${MAGMA_CUDA_ARCH_MIN}" )
+    else()
+        message( STATUS "Could not find CUDA" )
+    endif()
+endif()
 
 # ----------------------------------------
 # locate HIP/ROCm libraries
 if (MAGMA_ENABLE_HIP)
-    set(GPU_TARGET "gfx900" CACHE STRING "HIP architectures to compile for")
-    list(APPEND CMAKE_PREFIX_PATH /opt/rocm /opt/rocm/lib/cmake/hip)
-    find_package(HIP)
-    if (HIP_FOUND)
-        message(STATUS "Found HIP ${HIP_VERSION}")
-        message(STATUS "    HIP_INCLUDE_DIRS:   ${HIP_INCLUDE_DIRS}")
-        message(STATUS "GPU_TARGET:  ${GPU_TARGET}")
-
-        include_directories(${HIP_INCLUDE_DIRS})
-
-        set(HIP_SEPARABLE_COMPILATION ON)
-
-        if (GPU_TARGET MATCHES kaveri)
-            set(GPU_TARGET ${GPU_TARGET} gfx700)
-        endif ()
-
-        if (GPU_TARGET MATCHES hawaii)
-            set(GPU_TARGET ${GPU_TARGET} gfx701)
-        endif ()
-
-        if (GPU_TARGET MATCHES kabini)
-            set(GPU_TARGET ${GPU_TARGET} gfx703)
-        endif ()
-
-        if (GPU_TARGET MATCHES mullins)
-            set(GPU_TARGET ${GPU_TARGET} gfx703)
-        endif ()
-
-        if (GPU_TARGET MATCHES bonaire)
-            set(GPU_TARGET ${GPU_TARGET} gfx704)
-        endif ()
-
-        if (GPU_TARGET MATCHES carrizo)
-            set(GPU_TARGET ${GPU_TARGET} gfx801)
-        endif ()
-
-        if (GPU_TARGET MATCHES iceland)
-            set(GPU_TARGET ${GPU_TARGET} gfx802)
-        endif ()
-
-        if (GPU_TARGET MATCHES tonga)
-            set(GPU_TARGET ${GPU_TARGET} gfx802)
-        endif ()
-
-        if (GPU_TARGET MATCHES fiji)
-            set(GPU_TARGET ${GPU_TARGET} gfx803)
-        endif ()
-
-        if (GPU_TARGET MATCHES polaris10)
-            set(GPU_TARGET ${GPU_TARGET} gfx803)
-        endif ()
-
-        if (GPU_TARGET MATCHES tongapro)
-            set(GPU_TARGET ${GPU_TARGET} gfx805)
-        endif ()
-
-        if (GPU_TARGET MATCHES stoney)
-            set(GPU_TARGET ${GPU_TARGET} gfx810)
-        endif ()
-
-        set(DEVCCFLAGS "")
-        set(VALID_GFXS "700;701;702;703;704;705;801;802;803;805;810;900;902;904;906;908;909;90c;1010;1011;1012;1030;1031;1032;1033")
-        foreach (GFX ${VALID_GFXS})
-            if (GPU_TARGET MATCHES gfx${GFX})
-                set(DEVCCFLAGS ${DEVCCFLAGS} --offload-arch=gfx${GFX})
-            endif ()
-        endforeach ()
-
-        set(DEVCCFLAGS ${DEVCCFLAGS} -fPIC ${FORTRAN_CONVENTION})
-        set(MAGMA_HAVE_HIP "1")
-        message(STATUS "Define -DMAGMA_HAVE_HIP")
-
-        set_property(TARGET hip::device APPEND PROPERTY COMPATIBLE_INTERFACE_BOOL INTERFACE_HIP_DEVICE_COMPILE)
-        set_property(TARGET hip::device PROPERTY INTERFACE_HIP_DEVICE_COMPILE ON)
-        set(GPU_ARCH_FLAGS ${DEVCCFLAGS})
-
-        #add_compile_options(${GPU_ARCH_FLAGS})
-        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_AMD__")
-        option(ROCM_CORE "Location of the rocm-core package")
-        execute_process(COMMAND "${CMAKE_SOURCE_DIR}/tools/get-rocm-version.sh" "${ROCM_CORE}" OUTPUT_VARIABLE ROCM_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE)
-        message(STATUS "ROCM_VERSION=${ROCM_VERSION}")
-        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DROCM_VERSION=${ROCM_VERSION}")
-    else ()
-        message(STATUS "Could not find HIP")
-    endif ()
-endif ()
+  set( GPU_TARGET "gfx900" CACHE STRING "HIP architectures to compile for" )
+  list(APPEND CMAKE_PREFIX_PATH /opt/rocm /opt/rocm/lib/cmake/hip)
+  find_package( HIP )
+  if (HIP_FOUND)
+    message( STATUS "Found HIP ${HIP_VERSION}" )
+    message( STATUS "    HIP_INCLUDE_DIRS:   ${HIP_INCLUDE_DIRS}"   )
+    message( STATUS "GPU_TARGET:  ${GPU_TARGET}"   )
+
+    include_directories( ${HIP_INCLUDE_DIRS} )
+
+    set(HIP_SEPARABLE_COMPILATION ON)
+
+    if (GPU_TARGET MATCHES kaveri)
+      set( GPU_TARGET ${GPU_TARGET} gfx700 )
+    endif()
+
+    if (GPU_TARGET MATCHES hawaii)
+      set( GPU_TARGET ${GPU_TARGET} gfx701 )
+    endif()
+
+    if (GPU_TARGET MATCHES kabini)
+      set( GPU_TARGET ${GPU_TARGET} gfx703 )
+    endif()
+
+    if (GPU_TARGET MATCHES mullins)
+      set( GPU_TARGET ${GPU_TARGET} gfx703 )
+    endif()
+
+    if (GPU_TARGET MATCHES bonaire)
+      set( GPU_TARGET ${GPU_TARGET} gfx704 )
+    endif()
+
+    if (GPU_TARGET MATCHES carrizo)
+      set( GPU_TARGET ${GPU_TARGET} gfx801 )
+    endif()
+
+    if (GPU_TARGET MATCHES iceland)
+      set( GPU_TARGET ${GPU_TARGET} gfx802 )
+    endif()
+
+    if (GPU_TARGET MATCHES tonga)
+      set( GPU_TARGET ${GPU_TARGET} gfx802 )
+    endif()
+
+    if (GPU_TARGET MATCHES fiji)
+      set( GPU_TARGET ${GPU_TARGET} gfx803 )
+    endif()
+
+    if (GPU_TARGET MATCHES polaris10)
+      set( GPU_TARGET ${GPU_TARGET} gfx803 )
+    endif()
+
+    if (GPU_TARGET MATCHES tongapro)
+      set( GPU_TARGET ${GPU_TARGET} gfx805 )
+    endif()
+
+    if (GPU_TARGET MATCHES stoney)
+      set( GPU_TARGET ${GPU_TARGET} gfx810 )
+    endif()
+
+    set( DEVCCFLAGS  "" )
+    set(VALID_GFXS "700;701;702;703;704;705;801;802;803;805;810;900;902;904;906;908;909;90c;1010;1011;1012;1030;1031;1032;1033")
+    foreach( GFX ${VALID_GFXS} )
+      if ( GPU_TARGET MATCHES gfx${GFX} )
+        set( DEVCCFLAGS ${DEVCCFLAGS} --offload-arch=gfx${GFX} )
+      endif()
+    endforeach()
+
+    set( DEVCCFLAGS ${DEVCCFLAGS} -fPIC ${FORTRAN_CONVENTION} )
+    set(MAGMA_HAVE_HIP "1")
+    message( STATUS "Define -DMAGMA_HAVE_HIP" )
+
+    set_property(TARGET hip::device APPEND PROPERTY COMPATIBLE_INTERFACE_BOOL INTERFACE_HIP_DEVICE_COMPILE)
+    set_property(TARGET hip::device PROPERTY INTERFACE_HIP_DEVICE_COMPILE ON)
+    set(GPU_ARCH_FLAGS ${DEVCCFLAGS})
+
+    #add_compile_options(${GPU_ARCH_FLAGS})
+    set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_AMD__" )
+  else()
+    message( STATUS "Could not find HIP" )
+  endif()
+endif()
 
 # ----------------------------------------
 # locate LAPACK libraries
 
 set(BLA_VENDOR "" CACHE STRING
-        "Use specified BLAS library. See https://cmake.org/cmake/help/latest/module/FindBLAS.html")
+    "Use specified BLAS library. See https://cmake.org/cmake/help/latest/module/FindBLAS.html")
 
 # List from CMake 3.17, minus some obsolete ones:
 # PhiPACK, Compaq CXML, DEC Alpha DXML, SunPerf, SGI SCSL, SGIMATH,
 # Intel, NAS (Apple veclib).
 # FLAME is BLIS.
 set_property(CACHE BLA_VENDOR PROPERTY STRINGS
-        "" "All" "Goto" "OpenBLAS" "FLAME" "ATLAS" "IBMESSL"
-        "Intel10_64lp" "Intel10_64lp_seq" "Intel10_64ilp" "Intel10_64ilp_seq"
-        "ACML" "ACML_MP" "ACML_GPU"
-        "Apple"
-        "Arm" "Arm_mp" "Arm_ilp64" "Arm_ilp64_mp"
-        "Generic")
-
-set(LAPACK_LIBRARIES "" CACHE STRING "Libraries for LAPACK and BLAS, to manually override search")
+    "" "All" "Goto" "OpenBLAS" "FLAME" "ATLAS" "IBMESSL"
+    "Intel10_64lp" "Intel10_64lp_seq" "Intel10_64ilp" "Intel10_64ilp_seq"
+    "ACML" "ACML_MP" "ACML_GPU"
+    "Apple"
+    "Arm" "Arm_mp" "Arm_ilp64" "Arm_ilp64_mp"
+    "Generic")
+
+set( LAPACK_LIBRARIES "" CACHE STRING "Libraries for LAPACK and BLAS, to manually override search" )
 if (LAPACK_LIBRARIES STREQUAL "")
-    message(STATUS "Searching for BLAS and LAPACK. To override, set LAPACK_LIBRARIES using ccmake.")
-    find_package(LAPACK)
+    message( STATUS "Searching for BLAS and LAPACK. To override, set LAPACK_LIBRARIES using ccmake." )
+    find_package( LAPACK )
     # force showing updated LAPACK_LIBRARIES in ccmake / cmake-gui.
-    set(LAPACK_LIBRARIES ${LAPACK_LIBRARIES} CACHE STRING "Libraries for LAPACK and BLAS, to manually override search" FORCE)
-else ()
-    message(STATUS "User set LAPACK_LIBRARIES. To change, edit LAPACK_LIBRARIES using ccmake (set to empty to enable search).")
+    set( LAPACK_LIBRARIES ${LAPACK_LIBRARIES} CACHE STRING "Libraries for LAPACK and BLAS, to manually override search" FORCE )
+else()
+    message( STATUS "User set LAPACK_LIBRARIES. To change, edit LAPACK_LIBRARIES using ccmake (set to empty to enable search)." )
     # Check either -lname syntax or file existence
-    foreach (LIB ${LAPACK_LIBRARIES})
+    foreach( LIB ${LAPACK_LIBRARIES} )
         if (NOT LIB MATCHES "^-l[a-zA-Z0-9_-]+$")
-            if (NOT EXISTS ${LIB})
-                message(WARNING "\n      Warning: file ${LIB} does not exist.\n")
-            endif ()
-        endif ()
-    endforeach ()
-endif ()
+        if (NOT EXISTS ${LIB})
+            message( WARNING "\n      Warning: file ${LIB} does not exist.\n" )
+        endif()
+        endif()
+    endforeach()
+endif()
 
 # If using MKL, add it to includes and define MAGMA_WITH_MKL
 # Initially, this gets MKLROOT from environment, but then the user can edit it.
 if (LAPACK_LIBRARIES MATCHES mkl_core)
-    set(MKLROOT $ENV{MKLROOT} CACHE STRING "MKL installation directory")
+    set( MKLROOT $ENV{MKLROOT} CACHE STRING "MKL installation directory" )
     if (MKLROOT STREQUAL "")
-        message(WARNING "LAPACK_LIBRARIES has MKL, but MKLROOT not set; can't add include directory.")
-    else ()
-        message(STATUS "MKLROOT set to ${MKLROOT}. To change, edit MKLROOT using ccmake.")
+        message( WARNING "LAPACK_LIBRARIES has MKL, but MKLROOT not set; can't add include directory." )
+    else()
+        message( STATUS "MKLROOT set to ${MKLROOT}. To change, edit MKLROOT using ccmake." )
         if (NOT EXISTS ${MKLROOT})
-            message(FATAL_ERROR "MKLROOT ${MKLROOT} directory does not exist.")
-        endif ()
-        include_directories(${MKLROOT}/include)
-        add_definitions(-DMAGMA_WITH_MKL)
-        message(STATUS "Define -DMAGMA_WITH_MKL")
-    endif ()
-endif ()
+            message( FATAL_ERROR "MKLROOT ${MKLROOT} directory does not exist." )
+        endif()
+        include_directories( ${MKLROOT}/include )
+        add_definitions( -DMAGMA_WITH_MKL )
+        message( STATUS "Define -DMAGMA_WITH_MKL" )
+    endif()
+endif()
 
 
 # ----------------------------------------
 # save magma.lib, magma_sparse.lib, etc. in lib/
-set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY lib)
-set(CMAKE_LIBRARY_OUTPUT_DIRECTORY lib)
+set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY lib )
+set( CMAKE_LIBRARY_OUTPUT_DIRECTORY lib )
 
 
 # ----------------------------------------
 # list of sources
 if (MAGMA_ENABLE_CUDA)
-    include(${CMAKE_SOURCE_DIR}/CMake.src.cuda)
-else ()
-    include(${CMAKE_SOURCE_DIR}/CMake.src.hip)
-endif ()
+  include( ${CMAKE_SOURCE_DIR}/CMake.src.cuda )
+else()
+  include( ${CMAKE_SOURCE_DIR}/CMake.src.hip )
+endif()
 
 # ----------------------------------------
 # common flags
@@ -401,20 +396,20 @@ if (WIN32)
     #     -Wall is way too verbose; use -W4
     #     -MP enables parallel builds
     #     -std=c99 is not implemented, so skip that
-    string(REGEX REPLACE " */W3" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
-    string(REGEX REPLACE " */W3" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -W4 -MP -DMAGMA_NOAFFINITY")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -W4 -MP -DMAGMA_NOAFFINITY")
-else ()
+    string( REGEX REPLACE " */W3" "" CMAKE_C_FLAGS   "${CMAKE_C_FLAGS}"   )
+    string( REGEX REPLACE " */W3" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" )
+    set( CMAKE_C_FLAGS     "${CMAKE_C_FLAGS} -W4 -MP -DMAGMA_NOAFFINITY" )
+    set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -W4 -MP -DMAGMA_NOAFFINITY" )
+else()
     # Primarily for gcc / nvcc:
     # Ignore unused static functions in headers.
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wno-unused-function")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-unused-function")
-endif ()
+    set( CMAKE_C_FLAGS     "${CMAKE_C_FLAGS} -Wall -Wno-unused-function" )
+    set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-unused-function" )
+endif()
 
 if (CMAKE_HOST_APPLE)
     # Use rpaths, which is on by default in CMake 3.
-    set(CMAKE_MACOSX_RPATH 1)
+    set( CMAKE_MACOSX_RPATH 1 )
 
     # 64-bit veclib (Accelerate) has issues; substitute correct functions from LAPACK.
     # (The issue is single precision functions that return doubles;
@@ -422,48 +417,48 @@ if (CMAKE_HOST_APPLE)
     # but this is not feasible in Fortran.)
     if (LAPACK_LIBRARIES MATCHES "Accelerate")
         if (USE_FORTRAN)
-            message(STATUS "MacOS X: adding blas_fix library")
-            add_library(blas_fix ${libblas_fix_src})
-            target_link_libraries(blas_fix
-                    ${LAPACK_LIBRARIES}
+            message( STATUS "MacOS X: adding blas_fix library" )
+            add_library( blas_fix ${libblas_fix_src} )
+            target_link_libraries( blas_fix
+                ${LAPACK_LIBRARIES}
             )
-            set(blas_fix blas_fix)
-            set(blas_fix_lib -lblas_fix)
-        else ()
-            message(WARNING "\n      Warning: cannot compile blas_fix library for MacOS X without Fortran compiler.\n")
-        endif ()
-    endif ()
+            set( blas_fix blas_fix )
+            set( blas_fix_lib -lblas_fix )
+        else()
+            message( WARNING "\n      Warning: cannot compile blas_fix library for MacOS X without Fortran compiler.\n" )
+        endif()
+    endif()
 
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMAGMA_NOAFFINITY")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMAGMA_NOAFFINITY")
+    set( CMAKE_C_FLAGS     "${CMAKE_C_FLAGS} -DMAGMA_NOAFFINITY" )
+    set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMAGMA_NOAFFINITY" )
 
     # previously, just compile as 32-bit, but CUDA 6.5 no longer has 32-bit FAT libraries
     ## set( CMAKE_C_FLAGS       "${CMAKE_C_FLAGS} -m32" )
     ## set( CMAKE_CXX_FLAGS     "${CMAKE_CXX_FLAGS} -m32" )
     ## set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -m32" )
     ## set( CUDA_64_BIT_DEVICE_CODE OFF )
-endif ()
+endif()
 
-include_directories("${CMAKE_BINARY_DIR}/include")
+include_directories( "${CMAKE_BINARY_DIR}/include" )
 
-include_directories(include)
-include_directories(control)
+include_directories( include )
+include_directories( control )
 if (MAGMA_ENABLE_CUDA)
-    include_directories(magmablas)  # e.g., shuffle.cuh
-else ()
-    include_directories(magmablas_hip)  # e.g., shuffle.cuh
-endif ()
+  include_directories( magmablas )  # e.g., shuffle.cuh
+else()
+  include_directories( magmablas_hip )  # e.g., shuffle.cuh
+endif()
 
 # Need to check sizeof(void*) after setting flags above;
 # CMAKE_SIZEOF_VOID_P can be wrong.
-include(CheckTypeSize)
-CHECK_TYPE_SIZE(void* SIZEOF_VOID_PTR)
+include( CheckTypeSize )
+CHECK_TYPE_SIZE( void* SIZEOF_VOID_PTR )
 if (USE_FORTRAN)
-    set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -Dmagma_devptr_t=\"integer\(kind=${SIZEOF_VOID_PTR}\)\"")
-endif ()
+    set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -Dmagma_devptr_t=\"integer\(kind=${SIZEOF_VOID_PTR}\)\"" )
+endif()
 
 # Configure config
-configure_file(${CMAKE_SOURCE_DIR}/include/magma_config.h.in ${CMAKE_BINARY_DIR}/include/magma_config.h)
+configure_file(${CMAKE_SOURCE_DIR}/include/magma_config.h.in  ${CMAKE_BINARY_DIR}/include/magma_config.h)
 
 # ----------------------------------------
 # compile MAGMA library
@@ -473,24 +468,24 @@ if (WIN32)
     # understand that .F90 files should be pre-processed.
 
     # separate Fortran and C/C++/CUDA files
-    foreach (filename ${libmagma_all})
+    foreach( filename ${libmagma_all} )
         if (filename MATCHES "\\.(f)$")  # |f90|F90
-            list(APPEND libmagma_all_f ${filename})
+            list( APPEND libmagma_all_f   ${filename} )
         elseif (filename MATCHES "\\.(c|cu|cpp)$")
-            list(APPEND libmagma_all_cpp ${filename})
-        endif ()
-    endforeach ()
+            list( APPEND libmagma_all_cpp ${filename} )
+        endif()
+    endforeach()
     #message( "libmagma_all_cpp ${libmagma_all_cpp}" )
     #message( "libmagma_all_f   ${libmagma_all_f}"   )
 
     # on Windows, Fortran files aren't compiled if listed here...
-    add_library(magma ${libmagma_all_cpp})
-    target_link_libraries(magma
-            ${LAPACK_LIBRARIES}
-            CUDA::cudart
-            CUDA::cublas
-            CUDA::cusparse
-            magma_nvcc_flags
+    add_library( magma ${libmagma_all_cpp} )
+    target_link_libraries( magma
+        ${LAPACK_LIBRARIES}
+        CUDA::cudart
+        CUDA::cublas
+        CUDA::cusparse
+        magma_nvcc_flags
     )
 
     # no Fortran files at the moment (how to test libmagma_all_f is not empty?),
@@ -505,51 +500,51 @@ if (WIN32)
     ##     CUDA::cusparse
     ## )
     ## make list of Fortran .mod files to install, as below
-else ()
+else()
     # Unix doesn't seem to have a problem with mixing C, CUDA, and Fortran files
     if (MAGMA_ENABLE_CUDA)
-        #message(FATAL_ERROR "${libmagma_all}")
-        add_library(magma ${libmagma_all})
-        target_link_libraries(magma
-                ${blas_fix}
-                ${LAPACK_LIBRARIES}
-                CUDA::cudart
-                CUDA::cublas
-                CUDA::cusparse
-                magma_nvcc_flags
+    #message(FATAL_ERROR "${libmagma_all}")
+      add_library( magma ${libmagma_all} )
+      target_link_libraries( magma
+        ${blas_fix}
+        ${LAPACK_LIBRARIES}
+        CUDA::cudart
+        CUDA::cublas
+        CUDA::cusparse
+        magma_nvcc_flags
         )
-    else ()
-        find_package(hipBLAS)
-        if (hipBLAS_FOUND)
-            message(STATUS "Found rocBLAS ${rocBLAS_VERSION}")
-        endif ()
-        find_package(hipSPARSE)
-        if (hipSPARSE_FOUND)
-            message(STATUS "Found rocSPARSE ${rocSPARSE_VERSION}")
-        endif ()
-        add_library(magma ${libmagma_all})
-        target_link_libraries(magma
-                hip::host
-                ${blas_fix}
-                ${LAPACK_LIBRARIES}
+    else()
+      find_package( hipBLAS )
+      if (hipBLAS_FOUND)
+        message( STATUS "Found rocBLAS ${rocBLAS_VERSION}" )
+      endif()
+      find_package( hipSPARSE )
+      if (hipSPARSE_FOUND)
+        message( STATUS "Found rocSPARSE ${rocSPARSE_VERSION}" )
+      endif()
+      add_library( magma ${libmagma_all} )
+      target_link_libraries( magma
+        hip::host
+        ${blas_fix}
+        ${LAPACK_LIBRARIES}
         hip::device
-                roc::hipblas
-                roc::hipsparse
+        roc::hipblas
+        roc::hipsparse
         )
-    endif ()
+    endif()
 
     if (USE_FORTRAN)
         # make list of Fortran .mod files to install
-        foreach (filename ${libmagma_all})
+        foreach( filename ${libmagma_all} )
             if (filename MATCHES "\\.(f90|F90)$")
                 # mod files seem to wind up in root build directory
-                get_filename_component(fmod ${filename} NAME_WE)
-                list(APPEND modules "${CMAKE_BINARY_DIR}/${fmod}.mod")
-            endif ()
-        endforeach ()
-    endif ()
-endif ()
-add_custom_target(lib DEPENDS magma)
+                get_filename_component( fmod ${filename} NAME_WE )
+                list( APPEND modules "${CMAKE_BINARY_DIR}/${fmod}.mod" )
+            endif()
+        endforeach()
+    endif()
+endif()
+add_custom_target( lib DEPENDS magma )
 
 
 # ----------------------------------------
@@ -557,35 +552,35 @@ add_custom_target(lib DEPENDS magma)
 # If use fortran, compile only Fortran files, not magma_[sdcz]_no_fortran.cpp
 # else,           compile only C++     files, not Fortran files
 if (USE_FORTRAN)
-    foreach (filename ${liblapacktest_all})
+    foreach( filename ${liblapacktest_all} )
         if (filename MATCHES "\\.(f|f90|F90)$")
-            list(APPEND liblapacktest_all_f ${filename})
-        endif ()
-    endforeach ()
-    add_library(lapacktest ${liblapacktest_all_f})
-else ()
+            list( APPEND liblapacktest_all_f ${filename} )
+        endif()
+    endforeach()
+    add_library( lapacktest ${liblapacktest_all_f} )
+else()
     # alternatively, use only C/C++/CUDA files, including magma_[sdcz]_no_fortran.cpp
-    foreach (filename ${liblapacktest_all})
+    foreach( filename ${liblapacktest_all} )
         if (filename MATCHES "\\.(c|cu|cpp)$")
-            list(APPEND liblapacktest_all_cpp ${filename})
-        endif ()
-    endforeach ()
-    add_library(lapacktest ${liblapacktest_all_cpp})
-endif ()
-target_link_libraries(lapacktest
-        ${blas_fix}
-        ${LAPACK_LIBRARIES}
+            list( APPEND liblapacktest_all_cpp ${filename} )
+        endif()
+    endforeach()
+    add_library( lapacktest ${liblapacktest_all_cpp} )
+endif()
+target_link_libraries( lapacktest
+    ${blas_fix}
+    ${LAPACK_LIBRARIES}
 )
 
 
 # ----------------------------------------
 # compile tester library
-add_library(tester ${libtest_all})
-target_link_libraries(tester
-        magma
-        lapacktest
-        ${blas_fix}
-        ${LAPACK_LIBRARIES}
+add_library( tester ${libtest_all} )
+target_link_libraries( tester
+    magma
+    lapacktest
+    ${blas_fix}
+    ${LAPACK_LIBRARIES}
 )
 
 
@@ -594,38 +589,38 @@ target_link_libraries(tester
 
 # sparse doesn't have Fortran at the moment, so no need for above shenanigans
 if (MAGMA_ENABLE_CUDA)
-    include_directories(sparse/include)
-    include_directories(sparse/control)
-else ()
-    include_directories(sparse_hip/include)
-    include_directories(sparse_hip/control)
-endif ()
-include_directories(testing)
+  include_directories( sparse/include )
+  include_directories( sparse/control )
+else()
+  include_directories( sparse_hip/include )
+  include_directories( sparse_hip/control )
+endif()
+include_directories( testing )
 
 if (MAGMA_ENABLE_CUDA)
-    add_library(magma_sparse ${libsparse_all})
+  add_library( magma_sparse ${libsparse_all} )
   set_property(TARGET magma_sparse PROPERTY CUDA_STANDARD 14)
-    target_link_libraries(magma_sparse
-            magma
-            ${blas_fix}
-            ${LAPACK_LIBRARIES}
-            CUDA::cudart
-            CUDA::cublas
-            CUDA::cusparse
-            magma_nvcc_flags
+  target_link_libraries( magma_sparse
+    magma
+    ${blas_fix}
+    ${LAPACK_LIBRARIES}
+    CUDA::cudart
+    CUDA::cublas
+    CUDA::cusparse
+    magma_nvcc_flags
     )
-else ()
-    add_library(magma_sparse ${libsparse_all})
-    target_link_libraries(magma_sparse
-            magma
-            ${blas_fix}
-            ${LAPACK_LIBRARIES}
-            hip::device
-            roc::hipblas
-            roc::hipsparse
+else()
+  add_library( magma_sparse ${libsparse_all} )
+  target_link_libraries( magma_sparse
+    magma
+    ${blas_fix}
+    ${LAPACK_LIBRARIES}
+    hip::device
+    roc::hipblas
+    roc::hipsparse
     )
-endif ()
-add_custom_target(sparse-lib DEPENDS magma_sparse)
+endif()
+add_custom_target( sparse-lib DEPENDS magma_sparse )
 
 
 # ----------------------------------------
@@ -633,119 +628,119 @@ add_custom_target(sparse-lib DEPENDS magma_sparse)
 
 # save testers to testing/
 # save tester lib files to testing_lib/ to avoid cluttering lib/
-set(CMAKE_RUNTIME_OUTPUT_DIRECTORY testing)
-set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY testing_lib)
-set(CMAKE_LIBRARY_OUTPUT_DIRECTORY testing_lib)
+set( CMAKE_RUNTIME_OUTPUT_DIRECTORY testing )
+set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY testing_lib )
+set( CMAKE_LIBRARY_OUTPUT_DIRECTORY testing_lib )
 
 # skip Fortran testers, which require an extra file from CUDA
-foreach (filename ${testing_all})
+foreach( filename ${testing_all} )
     if (filename MATCHES "\\.(c|cu|cpp)$")
-        list(APPEND testing_all_cpp ${filename})
-    endif ()
-endforeach ()
-foreach (TEST ${testing_all_cpp})
-    string(REGEX REPLACE "\\.(cpp|f90|F90)" "" EXE ${TEST})
-    string(REGEX REPLACE "testing/" "" EXE ${EXE})
+        list( APPEND testing_all_cpp ${filename} )
+    endif()
+endforeach()
+foreach( TEST ${testing_all_cpp} )
+    string( REGEX REPLACE "\\.(cpp|f90|F90)" "" EXE ${TEST} )
+    string( REGEX REPLACE "testing/" "" EXE ${EXE} )
     #message( "${TEST} --> ${EXE}" )
-    add_executable(${EXE} ${TEST})
-    target_link_libraries(${EXE} tester lapacktest magma)
-    list(APPEND testing ${EXE})
-endforeach ()
-add_custom_target(testing DEPENDS ${testing})
+    add_executable( ${EXE} ${TEST} )
+    target_link_libraries( ${EXE} tester lapacktest magma )
+    list( APPEND testing ${EXE} )
+endforeach()
+add_custom_target( testing DEPENDS ${testing} )
 
 
 # ----------------------------------------
 # compile each sparse tester
 
 if (MAGMA_ENABLE_CUDA)
-    set(SPARSE_TEST_DIR "sparse/testing")
-else ()
-    set(SPARSE_TEST_DIR "sparse_hip/testing")
-endif ()
+  set(SPARSE_TEST_DIR "sparse/testing")
+else()
+  set(SPARSE_TEST_DIR "sparse_hip/testing")
+endif()
 
 
-set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${SPARSE_TEST_DIR}")
-cmake_policy(SET CMP0037 OLD)
-foreach (TEST ${sparse_testing_all})
-    string(REGEX REPLACE "\\.(cpp|f90|F90)" "" EXE ${TEST})
-    string(REGEX REPLACE "${SPARSE_TEST_DIR}/" "" EXE ${EXE})
+set( CMAKE_RUNTIME_OUTPUT_DIRECTORY "${SPARSE_TEST_DIR}" )
+cmake_policy( SET CMP0037 OLD)
+foreach( TEST ${sparse_testing_all} )
+    string( REGEX REPLACE "\\.(cpp|f90|F90)"     "" EXE ${TEST} )
+    string( REGEX REPLACE "${SPARSE_TEST_DIR}/" "" EXE ${EXE} )
     #message( "${TEST} --> ${EXE}" )
-    add_executable(${EXE} ${TEST})
-    target_link_libraries(${EXE} magma_sparse magma)
-    list(APPEND sparse-testing ${EXE})
-endforeach ()
-add_custom_target(sparse-testing DEPENDS ${sparse-testing})
+    add_executable( ${EXE} ${TEST} )
+    target_link_libraries( ${EXE} magma_sparse magma )
+    list( APPEND sparse-testing ${EXE} )
+endforeach()
+add_custom_target( sparse-testing DEPENDS ${sparse-testing} )
 
 
 # ----------------------------------------
 # what to install
-install(TARGETS magma magma_sparse ${blas_fix}
-        RUNTIME DESTINATION bin
-        LIBRARY DESTINATION lib
-        ARCHIVE DESTINATION lib)
+install( TARGETS magma magma_sparse ${blas_fix}
+         RUNTIME DESTINATION bin
+         LIBRARY DESTINATION lib
+         ARCHIVE DESTINATION lib )
 if (MAGMA_ENABLE_CUDA)
-    file(GLOB headers include/*.h sparse/include/*.h "${CMAKE_BINARY_DIR}/include/*.h")
-else ()
-    file(GLOB headers include/*.h sparse_hip/include/*.h "${CMAKE_BINARY_DIR}/include/*.h")
-endif ()
+  file( GLOB headers include/*.h sparse/include/*.h "${CMAKE_BINARY_DIR}/include/*.h" )
+else()
+  file( GLOB headers include/*.h sparse_hip/include/*.h "${CMAKE_BINARY_DIR}/include/*.h" )
+endif()
 if (USE_FORTRAN)
-    install(FILES ${headers} ${modules}
-            DESTINATION include)
-else ()
-    install(FILES ${headers} DESTINATION include)
-endif ()
+    install( FILES ${headers} ${modules}
+             DESTINATION include )
+else()
+    install( FILES ${headers} DESTINATION include )
+endif()
 
 # ----------------------------------------
 # pkg-config
 get_target_property(MAGMA_INCLUDE magma INCLUDE_DIRECTORIES)
-foreach (dir ${MAGMA_INCLUDE})
+foreach(dir ${MAGMA_INCLUDE})
     string(APPEND INCLUDE_COMPILER_STRING "-I${dir} ")
-endforeach ()
-set(MAGMA_INCLUDE "${INCLUDE_COMPILER_STRING}")
-set(pkgconfig lib/pkgconfig/magma.pc)
-message(STATUS "pkgconfig ${pkgconfig}")
-set(INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}")
-set(CFLAGS "${CMAKE_C_FLAGS}")
-set(CXXFLAGS "${CMAKE_CXX_FLAGS}")
+endforeach()
+set( MAGMA_INCLUDE "${INCLUDE_COMPILER_STRING}" )
+set( pkgconfig lib/pkgconfig/magma.pc )
+message( STATUS "pkgconfig ${pkgconfig}" )
+set( INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}" )
+set( CFLAGS "${CMAKE_C_FLAGS}" )
+set( CXXFLAGS "${CMAKE_CXX_FLAGS}" )
 # CMake finds the Accelerate directory; we want -framework Accelerate for linking.
-string(REPLACE "/System/Library/Frameworks/Accelerate.framework" "-framework Accelerate" LAPACK_LIBS "${LAPACK_LIBRARIES}")
+string( REPLACE "/System/Library/Frameworks/Accelerate.framework" "-framework Accelerate" LAPACK_LIBS "${LAPACK_LIBRARIES}" )
 if (MAGMA_ENABLE_CUDA)
-    string(REPLACE ";" " " LIBS
-            "${blas_fix_lib} ${LAPACK_LIBS} -L${CUDAToolkit_LIBRARY_DIR} -lcudart -lcublas -lcusparse")
-else ()
-    string(REPLACE ";" " " LIBS
-            "${blas_fix_lib} ${LAPACK_LIBS} ${HIP} ${rocBLAS} ${rocSPARSE}")
-    #    "${blas_fix_lib} ${LAPACK_LIBS} hip::device roc::hipblas roc::hipsparse" )
-endif ()
-set(MAGMA_REQUIRED "")
-configure_file("${pkgconfig}.in" "${pkgconfig}" @ONLY)
-install(FILES "${CMAKE_BINARY_DIR}/${pkgconfig}"
-        DESTINATION lib/pkgconfig)
+  string( REPLACE ";" " " LIBS
+    "${blas_fix_lib} ${LAPACK_LIBS} -L${CUDAToolkit_LIBRARY_DIR} -lcudart -lcublas -lcusparse")
+else()
+  string( REPLACE ";" " " LIBS
+     "${blas_fix_lib} ${LAPACK_LIBS} ${HIP} ${rocBLAS} ${rocSPARSE}" )
+#    "${blas_fix_lib} ${LAPACK_LIBS} hip::device roc::hipblas roc::hipsparse" )
+endif()
+set( MAGMA_REQUIRED "" )
+configure_file( "${pkgconfig}.in" "${pkgconfig}" @ONLY )
+install( FILES "${CMAKE_BINARY_DIR}/${pkgconfig}"
+         DESTINATION lib/pkgconfig )
 
 # ----------------------------------------
-get_directory_property(compile_definitions COMPILE_DEFINITIONS)
+get_directory_property( compile_definitions COMPILE_DEFINITIONS )
 
-message(STATUS "Flags")
-message(STATUS "    CMAKE_INSTALL_PREFIX:  ${CMAKE_INSTALL_PREFIX}")
-message(STATUS "    CFLAGS:                ${CMAKE_C_FLAGS}")
-message(STATUS "    CXXFLAGS:              ${CMAKE_CXX_FLAGS}")
+message( STATUS "Flags" )
+message( STATUS "    CMAKE_INSTALL_PREFIX:  ${CMAKE_INSTALL_PREFIX}" )
+message( STATUS "    CFLAGS:                ${CMAKE_C_FLAGS}" )
+message( STATUS "    CXXFLAGS:              ${CMAKE_CXX_FLAGS}" )
 if (MAGMA_ENABLE_CUDA)
-    message(STATUS "    NVCCFLAGS:             ${CMAKE_CUDA_FLAGS}")
-else ()
-    message(STATUS "    DEVCCFLAGS:            ${DEVCCFLAGS}")
-endif ()
-message(STATUS "    FFLAGS:                ${CMAKE_Fortran_FLAGS}")
-message(STATUS "    LIBS:                  ${LIBS}")
-message(STATUS "    blas_fix:              ${blas_fix}  (MacOS Accelerate only)")
-message(STATUS "    LAPACK_LIBRARIES:      ${LAPACK_LIBRARIES}")
-message(STATUS "    INCLUDE_DIRECTORIES:   ${MAGMA_INCLUDE}")
+  message( STATUS "    NVCCFLAGS:             ${CMAKE_CUDA_FLAGS}" )
+else()
+  message( STATUS "    DEVCCFLAGS:            ${DEVCCFLAGS}" )
+endif()
+message( STATUS "    FFLAGS:                ${CMAKE_Fortran_FLAGS}" )
+message( STATUS "    LIBS:                  ${LIBS}" )
+message( STATUS "    blas_fix:              ${blas_fix}  (MacOS Accelerate only)" )
+message( STATUS "    LAPACK_LIBRARIES:      ${LAPACK_LIBRARIES}"      )
+message( STATUS "    INCLUDE_DIRECTORIES:   ${MAGMA_INCLUDE}"   )
 if (MAGMA_ENABLE_CUDA)
-    message(STATUS "    CUDA_CUDART_LIBRARY:   CUDA::cudart")
-    message(STATUS "    CUDA_CUBLAS_LIBRARIES: CUDA::cublas")
-    message(STATUS "    CUDA_cusparse_LIBRARY: CUDA::cusparse")
-else ()
-    message(STATUS "    HIP_LIBRARY:   hip::device")
-    message(STATUS "    HIP_BLAS_LIBRARIES: roc::hipblas")
-    message(STATUS "    HIP_sparse_LIBRARY: roc::hipsparse")
-endif ()
-message(STATUS "    Fortran modules:       ${modules}")
+  message( STATUS "    CUDA_CUDART_LIBRARY:   CUDA::cudart"   )
+  message( STATUS "    CUDA_CUBLAS_LIBRARIES: CUDA::cublas" )
+  message( STATUS "    CUDA_cusparse_LIBRARY: CUDA::cusparse" )
+else()
+  message( STATUS "    HIP_LIBRARY:   hip::device"   )
+  message( STATUS "    HIP_BLAS_LIBRARIES: roc::hipblas" )
+  message( STATUS "    HIP_sparse_LIBRARY: roc::hipsparse" )
+endif()
+message( STATUS "    Fortran modules:       ${modules}" )
diff --git a/Makefile b/Makefile
index 44880ab61..1fabf54ca 100644
--- a/Makefile
+++ b/Makefile
@@ -847,7 +847,6 @@ d_ext := cu
 else ifeq ($(BACKEND),hip)
 d_ext := cpp
 CXXFLAGS += -D__HIP_PLATFORM_AMD__
-CXXFLAGS += -DROCM_VERSION=$(shell ./tools/get-rocm-version.sh)
 endif
 
 
diff --git a/include/magma_auxiliary.h b/include/magma_auxiliary.h
index 2c7fd075c..32434b575 100644
--- a/include/magma_auxiliary.h
+++ b/include/magma_auxiliary.h
@@ -139,43 +139,43 @@ magma_memset_async(void * ptr, int value, size_t count, magma_queue_t queue);
 /// Type-safe version of magma_malloc(), for magma_int_t arrays. Allocates n*sizeof(magma_int_t) bytes.
 static inline magma_int_t magma_imalloc( magmaInt_ptr           *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(magma_int_t)        ); }
 
-/// Type-safe version of magma_malloc(), for magma_int_t arrays. Allocates n*sizeof(magma_int_t) bytes.
+/// Type-safe asynchronous version of magma_malloc(), for magma_int_t arrays. Allocates n*sizeof(magma_int_t) bytes using CUDA stream specified in queue.
 static inline magma_int_t magma_imalloc_async( magmaInt_ptr           *ptr_ptr, size_t n, magma_queue_t queue ) { return magma_malloc_async( (magma_ptr*) ptr_ptr, n*sizeof(magma_int_t), queue ); }
 
 /// Type-safe version of magma_malloc(), for magma_index_t arrays. Allocates n*sizeof(magma_index_t) bytes.
 static inline magma_int_t magma_index_malloc( magmaIndex_ptr    *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(magma_index_t)      ); }
 
-/// Type-safe version of magma_malloc(), for magma_index_t arrays. Allocates n*sizeof(magma_index_t) bytes.
+/// Type-safe asynchronous version of magma_malloc(), for magma_index_t arrays. Allocates n*sizeof(magma_index_t) bytes using CUDA stream specified in queue.
 static inline magma_int_t magma_index_malloc_async( magmaIndex_ptr    *ptr_ptr, size_t n, magma_queue_t queue ) { return magma_malloc_async( (magma_ptr*) ptr_ptr, n*sizeof(magma_index_t), queue ); }
 
 /// Type-safe version of magma_malloc(), for magma_uindex_t arrays. Allocates n*sizeof(magma_uindex_t) bytes.
 static inline magma_int_t magma_uindex_malloc( magmaUIndex_ptr    *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(magma_uindex_t)      ); }
 
-/// Type-safe version of magma_malloc(), for magma_uindex_t arrays. Allocates n*sizeof(magma_uindex_t) bytes.
+/// Type-safe asynchronous version of magma_malloc(), for magma_uindex_t arrays. Allocates n*sizeof(magma_uindex_t) bytes using CUDA stream specified in queue.
 static inline magma_int_t magma_uindex_malloc_async( magmaUIndex_ptr    *ptr_ptr, size_t n, magma_queue_t queue ) { return magma_malloc_async( (magma_ptr*) ptr_ptr, n*sizeof(magma_uindex_t), queue); }
 
 /// Type-safe version of magma_malloc(), for float arrays. Allocates n*sizeof(float) bytes.
 static inline magma_int_t magma_smalloc( magmaFloat_ptr         *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(float)              ); }
 
-/// Type-safe version of magma_malloc(), for float arrays. Allocates n*sizeof(float) bytes.
+/// Type-safe asynchronous version of magma_malloc(), for float arrays. Allocates n*sizeof(float) bytes using CUDA stream specified in queue.
 static inline magma_int_t magma_smalloc_async( magmaFloat_ptr         *ptr_ptr, size_t n, magma_queue_t queue ) { return magma_malloc_async( (magma_ptr*) ptr_ptr, n*sizeof(float), queue); }
 
 /// Type-safe version of magma_malloc(), for double arrays. Allocates n*sizeof(double) bytes.
 static inline magma_int_t magma_dmalloc( magmaDouble_ptr        *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(double)             ); }
 
-/// Type-safe version of magma_malloc(), for double arrays. Allocates n*sizeof(double) bytes.
+/// Type-safe asynchronous version of magma_malloc(), for double arrays. Allocates n*sizeof(double) bytes using CUDA stream specified in queue.
 static inline magma_int_t magma_dmalloc_async( magmaDouble_ptr        *ptr_ptr, size_t n, magma_queue_t queue ) { return magma_malloc_async( (magma_ptr*) ptr_ptr, n*sizeof(double), queue); }
 
 /// Type-safe version of magma_malloc(), for magmaFloatComplex arrays. Allocates n*sizeof(magmaFloatComplex) bytes.
 static inline magma_int_t magma_cmalloc( magmaFloatComplex_ptr  *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(magmaFloatComplex)  ); }
 
-/// Type-safe version of magma_malloc(), for magmaFloatComplex arrays. Allocates n*sizeof(magmaFloatComplex) bytes.
+/// Type-safe asynchronous version of magma_malloc(), for magmaFloatComplex arrays. Allocates n*sizeof(magmaFloatComplex) bytes using CUDA stream specified in queue.
 static inline magma_int_t magma_cmalloc_async( magmaFloatComplex_ptr  *ptr_ptr, size_t n, magma_queue_t queue ) { return magma_malloc_async( (magma_ptr*) ptr_ptr, n*sizeof(magmaFloatComplex), queue ); }
 
 /// Type-safe version of magma_malloc(), for magmaDoubleComplex arrays. Allocates n*sizeof(magmaDoubleComplex) bytes.
 static inline magma_int_t magma_zmalloc( magmaDoubleComplex_ptr *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(magmaDoubleComplex) ); }
 
-/// Type-safe version of magma_malloc_async(), for magmaDoubleComplex arrays. Allocates n*sizeof(magmaDoubleComplex) bytes.
+/// Type-safe asynchronous version of magma_malloc_async(), for magmaDoubleComplex arrays. Allocates n*sizeof(magmaDoubleComplex) bytes using CUDA stream specified in queue.
 static inline magma_int_t magma_zmalloc_async( magmaDoubleComplex_ptr *ptr_ptr, size_t n, magma_queue_t queue ) { return magma_malloc_async( (magma_ptr*) ptr_ptr, n*sizeof(magmaDoubleComplex), queue ); }
 
 /// @}
diff --git a/interface_cuda/alloc.cpp b/interface_cuda/alloc.cpp
index 74f9bdbd1..da30b1143 100644
--- a/interface_cuda/alloc.cpp
+++ b/interface_cuda/alloc.cpp
@@ -89,21 +89,23 @@ magma_malloc( magma_ptr* ptrPtr, size_t size )
     @param[in]
     size    Size in bytes to allocate. If size = 0, allocates some minimal size.
 
-     @param[in]
+    @param[in]
     queue    Magma queue whose CUDA stream is used to put the cudaMalloc on.
 
     @return MAGMA_SUCCESS
     @return MAGMA_ERR_DEVICE_ALLOC on failure
 
     Type-safe versions avoid the need for a (void**) cast and explicit sizeof.
-    @see magma_smalloc_q
-    @see magma_dmalloc_q
-    @see magma_cmalloc_q
-    @see magma_zmalloc_q
-    @see magma_imalloc_q
-    @see magma_index_malloc_q
+    @see magma_smalloc_async
+    @see magma_dmalloc_async
+    @see magma_cmalloc_async
+    @see magma_zmalloc_async
+    @see magma_imalloc_async
+    @see magma_index_malloc_async
+    @see magma_uindex_malloc_async
 
     @ingroup magma_malloc
+    
 *******************************************************************************/
 extern "C" magma_int_t
 magma_malloc_async( magma_ptr* ptrPtr, size_t size, magma_queue_t queue)
@@ -249,10 +251,10 @@ magma_malloc_cpu( void** ptrPtr, size_t size )
     }
 #endif
 #else
-        *ptrPtr = malloc( size );
-        if ( *ptrPtr == NULL ) {
-            return MAGMA_ERR_HOST_ALLOC;
-        }
+    *ptrPtr = malloc( size );
+    if ( *ptrPtr == NULL ) {
+        return MAGMA_ERR_HOST_ALLOC;
+    }
 #endif
 
     #ifdef DEBUG_MEMORY
@@ -413,16 +415,37 @@ magma_mem_info(size_t * freeMem, size_t * totalMem) {
     return MAGMA_SUCCESS;
 }
 
+/***************************************************************************//**
+    Sets memory chunk pointed to by ptr of size count to the byte value of value.
+
+    @param[in]
+    ptr    Address of the chunk of GPU memory to set.
+
+    @param[in]
+    count  Number of bytes to set starting at ptr.
+
+    @return CUDA_SUCCESS
+    @return CUDA_ERROR_* code on failure
 
+*******************************************************************************/
 extern "C" magma_int_t
 magma_memset(void * ptr, int value, size_t count) {
     return cudaMemset(ptr, value, count);
 }
 
+/***************************************************************************//**
+
+    @copydoc magma_memset
+
+    @param[in]
+    queue   magma_queue_t
+            A pointer to a magma_queue structure that will be used for the 
+            execution of this method, and all methods it calls.  queue != nullptr
+
+*******************************************************************************/
 extern "C" magma_int_t
 magma_memset_async(void * ptr, int value, size_t count, magma_queue_t queue) {
 #ifdef MAGMA_HAVE_CUDA
-//    return cudaMemsetAsync(ptr, value, count, queue);
     return cudaMemsetAsync(ptr, value, count, queue->cuda_stream());
 #elif defined(MAGMA_HAVE_HIP)
     return hipMemsetAsync(ptr, value, count, queue->hip_stream());
diff --git a/interface_cuda/blas_z_v2.cpp b/interface_cuda/blas_z_v2.cpp
index 351ca7a7e..c7f5da2e0 100644
--- a/interface_cuda/blas_z_v2.cpp
+++ b/interface_cuda/blas_z_v2.cpp
@@ -1859,7 +1859,7 @@ magma_ztrmm(
 		    int(m), int(n),
 		    (cuDoubleComplex*)&alpha, (const cuDoubleComplex*)dA, int(ldda),
 		    (cuDoubleComplex*)dB, int(lddb)
-    #if (ROCM_VERSION >= 60000)
+    #if (hipblasVersionMajor >= 2)
 		    , (cuDoubleComplex*)dB, int(lddb)
     #endif
 		    );
diff --git a/interface_cuda/interface.cpp b/interface_cuda/interface.cpp
index 0c317ebb5..19dd6dc4b 100644
--- a/interface_cuda/interface.cpp
+++ b/interface_cuda/interface.cpp
@@ -491,7 +491,7 @@ magma_is_devptr( const void* A )
                   #endif
 
                 #elif defined(MAGMA_HAVE_HIP)
-		  #if ROCM_VERSION >= 60000
+		  #if HIP_VERSION_MAJOR >= 6
 		    return (attr.type == hipMemoryTypeDevice);
 		  #else
                     return (attr.memoryType == hipMemoryTypeDevice);
diff --git a/magmablas/ztrsm.cu b/magmablas/ztrsm.cu
index 81cc626a6..8474e4aae 100644
--- a/magmablas/ztrsm.cu
+++ b/magmablas/ztrsm.cu
@@ -742,6 +742,16 @@ void magmablas_ztrsm(
     magma_free( dX );
 }
 
+/***************************************************************************//**
+
+    @copydoc magmablas_ztrsm
+
+    @param[in]
+    queue   magma_queue_t
+            A pointer to a magma_queue structure that will be used for the 
+            execution of this method, and all methods it calls.  queue != nullptr
+
+*******************************************************************************/
 extern "C"
 void magmablas_ztrsm_async(
     magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag,
diff --git a/magmablas/ztrtri_diag.cu b/magmablas/ztrtri_diag.cu
index 2e75f20e3..8207b4dba 100644
--- a/magmablas/ztrtri_diag.cu
+++ b/magmablas/ztrtri_diag.cu
@@ -179,6 +179,17 @@ magmablas_ztrtri_diag(
     }
 }
 
+
+/***************************************************************************//**
+
+    @copydoc magmablas_ztrtri_diag
+
+    @param[in]
+    queue   magma_queue_t
+            A pointer to a magma_queue structure that will be used for the 
+            execution of this method, and all methods it calls.  queue != nullptr
+
+*******************************************************************************/
 extern "C" void
 magmablas_ztrtri_diag_async(
     magma_uplo_t uplo, magma_diag_t diag, magma_int_t n,
diff --git a/make.inc-examples/make.inc.hip-gcc-mkl b/make.inc-examples/make.inc.hip-gcc-mkl
index 4a7809e20..24ee6bf6b 100644
--- a/make.inc-examples/make.inc.hip-gcc-mkl
+++ b/make.inc-examples/make.inc.hip-gcc-mkl
@@ -104,9 +104,6 @@ LDFLAGS     =     $(FPIC) $(FOPENMP)
 # add in the default for device compiling
 DEVCCFLAGS  = -O3 -DNDEBUG -DADD_
 
-# Add ROCM_VERSION for device compilation
-DEVCCFLAGS += -DROCM_VERSION=$(shell ./tools/get-rocm-version.sh)
-
 # add the flags in a backend-specific way
 ifeq ($(BACKEND),cuda)
     DEVCCFLAGS += -Xcompiler "$(FPIC)" -Xcompiler "$(FOPENMP)" -std=c++11
diff --git a/make.inc-examples/make.inc.hip-gcc-openblas b/make.inc-examples/make.inc.hip-gcc-openblas
index 2c6be03ea..547169bb1 100644
--- a/make.inc-examples/make.inc.hip-gcc-openblas
+++ b/make.inc-examples/make.inc.hip-gcc-openblas
@@ -108,9 +108,6 @@ LDFLAGS     =     $(FPIC) $(FOPENMP)
 # add in the default for device compiling
 DEVCCFLAGS  = -O3 -DNDEBUG -DADD_
 
-# Add ROCM_VERSION for device compilation
-DEVCCFLAGS += -DROCM_VERSION=$(shell ./tools/get-rocm-version.sh)
-
 # add the flags in a backend-specific way
 ifeq ($(BACKEND),cuda)
     DEVCCFLAGS += -Xcompiler "$(FPIC)" -Xcompiler "$(FOPENMP)" -std=c++11
diff --git a/src/zgerbt_gpu.cpp b/src/zgerbt_gpu.cpp
index 436b13ce1..44a4ee40a 100644
--- a/src/zgerbt_gpu.cpp
+++ b/src/zgerbt_gpu.cpp
@@ -170,6 +170,16 @@ magma_zgerbt_gpu(
     return *info;
 }
 
+/***************************************************************************//**
+
+    @copydoc magma_zgerbt_gpu
+
+    @param[in]
+    queue   magma_queue_t
+            A pointer to a magma_queue structure that will be used for the 
+            execution of this method, and all methods it calls.  queue != nullptr
+
+*******************************************************************************/
 extern "C" magma_int_t
 magma_zgerbt_gpu_async(
         const magma_bool_t gen, const magma_int_t n, const magma_int_t nrhs,
diff --git a/src/zgerfs_nopiv_gpu.cpp b/src/zgerfs_nopiv_gpu.cpp
index 3432d67b5..0a2e0152b 100644
--- a/src/zgerfs_nopiv_gpu.cpp
+++ b/src/zgerfs_nopiv_gpu.cpp
@@ -277,6 +277,26 @@ magma_zgerfs_nopiv_gpu(
     return *info;
 }
 
+/***************************************************************************//**
+
+    @copydoc magma_zgerfs_nopiv_gpu
+
+    @param[in]
+    iter_max INTEGER
+            The maximum number of refinement iterations performed.
+
+    @param[in]
+    bwdmax  DOUBLE
+            Refine the solution if the error is above the threshold multiplied by
+            bwdmax. See @magma_zgerfs_nopiv_gpu for how error threshold is calculated.
+            Set to zero to perform all iterations specified in iter_max.
+
+    @param[in]
+    queue   magma_queue_t
+            A pointer to a magma_queue structure that will be used for the 
+            execution of this method, and all methods it calls.  queue != nullptr
+
+*******************************************************************************/
 extern "C" magma_int_t
 magma_zgerfs_nopiv_gpu_async(
         magma_trans_t trans, magma_int_t n, magma_int_t nrhs,
diff --git a/src/zgesv_nopiv_gpu.cpp b/src/zgesv_nopiv_gpu.cpp
index ab76c5ef9..ba389c2ee 100644
--- a/src/zgesv_nopiv_gpu.cpp
+++ b/src/zgesv_nopiv_gpu.cpp
@@ -94,6 +94,16 @@ magma_zgesv_nopiv_gpu(
     return *info;
 }
 
+/***************************************************************************//**
+
+    @copydoc magma_zgesv_nopiv_gpu
+
+    @param[in]
+    queue   magma_queue_t
+            A pointer to a magma_queue structure that will be used for the 
+            execution of this method, and all methods it calls.  queue != nullptr
+
+*******************************************************************************/
 extern "C" magma_int_t
 magma_zgesv_nopiv_gpu_async(
         magma_int_t n, magma_int_t nrhs,
diff --git a/src/zgesv_rbt.cpp b/src/zgesv_rbt.cpp
index 9b11daa77..61055e814 100644
--- a/src/zgesv_rbt.cpp
+++ b/src/zgesv_rbt.cpp
@@ -202,6 +202,26 @@ magma_zgesv_rbt(
     return *info;
 }
 
+/***************************************************************************//**
+
+    @copydoc magma_zgetrf_nopiv_gpu
+
+    @param[in]
+    iter_max INTEGER
+            The maximum number of refinement iterations performed.
+
+    @param[in]
+    bwdmax  DOUBLE
+            Refine the solution if the error is above the threshold multiplied by
+            bwdmax. See @magma_zgerfs_nopiv_gpu for how error threshold is calculated.
+            Set to zero to perform all iterations specified in iter_max.
+
+    @param[in]
+    queue   magma_queue_t
+            A pointer to a magma_queue structure that will be used for the 
+            execution of this method, and all methods it calls.  queue != nullptr
+
+*******************************************************************************/
 extern "C" magma_int_t
 magma_zgesv_rbt_async(
         const magma_bool_t refine, const magma_int_t n, const magma_int_t nrhs,
diff --git a/src/zgetrf_nopiv_gpu.cpp b/src/zgetrf_nopiv_gpu.cpp
index 5abbbafa7..7d20f3264 100644
--- a/src/zgetrf_nopiv_gpu.cpp
+++ b/src/zgetrf_nopiv_gpu.cpp
@@ -210,6 +210,17 @@ magma_zgetrf_nopiv_gpu(
     return *info;
 } /* magma_zgetrf_nopiv_gpu */
 
+
+/***************************************************************************//**
+
+    @copydoc magma_zgetrf_nopiv_gpu
+
+    @param[in]
+    queue   magma_queue_t
+            A pointer to a magma_queue structure that will be used for the 
+            execution of this method, and all methods it calls.  queue != nullptr
+
+*******************************************************************************/
 extern "C" magma_int_t
 magma_zgetrf_nopiv_gpu_async(
         magma_int_t m, magma_int_t n,
diff --git a/src/zgetrs_nopiv_gpu.cpp b/src/zgetrs_nopiv_gpu.cpp
index 1a56bee75..9eef098df 100644
--- a/src/zgetrs_nopiv_gpu.cpp
+++ b/src/zgetrs_nopiv_gpu.cpp
@@ -131,6 +131,16 @@ magma_zgetrs_nopiv_gpu(
     return *info;
 }
 
+/***************************************************************************//**
+
+    @copydoc magma_zgetrs_nopiv_gpu
+
+    @param[in]
+    queue   magma_queue_t
+            A pointer to a magma_queue structure that will be used for the 
+            execution of this method, and all methods it calls.  queue != nullptr
+
+*******************************************************************************/
 extern "C" magma_int_t
 magma_zgetrs_nopiv_gpu_async(
         magma_trans_t trans, magma_int_t n, magma_int_t nrhs,
diff --git a/tools/get-rocm-version.sh b/tools/get-rocm-version.sh
deleted file mode 100755
index c28974a12..000000000
--- a/tools/get-rocm-version.sh
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/bin/bash
-
-DEBUG=0
-[[ -d $1 ]] && ROCM_PATH=$1 || ROCM_PATH="${ROCM_PATH:-/opt/rocm}"
-HEADER_PATH_ONE=${ROCM_PATH}/include/rocm-core/rocm_version.h
-HEADER_PATH_TWO=${ROCM_PATH}/include/rocm_version.h
-PACKAGE_PATH_ONE=${ROCM_PATH}/.info/version-dev
-PACKAGE_PATH_TWO=${ROCM_PATH}/.info/version
-
-function parse_semver() {
-    local token="$1"
-    local major=0
-    local minor=0
-    local patch=0
-
-    if egrep '^[0-9]+\.[0-9]+\.[0-9]+' <<<"$token" >/dev/null 2>&1
-    then
-        # It has the correct syntax.
-        local n=${token//[!0-9]/ }
-        local a=(${n//\./ })
-        major=${a[0]}
-        minor=${a[1]}
-        patch=${a[2]}
-    fi
-
-    echo "$major $minor $patch"
-}
-
-if [[ -f ${HEADER_PATH_ONE} ]]
-then
-    ROCM_VERSION_DEV_RAW=$(grep ROCM_BUILD_INFO ${HEADER_PATH_ONE} | cut -d '"' -f 2)
-    if [[ "x$DEBUG" = "x1" ]]
-    then
-        echo "Found ${HEADER_PATH_ONE}, ROCM_VERSION_DEV_RAW=${ROCM_VERSION_DEV_RAW}"
-    fi
-elif [[ -f ${HEADER_PATH_TWO} ]]
-then
-    ROCM_VERSION_DEV_RAW=$(grep ROCM_BUILD_INFO ${HEADER_PATH_TWO} | cut -d '"' -f 2)
-    if [[ "x$DEBUG" = "x1" ]]
-    then
-        echo "Found ${HEADER_PATH_TWO}, ROCM_VERSION_DEV_RAW=${ROCM_VERSION_DEV_RAW}"
-    fi
-elif [[ -f ${PACKAGE_PATH_ONE} ]]
-then
-    ROCM_VERSION_DEV_RAW=$(cat ${PACKAGE_PATH_ONE})
-    if [[ "x$DEBUG" = "x1" ]]
-    then
-        echo "Found ${PACKAGE_PATH_ONE}, ROCM_VERSION_DEV_RAW=${ROCM_VERSION_DEV_RAW}"
-    fi
-elif [[ -f ${PACKAGE_PATH_TWO} ]]
-then
-    ROCM_VERSION_DEV_RAW=$(cat ${PACKAGE_PATH_TWO})
-    if [[ "x$DEBUG" = "x1" ]]
-    then
-        echo "Found ${PACKAGE_PATH_TWO}, ROCM_VERSION_DEV_RAW=${ROCM_VERSION_DEV_RAW}"
-    fi
-else
-    echo "ROCM_VERSION_NOT_FOUND"
-    exit 1
-fi
-
-a=($(parse_semver "${ROCM_VERSION_DEV_RAW}"))
-major=${a[0]}
-minor=${a[1]}
-patch=${a[2]}
-
-echo "$((major * 10000 + minor * 100 + patch))"
-

From be231a8707e1a65b16d5b87311a956b67781ee75 Mon Sep 17 00:00:00 2001
From: Xeonacid <h.dwwwwww@gmail.com>
Date: Tue, 29 Apr 2025 14:56:50 +0800
Subject: [PATCH 04/27] Drop CMP0037 to fix cmake 4.0 build error

Don't know why it is here. Removing it builds fine on my end.
---
 CMakeLists.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7a9166be8..9324be3eb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -660,7 +660,6 @@ endif()
 
 
 set( CMAKE_RUNTIME_OUTPUT_DIRECTORY "${SPARSE_TEST_DIR}" )
-cmake_policy( SET CMP0037 OLD)
 foreach( TEST ${sparse_testing_all} )
     string( REGEX REPLACE "\\.(cpp|f90|F90)"     "" EXE ${TEST} )
     string( REGEX REPLACE "${SPARSE_TEST_DIR}/" "" EXE ${EXE} )

From 87bf295a8ffbd041b49184b1a5fe2374630fee1e Mon Sep 17 00:00:00 2001
From: Mark Gates <mgates3@icl.utk.edu>
Date: Fri, 14 Feb 2025 13:31:31 +0100
Subject: [PATCH 05/27] remove blas_fix, which was to support macOS Accelerate

---
 blas_fix/Makefile     |   10 -
 blas_fix/Makefile.src |   67 --
 blas_fix/README.txt   |    8 -
 blas_fix/cdotc.f      |  102 ---
 blas_fix/cdotu.f      |   98 ---
 blas_fix/cladiv.f     |   97 ---
 blas_fix/clangb.f     |  227 ------
 blas_fix/clange.f     |  214 ------
 blas_fix/clangt.f     |  209 ------
 blas_fix/clanhb.f     |  277 --------
 blas_fix/clanhe.f     |  259 -------
 blas_fix/clanhf.f     | 1573 -----------------------------------------
 blas_fix/clanhp.f     |  270 -------
 blas_fix/clanhs.f     |  208 ------
 blas_fix/clanht.f     |  189 -----
 blas_fix/clansb.f     |  261 -------
 blas_fix/clansp.f     |  273 -------
 blas_fix/clansy.f     |  244 -------
 blas_fix/clantb.f     |  364 ----------
 blas_fix/clantp.f     |  358 ----------
 blas_fix/clantr.f     |  356 ----------
 blas_fix/sasum.f      |  112 ---
 blas_fix/scabs1.f     |   57 --
 blas_fix/scasum.f     |   97 ---
 blas_fix/scnrm2.f     |  119 ----
 blas_fix/scsum1.f     |  140 ----
 blas_fix/sdot.f       |  117 ---
 blas_fix/sdsdot.f     |  255 -------
 blas_fix/sladiv.f     |  253 -------
 blas_fix/slamch.f     |  192 -----
 blas_fix/slangb.f     |  226 ------
 blas_fix/slange.f     |  212 ------
 blas_fix/slangt.f     |  209 ------
 blas_fix/slanhs.f     |  206 ------
 blas_fix/slansb.f     |  259 -------
 blas_fix/slansf.f     |  965 -------------------------
 blas_fix/slansp.f     |  262 -------
 blas_fix/slanst.f     |  187 -----
 blas_fix/slansy.f     |  242 -------
 blas_fix/slantb.f     |  362 ----------
 blas_fix/slantp.f     |  356 ----------
 blas_fix/slantr.f     |  354 ----------
 blas_fix/slapy2.f     |  104 ---
 blas_fix/slapy3.f     |  111 ---
 blas_fix/snrm2.f      |  112 ---
 45 files changed, 11173 deletions(-)
 delete mode 100644 blas_fix/Makefile
 delete mode 100644 blas_fix/Makefile.src
 delete mode 100644 blas_fix/README.txt
 delete mode 100644 blas_fix/cdotc.f
 delete mode 100644 blas_fix/cdotu.f
 delete mode 100644 blas_fix/cladiv.f
 delete mode 100644 blas_fix/clangb.f
 delete mode 100644 blas_fix/clange.f
 delete mode 100644 blas_fix/clangt.f
 delete mode 100644 blas_fix/clanhb.f
 delete mode 100644 blas_fix/clanhe.f
 delete mode 100644 blas_fix/clanhf.f
 delete mode 100644 blas_fix/clanhp.f
 delete mode 100644 blas_fix/clanhs.f
 delete mode 100644 blas_fix/clanht.f
 delete mode 100644 blas_fix/clansb.f
 delete mode 100644 blas_fix/clansp.f
 delete mode 100644 blas_fix/clansy.f
 delete mode 100644 blas_fix/clantb.f
 delete mode 100644 blas_fix/clantp.f
 delete mode 100644 blas_fix/clantr.f
 delete mode 100644 blas_fix/sasum.f
 delete mode 100644 blas_fix/scabs1.f
 delete mode 100644 blas_fix/scasum.f
 delete mode 100644 blas_fix/scnrm2.f
 delete mode 100644 blas_fix/scsum1.f
 delete mode 100644 blas_fix/sdot.f
 delete mode 100644 blas_fix/sdsdot.f
 delete mode 100644 blas_fix/sladiv.f
 delete mode 100644 blas_fix/slamch.f
 delete mode 100644 blas_fix/slangb.f
 delete mode 100644 blas_fix/slange.f
 delete mode 100644 blas_fix/slangt.f
 delete mode 100644 blas_fix/slanhs.f
 delete mode 100644 blas_fix/slansb.f
 delete mode 100644 blas_fix/slansf.f
 delete mode 100644 blas_fix/slansp.f
 delete mode 100644 blas_fix/slanst.f
 delete mode 100644 blas_fix/slansy.f
 delete mode 100644 blas_fix/slantb.f
 delete mode 100644 blas_fix/slantp.f
 delete mode 100644 blas_fix/slantr.f
 delete mode 100644 blas_fix/slapy2.f
 delete mode 100644 blas_fix/slapy3.f
 delete mode 100644 blas_fix/snrm2.f

diff --git a/blas_fix/Makefile b/blas_fix/Makefile
deleted file mode 100644
index 6e7f5693d..000000000
--- a/blas_fix/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-# See Makefile.src for list of files in this directory.
-# This makefile simply forwards commands to the top-level makefile.
-
-top  := ..
-pwd  := $(shell pwd)
-cdir := $(notdir $(pwd))
-
-default: $(cdir)
-
-include $(top)/Makefile.subdir
diff --git a/blas_fix/Makefile.src b/blas_fix/Makefile.src
deleted file mode 100644
index 6a3a76c78..000000000
--- a/blas_fix/Makefile.src
+++ /dev/null
@@ -1,67 +0,0 @@
-#//////////////////////////////////////////////////////////////////////////////
-#   -- MAGMA (version 2.0) --
-#      Univ. of Tennessee, Knoxville
-#      Univ. of California, Berkeley
-#      Univ. of Colorado, Denver
-#      @date
-#//////////////////////////////////////////////////////////////////////////////
-
-# push previous directory
-dir_stack := $(dir_stack) $(cdir)
-cdir      := blas_fix
-# ----------------------------------------------------------------------
-
-
-# BLAS single & single-complex functions
-libblas_fix_src := \
-	$(cdir)/cdotc.f		\
-	$(cdir)/cdotu.f		\
-	$(cdir)/sasum.f		\
-	$(cdir)/scabs1.f	\
-	$(cdir)/scasum.f	\
-	$(cdir)/scnrm2.f	\
-	$(cdir)/sdot.f		\
-	$(cdir)/sdsdot.f	\
-	$(cdir)/snrm2.f		\
-
-# LAPACK single & single-complex functions
-libblas_fix_src += \
-	$(cdir)/cladiv.f	\
-	$(cdir)/clangb.f	\
-	$(cdir)/clange.f	\
-	$(cdir)/clangt.f	\
-	$(cdir)/clanhb.f	\
-	$(cdir)/clanhe.f	\
-	$(cdir)/clanhf.f	\
-	$(cdir)/clanhp.f	\
-	$(cdir)/clanhs.f	\
-	$(cdir)/clanht.f	\
-	$(cdir)/clansb.f	\
-	$(cdir)/clansp.f	\
-	$(cdir)/clansy.f	\
-	$(cdir)/clantb.f	\
-	$(cdir)/clantp.f	\
-	$(cdir)/clantr.f	\
-	$(cdir)/scsum1.f	\
-	$(cdir)/sladiv.f	\
-	$(cdir)/slangb.f	\
-	$(cdir)/slange.f	\
-	$(cdir)/slangt.f	\
-	$(cdir)/slanhs.f	\
-	$(cdir)/slansb.f	\
-	$(cdir)/slansf.f	\
-	$(cdir)/slansp.f	\
-	$(cdir)/slanst.f	\
-	$(cdir)/slansy.f	\
-	$(cdir)/slantb.f	\
-	$(cdir)/slantp.f	\
-	$(cdir)/slantr.f	\
-	$(cdir)/slapy2.f	\
-	$(cdir)/slapy3.f	\
-	$(cdir)/slamch.f	\
-
-
-# ----------------------------------------------------------------------
-# pop first directory
-cdir      := $(firstword $(dir_stack))
-dir_stack := $(wordlist 2, $(words $(dir_stack)), $(dir_stack))
diff --git a/blas_fix/README.txt b/blas_fix/README.txt
deleted file mode 100644
index a91934443..000000000
--- a/blas_fix/README.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-MacOS veclib has a bug where some single precision functions return
-a double precision result, for instance slange.
-This is observed with -m64, but oddly not with -m32.
-The easiest fix is to replace those routines with correct ones from LAPACK (3.5.0).
-See BLAS_FIX in make.inc.macos
-
-Note that these are Level 1 and Level 2 BLAS and BLAS-like functions,
-primarily norms and dot products, which are not generally performance critical.
diff --git a/blas_fix/cdotc.f b/blas_fix/cdotc.f
deleted file mode 100644
index 8e7d8b9d9..000000000
--- a/blas_fix/cdotc.f
+++ /dev/null
@@ -1,102 +0,0 @@
-*> \brief \b CDOTC
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*  Definition:
-*  ===========
-*
-*       COMPLEX FUNCTION CDOTC(N,CX,INCX,CY,INCY)
-* 
-*       .. Scalar Arguments ..
-*       INTEGER INCX,INCY,N
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX CX(*),CY(*)
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    forms the dot product of two vectors, conjugating the first
-*>    vector.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date November 2011
-*
-*> \ingroup complex_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack,  3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      COMPLEX FUNCTION CDOTC(N,CX,INCX,CY,INCY)
-*
-*  -- Reference BLAS level1 routine (version 3.4.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,INCY,N
-*     ..
-*     .. Array Arguments ..
-      COMPLEX CX(*),CY(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      COMPLEX CTEMP
-      INTEGER I,IX,IY
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC CONJG
-*     ..
-      CTEMP = (0.0,0.0)
-      CDOTC = (0.0,0.0)
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*
-*        code for both increments equal to 1
-*
-         DO I = 1,N
-            CTEMP = CTEMP + CONJG(CX(I))*CY(I)
-         END DO
-      ELSE
-*
-*        code for unequal increments or equal increments
-*          not equal to 1
-*
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            CTEMP = CTEMP + CONJG(CX(IX))*CY(IY)
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      CDOTC = CTEMP
-      RETURN
-      END
diff --git a/blas_fix/cdotu.f b/blas_fix/cdotu.f
deleted file mode 100644
index 456a409f7..000000000
--- a/blas_fix/cdotu.f
+++ /dev/null
@@ -1,98 +0,0 @@
-*> \brief \b CDOTU
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*  Definition:
-*  ===========
-*
-*       COMPLEX FUNCTION CDOTU(N,CX,INCX,CY,INCY)
-* 
-*       .. Scalar Arguments ..
-*       INTEGER INCX,INCY,N
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX CX(*),CY(*)
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    CDOTU forms the dot product of two vectors.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date November 2011
-*
-*> \ingroup complex_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      COMPLEX FUNCTION CDOTU(N,CX,INCX,CY,INCY)
-*
-*  -- Reference BLAS level1 routine (version 3.4.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,INCY,N
-*     ..
-*     .. Array Arguments ..
-      COMPLEX CX(*),CY(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      COMPLEX CTEMP
-      INTEGER I,IX,IY
-*     ..
-      CTEMP = (0.0,0.0)
-      CDOTU = (0.0,0.0)
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*
-*        code for both increments equal to 1
-*
-         DO I = 1,N
-            CTEMP = CTEMP + CX(I)*CY(I)
-         END DO
-      ELSE
-*
-*        code for unequal increments or equal increments
-*          not equal to 1
-*
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            CTEMP = CTEMP + CX(IX)*CY(IY)
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      CDOTU = CTEMP
-      RETURN
-      END
diff --git a/blas_fix/cladiv.f b/blas_fix/cladiv.f
deleted file mode 100644
index 0d0ec666f..000000000
--- a/blas_fix/cladiv.f
+++ /dev/null
@@ -1,97 +0,0 @@
-*> \brief \b CLADIV performs complex division in real arithmetic, avoiding unnecessary overflow.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*> \htmlonly
-*> Download CLADIV + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cladiv.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cladiv.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cladiv.f"> 
-*> [TXT]</a>
-*> \endhtmlonly 
-*
-*  Definition:
-*  ===========
-*
-*       COMPLEX FUNCTION CLADIV( X, Y )
-* 
-*       .. Scalar Arguments ..
-*       COMPLEX            X, Y
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CLADIV := X / Y, where X and Y are complex.  The computation of X / Y
-*> will not overflow on an intermediary step unless the results
-*> overflows.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX
-*> \endverbatim
-*>
-*> \param[in] Y
-*> \verbatim
-*>          Y is COMPLEX
-*>          The complex scalars X and Y.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date September 2012
-*
-*> \ingroup complexOTHERauxiliary
-*
-*  =====================================================================
-      COMPLEX FUNCTION CLADIV( X, Y )
-*
-*  -- LAPACK auxiliary routine (version 3.4.2) --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
-*
-*     .. Scalar Arguments ..
-      COMPLEX            X, Y
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      REAL               ZI, ZR
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           SLADIV
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          AIMAG, CMPLX, REAL
-*     ..
-*     .. Executable Statements ..
-*
-      CALL SLADIV( REAL( X ), AIMAG( X ), REAL( Y ), AIMAG( Y ), ZR,
-     $             ZI )
-      CLADIV = CMPLX( ZR, ZI )
-*
-      RETURN
-*
-*     End of CLADIV
-*
-      END
diff --git a/blas_fix/clangb.f b/blas_fix/clangb.f
deleted file mode 100644
index cb6ef626e..000000000
--- a/blas_fix/clangb.f
+++ /dev/null
@@ -1,227 +0,0 @@
-*> \brief \b CLANGB returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value of any element of general band matrix.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*> \htmlonly
-*> Download CLANGB + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/clangb.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/clangb.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/clangb.f"> 
-*> [TXT]</a>
-*> \endhtmlonly 
-*
-*  Definition:
-*  ===========
-*
-*       REAL             FUNCTION CLANGB( NORM, N, KL, KU, AB, LDAB,
-*                        WORK )
-* 
-*       .. Scalar Arguments ..
-*       CHARACTER          NORM
-*       INTEGER            KL, KU, LDAB, N
-*       ..
-*       .. Array Arguments ..
-*       REAL               WORK( * )
-*       COMPLEX            AB( LDAB, * )
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CLANGB  returns the value of the one norm,  or the Frobenius norm, or
-*> the  infinity norm,  or the element of  largest absolute value  of an
-*> n by n band matrix  A,  with kl sub-diagonals and ku super-diagonals.
-*> \endverbatim
-*>
-*> \return CLANGB
-*> \verbatim
-*>
-*>    CLANGB = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-*>             (
-*>             ( norm1(A),         NORM = '1', 'O' or 'o'
-*>             (
-*>             ( normI(A),         NORM = 'I' or 'i'
-*>             (
-*>             ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-*>
-*> where  norm1  denotes the  one norm of a matrix (maximum column sum),
-*> normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-*> normF  denotes the  Frobenius norm of a matrix (square root of sum of
-*> squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] NORM
-*> \verbatim
-*>          NORM is CHARACTER*1
-*>          Specifies the value to be returned in CLANGB as described
-*>          above.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The order of the matrix A.  N >= 0.  When N = 0, CLANGB is
-*>          set to zero.
-*> \endverbatim
-*>
-*> \param[in] KL
-*> \verbatim
-*>          KL is INTEGER
-*>          The number of sub-diagonals of the matrix A.  KL >= 0.
-*> \endverbatim
-*>
-*> \param[in] KU
-*> \verbatim
-*>          KU is INTEGER
-*>          The number of super-diagonals of the matrix A.  KU >= 0.
-*> \endverbatim
-*>
-*> \param[in] AB
-*> \verbatim
-*>          AB is COMPLEX array, dimension (LDAB,N)
-*>          The band matrix A, stored in rows 1 to KL+KU+1.  The j-th
-*>          column of A is stored in the j-th column of the array AB as
-*>          follows:
-*>          AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl).
-*> \endverbatim
-*>
-*> \param[in] LDAB
-*> \verbatim
-*>          LDAB is INTEGER
-*>          The leading dimension of the array AB.  LDAB >= KL+KU+1.
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is REAL array, dimension (MAX(1,LWORK)),
-*>          where LWORK >= N when NORM = 'I'; otherwise, WORK is not
-*>          referenced.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date September 2012
-*
-*> \ingroup complexGBauxiliary
-*
-*  =====================================================================
-      REAL             FUNCTION CLANGB( NORM, N, KL, KU, AB, LDAB,
-     $                 WORK )
-*
-*  -- LAPACK auxiliary routine (version 3.4.2) --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
-*
-*     .. Scalar Arguments ..
-      CHARACTER          NORM
-      INTEGER            KL, KU, LDAB, N
-*     ..
-*     .. Array Arguments ..
-      REAL               WORK( * )
-      COMPLEX            AB( LDAB, * )
-*     ..
-*
-* =====================================================================
-*
-*     .. Parameters ..
-      REAL               ONE, ZERO
-      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
-*     ..
-*     .. Local Scalars ..
-      INTEGER            I, J, K, L
-      REAL               SCALE, SUM, VALUE, TEMP
-*     ..
-*     .. External Functions ..
-      LOGICAL            LSAME, SISNAN
-      EXTERNAL           LSAME, SISNAN
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           CLASSQ
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, MAX, MIN, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-      VALUE = ZERO
-      IF( N.EQ.0 ) THEN
-         VALUE = ZERO
-      ELSE IF( LSAME( NORM, 'M' ) ) THEN
-*
-*        Find max(abs(A(i,j))).
-*
-         VALUE = ZERO
-         DO 20 J = 1, N
-            DO 10 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 )
-               TEMP = ABS( AB( I, J ) )
-               IF( VALUE.LT.TEMP .OR. SISNAN( TEMP ) ) VALUE = TEMP
-   10       CONTINUE
-   20    CONTINUE
-      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
-*
-*        Find norm1(A).
-*
-         VALUE = ZERO
-         DO 40 J = 1, N
-            SUM = ZERO
-            DO 30 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 )
-               SUM = SUM + ABS( AB( I, J ) )
-   30       CONTINUE
-            IF( VALUE.LT.SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   40    CONTINUE
-      ELSE IF( LSAME( NORM, 'I' ) ) THEN
-*
-*        Find normI(A).
-*
-         DO 50 I = 1, N
-            WORK( I ) = ZERO
-   50    CONTINUE
-         DO 70 J = 1, N
-            K = KU + 1 - J
-            DO 60 I = MAX( 1, J-KU ), MIN( N, J+KL )
-               WORK( I ) = WORK( I ) + ABS( AB( K+I, J ) )
-   60       CONTINUE
-   70    CONTINUE
-         VALUE = ZERO
-         DO 80 I = 1, N
-            TEMP = WORK( I )
-            IF( VALUE.LT.TEMP .OR. SISNAN( TEMP ) ) VALUE = TEMP
-   80    CONTINUE
-      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
-*
-*        Find normF(A).
-*
-         SCALE = ZERO
-         SUM = ONE
-         DO 90 J = 1, N
-            L = MAX( 1, J-KU )
-            K = KU + 1 - J + L
-            CALL CLASSQ( MIN( N, J+KL )-L+1, AB( K, J ), 1, SCALE, SUM )
-   90    CONTINUE
-         VALUE = SCALE*SQRT( SUM )
-      END IF
-*
-      CLANGB = VALUE
-      RETURN
-*
-*     End of CLANGB
-*
-      END
diff --git a/blas_fix/clange.f b/blas_fix/clange.f
deleted file mode 100644
index 8423b69c4..000000000
--- a/blas_fix/clange.f
+++ /dev/null
@@ -1,214 +0,0 @@
-*> \brief \b CLANGE returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value of any element of a general rectangular matrix.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*> \htmlonly
-*> Download CLANGE + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/clange.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/clange.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/clange.f"> 
-*> [TXT]</a>
-*> \endhtmlonly 
-*
-*  Definition:
-*  ===========
-*
-*       REAL             FUNCTION CLANGE( NORM, M, N, A, LDA, WORK )
-* 
-*       .. Scalar Arguments ..
-*       CHARACTER          NORM
-*       INTEGER            LDA, M, N
-*       ..
-*       .. Array Arguments ..
-*       REAL               WORK( * )
-*       COMPLEX            A( LDA, * )
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CLANGE  returns the value of the one norm,  or the Frobenius norm, or
-*> the  infinity norm,  or the  element of  largest absolute value  of a
-*> complex matrix A.
-*> \endverbatim
-*>
-*> \return CLANGE
-*> \verbatim
-*>
-*>    CLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-*>             (
-*>             ( norm1(A),         NORM = '1', 'O' or 'o'
-*>             (
-*>             ( normI(A),         NORM = 'I' or 'i'
-*>             (
-*>             ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-*>
-*> where  norm1  denotes the  one norm of a matrix (maximum column sum),
-*> normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-*> normF  denotes the  Frobenius norm of a matrix (square root of sum of
-*> squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] NORM
-*> \verbatim
-*>          NORM is CHARACTER*1
-*>          Specifies the value to be returned in CLANGE as described
-*>          above.
-*> \endverbatim
-*>
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>          The number of rows of the matrix A.  M >= 0.  When M = 0,
-*>          CLANGE is set to zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The number of columns of the matrix A.  N >= 0.  When N = 0,
-*>          CLANGE is set to zero.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array, dimension (LDA,N)
-*>          The m by n matrix A.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>          The leading dimension of the array A.  LDA >= max(M,1).
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is REAL array, dimension (MAX(1,LWORK)),
-*>          where LWORK >= M when NORM = 'I'; otherwise, WORK is not
-*>          referenced.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date September 2012
-*
-*> \ingroup complexGEauxiliary
-*
-*  =====================================================================
-      REAL             FUNCTION CLANGE( NORM, M, N, A, LDA, WORK )
-*
-*  -- LAPACK auxiliary routine (version 3.4.2) --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
-*
-*     .. Scalar Arguments ..
-      CHARACTER          NORM
-      INTEGER            LDA, M, N
-*     ..
-*     .. Array Arguments ..
-      REAL               WORK( * )
-      COMPLEX            A( LDA, * )
-*     ..
-*
-* =====================================================================
-*
-*     .. Parameters ..
-      REAL               ONE, ZERO
-      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
-*     ..
-*     .. Local Scalars ..
-      INTEGER            I, J
-      REAL               SCALE, SUM, VALUE, TEMP
-*     ..
-*     .. External Functions ..
-      LOGICAL            LSAME, SISNAN
-      EXTERNAL           LSAME, SISNAN
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           CLASSQ
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, MIN, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-      VALUE = ZERO
-      IF( MIN( M, N ).EQ.0 ) THEN
-         VALUE = ZERO
-      ELSE IF( LSAME( NORM, 'M' ) ) THEN
-*
-*        Find max(abs(A(i,j))).
-*
-         VALUE = ZERO
-         DO 20 J = 1, N
-            DO 10 I = 1, M
-               TEMP = ABS( A( I, J ) )
-               IF( VALUE.LT.TEMP .OR. SISNAN( TEMP ) ) VALUE = TEMP
-   10       CONTINUE
-   20    CONTINUE
-      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
-*
-*        Find norm1(A).
-*
-         VALUE = ZERO
-         DO 40 J = 1, N
-            SUM = ZERO
-            DO 30 I = 1, M
-               SUM = SUM + ABS( A( I, J ) )
-   30       CONTINUE
-            IF( VALUE.LT.SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   40    CONTINUE
-      ELSE IF( LSAME( NORM, 'I' ) ) THEN
-*
-*        Find normI(A).
-*
-         DO 50 I = 1, M
-            WORK( I ) = ZERO
-   50    CONTINUE
-         DO 70 J = 1, N
-            DO 60 I = 1, M
-               WORK( I ) = WORK( I ) + ABS( A( I, J ) )
-   60       CONTINUE
-   70    CONTINUE
-         VALUE = ZERO
-         DO 80 I = 1, M
-            TEMP = WORK( I )
-            IF( VALUE.LT.TEMP .OR. SISNAN( TEMP ) ) VALUE = TEMP
-   80    CONTINUE
-      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
-*
-*        Find normF(A).
-*
-         SCALE = ZERO
-         SUM = ONE
-         DO 90 J = 1, N
-            CALL CLASSQ( M, A( 1, J ), 1, SCALE, SUM )
-   90    CONTINUE
-         VALUE = SCALE*SQRT( SUM )
-      END IF
-*
-      CLANGE = VALUE
-      RETURN
-*
-*     End of CLANGE
-*
-      END
diff --git a/blas_fix/clangt.f b/blas_fix/clangt.f
deleted file mode 100644
index 6b15a3913..000000000
--- a/blas_fix/clangt.f
+++ /dev/null
@@ -1,209 +0,0 @@
-*> \brief \b CLANGT returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value of any element of a general tridiagonal matrix.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*> \htmlonly
-*> Download CLANGT + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/clangt.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/clangt.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/clangt.f"> 
-*> [TXT]</a>
-*> \endhtmlonly 
-*
-*  Definition:
-*  ===========
-*
-*       REAL             FUNCTION CLANGT( NORM, N, DL, D, DU )
-* 
-*       .. Scalar Arguments ..
-*       CHARACTER          NORM
-*       INTEGER            N
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX            D( * ), DL( * ), DU( * )
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CLANGT  returns the value of the one norm,  or the Frobenius norm, or
-*> the  infinity norm,  or the  element of  largest absolute value  of a
-*> complex tridiagonal matrix A.
-*> \endverbatim
-*>
-*> \return CLANGT
-*> \verbatim
-*>
-*>    CLANGT = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-*>             (
-*>             ( norm1(A),         NORM = '1', 'O' or 'o'
-*>             (
-*>             ( normI(A),         NORM = 'I' or 'i'
-*>             (
-*>             ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-*>
-*> where  norm1  denotes the  one norm of a matrix (maximum column sum),
-*> normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-*> normF  denotes the  Frobenius norm of a matrix (square root of sum of
-*> squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] NORM
-*> \verbatim
-*>          NORM is CHARACTER*1
-*>          Specifies the value to be returned in CLANGT as described
-*>          above.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The order of the matrix A.  N >= 0.  When N = 0, CLANGT is
-*>          set to zero.
-*> \endverbatim
-*>
-*> \param[in] DL
-*> \verbatim
-*>          DL is COMPLEX array, dimension (N-1)
-*>          The (n-1) sub-diagonal elements of A.
-*> \endverbatim
-*>
-*> \param[in] D
-*> \verbatim
-*>          D is COMPLEX array, dimension (N)
-*>          The diagonal elements of A.
-*> \endverbatim
-*>
-*> \param[in] DU
-*> \verbatim
-*>          DU is COMPLEX array, dimension (N-1)
-*>          The (n-1) super-diagonal elements of A.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date September 2012
-*
-*> \ingroup complexOTHERauxiliary
-*
-*  =====================================================================
-      REAL             FUNCTION CLANGT( NORM, N, DL, D, DU )
-*
-*  -- LAPACK auxiliary routine (version 3.4.2) --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
-*
-*     .. Scalar Arguments ..
-      CHARACTER          NORM
-      INTEGER            N
-*     ..
-*     .. Array Arguments ..
-      COMPLEX            D( * ), DL( * ), DU( * )
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      REAL               ONE, ZERO
-      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
-*     ..
-*     .. Local Scalars ..
-      INTEGER            I
-      REAL               ANORM, SCALE, SUM, TEMP
-*     ..
-*     .. External Functions ..
-      LOGICAL            LSAME, SISNAN
-      EXTERNAL           LSAME, SISNAN
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           CLASSQ
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-      ANORM = ZERO
-      IF( N.LE.0 ) THEN
-         ANORM = ZERO
-      ELSE IF( LSAME( NORM, 'M' ) ) THEN
-*
-*        Find max(abs(A(i,j))).
-*
-         ANORM = ABS( D( N ) )
-         DO 10 I = 1, N - 1
-            IF( ANORM.LT.ABS( DL( I ) ) .OR. SISNAN( ABS( DL( I ) ) ) ) 
-     $           ANORM = ABS(DL(I))
-            IF( ANORM.LT.ABS( D( I ) ) .OR. SISNAN( ABS( D( I ) ) ) ) 
-     $           ANORM = ABS(D(I))
-            IF( ANORM.LT.ABS( DU( I ) ) .OR. SISNAN (ABS( DU( I ) ) ) ) 
-     $           ANORM = ABS(DU(I))
-   10    CONTINUE
-      ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' ) THEN
-*
-*        Find norm1(A).
-*
-         IF( N.EQ.1 ) THEN
-            ANORM = ABS( D( 1 ) )
-         ELSE
-            ANORM = ABS( D( 1 ) )+ABS( DL( 1 ) )
-            TEMP = ABS( D( N ) )+ABS( DU( N-1 ) ) 
-            IF( ANORM .LT. TEMP .OR. SISNAN( TEMP ) ) ANORM = TEMP
-            DO 20 I = 2, N - 1
-               TEMP = ABS( D( I ) )+ABS( DL( I ) )+ABS( DU( I-1 ) )
-               IF( ANORM .LT. TEMP .OR. SISNAN( TEMP ) ) ANORM = TEMP
-   20       CONTINUE
-         END IF
-      ELSE IF( LSAME( NORM, 'I' ) ) THEN
-*
-*        Find normI(A).
-*
-         IF( N.EQ.1 ) THEN
-            ANORM = ABS( D( 1 ) )
-         ELSE
-            ANORM = ABS( D( 1 ) )+ABS( DU( 1 ) )
-            TEMP = ABS( D( N ) )+ABS( DL( N-1 ) )
-            IF( ANORM .LT. TEMP .OR. SISNAN( TEMP ) ) ANORM = TEMP
-            DO 30 I = 2, N - 1
-               TEMP = ABS( D( I ) )+ABS( DU( I ) )+ABS( DL( I-1 ) )
-               IF( ANORM .LT. TEMP .OR. SISNAN( TEMP ) ) ANORM = TEMP
-   30       CONTINUE
-         END IF
-      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
-*
-*        Find normF(A).
-*
-         SCALE = ZERO
-         SUM = ONE
-         CALL CLASSQ( N, D, 1, SCALE, SUM )
-         IF( N.GT.1 ) THEN
-            CALL CLASSQ( N-1, DL, 1, SCALE, SUM )
-            CALL CLASSQ( N-1, DU, 1, SCALE, SUM )
-         END IF
-         ANORM = SCALE*SQRT( SUM )
-      END IF
-*
-      CLANGT = ANORM
-      RETURN
-*
-*     End of CLANGT
-*
-      END
diff --git a/blas_fix/clanhb.f b/blas_fix/clanhb.f
deleted file mode 100644
index e6e0d042c..000000000
--- a/blas_fix/clanhb.f
+++ /dev/null
@@ -1,277 +0,0 @@
-*> \brief \b CLANHB returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a Hermitian band matrix.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*> \htmlonly
-*> Download CLANHB + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/clanhb.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/clanhb.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/clanhb.f"> 
-*> [TXT]</a>
-*> \endhtmlonly 
-*
-*  Definition:
-*  ===========
-*
-*       REAL             FUNCTION CLANHB( NORM, UPLO, N, K, AB, LDAB,
-*                        WORK )
-* 
-*       .. Scalar Arguments ..
-*       CHARACTER          NORM, UPLO
-*       INTEGER            K, LDAB, N
-*       ..
-*       .. Array Arguments ..
-*       REAL               WORK( * )
-*       COMPLEX            AB( LDAB, * )
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CLANHB  returns the value of the one norm,  or the Frobenius norm, or
-*> the  infinity norm,  or the element of  largest absolute value  of an
-*> n by n hermitian band matrix A,  with k super-diagonals.
-*> \endverbatim
-*>
-*> \return CLANHB
-*> \verbatim
-*>
-*>    CLANHB = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-*>             (
-*>             ( norm1(A),         NORM = '1', 'O' or 'o'
-*>             (
-*>             ( normI(A),         NORM = 'I' or 'i'
-*>             (
-*>             ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-*>
-*> where  norm1  denotes the  one norm of a matrix (maximum column sum),
-*> normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-*> normF  denotes the  Frobenius norm of a matrix (square root of sum of
-*> squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] NORM
-*> \verbatim
-*>          NORM is CHARACTER*1
-*>          Specifies the value to be returned in CLANHB as described
-*>          above.
-*> \endverbatim
-*>
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>          Specifies whether the upper or lower triangular part of the
-*>          band matrix A is supplied.
-*>          = 'U':  Upper triangular
-*>          = 'L':  Lower triangular
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The order of the matrix A.  N >= 0.  When N = 0, CLANHB is
-*>          set to zero.
-*> \endverbatim
-*>
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>          The number of super-diagonals or sub-diagonals of the
-*>          band matrix A.  K >= 0.
-*> \endverbatim
-*>
-*> \param[in] AB
-*> \verbatim
-*>          AB is COMPLEX array, dimension (LDAB,N)
-*>          The upper or lower triangle of the hermitian band matrix A,
-*>          stored in the first K+1 rows of AB.  The j-th column of A is
-*>          stored in the j-th column of the array AB as follows:
-*>          if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j;
-*>          if UPLO = 'L', AB(1+i-j,j)   = A(i,j) for j<=i<=min(n,j+k).
-*>          Note that the imaginary parts of the diagonal elements need
-*>          not be set and are assumed to be zero.
-*> \endverbatim
-*>
-*> \param[in] LDAB
-*> \verbatim
-*>          LDAB is INTEGER
-*>          The leading dimension of the array AB.  LDAB >= K+1.
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is REAL array, dimension (MAX(1,LWORK)),
-*>          where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
-*>          WORK is not referenced.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date September 2012
-*
-*> \ingroup complexOTHERauxiliary
-*
-*  =====================================================================
-      REAL             FUNCTION CLANHB( NORM, UPLO, N, K, AB, LDAB,
-     $                 WORK )
-*
-*  -- LAPACK auxiliary routine (version 3.4.2) --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
-*
-*     .. Scalar Arguments ..
-      CHARACTER          NORM, UPLO
-      INTEGER            K, LDAB, N
-*     ..
-*     .. Array Arguments ..
-      REAL               WORK( * )
-      COMPLEX            AB( LDAB, * )
-*     ..
-*
-* =====================================================================
-*
-*     .. Parameters ..
-      REAL               ONE, ZERO
-      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
-*     ..
-*     .. Local Scalars ..
-      INTEGER            I, J, L
-      REAL               ABSA, SCALE, SUM, VALUE
-*     ..
-*     .. External Functions ..
-      LOGICAL            LSAME, SISNAN
-      EXTERNAL           LSAME, SISNAN
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           CLASSQ
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, MAX, MIN, REAL, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-      VALUE = ZERO
-      IF( N.EQ.0 ) THEN
-         VALUE = ZERO
-      ELSE IF( LSAME( NORM, 'M' ) ) THEN
-*
-*        Find max(abs(A(i,j))).
-*
-         VALUE = ZERO
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            DO 20 J = 1, N
-               DO 10 I = MAX( K+2-J, 1 ), K
-                  SUM = ABS( AB( I, J ) )
-                  IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM                  
-   10          CONTINUE
-               SUM = ABS( REAL( AB( K+1, J ) ) )
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   20       CONTINUE
-         ELSE
-            DO 40 J = 1, N
-               SUM = ABS( REAL( AB( 1, J ) ) )
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-               DO 30 I = 2, MIN( N+1-J, K+1 )
-                  SUM = ABS( AB( I, J ) )
-                  IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   30          CONTINUE
-   40       CONTINUE
-         END IF
-      ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR.
-     $         ( NORM.EQ.'1' ) ) THEN
-*
-*        Find normI(A) ( = norm1(A), since A is hermitian).
-*
-         VALUE = ZERO
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            DO 60 J = 1, N
-               SUM = ZERO
-               L = K + 1 - J
-               DO 50 I = MAX( 1, J-K ), J - 1
-                  ABSA = ABS( AB( L+I, J ) )
-                  SUM = SUM + ABSA
-                  WORK( I ) = WORK( I ) + ABSA
-   50          CONTINUE
-               WORK( J ) = SUM + ABS( REAL( AB( K+1, J ) ) )
-   60       CONTINUE
-            DO 70 I = 1, N
-               SUM = WORK( I )
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   70       CONTINUE
-         ELSE
-            DO 80 I = 1, N
-               WORK( I ) = ZERO
-   80       CONTINUE
-            DO 100 J = 1, N
-               SUM = WORK( J ) + ABS( REAL( AB( 1, J ) ) )
-               L = 1 - J
-               DO 90 I = J + 1, MIN( N, J+K )
-                  ABSA = ABS( AB( L+I, J ) )
-                  SUM = SUM + ABSA
-                  WORK( I ) = WORK( I ) + ABSA
-   90          CONTINUE
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-  100       CONTINUE
-         END IF
-      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
-*
-*        Find normF(A).
-*
-         SCALE = ZERO
-         SUM = ONE
-         IF( K.GT.0 ) THEN
-            IF( LSAME( UPLO, 'U' ) ) THEN
-               DO 110 J = 2, N
-                  CALL CLASSQ( MIN( J-1, K ), AB( MAX( K+2-J, 1 ), J ),
-     $                         1, SCALE, SUM )
-  110          CONTINUE
-               L = K + 1
-            ELSE
-               DO 120 J = 1, N - 1
-                  CALL CLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE,
-     $                         SUM )
-  120          CONTINUE
-               L = 1
-            END IF
-            SUM = 2*SUM
-         ELSE
-            L = 1
-         END IF
-         DO 130 J = 1, N
-            IF( REAL( AB( L, J ) ).NE.ZERO ) THEN
-               ABSA = ABS( REAL( AB( L, J ) ) )
-               IF( SCALE.LT.ABSA ) THEN
-                  SUM = ONE + SUM*( SCALE / ABSA )**2
-                  SCALE = ABSA
-               ELSE
-                  SUM = SUM + ( ABSA / SCALE )**2
-               END IF
-            END IF
-  130    CONTINUE
-         VALUE = SCALE*SQRT( SUM )
-      END IF
-*
-      CLANHB = VALUE
-      RETURN
-*
-*     End of CLANHB
-*
-      END
diff --git a/blas_fix/clanhe.f b/blas_fix/clanhe.f
deleted file mode 100644
index fba278b11..000000000
--- a/blas_fix/clanhe.f
+++ /dev/null
@@ -1,259 +0,0 @@
-*> \brief \b CLANHE returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a complex Hermitian matrix.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*> \htmlonly
-*> Download CLANHE + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/clanhe.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/clanhe.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/clanhe.f"> 
-*> [TXT]</a>
-*> \endhtmlonly 
-*
-*  Definition:
-*  ===========
-*
-*       REAL             FUNCTION CLANHE( NORM, UPLO, N, A, LDA, WORK )
-* 
-*       .. Scalar Arguments ..
-*       CHARACTER          NORM, UPLO
-*       INTEGER            LDA, N
-*       ..
-*       .. Array Arguments ..
-*       REAL               WORK( * )
-*       COMPLEX            A( LDA, * )
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CLANHE  returns the value of the one norm,  or the Frobenius norm, or
-*> the  infinity norm,  or the  element of  largest absolute value  of a
-*> complex hermitian matrix A.
-*> \endverbatim
-*>
-*> \return CLANHE
-*> \verbatim
-*>
-*>    CLANHE = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-*>             (
-*>             ( norm1(A),         NORM = '1', 'O' or 'o'
-*>             (
-*>             ( normI(A),         NORM = 'I' or 'i'
-*>             (
-*>             ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-*>
-*> where  norm1  denotes the  one norm of a matrix (maximum column sum),
-*> normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-*> normF  denotes the  Frobenius norm of a matrix (square root of sum of
-*> squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] NORM
-*> \verbatim
-*>          NORM is CHARACTER*1
-*>          Specifies the value to be returned in CLANHE as described
-*>          above.
-*> \endverbatim
-*>
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>          Specifies whether the upper or lower triangular part of the
-*>          hermitian matrix A is to be referenced.
-*>          = 'U':  Upper triangular part of A is referenced
-*>          = 'L':  Lower triangular part of A is referenced
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The order of the matrix A.  N >= 0.  When N = 0, CLANHE is
-*>          set to zero.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array, dimension (LDA,N)
-*>          The hermitian matrix A.  If UPLO = 'U', the leading n by n
-*>          upper triangular part of A contains the upper triangular part
-*>          of the matrix A, and the strictly lower triangular part of A
-*>          is not referenced.  If UPLO = 'L', the leading n by n lower
-*>          triangular part of A contains the lower triangular part of
-*>          the matrix A, and the strictly upper triangular part of A is
-*>          not referenced. Note that the imaginary parts of the diagonal
-*>          elements need not be set and are assumed to be zero.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>          The leading dimension of the array A.  LDA >= max(N,1).
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is REAL array, dimension (MAX(1,LWORK)),
-*>          where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
-*>          WORK is not referenced.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date September 2012
-*
-*> \ingroup complexHEauxiliary
-*
-*  =====================================================================
-      REAL             FUNCTION CLANHE( NORM, UPLO, N, A, LDA, WORK )
-*
-*  -- LAPACK auxiliary routine (version 3.4.2) --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
-*
-*     .. Scalar Arguments ..
-      CHARACTER          NORM, UPLO
-      INTEGER            LDA, N
-*     ..
-*     .. Array Arguments ..
-      REAL               WORK( * )
-      COMPLEX            A( LDA, * )
-*     ..
-*
-* =====================================================================
-*
-*     .. Parameters ..
-      REAL               ONE, ZERO
-      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
-*     ..
-*     .. Local Scalars ..
-      INTEGER            I, J
-      REAL               ABSA, SCALE, SUM, VALUE
-*     ..
-*     .. External Functions ..
-      LOGICAL            LSAME, SISNAN
-      EXTERNAL           LSAME, SISNAN
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           CLASSQ
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, REAL, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-      VALUE = ZERO
-      IF( N.EQ.0 ) THEN
-         VALUE = ZERO
-      ELSE IF( LSAME( NORM, 'M' ) ) THEN
-*
-*        Find max(abs(A(i,j))).
-*
-         VALUE = ZERO
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            DO 20 J = 1, N
-               DO 10 I = 1, J - 1
-                  SUM = ABS( A( I, J ) )
-                  IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   10          CONTINUE
-               SUM = ABS( REAL( A( J, J ) ) )
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   20       CONTINUE
-         ELSE
-            DO 40 J = 1, N
-               SUM = ABS( REAL( A( J, J ) ) )
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-               DO 30 I = J + 1, N
-                  SUM = ABS( A( I, J ) )
-                  IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   30          CONTINUE
-   40       CONTINUE
-         END IF
-      ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR.
-     $         ( NORM.EQ.'1' ) ) THEN
-*
-*        Find normI(A) ( = norm1(A), since A is hermitian).
-*
-         VALUE = ZERO
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            DO 60 J = 1, N
-               SUM = ZERO
-               DO 50 I = 1, J - 1
-                  ABSA = ABS( A( I, J ) )
-                  SUM = SUM + ABSA
-                  WORK( I ) = WORK( I ) + ABSA
-   50          CONTINUE
-               WORK( J ) = SUM + ABS( REAL( A( J, J ) ) )
-   60       CONTINUE
-            DO 70 I = 1, N
-               SUM = WORK( I )
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   70       CONTINUE
-         ELSE
-            DO 80 I = 1, N
-               WORK( I ) = ZERO
-   80       CONTINUE
-            DO 100 J = 1, N
-               SUM = WORK( J ) + ABS( REAL( A( J, J ) ) )
-               DO 90 I = J + 1, N
-                  ABSA = ABS( A( I, J ) )
-                  SUM = SUM + ABSA
-                  WORK( I ) = WORK( I ) + ABSA
-   90          CONTINUE
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-  100       CONTINUE
-         END IF
-      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
-*
-*        Find normF(A).
-*
-         SCALE = ZERO
-         SUM = ONE
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            DO 110 J = 2, N
-               CALL CLASSQ( J-1, A( 1, J ), 1, SCALE, SUM )
-  110       CONTINUE
-         ELSE
-            DO 120 J = 1, N - 1
-               CALL CLASSQ( N-J, A( J+1, J ), 1, SCALE, SUM )
-  120       CONTINUE
-         END IF
-         SUM = 2*SUM
-         DO 130 I = 1, N
-            IF( REAL( A( I, I ) ).NE.ZERO ) THEN
-               ABSA = ABS( REAL( A( I, I ) ) )
-               IF( SCALE.LT.ABSA ) THEN
-                  SUM = ONE + SUM*( SCALE / ABSA )**2
-                  SCALE = ABSA
-               ELSE
-                  SUM = SUM + ( ABSA / SCALE )**2
-               END IF
-            END IF
-  130    CONTINUE
-         VALUE = SCALE*SQRT( SUM )
-      END IF
-*
-      CLANHE = VALUE
-      RETURN
-*
-*     End of CLANHE
-*
-      END
diff --git a/blas_fix/clanhf.f b/blas_fix/clanhf.f
deleted file mode 100644
index f7bd4fb55..000000000
--- a/blas_fix/clanhf.f
+++ /dev/null
@@ -1,1573 +0,0 @@
-*> \brief \b CLANHF returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a Hermitian matrix in RFP format.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*> \htmlonly
-*> Download CLANHF + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/clanhf.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/clanhf.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/clanhf.f"> 
-*> [TXT]</a>
-*> \endhtmlonly 
-*
-*  Definition:
-*  ===========
-*
-*       REAL FUNCTION CLANHF( NORM, TRANSR, UPLO, N, A, WORK )
-* 
-*       .. Scalar Arguments ..
-*       CHARACTER          NORM, TRANSR, UPLO
-*       INTEGER            N
-*       ..
-*       .. Array Arguments ..
-*       REAL               WORK( 0: * )
-*       COMPLEX            A( 0: * )
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CLANHF  returns the value of the one norm,  or the Frobenius norm, or
-*> the  infinity norm,  or the  element of  largest absolute value  of a
-*> complex Hermitian matrix A in RFP format.
-*> \endverbatim
-*>
-*> \return CLANHF
-*> \verbatim
-*>
-*>    CLANHF = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-*>             (
-*>             ( norm1(A),         NORM = '1', 'O' or 'o'
-*>             (
-*>             ( normI(A),         NORM = 'I' or 'i'
-*>             (
-*>             ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-*>
-*> where  norm1  denotes the  one norm of a matrix (maximum column sum),
-*> normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-*> normF  denotes the  Frobenius norm of a matrix (square root of sum of
-*> squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] NORM
-*> \verbatim
-*>          NORM is CHARACTER
-*>            Specifies the value to be returned in CLANHF as described
-*>            above.
-*> \endverbatim
-*>
-*> \param[in] TRANSR
-*> \verbatim
-*>          TRANSR is CHARACTER
-*>            Specifies whether the RFP format of A is normal or
-*>            conjugate-transposed format.
-*>            = 'N':  RFP format is Normal
-*>            = 'C':  RFP format is Conjugate-transposed
-*> \endverbatim
-*>
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER
-*>            On entry, UPLO specifies whether the RFP matrix A came from
-*>            an upper or lower triangular matrix as follows:
-*>
-*>            UPLO = 'U' or 'u' RFP A came from an upper triangular
-*>            matrix
-*>
-*>            UPLO = 'L' or 'l' RFP A came from a  lower triangular
-*>            matrix
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>            The order of the matrix A.  N >= 0.  When N = 0, CLANHF is
-*>            set to zero.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array, dimension ( N*(N+1)/2 );
-*>            On entry, the matrix A in RFP Format.
-*>            RFP Format is described by TRANSR, UPLO and N as follows:
-*>            If TRANSR='N' then RFP A is (0:N,0:K-1) when N is even;
-*>            K=N/2. RFP A is (0:N-1,0:K) when N is odd; K=N/2. If
-*>            TRANSR = 'C' then RFP is the Conjugate-transpose of RFP A
-*>            as defined when TRANSR = 'N'. The contents of RFP A are
-*>            defined by UPLO as follows: If UPLO = 'U' the RFP A
-*>            contains the ( N*(N+1)/2 ) elements of upper packed A
-*>            either in normal or conjugate-transpose Format. If
-*>            UPLO = 'L' the RFP A contains the ( N*(N+1) /2 ) elements
-*>            of lower packed A either in normal or conjugate-transpose
-*>            Format. The LDA of RFP A is (N+1)/2 when TRANSR = 'C'. When
-*>            TRANSR is 'N' the LDA is N+1 when N is even and is N when
-*>            is odd. See the Note below for more details.
-*>            Unchanged on exit.
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is REAL array, dimension (LWORK),
-*>            where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
-*>            WORK is not referenced.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date September 2012
-*
-*> \ingroup complexOTHERcomputational
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  We first consider Standard Packed Format when N is even.
-*>  We give an example where N = 6.
-*>
-*>      AP is Upper             AP is Lower
-*>
-*>   00 01 02 03 04 05       00
-*>      11 12 13 14 15       10 11
-*>         22 23 24 25       20 21 22
-*>            33 34 35       30 31 32 33
-*>               44 45       40 41 42 43 44
-*>                  55       50 51 52 53 54 55
-*>
-*>
-*>  Let TRANSR = 'N'. RFP holds AP as follows:
-*>  For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last
-*>  three columns of AP upper. The lower triangle A(4:6,0:2) consists of
-*>  conjugate-transpose of the first three columns of AP upper.
-*>  For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first
-*>  three columns of AP lower. The upper triangle A(0:2,0:2) consists of
-*>  conjugate-transpose of the last three columns of AP lower.
-*>  To denote conjugate we place -- above the element. This covers the
-*>  case N even and TRANSR = 'N'.
-*>
-*>         RFP A                   RFP A
-*>
-*>                                -- -- --
-*>        03 04 05                33 43 53
-*>                                   -- --
-*>        13 14 15                00 44 54
-*>                                      --
-*>        23 24 25                10 11 55
-*>
-*>        33 34 35                20 21 22
-*>        --
-*>        00 44 45                30 31 32
-*>        -- --
-*>        01 11 55                40 41 42
-*>        -- -- --
-*>        02 12 22                50 51 52
-*>
-*>  Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate-
-*>  transpose of RFP A above. One therefore gets:
-*>
-*>
-*>           RFP A                   RFP A
-*>
-*>     -- -- -- --                -- -- -- -- -- --
-*>     03 13 23 33 00 01 02    33 00 10 20 30 40 50
-*>     -- -- -- -- --                -- -- -- -- --
-*>     04 14 24 34 44 11 12    43 44 11 21 31 41 51
-*>     -- -- -- -- -- --                -- -- -- --
-*>     05 15 25 35 45 55 22    53 54 55 22 32 42 52
-*>
-*>
-*>  We next  consider Standard Packed Format when N is odd.
-*>  We give an example where N = 5.
-*>
-*>     AP is Upper                 AP is Lower
-*>
-*>   00 01 02 03 04              00
-*>      11 12 13 14              10 11
-*>         22 23 24              20 21 22
-*>            33 34              30 31 32 33
-*>               44              40 41 42 43 44
-*>
-*>
-*>  Let TRANSR = 'N'. RFP holds AP as follows:
-*>  For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last
-*>  three columns of AP upper. The lower triangle A(3:4,0:1) consists of
-*>  conjugate-transpose of the first two   columns of AP upper.
-*>  For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first
-*>  three columns of AP lower. The upper triangle A(0:1,1:2) consists of
-*>  conjugate-transpose of the last two   columns of AP lower.
-*>  To denote conjugate we place -- above the element. This covers the
-*>  case N odd  and TRANSR = 'N'.
-*>
-*>         RFP A                   RFP A
-*>
-*>                                   -- --
-*>        02 03 04                00 33 43
-*>                                      --
-*>        12 13 14                10 11 44
-*>
-*>        22 23 24                20 21 22
-*>        --
-*>        00 33 34                30 31 32
-*>        -- --
-*>        01 11 44                40 41 42
-*>
-*>  Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate-
-*>  transpose of RFP A above. One therefore gets:
-*>
-*>
-*>           RFP A                   RFP A
-*>
-*>     -- -- --                   -- -- -- -- -- --
-*>     02 12 22 00 01             00 10 20 30 40 50
-*>     -- -- -- --                   -- -- -- -- --
-*>     03 13 23 33 11             33 11 21 31 41 51
-*>     -- -- -- -- --                   -- -- -- --
-*>     04 14 24 34 44             43 44 22 32 42 52
-*> \endverbatim
-*>
-*  =====================================================================
-      REAL FUNCTION CLANHF( NORM, TRANSR, UPLO, N, A, WORK )
-*
-*  -- LAPACK computational routine (version 3.4.2) --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
-*
-*     .. Scalar Arguments ..
-      CHARACTER          NORM, TRANSR, UPLO
-      INTEGER            N
-*     ..
-*     .. Array Arguments ..
-      REAL               WORK( 0: * )
-      COMPLEX            A( 0: * )
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      REAL               ONE, ZERO
-      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
-*     ..
-*     .. Local Scalars ..
-      INTEGER            I, J, IFM, ILU, NOE, N1, K, L, LDA
-      REAL               SCALE, S, VALUE, AA, TEMP
-*     ..
-*     .. External Functions ..
-      LOGICAL            LSAME, SISNAN
-      EXTERNAL           LSAME, SISNAN
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           CLASSQ
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, REAL, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-      IF( N.EQ.0 ) THEN
-         CLANHF = ZERO
-         RETURN
-      ELSE IF( N.EQ.1 ) THEN
-         CLANHF = ABS( A(0) )
-         RETURN
-      END IF
-*
-*     set noe = 1 if n is odd. if n is even set noe=0
-*
-      NOE = 1
-      IF( MOD( N, 2 ).EQ.0 )
-     $   NOE = 0
-*
-*     set ifm = 0 when form='C' or 'c' and 1 otherwise
-*
-      IFM = 1
-      IF( LSAME( TRANSR, 'C' ) )
-     $   IFM = 0
-*
-*     set ilu = 0 when uplo='U or 'u' and 1 otherwise
-*
-      ILU = 1
-      IF( LSAME( UPLO, 'U' ) )
-     $   ILU = 0
-*
-*     set lda = (n+1)/2 when ifm = 0
-*     set lda = n when ifm = 1 and noe = 1
-*     set lda = n+1 when ifm = 1 and noe = 0
-*
-      IF( IFM.EQ.1 ) THEN
-         IF( NOE.EQ.1 ) THEN
-            LDA = N
-         ELSE
-*           noe=0
-            LDA = N + 1
-         END IF
-      ELSE
-*        ifm=0
-         LDA = ( N+1 ) / 2
-      END IF
-*
-      IF( LSAME( NORM, 'M' ) ) THEN
-*
-*       Find max(abs(A(i,j))).
-*
-         K = ( N+1 ) / 2
-         VALUE = ZERO
-         IF( NOE.EQ.1 ) THEN
-*           n is odd & n = k + k - 1
-            IF( IFM.EQ.1 ) THEN
-*              A is n by k
-               IF( ILU.EQ.1 ) THEN
-*                 uplo ='L'
-                  J = 0
-*                 -> L(0,0)
-                  TEMP = ABS( REAL( A( J+J*LDA ) ) )
-                  IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) 
-     $                 VALUE = TEMP
-                  DO I = 1, N - 1
-                     TEMP = ABS( A( I+J*LDA ) )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) 
-     $                    VALUE = TEMP
-                  END DO
-                  DO J = 1, K - 1
-                     DO I = 0, J - 2
-                        TEMP = ABS( A( I+J*LDA ) )
-                        IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                       VALUE = TEMP
-                     END DO
-                     I = J - 1
-*                    L(k+j,k+j)
-                     TEMP = ABS( REAL( A( I+J*LDA ) ) )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                     I = J
-*                    -> L(j,j)
-                     TEMP = ABS( REAL( A( I+J*LDA ) ) )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                     DO I = J + 1, N - 1
-                        TEMP = ABS( A( I+J*LDA ) )
-                        IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                       VALUE = TEMP
-                     END DO
-                  END DO
-               ELSE
-*                 uplo = 'U'
-                  DO J = 0, K - 2
-                     DO I = 0, K + J - 2
-                        TEMP = ABS( A( I+J*LDA ) )
-                        IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                       VALUE = TEMP
-                     END DO
-                     I = K + J - 1
-*                    -> U(i,i)
-                     TEMP = ABS( REAL( A( I+J*LDA ) ) )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                     I = I + 1
-*                    =k+j; i -> U(j,j)
-                     TEMP = ABS( REAL( A( I+J*LDA ) ) )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                     DO I = K + J + 1, N - 1
-                        TEMP = ABS( A( I+J*LDA ) )
-                        IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                       VALUE = TEMP
-                     END DO
-                  END DO
-                  DO I = 0, N - 2
-                     TEMP = ABS( A( I+J*LDA ) )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-*                    j=k-1
-                  END DO
-*                 i=n-1 -> U(n-1,n-1)
-                  TEMP = ABS( REAL( A( I+J*LDA ) ) )
-                  IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                 VALUE = TEMP
-               END IF
-            ELSE
-*              xpose case; A is k by n
-               IF( ILU.EQ.1 ) THEN
-*                 uplo ='L'
-                  DO J = 0, K - 2
-                     DO I = 0, J - 1
-                        TEMP = ABS( A( I+J*LDA ) )
-                        IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                       VALUE = TEMP
-                     END DO
-                     I = J
-*                    L(i,i)
-                     TEMP = ABS( REAL( A( I+J*LDA ) ) )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                     I = J + 1
-*                    L(j+k,j+k)
-                     TEMP = ABS( REAL( A( I+J*LDA ) ) )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                     DO I = J + 2, K - 1
-                        TEMP = ABS( A( I+J*LDA ) )
-                        IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                       VALUE = TEMP
-                     END DO
-                  END DO
-                  J = K - 1
-                  DO I = 0, K - 2
-                     TEMP = ABS( A( I+J*LDA ) )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                  END DO
-                  I = K - 1
-*                 -> L(i,i) is at A(i,j)
-                  TEMP = ABS( REAL( A( I+J*LDA ) ) )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                  DO J = K, N - 1
-                     DO I = 0, K - 1
-                        TEMP = ABS( A( I+J*LDA ) )
-                        IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                       VALUE = TEMP
-                     END DO
-                  END DO
-               ELSE
-*                 uplo = 'U'
-                  DO J = 0, K - 2
-                     DO I = 0, K - 1
-                        TEMP = ABS( A( I+J*LDA ) )
-                        IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                       VALUE = TEMP
-                     END DO
-                  END DO
-                  J = K - 1
-*                 -> U(j,j) is at A(0,j)
-                  TEMP = ABS( REAL( A( 0+J*LDA ) ) )
-                  IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                  DO I = 1, K - 1
-                     TEMP = ABS( A( I+J*LDA ) )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                  END DO
-                  DO J = K, N - 1
-                     DO I = 0, J - K - 1
-                        TEMP = ABS( A( I+J*LDA ) )
-                        IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                       VALUE = TEMP
-                     END DO
-                     I = J - K
-*                    -> U(i,i) at A(i,j)
-                     TEMP = ABS( REAL( A( I+J*LDA ) ) )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                     I = J - K + 1
-*                    U(j,j)
-                     TEMP = ABS( REAL( A( I+J*LDA ) ) )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                     DO I = J - K + 2, K - 1
-                        TEMP = ABS( A( I+J*LDA ) )
-                        IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                       VALUE = TEMP
-                     END DO
-                  END DO
-               END IF
-            END IF
-         ELSE
-*           n is even & k = n/2
-            IF( IFM.EQ.1 ) THEN
-*              A is n+1 by k
-               IF( ILU.EQ.1 ) THEN
-*                 uplo ='L'
-                  J = 0
-*                 -> L(k,k) & j=1 -> L(0,0)
-                  TEMP = ABS( REAL( A( J+J*LDA ) ) )
-                  IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                 VALUE = TEMP
-                  TEMP = ABS( REAL( A( J+1+J*LDA ) ) )
-                  IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                 VALUE = TEMP
-                  DO I = 2, N
-                     TEMP = ABS( A( I+J*LDA ) )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                  END DO
-                  DO J = 1, K - 1
-                     DO I = 0, J - 1
-                        TEMP = ABS( A( I+J*LDA ) )
-                        IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                       VALUE = TEMP
-                     END DO
-                     I = J
-*                    L(k+j,k+j)
-                     TEMP = ABS( REAL( A( I+J*LDA ) ) )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                     I = J + 1
-*                    -> L(j,j)
-                     TEMP = ABS( REAL( A( I+J*LDA ) ) )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                     DO I = J + 2, N
-                        TEMP = ABS( A( I+J*LDA ) )
-                        IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                       VALUE = TEMP
-                     END DO
-                  END DO
-               ELSE
-*                 uplo = 'U'
-                  DO J = 0, K - 2
-                     DO I = 0, K + J - 1
-                        TEMP = ABS( A( I+J*LDA ) )
-                        IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                       VALUE = TEMP
-                     END DO
-                     I = K + J
-*                    -> U(i,i)
-                     TEMP = ABS( REAL( A( I+J*LDA ) ) )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                     I = I + 1
-*                    =k+j+1; i -> U(j,j)
-                     TEMP = ABS( REAL( A( I+J*LDA ) ) )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                     DO I = K + J + 2, N
-                        TEMP = ABS( A( I+J*LDA ) )
-                        IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                       VALUE = TEMP
-                     END DO
-                  END DO
-                  DO I = 0, N - 2
-                     TEMP = ABS( A( I+J*LDA ) )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-*                 j=k-1
-                  END DO
-*                 i=n-1 -> U(n-1,n-1)
-                  TEMP = ABS( REAL( A( I+J*LDA ) ) )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                  I = N
-*                 -> U(k-1,k-1)
-                  TEMP = ABS( REAL( A( I+J*LDA ) ) )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-               END IF
-            ELSE
-*              xpose case; A is k by n+1
-               IF( ILU.EQ.1 ) THEN
-*                 uplo ='L'
-                  J = 0
-*                 -> L(k,k) at A(0,0)
-                  TEMP = ABS( REAL( A( J+J*LDA ) ) )
-                  IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                  DO I = 1, K - 1
-                     TEMP = ABS( A( I+J*LDA ) )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                  END DO
-                  DO J = 1, K - 1
-                     DO I = 0, J - 2
-                        TEMP = ABS( A( I+J*LDA ) )
-                        IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                       VALUE = TEMP
-                     END DO
-                     I = J - 1
-*                    L(i,i)
-                     TEMP = ABS( REAL( A( I+J*LDA ) ) )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                     I = J
-*                    L(j+k,j+k)
-                     TEMP = ABS( REAL( A( I+J*LDA ) ) )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                     DO I = J + 1, K - 1
-                        TEMP = ABS( A( I+J*LDA ) )
-                        IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                       VALUE = TEMP
-                     END DO
-                  END DO
-                  J = K
-                  DO I = 0, K - 2
-                     TEMP = ABS( A( I+J*LDA ) )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                  END DO
-                  I = K - 1
-*                 -> L(i,i) is at A(i,j)
-                  TEMP = ABS( REAL( A( I+J*LDA ) ) )
-                  IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                 VALUE = TEMP
-                  DO J = K + 1, N
-                     DO I = 0, K - 1
-                        TEMP = ABS( A( I+J*LDA ) )
-                        IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                       VALUE = TEMP
-                     END DO
-                  END DO
-               ELSE
-*                 uplo = 'U'
-                  DO J = 0, K - 1
-                     DO I = 0, K - 1
-                        TEMP = ABS( A( I+J*LDA ) )
-                        IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                       VALUE = TEMP
-                     END DO
-                  END DO
-                  J = K
-*                 -> U(j,j) is at A(0,j)
-                  TEMP = ABS( REAL( A( 0+J*LDA ) ) )
-                  IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                 VALUE = TEMP
-                  DO I = 1, K - 1
-                     TEMP = ABS( A( I+J*LDA ) )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                  END DO
-                  DO J = K + 1, N - 1
-                     DO I = 0, J - K - 2
-                        TEMP = ABS( A( I+J*LDA ) )
-                        IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                       VALUE = TEMP
-                     END DO
-                     I = J - K - 1
-*                    -> U(i,i) at A(i,j)
-                     TEMP = ABS( REAL( A( I+J*LDA ) ) )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                     I = J - K
-*                    U(j,j)
-                     TEMP = ABS( REAL( A( I+J*LDA ) ) )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                     DO I = J - K + 1, K - 1
-                        TEMP = ABS( A( I+J*LDA ) )
-                        IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                       VALUE = TEMP
-                     END DO
-                  END DO
-                  J = N
-                  DO I = 0, K - 2
-                     TEMP = ABS( A( I+J*LDA ) )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                  END DO
-                  I = K - 1
-*                 U(k,k) at A(i,j)
-                  TEMP = ABS( REAL( A( I+J*LDA ) ) )
-                  IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                 VALUE = TEMP
-               END IF
-            END IF
-         END IF
-      ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR.
-     $         ( NORM.EQ.'1' ) ) THEN
-*
-*       Find normI(A) ( = norm1(A), since A is Hermitian).
-*
-         IF( IFM.EQ.1 ) THEN
-*           A is 'N'
-            K = N / 2
-            IF( NOE.EQ.1 ) THEN
-*              n is odd & A is n by (n+1)/2
-               IF( ILU.EQ.0 ) THEN
-*                 uplo = 'U'
-                  DO I = 0, K - 1
-                     WORK( I ) = ZERO
-                  END DO
-                  DO J = 0, K
-                     S = ZERO
-                     DO I = 0, K + J - 1
-                        AA = ABS( A( I+J*LDA ) )
-*                       -> A(i,j+k)
-                        S = S + AA
-                        WORK( I ) = WORK( I ) + AA
-                     END DO
-                     AA = ABS( REAL( A( I+J*LDA ) ) )
-*                    -> A(j+k,j+k)
-                     WORK( J+K ) = S + AA
-                     IF( I.EQ.K+K )
-     $                  GO TO 10
-                     I = I + 1
-                     AA = ABS( REAL( A( I+J*LDA ) ) )
-*                    -> A(j,j)
-                     WORK( J ) = WORK( J ) + AA
-                     S = ZERO
-                     DO L = J + 1, K - 1
-                        I = I + 1
-                        AA = ABS( A( I+J*LDA ) )
-*                       -> A(l,j)
-                        S = S + AA
-                        WORK( L ) = WORK( L ) + AA
-                     END DO
-                     WORK( J ) = WORK( J ) + S
-                  END DO
-   10             CONTINUE
-                  VALUE = WORK( 0 )
-                  DO I = 1, N-1
-                     TEMP = WORK( I )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) 
-     $                    VALUE = TEMP
-                  END DO
-               ELSE
-*                 ilu = 1 & uplo = 'L'
-                  K = K + 1
-*                 k=(n+1)/2 for n odd and ilu=1
-                  DO I = K, N - 1
-                     WORK( I ) = ZERO
-                  END DO
-                  DO J = K - 1, 0, -1
-                     S = ZERO
-                     DO I = 0, J - 2
-                        AA = ABS( A( I+J*LDA ) )
-*                       -> A(j+k,i+k)
-                        S = S + AA
-                        WORK( I+K ) = WORK( I+K ) + AA
-                     END DO
-                     IF( J.GT.0 ) THEN
-                        AA = ABS( REAL( A( I+J*LDA ) ) )
-*                       -> A(j+k,j+k)
-                        S = S + AA
-                        WORK( I+K ) = WORK( I+K ) + S
-*                       i=j
-                        I = I + 1
-                     END IF
-                     AA = ABS( REAL( A( I+J*LDA ) ) )
-*                    -> A(j,j)
-                     WORK( J ) = AA
-                     S = ZERO
-                     DO L = J + 1, N - 1
-                        I = I + 1
-                        AA = ABS( A( I+J*LDA ) )
-*                       -> A(l,j)
-                        S = S + AA
-                        WORK( L ) = WORK( L ) + AA
-                     END DO
-                     WORK( J ) = WORK( J ) + S
-                  END DO
-                  VALUE = WORK( 0 )
-                  DO I = 1, N-1
-                     TEMP = WORK( I )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                  END DO
-               END IF
-            ELSE
-*              n is even & A is n+1 by k = n/2
-               IF( ILU.EQ.0 ) THEN
-*                 uplo = 'U'
-                  DO I = 0, K - 1
-                     WORK( I ) = ZERO
-                  END DO
-                  DO J = 0, K - 1
-                     S = ZERO
-                     DO I = 0, K + J - 1
-                        AA = ABS( A( I+J*LDA ) )
-*                       -> A(i,j+k)
-                        S = S + AA
-                        WORK( I ) = WORK( I ) + AA
-                     END DO
-                     AA = ABS( REAL( A( I+J*LDA ) ) )
-*                    -> A(j+k,j+k)
-                     WORK( J+K ) = S + AA
-                     I = I + 1
-                     AA = ABS( REAL( A( I+J*LDA ) ) )
-*                    -> A(j,j)
-                     WORK( J ) = WORK( J ) + AA
-                     S = ZERO
-                     DO L = J + 1, K - 1
-                        I = I + 1
-                        AA = ABS( A( I+J*LDA ) )
-*                       -> A(l,j)
-                        S = S + AA
-                        WORK( L ) = WORK( L ) + AA
-                     END DO
-                     WORK( J ) = WORK( J ) + S
-                  END DO
-                  VALUE = WORK( 0 )
-                  DO I = 1, N-1
-                     TEMP = WORK( I )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                  END DO
-               ELSE
-*                 ilu = 1 & uplo = 'L'
-                  DO I = K, N - 1
-                     WORK( I ) = ZERO
-                  END DO
-                  DO J = K - 1, 0, -1
-                     S = ZERO
-                     DO I = 0, J - 1
-                        AA = ABS( A( I+J*LDA ) )
-*                       -> A(j+k,i+k)
-                        S = S + AA
-                        WORK( I+K ) = WORK( I+K ) + AA
-                     END DO
-                     AA = ABS( REAL( A( I+J*LDA ) ) )
-*                    -> A(j+k,j+k)
-                     S = S + AA
-                     WORK( I+K ) = WORK( I+K ) + S
-*                    i=j
-                     I = I + 1
-                     AA = ABS( REAL( A( I+J*LDA ) ) )
-*                    -> A(j,j)
-                     WORK( J ) = AA
-                     S = ZERO
-                     DO L = J + 1, N - 1
-                        I = I + 1
-                        AA = ABS( A( I+J*LDA ) )
-*                       -> A(l,j)
-                        S = S + AA
-                        WORK( L ) = WORK( L ) + AA
-                     END DO
-                     WORK( J ) = WORK( J ) + S
-                  END DO
-                  VALUE = WORK( 0 )
-                  DO I = 1, N-1
-                     TEMP = WORK( I )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                  END DO
-               END IF
-            END IF
-         ELSE
-*           ifm=0
-            K = N / 2
-            IF( NOE.EQ.1 ) THEN
-*              n is odd & A is (n+1)/2 by n
-               IF( ILU.EQ.0 ) THEN
-*                 uplo = 'U'
-                  N1 = K
-*                 n/2
-                  K = K + 1
-*                 k is the row size and lda
-                  DO I = N1, N - 1
-                     WORK( I ) = ZERO
-                  END DO
-                  DO J = 0, N1 - 1
-                     S = ZERO
-                     DO I = 0, K - 1
-                        AA = ABS( A( I+J*LDA ) )
-*                       A(j,n1+i)
-                        WORK( I+N1 ) = WORK( I+N1 ) + AA
-                        S = S + AA
-                     END DO
-                     WORK( J ) = S
-                  END DO
-*                 j=n1=k-1 is special
-                  S = ABS( REAL( A( 0+J*LDA ) ) )
-*                 A(k-1,k-1)
-                  DO I = 1, K - 1
-                     AA = ABS( A( I+J*LDA ) )
-*                    A(k-1,i+n1)
-                     WORK( I+N1 ) = WORK( I+N1 ) + AA
-                     S = S + AA
-                  END DO
-                  WORK( J ) = WORK( J ) + S
-                  DO J = K, N - 1
-                     S = ZERO
-                     DO I = 0, J - K - 1
-                        AA = ABS( A( I+J*LDA ) )
-*                       A(i,j-k)
-                        WORK( I ) = WORK( I ) + AA
-                        S = S + AA
-                     END DO
-*                    i=j-k
-                     AA = ABS( REAL( A( I+J*LDA ) ) )
-*                    A(j-k,j-k)
-                     S = S + AA
-                     WORK( J-K ) = WORK( J-K ) + S
-                     I = I + 1
-                     S = ABS( REAL( A( I+J*LDA ) ) )
-*                    A(j,j)
-                     DO L = J + 1, N - 1
-                        I = I + 1
-                        AA = ABS( A( I+J*LDA ) )
-*                       A(j,l)
-                        WORK( L ) = WORK( L ) + AA
-                        S = S + AA
-                     END DO
-                     WORK( J ) = WORK( J ) + S
-                  END DO
-                  VALUE = WORK( 0 )
-                  DO I = 1, N-1
-                     TEMP = WORK( I )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                  END DO
-               ELSE
-*                 ilu=1 & uplo = 'L'
-                  K = K + 1
-*                 k=(n+1)/2 for n odd and ilu=1
-                  DO I = K, N - 1
-                     WORK( I ) = ZERO
-                  END DO
-                  DO J = 0, K - 2
-*                    process
-                     S = ZERO
-                     DO I = 0, J - 1
-                        AA = ABS( A( I+J*LDA ) )
-*                       A(j,i)
-                        WORK( I ) = WORK( I ) + AA
-                        S = S + AA
-                     END DO
-                     AA = ABS( REAL( A( I+J*LDA ) ) )
-*                    i=j so process of A(j,j)
-                     S = S + AA
-                     WORK( J ) = S
-*                    is initialised here
-                     I = I + 1
-*                    i=j process A(j+k,j+k)
-                     AA = ABS( REAL( A( I+J*LDA ) ) )
-                     S = AA
-                     DO L = K + J + 1, N - 1
-                        I = I + 1
-                        AA = ABS( A( I+J*LDA ) )
-*                       A(l,k+j)
-                        S = S + AA
-                        WORK( L ) = WORK( L ) + AA
-                     END DO
-                     WORK( K+J ) = WORK( K+J ) + S
-                  END DO
-*                 j=k-1 is special :process col A(k-1,0:k-1)
-                  S = ZERO
-                  DO I = 0, K - 2
-                     AA = ABS( A( I+J*LDA ) )
-*                    A(k,i)
-                     WORK( I ) = WORK( I ) + AA
-                     S = S + AA
-                  END DO
-*                 i=k-1
-                  AA = ABS( REAL( A( I+J*LDA ) ) )
-*                 A(k-1,k-1)
-                  S = S + AA
-                  WORK( I ) = S
-*                 done with col j=k+1
-                  DO J = K, N - 1
-*                    process col j of A = A(j,0:k-1)
-                     S = ZERO
-                     DO I = 0, K - 1
-                        AA = ABS( A( I+J*LDA ) )
-*                       A(j,i)
-                        WORK( I ) = WORK( I ) + AA
-                        S = S + AA
-                     END DO
-                     WORK( J ) = WORK( J ) + S
-                  END DO
-                  VALUE = WORK( 0 )
-                  DO I = 1, N-1
-                     TEMP = WORK( I )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                  END DO
-               END IF
-            ELSE
-*              n is even & A is k=n/2 by n+1
-               IF( ILU.EQ.0 ) THEN
-*                 uplo = 'U'
-                  DO I = K, N - 1
-                     WORK( I ) = ZERO
-                  END DO
-                  DO J = 0, K - 1
-                     S = ZERO
-                     DO I = 0, K - 1
-                        AA = ABS( A( I+J*LDA ) )
-*                       A(j,i+k)
-                        WORK( I+K ) = WORK( I+K ) + AA
-                        S = S + AA
-                     END DO
-                     WORK( J ) = S
-                  END DO
-*                 j=k
-                  AA = ABS( REAL( A( 0+J*LDA ) ) )
-*                 A(k,k)
-                  S = AA
-                  DO I = 1, K - 1
-                     AA = ABS( A( I+J*LDA ) )
-*                    A(k,k+i)
-                     WORK( I+K ) = WORK( I+K ) + AA
-                     S = S + AA
-                  END DO
-                  WORK( J ) = WORK( J ) + S
-                  DO J = K + 1, N - 1
-                     S = ZERO
-                     DO I = 0, J - 2 - K
-                        AA = ABS( A( I+J*LDA ) )
-*                       A(i,j-k-1)
-                        WORK( I ) = WORK( I ) + AA
-                        S = S + AA
-                     END DO
-*                    i=j-1-k
-                     AA = ABS( REAL( A( I+J*LDA ) ) )
-*                    A(j-k-1,j-k-1)
-                     S = S + AA
-                     WORK( J-K-1 ) = WORK( J-K-1 ) + S
-                     I = I + 1
-                     AA = ABS( REAL( A( I+J*LDA ) ) )
-*                    A(j,j)
-                     S = AA
-                     DO L = J + 1, N - 1
-                        I = I + 1
-                        AA = ABS( A( I+J*LDA ) )
-*                       A(j,l)
-                        WORK( L ) = WORK( L ) + AA
-                        S = S + AA
-                     END DO
-                     WORK( J ) = WORK( J ) + S
-                  END DO
-*                 j=n
-                  S = ZERO
-                  DO I = 0, K - 2
-                     AA = ABS( A( I+J*LDA ) )
-*                    A(i,k-1)
-                     WORK( I ) = WORK( I ) + AA
-                     S = S + AA
-                  END DO
-*                 i=k-1
-                  AA = ABS( REAL( A( I+J*LDA ) ) )
-*                 A(k-1,k-1)
-                  S = S + AA
-                  WORK( I ) = WORK( I ) + S
-                  VALUE = WORK( 0 )
-                  DO I = 1, N-1
-                     TEMP = WORK( I )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                  END DO
-               ELSE
-*                 ilu=1 & uplo = 'L'
-                  DO I = K, N - 1
-                     WORK( I ) = ZERO
-                  END DO
-*                 j=0 is special :process col A(k:n-1,k)
-                  S = ABS( REAL( A( 0 ) ) )
-*                 A(k,k)
-                  DO I = 1, K - 1
-                     AA = ABS( A( I ) )
-*                    A(k+i,k)
-                     WORK( I+K ) = WORK( I+K ) + AA
-                     S = S + AA
-                  END DO
-                  WORK( K ) = WORK( K ) + S
-                  DO J = 1, K - 1
-*                    process
-                     S = ZERO
-                     DO I = 0, J - 2
-                        AA = ABS( A( I+J*LDA ) )
-*                       A(j-1,i)
-                        WORK( I ) = WORK( I ) + AA
-                        S = S + AA
-                     END DO
-                     AA = ABS( REAL( A( I+J*LDA ) ) )
-*                    i=j-1 so process of A(j-1,j-1)
-                     S = S + AA
-                     WORK( J-1 ) = S
-*                    is initialised here
-                     I = I + 1
-*                    i=j process A(j+k,j+k)
-                     AA = ABS( REAL( A( I+J*LDA ) ) )
-                     S = AA
-                     DO L = K + J + 1, N - 1
-                        I = I + 1
-                        AA = ABS( A( I+J*LDA ) )
-*                       A(l,k+j)
-                        S = S + AA
-                        WORK( L ) = WORK( L ) + AA
-                     END DO
-                     WORK( K+J ) = WORK( K+J ) + S
-                  END DO
-*                 j=k is special :process col A(k,0:k-1)
-                  S = ZERO
-                  DO I = 0, K - 2
-                     AA = ABS( A( I+J*LDA ) )
-*                    A(k,i)
-                     WORK( I ) = WORK( I ) + AA
-                     S = S + AA
-                  END DO
-*
-*                 i=k-1
-                  AA = ABS( REAL( A( I+J*LDA ) ) )
-*                 A(k-1,k-1)
-                  S = S + AA
-                  WORK( I ) = S
-*                 done with col j=k+1
-                  DO J = K + 1, N
-*
-*                    process col j-1 of A = A(j-1,0:k-1)
-                     S = ZERO
-                     DO I = 0, K - 1
-                        AA = ABS( A( I+J*LDA ) )
-*                       A(j-1,i)
-                        WORK( I ) = WORK( I ) + AA
-                        S = S + AA
-                     END DO
-                     WORK( J-1 ) = WORK( J-1 ) + S
-                  END DO
-                  VALUE = WORK( 0 )
-                  DO I = 1, N-1
-                     TEMP = WORK( I )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                  END DO
-               END IF
-            END IF
-         END IF
-      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
-*
-*       Find normF(A).
-*
-         K = ( N+1 ) / 2
-         SCALE = ZERO
-         S = ONE
-         IF( NOE.EQ.1 ) THEN
-*           n is odd
-            IF( IFM.EQ.1 ) THEN
-*              A is normal & A is n by k
-               IF( ILU.EQ.0 ) THEN
-*                 A is upper
-                  DO J = 0, K - 3
-                     CALL CLASSQ( K-J-2, A( K+J+1+J*LDA ), 1, SCALE, S )
-*                    L at A(k,0)
-                  END DO
-                  DO J = 0, K - 1
-                     CALL CLASSQ( K+J-1, A( 0+J*LDA ), 1, SCALE, S )
-*                    trap U at A(0,0)
-                  END DO
-                  S = S + S
-*                 double s for the off diagonal elements
-                  L = K - 1
-*                 -> U(k,k) at A(k-1,0)
-                  DO I = 0, K - 2
-                     AA = REAL( A( L ) )
-*                    U(k+i,k+i)
-                     IF( AA.NE.ZERO ) THEN
-                        IF( SCALE.LT.AA ) THEN
-                           S = ONE + S*( SCALE / AA )**2
-                           SCALE = AA
-                        ELSE
-                           S = S + ( AA / SCALE )**2
-                        END IF
-                     END IF
-                     AA = REAL( A( L+1 ) )
-*                    U(i,i)
-                     IF( AA.NE.ZERO ) THEN
-                        IF( SCALE.LT.AA ) THEN
-                           S = ONE + S*( SCALE / AA )**2
-                           SCALE = AA
-                        ELSE
-                           S = S + ( AA / SCALE )**2
-                        END IF
-                     END IF
-                     L = L + LDA + 1
-                  END DO
-                  AA = REAL( A( L ) )
-*                 U(n-1,n-1)
-                  IF( AA.NE.ZERO ) THEN
-                     IF( SCALE.LT.AA ) THEN
-                        S = ONE + S*( SCALE / AA )**2
-                        SCALE = AA
-                     ELSE
-                        S = S + ( AA / SCALE )**2
-                     END IF
-                  END IF
-               ELSE
-*                 ilu=1 & A is lower
-                  DO J = 0, K - 1
-                     CALL CLASSQ( N-J-1, A( J+1+J*LDA ), 1, SCALE, S )
-*                    trap L at A(0,0)
-                  END DO
-                  DO J = 1, K - 2
-                     CALL CLASSQ( J, A( 0+( 1+J )*LDA ), 1, SCALE, S )
-*                    U at A(0,1)
-                  END DO
-                  S = S + S
-*                 double s for the off diagonal elements
-                  AA = REAL( A( 0 ) )
-*                 L(0,0) at A(0,0)
-                  IF( AA.NE.ZERO ) THEN
-                     IF( SCALE.LT.AA ) THEN
-                        S = ONE + S*( SCALE / AA )**2
-                        SCALE = AA
-                     ELSE
-                        S = S + ( AA / SCALE )**2
-                     END IF
-                  END IF
-                  L = LDA
-*                 -> L(k,k) at A(0,1)
-                  DO I = 1, K - 1
-                     AA = REAL( A( L ) )
-*                    L(k-1+i,k-1+i)
-                     IF( AA.NE.ZERO ) THEN
-                        IF( SCALE.LT.AA ) THEN
-                           S = ONE + S*( SCALE / AA )**2
-                           SCALE = AA
-                        ELSE
-                           S = S + ( AA / SCALE )**2
-                        END IF
-                     END IF
-                     AA = REAL( A( L+1 ) )
-*                    L(i,i)
-                     IF( AA.NE.ZERO ) THEN
-                        IF( SCALE.LT.AA ) THEN
-                           S = ONE + S*( SCALE / AA )**2
-                           SCALE = AA
-                        ELSE
-                           S = S + ( AA / SCALE )**2
-                        END IF
-                     END IF
-                     L = L + LDA + 1
-                  END DO
-               END IF
-            ELSE
-*              A is xpose & A is k by n
-               IF( ILU.EQ.0 ) THEN
-*                 A**H is upper
-                  DO J = 1, K - 2
-                     CALL CLASSQ( J, A( 0+( K+J )*LDA ), 1, SCALE, S )
-*                    U at A(0,k)
-                  END DO
-                  DO J = 0, K - 2
-                     CALL CLASSQ( K, A( 0+J*LDA ), 1, SCALE, S )
-*                    k by k-1 rect. at A(0,0)
-                  END DO
-                  DO J = 0, K - 2
-                     CALL CLASSQ( K-J-1, A( J+1+( J+K-1 )*LDA ), 1,
-     $                            SCALE, S )
-*                    L at A(0,k-1)
-                  END DO
-                  S = S + S
-*                 double s for the off diagonal elements
-                  L = 0 + K*LDA - LDA
-*                 -> U(k-1,k-1) at A(0,k-1)
-                  AA = REAL( A( L ) )
-*                 U(k-1,k-1)
-                  IF( AA.NE.ZERO ) THEN
-                     IF( SCALE.LT.AA ) THEN
-                        S = ONE + S*( SCALE / AA )**2
-                        SCALE = AA
-                     ELSE
-                        S = S + ( AA / SCALE )**2
-                     END IF
-                  END IF
-                  L = L + LDA
-*                 -> U(0,0) at A(0,k)
-                  DO J = K, N - 1
-                     AA = REAL( A( L ) )
-*                    -> U(j-k,j-k)
-                     IF( AA.NE.ZERO ) THEN
-                        IF( SCALE.LT.AA ) THEN
-                           S = ONE + S*( SCALE / AA )**2
-                           SCALE = AA
-                        ELSE
-                           S = S + ( AA / SCALE )**2
-                        END IF
-                     END IF
-                     AA = REAL( A( L+1 ) )
-*                    -> U(j,j)
-                     IF( AA.NE.ZERO ) THEN
-                        IF( SCALE.LT.AA ) THEN
-                           S = ONE + S*( SCALE / AA )**2
-                           SCALE = AA
-                        ELSE
-                           S = S + ( AA / SCALE )**2
-                        END IF
-                     END IF
-                     L = L + LDA + 1
-                  END DO
-               ELSE
-*                 A**H is lower
-                  DO J = 1, K - 1
-                     CALL CLASSQ( J, A( 0+J*LDA ), 1, SCALE, S )
-*                    U at A(0,0)
-                  END DO
-                  DO J = K, N - 1
-                     CALL CLASSQ( K, A( 0+J*LDA ), 1, SCALE, S )
-*                    k by k-1 rect. at A(0,k)
-                  END DO
-                  DO J = 0, K - 3
-                     CALL CLASSQ( K-J-2, A( J+2+J*LDA ), 1, SCALE, S )
-*                    L at A(1,0)
-                  END DO
-                  S = S + S
-*                 double s for the off diagonal elements
-                  L = 0
-*                 -> L(0,0) at A(0,0)
-                  DO I = 0, K - 2
-                     AA = REAL( A( L ) )
-*                    L(i,i)
-                     IF( AA.NE.ZERO ) THEN
-                        IF( SCALE.LT.AA ) THEN
-                           S = ONE + S*( SCALE / AA )**2
-                           SCALE = AA
-                        ELSE
-                           S = S + ( AA / SCALE )**2
-                        END IF
-                     END IF
-                     AA = REAL( A( L+1 ) )
-*                    L(k+i,k+i)
-                     IF( AA.NE.ZERO ) THEN
-                        IF( SCALE.LT.AA ) THEN
-                           S = ONE + S*( SCALE / AA )**2
-                           SCALE = AA
-                        ELSE
-                           S = S + ( AA / SCALE )**2
-                        END IF
-                     END IF
-                     L = L + LDA + 1
-                  END DO
-*                 L-> k-1 + (k-1)*lda or L(k-1,k-1) at A(k-1,k-1)
-                  AA = REAL( A( L ) )
-*                 L(k-1,k-1) at A(k-1,k-1)
-                  IF( AA.NE.ZERO ) THEN
-                     IF( SCALE.LT.AA ) THEN
-                        S = ONE + S*( SCALE / AA )**2
-                        SCALE = AA
-                     ELSE
-                        S = S + ( AA / SCALE )**2
-                     END IF
-                  END IF
-               END IF
-            END IF
-         ELSE
-*           n is even
-            IF( IFM.EQ.1 ) THEN
-*              A is normal
-               IF( ILU.EQ.0 ) THEN
-*                 A is upper
-                  DO J = 0, K - 2
-                     CALL CLASSQ( K-J-1, A( K+J+2+J*LDA ), 1, SCALE, S )
-*                 L at A(k+1,0)
-                  END DO
-                  DO J = 0, K - 1
-                     CALL CLASSQ( K+J, A( 0+J*LDA ), 1, SCALE, S )
-*                 trap U at A(0,0)
-                  END DO
-                  S = S + S
-*                 double s for the off diagonal elements
-                  L = K
-*                 -> U(k,k) at A(k,0)
-                  DO I = 0, K - 1
-                     AA = REAL( A( L ) )
-*                    U(k+i,k+i)
-                     IF( AA.NE.ZERO ) THEN
-                        IF( SCALE.LT.AA ) THEN
-                           S = ONE + S*( SCALE / AA )**2
-                           SCALE = AA
-                        ELSE
-                           S = S + ( AA / SCALE )**2
-                        END IF
-                     END IF
-                     AA = REAL( A( L+1 ) )
-*                    U(i,i)
-                     IF( AA.NE.ZERO ) THEN
-                        IF( SCALE.LT.AA ) THEN
-                           S = ONE + S*( SCALE / AA )**2
-                           SCALE = AA
-                        ELSE
-                           S = S + ( AA / SCALE )**2
-                        END IF
-                     END IF
-                     L = L + LDA + 1
-                  END DO
-               ELSE
-*                 ilu=1 & A is lower
-                  DO J = 0, K - 1
-                     CALL CLASSQ( N-J-1, A( J+2+J*LDA ), 1, SCALE, S )
-*                    trap L at A(1,0)
-                  END DO
-                  DO J = 1, K - 1
-                     CALL CLASSQ( J, A( 0+J*LDA ), 1, SCALE, S )
-*                    U at A(0,0)
-                  END DO
-                  S = S + S
-*                 double s for the off diagonal elements
-                  L = 0
-*                 -> L(k,k) at A(0,0)
-                  DO I = 0, K - 1
-                     AA = REAL( A( L ) )
-*                    L(k-1+i,k-1+i)
-                     IF( AA.NE.ZERO ) THEN
-                        IF( SCALE.LT.AA ) THEN
-                           S = ONE + S*( SCALE / AA )**2
-                           SCALE = AA
-                        ELSE
-                           S = S + ( AA / SCALE )**2
-                        END IF
-                     END IF
-                     AA = REAL( A( L+1 ) )
-*                    L(i,i)
-                     IF( AA.NE.ZERO ) THEN
-                        IF( SCALE.LT.AA ) THEN
-                           S = ONE + S*( SCALE / AA )**2
-                           SCALE = AA
-                        ELSE
-                           S = S + ( AA / SCALE )**2
-                        END IF
-                     END IF
-                     L = L + LDA + 1
-                  END DO
-               END IF
-            ELSE
-*              A is xpose
-               IF( ILU.EQ.0 ) THEN
-*                 A**H is upper
-                  DO J = 1, K - 1
-                     CALL CLASSQ( J, A( 0+( K+1+J )*LDA ), 1, SCALE, S )
-*                 U at A(0,k+1)
-                  END DO
-                  DO J = 0, K - 1
-                     CALL CLASSQ( K, A( 0+J*LDA ), 1, SCALE, S )
-*                 k by k rect. at A(0,0)
-                  END DO
-                  DO J = 0, K - 2
-                     CALL CLASSQ( K-J-1, A( J+1+( J+K )*LDA ), 1, SCALE,
-     $                            S )
-*                 L at A(0,k)
-                  END DO
-                  S = S + S
-*                 double s for the off diagonal elements
-                  L = 0 + K*LDA
-*                 -> U(k,k) at A(0,k)
-                  AA = REAL( A( L ) )
-*                 U(k,k)
-                  IF( AA.NE.ZERO ) THEN
-                     IF( SCALE.LT.AA ) THEN
-                        S = ONE + S*( SCALE / AA )**2
-                        SCALE = AA
-                     ELSE
-                        S = S + ( AA / SCALE )**2
-                     END IF
-                  END IF
-                  L = L + LDA
-*                 -> U(0,0) at A(0,k+1)
-                  DO J = K + 1, N - 1
-                     AA = REAL( A( L ) )
-*                    -> U(j-k-1,j-k-1)
-                     IF( AA.NE.ZERO ) THEN
-                        IF( SCALE.LT.AA ) THEN
-                           S = ONE + S*( SCALE / AA )**2
-                           SCALE = AA
-                        ELSE
-                           S = S + ( AA / SCALE )**2
-                        END IF
-                     END IF
-                     AA = REAL( A( L+1 ) )
-*                    -> U(j,j)
-                     IF( AA.NE.ZERO ) THEN
-                        IF( SCALE.LT.AA ) THEN
-                           S = ONE + S*( SCALE / AA )**2
-                           SCALE = AA
-                        ELSE
-                           S = S + ( AA / SCALE )**2
-                        END IF
-                     END IF
-                     L = L + LDA + 1
-                  END DO
-*                 L=k-1+n*lda
-*                 -> U(k-1,k-1) at A(k-1,n)
-                  AA = REAL( A( L ) )
-*                 U(k,k)
-                  IF( AA.NE.ZERO ) THEN
-                     IF( SCALE.LT.AA ) THEN
-                        S = ONE + S*( SCALE / AA )**2
-                        SCALE = AA
-                     ELSE
-                        S = S + ( AA / SCALE )**2
-                     END IF
-                  END IF
-               ELSE
-*                 A**H is lower
-                  DO J = 1, K - 1
-                     CALL CLASSQ( J, A( 0+( J+1 )*LDA ), 1, SCALE, S )
-*                 U at A(0,1)
-                  END DO
-                  DO J = K + 1, N
-                     CALL CLASSQ( K, A( 0+J*LDA ), 1, SCALE, S )
-*                 k by k rect. at A(0,k+1)
-                  END DO
-                  DO J = 0, K - 2
-                     CALL CLASSQ( K-J-1, A( J+1+J*LDA ), 1, SCALE, S )
-*                 L at A(0,0)
-                  END DO
-                  S = S + S
-*                 double s for the off diagonal elements
-                  L = 0
-*                 -> L(k,k) at A(0,0)
-                  AA = REAL( A( L ) )
-*                 L(k,k) at A(0,0)
-                  IF( AA.NE.ZERO ) THEN
-                     IF( SCALE.LT.AA ) THEN
-                        S = ONE + S*( SCALE / AA )**2
-                        SCALE = AA
-                     ELSE
-                        S = S + ( AA / SCALE )**2
-                     END IF
-                  END IF
-                  L = LDA
-*                 -> L(0,0) at A(0,1)
-                  DO I = 0, K - 2
-                     AA = REAL( A( L ) )
-*                    L(i,i)
-                     IF( AA.NE.ZERO ) THEN
-                        IF( SCALE.LT.AA ) THEN
-                           S = ONE + S*( SCALE / AA )**2
-                           SCALE = AA
-                        ELSE
-                           S = S + ( AA / SCALE )**2
-                        END IF
-                     END IF
-                     AA = REAL( A( L+1 ) )
-*                    L(k+i+1,k+i+1)
-                     IF( AA.NE.ZERO ) THEN
-                        IF( SCALE.LT.AA ) THEN
-                           S = ONE + S*( SCALE / AA )**2
-                           SCALE = AA
-                        ELSE
-                           S = S + ( AA / SCALE )**2
-                        END IF
-                     END IF
-                     L = L + LDA + 1
-                  END DO
-*                 L-> k - 1 + k*lda or L(k-1,k-1) at A(k-1,k)
-                  AA = REAL( A( L ) )
-*                 L(k-1,k-1) at A(k-1,k)
-                  IF( AA.NE.ZERO ) THEN
-                     IF( SCALE.LT.AA ) THEN
-                        S = ONE + S*( SCALE / AA )**2
-                        SCALE = AA
-                     ELSE
-                        S = S + ( AA / SCALE )**2
-                     END IF
-                  END IF
-               END IF
-            END IF
-         END IF
-         VALUE = SCALE*SQRT( S )
-      END IF
-*
-      CLANHF = VALUE
-      RETURN
-*
-*     End of CLANHF
-*
-      END
diff --git a/blas_fix/clanhp.f b/blas_fix/clanhp.f
deleted file mode 100644
index 2279701ce..000000000
--- a/blas_fix/clanhp.f
+++ /dev/null
@@ -1,270 +0,0 @@
-*> \brief \b CLANHP returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a complex Hermitian matrix supplied in packed form.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*> \htmlonly
-*> Download CLANHP + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/clanhp.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/clanhp.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/clanhp.f"> 
-*> [TXT]</a>
-*> \endhtmlonly 
-*
-*  Definition:
-*  ===========
-*
-*       REAL             FUNCTION CLANHP( NORM, UPLO, N, AP, WORK )
-* 
-*       .. Scalar Arguments ..
-*       CHARACTER          NORM, UPLO
-*       INTEGER            N
-*       ..
-*       .. Array Arguments ..
-*       REAL               WORK( * )
-*       COMPLEX            AP( * )
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CLANHP  returns the value of the one norm,  or the Frobenius norm, or
-*> the  infinity norm,  or the  element of  largest absolute value  of a
-*> complex hermitian matrix A,  supplied in packed form.
-*> \endverbatim
-*>
-*> \return CLANHP
-*> \verbatim
-*>
-*>    CLANHP = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-*>             (
-*>             ( norm1(A),         NORM = '1', 'O' or 'o'
-*>             (
-*>             ( normI(A),         NORM = 'I' or 'i'
-*>             (
-*>             ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-*>
-*> where  norm1  denotes the  one norm of a matrix (maximum column sum),
-*> normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-*> normF  denotes the  Frobenius norm of a matrix (square root of sum of
-*> squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] NORM
-*> \verbatim
-*>          NORM is CHARACTER*1
-*>          Specifies the value to be returned in CLANHP as described
-*>          above.
-*> \endverbatim
-*>
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>          Specifies whether the upper or lower triangular part of the
-*>          hermitian matrix A is supplied.
-*>          = 'U':  Upper triangular part of A is supplied
-*>          = 'L':  Lower triangular part of A is supplied
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The order of the matrix A.  N >= 0.  When N = 0, CLANHP is
-*>          set to zero.
-*> \endverbatim
-*>
-*> \param[in] AP
-*> \verbatim
-*>          AP is COMPLEX array, dimension (N*(N+1)/2)
-*>          The upper or lower triangle of the hermitian matrix A, packed
-*>          columnwise in a linear array.  The j-th column of A is stored
-*>          in the array AP as follows:
-*>          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
-*>          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
-*>          Note that the  imaginary parts of the diagonal elements need
-*>          not be set and are assumed to be zero.
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is REAL array, dimension (MAX(1,LWORK)),
-*>          where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
-*>          WORK is not referenced.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date September 2012
-*
-*> \ingroup complexOTHERauxiliary
-*
-*  =====================================================================
-      REAL             FUNCTION CLANHP( NORM, UPLO, N, AP, WORK )
-*
-*  -- LAPACK auxiliary routine (version 3.4.2) --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
-*
-*     .. Scalar Arguments ..
-      CHARACTER          NORM, UPLO
-      INTEGER            N
-*     ..
-*     .. Array Arguments ..
-      REAL               WORK( * )
-      COMPLEX            AP( * )
-*     ..
-*
-* =====================================================================
-*
-*     .. Parameters ..
-      REAL               ONE, ZERO
-      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
-*     ..
-*     .. Local Scalars ..
-      INTEGER            I, J, K
-      REAL               ABSA, SCALE, SUM, VALUE
-*     ..
-*     .. External Functions ..
-      LOGICAL            LSAME, SISNAN
-      EXTERNAL           LSAME, SISNAN
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           CLASSQ
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, REAL, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-      VALUE = ZERO
-      IF( N.EQ.0 ) THEN
-         VALUE = ZERO
-      ELSE IF( LSAME( NORM, 'M' ) ) THEN
-*
-*        Find max(abs(A(i,j))).
-*
-         VALUE = ZERO
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            K = 0
-            DO 20 J = 1, N
-               DO 10 I = K + 1, K + J - 1
-                  SUM = ABS( AP( I ) )
-                  IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   10          CONTINUE
-               K = K + J
-               SUM = ABS( REAL( AP( K ) ) )
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   20       CONTINUE
-         ELSE
-            K = 1
-            DO 40 J = 1, N
-               SUM = ABS( REAL( AP( K ) ) )
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-               DO 30 I = K + 1, K + N - J
-                  SUM = ABS( AP( I ) )
-                  IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   30          CONTINUE
-               K = K + N - J + 1
-   40       CONTINUE
-         END IF
-      ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR.
-     $         ( NORM.EQ.'1' ) ) THEN
-*
-*        Find normI(A) ( = norm1(A), since A is hermitian).
-*
-         VALUE = ZERO
-         K = 1
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            DO 60 J = 1, N
-               SUM = ZERO
-               DO 50 I = 1, J - 1
-                  ABSA = ABS( AP( K ) )
-                  SUM = SUM + ABSA
-                  WORK( I ) = WORK( I ) + ABSA
-                  K = K + 1
-   50          CONTINUE
-               WORK( J ) = SUM + ABS( REAL( AP( K ) ) )
-               K = K + 1
-   60       CONTINUE
-            DO 70 I = 1, N
-               SUM = WORK( I )
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   70       CONTINUE
-         ELSE
-            DO 80 I = 1, N
-               WORK( I ) = ZERO
-   80       CONTINUE
-            DO 100 J = 1, N
-               SUM = WORK( J ) + ABS( REAL( AP( K ) ) )
-               K = K + 1
-               DO 90 I = J + 1, N
-                  ABSA = ABS( AP( K ) )
-                  SUM = SUM + ABSA
-                  WORK( I ) = WORK( I ) + ABSA
-                  K = K + 1
-   90          CONTINUE
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-  100       CONTINUE
-         END IF
-      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
-*
-*        Find normF(A).
-*
-         SCALE = ZERO
-         SUM = ONE
-         K = 2
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            DO 110 J = 2, N
-               CALL CLASSQ( J-1, AP( K ), 1, SCALE, SUM )
-               K = K + J
-  110       CONTINUE
-         ELSE
-            DO 120 J = 1, N - 1
-               CALL CLASSQ( N-J, AP( K ), 1, SCALE, SUM )
-               K = K + N - J + 1
-  120       CONTINUE
-         END IF
-         SUM = 2*SUM
-         K = 1
-         DO 130 I = 1, N
-            IF( REAL( AP( K ) ).NE.ZERO ) THEN
-               ABSA = ABS( REAL( AP( K ) ) )
-               IF( SCALE.LT.ABSA ) THEN
-                  SUM = ONE + SUM*( SCALE / ABSA )**2
-                  SCALE = ABSA
-               ELSE
-                  SUM = SUM + ( ABSA / SCALE )**2
-               END IF
-            END IF
-            IF( LSAME( UPLO, 'U' ) ) THEN
-               K = K + I + 1
-            ELSE
-               K = K + N - I + 1
-            END IF
-  130    CONTINUE
-         VALUE = SCALE*SQRT( SUM )
-      END IF
-*
-      CLANHP = VALUE
-      RETURN
-*
-*     End of CLANHP
-*
-      END
diff --git a/blas_fix/clanhs.f b/blas_fix/clanhs.f
deleted file mode 100644
index f0be01791..000000000
--- a/blas_fix/clanhs.f
+++ /dev/null
@@ -1,208 +0,0 @@
-*> \brief \b CLANHS returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value of any element of an upper Hessenberg matrix.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*> \htmlonly
-*> Download CLANHS + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/clanhs.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/clanhs.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/clanhs.f"> 
-*> [TXT]</a>
-*> \endhtmlonly 
-*
-*  Definition:
-*  ===========
-*
-*       REAL             FUNCTION CLANHS( NORM, N, A, LDA, WORK )
-* 
-*       .. Scalar Arguments ..
-*       CHARACTER          NORM
-*       INTEGER            LDA, N
-*       ..
-*       .. Array Arguments ..
-*       REAL               WORK( * )
-*       COMPLEX            A( LDA, * )
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CLANHS  returns the value of the one norm,  or the Frobenius norm, or
-*> the  infinity norm,  or the  element of  largest absolute value  of a
-*> Hessenberg matrix A.
-*> \endverbatim
-*>
-*> \return CLANHS
-*> \verbatim
-*>
-*>    CLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-*>             (
-*>             ( norm1(A),         NORM = '1', 'O' or 'o'
-*>             (
-*>             ( normI(A),         NORM = 'I' or 'i'
-*>             (
-*>             ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-*>
-*> where  norm1  denotes the  one norm of a matrix (maximum column sum),
-*> normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-*> normF  denotes the  Frobenius norm of a matrix (square root of sum of
-*> squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] NORM
-*> \verbatim
-*>          NORM is CHARACTER*1
-*>          Specifies the value to be returned in CLANHS as described
-*>          above.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The order of the matrix A.  N >= 0.  When N = 0, CLANHS is
-*>          set to zero.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array, dimension (LDA,N)
-*>          The n by n upper Hessenberg matrix A; the part of A below the
-*>          first sub-diagonal is not referenced.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>          The leading dimension of the array A.  LDA >= max(N,1).
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is REAL array, dimension (MAX(1,LWORK)),
-*>          where LWORK >= N when NORM = 'I'; otherwise, WORK is not
-*>          referenced.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date September 2012
-*
-*> \ingroup complexOTHERauxiliary
-*
-*  =====================================================================
-      REAL             FUNCTION CLANHS( NORM, N, A, LDA, WORK )
-*
-*  -- LAPACK auxiliary routine (version 3.4.2) --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
-*
-*     .. Scalar Arguments ..
-      CHARACTER          NORM
-      INTEGER            LDA, N
-*     ..
-*     .. Array Arguments ..
-      REAL               WORK( * )
-      COMPLEX            A( LDA, * )
-*     ..
-*
-* =====================================================================
-*
-*     .. Parameters ..
-      REAL               ONE, ZERO
-      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
-*     ..
-*     .. Local Scalars ..
-      INTEGER            I, J
-      REAL               SCALE, SUM, VALUE
-*     ..
-*     .. External Functions ..
-      LOGICAL            LSAME, SISNAN
-      EXTERNAL           LSAME, SISNAN
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           CLASSQ
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, MIN, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-      VALUE = ZERO
-      IF( N.EQ.0 ) THEN
-         VALUE = ZERO
-      ELSE IF( LSAME( NORM, 'M' ) ) THEN
-*
-*        Find max(abs(A(i,j))).
-*
-         VALUE = ZERO
-         DO 20 J = 1, N
-            DO 10 I = 1, MIN( N, J+1 )
-               SUM = ABS( A( I, J ) )
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   10       CONTINUE
-   20    CONTINUE
-      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
-*
-*        Find norm1(A).
-*
-         VALUE = ZERO
-         DO 40 J = 1, N
-            SUM = ZERO
-            DO 30 I = 1, MIN( N, J+1 )
-               SUM = SUM + ABS( A( I, J ) )
-   30       CONTINUE
-            IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   40    CONTINUE
-      ELSE IF( LSAME( NORM, 'I' ) ) THEN
-*
-*        Find normI(A).
-*
-         DO 50 I = 1, N
-            WORK( I ) = ZERO
-   50    CONTINUE
-         DO 70 J = 1, N
-            DO 60 I = 1, MIN( N, J+1 )
-               WORK( I ) = WORK( I ) + ABS( A( I, J ) )
-   60       CONTINUE
-   70    CONTINUE
-         VALUE = ZERO
-         DO 80 I = 1, N
-            SUM = WORK( I )
-            IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   80    CONTINUE
-      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
-*
-*        Find normF(A).
-*
-         SCALE = ZERO
-         SUM = ONE
-         DO 90 J = 1, N
-            CALL CLASSQ( MIN( N, J+1 ), A( 1, J ), 1, SCALE, SUM )
-   90    CONTINUE
-         VALUE = SCALE*SQRT( SUM )
-      END IF
-*
-      CLANHS = VALUE
-      RETURN
-*
-*     End of CLANHS
-*
-      END
diff --git a/blas_fix/clanht.f b/blas_fix/clanht.f
deleted file mode 100644
index f50d2c883..000000000
--- a/blas_fix/clanht.f
+++ /dev/null
@@ -1,189 +0,0 @@
-*> \brief \b CLANHT returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a complex Hermitian tridiagonal matrix.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*> \htmlonly
-*> Download CLANHT + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/clanht.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/clanht.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/clanht.f"> 
-*> [TXT]</a>
-*> \endhtmlonly 
-*
-*  Definition:
-*  ===========
-*
-*       REAL             FUNCTION CLANHT( NORM, N, D, E )
-* 
-*       .. Scalar Arguments ..
-*       CHARACTER          NORM
-*       INTEGER            N
-*       ..
-*       .. Array Arguments ..
-*       REAL               D( * )
-*       COMPLEX            E( * )
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CLANHT  returns the value of the one norm,  or the Frobenius norm, or
-*> the  infinity norm,  or the  element of  largest absolute value  of a
-*> complex Hermitian tridiagonal matrix A.
-*> \endverbatim
-*>
-*> \return CLANHT
-*> \verbatim
-*>
-*>    CLANHT = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-*>             (
-*>             ( norm1(A),         NORM = '1', 'O' or 'o'
-*>             (
-*>             ( normI(A),         NORM = 'I' or 'i'
-*>             (
-*>             ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-*>
-*> where  norm1  denotes the  one norm of a matrix (maximum column sum),
-*> normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-*> normF  denotes the  Frobenius norm of a matrix (square root of sum of
-*> squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] NORM
-*> \verbatim
-*>          NORM is CHARACTER*1
-*>          Specifies the value to be returned in CLANHT as described
-*>          above.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The order of the matrix A.  N >= 0.  When N = 0, CLANHT is
-*>          set to zero.
-*> \endverbatim
-*>
-*> \param[in] D
-*> \verbatim
-*>          D is REAL array, dimension (N)
-*>          The diagonal elements of A.
-*> \endverbatim
-*>
-*> \param[in] E
-*> \verbatim
-*>          E is COMPLEX array, dimension (N-1)
-*>          The (n-1) sub-diagonal or super-diagonal elements of A.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date September 2012
-*
-*> \ingroup complexOTHERauxiliary
-*
-*  =====================================================================
-      REAL             FUNCTION CLANHT( NORM, N, D, E )
-*
-*  -- LAPACK auxiliary routine (version 3.4.2) --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
-*
-*     .. Scalar Arguments ..
-      CHARACTER          NORM
-      INTEGER            N
-*     ..
-*     .. Array Arguments ..
-      REAL               D( * )
-      COMPLEX            E( * )
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      REAL               ONE, ZERO
-      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
-*     ..
-*     .. Local Scalars ..
-      INTEGER            I
-      REAL               ANORM, SCALE, SUM
-*     ..
-*     .. External Functions ..
-      LOGICAL            LSAME, SISNAN
-      EXTERNAL           LSAME, SISNAN
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           CLASSQ, SLASSQ
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-      ANORM = ZERO
-      IF( N.LE.0 ) THEN
-         ANORM = ZERO
-      ELSE IF( LSAME( NORM, 'M' ) ) THEN
-*
-*        Find max(abs(A(i,j))).
-*
-         ANORM = ABS( D( N ) )
-         DO 10 I = 1, N - 1
-            SUM = ABS( D( I ) )
-            IF( ANORM .LT. SUM .OR. SISNAN( SUM ) ) ANORM = SUM
-            SUM = ABS( E( I ) )
-            IF( ANORM .LT. SUM .OR. SISNAN( SUM ) ) ANORM = SUM
-   10    CONTINUE
-      ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' .OR.
-     $         LSAME( NORM, 'I' ) ) THEN
-*
-*        Find norm1(A).
-*
-         IF( N.EQ.1 ) THEN
-            ANORM = ABS( D( 1 ) )
-         ELSE
-            ANORM = ABS( D( 1 ) )+ABS( E( 1 ) )
-            SUM = ABS( E( N-1 ) )+ABS( D( N ) )
-            IF( ANORM .LT. SUM .OR. SISNAN( SUM ) ) ANORM = SUM
-            DO 20 I = 2, N - 1
-               SUM = ABS( D( I ) )+ABS( E( I ) )+ABS( E( I-1 ) )
-               IF( ANORM .LT. SUM .OR. SISNAN( SUM ) ) ANORM = SUM
-   20       CONTINUE
-         END IF
-      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
-*
-*        Find normF(A).
-*
-         SCALE = ZERO
-         SUM = ONE
-         IF( N.GT.1 ) THEN
-            CALL CLASSQ( N-1, E, 1, SCALE, SUM )
-            SUM = 2*SUM
-         END IF
-         CALL SLASSQ( N, D, 1, SCALE, SUM )
-         ANORM = SCALE*SQRT( SUM )
-      END IF
-*
-      CLANHT = ANORM
-      RETURN
-*
-*     End of CLANHT
-*
-      END
diff --git a/blas_fix/clansb.f b/blas_fix/clansb.f
deleted file mode 100644
index d386d7c09..000000000
--- a/blas_fix/clansb.f
+++ /dev/null
@@ -1,261 +0,0 @@
-*> \brief \b CLANSB returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a symmetric band matrix.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*> \htmlonly
-*> Download CLANSB + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/clansb.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/clansb.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/clansb.f"> 
-*> [TXT]</a>
-*> \endhtmlonly 
-*
-*  Definition:
-*  ===========
-*
-*       REAL             FUNCTION CLANSB( NORM, UPLO, N, K, AB, LDAB,
-*                        WORK )
-* 
-*       .. Scalar Arguments ..
-*       CHARACTER          NORM, UPLO
-*       INTEGER            K, LDAB, N
-*       ..
-*       .. Array Arguments ..
-*       REAL               WORK( * )
-*       COMPLEX            AB( LDAB, * )
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CLANSB  returns the value of the one norm,  or the Frobenius norm, or
-*> the  infinity norm,  or the element of  largest absolute value  of an
-*> n by n symmetric band matrix A,  with k super-diagonals.
-*> \endverbatim
-*>
-*> \return CLANSB
-*> \verbatim
-*>
-*>    CLANSB = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-*>             (
-*>             ( norm1(A),         NORM = '1', 'O' or 'o'
-*>             (
-*>             ( normI(A),         NORM = 'I' or 'i'
-*>             (
-*>             ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-*>
-*> where  norm1  denotes the  one norm of a matrix (maximum column sum),
-*> normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-*> normF  denotes the  Frobenius norm of a matrix (square root of sum of
-*> squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] NORM
-*> \verbatim
-*>          NORM is CHARACTER*1
-*>          Specifies the value to be returned in CLANSB as described
-*>          above.
-*> \endverbatim
-*>
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>          Specifies whether the upper or lower triangular part of the
-*>          band matrix A is supplied.
-*>          = 'U':  Upper triangular part is supplied
-*>          = 'L':  Lower triangular part is supplied
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The order of the matrix A.  N >= 0.  When N = 0, CLANSB is
-*>          set to zero.
-*> \endverbatim
-*>
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>          The number of super-diagonals or sub-diagonals of the
-*>          band matrix A.  K >= 0.
-*> \endverbatim
-*>
-*> \param[in] AB
-*> \verbatim
-*>          AB is COMPLEX array, dimension (LDAB,N)
-*>          The upper or lower triangle of the symmetric band matrix A,
-*>          stored in the first K+1 rows of AB.  The j-th column of A is
-*>          stored in the j-th column of the array AB as follows:
-*>          if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j;
-*>          if UPLO = 'L', AB(1+i-j,j)   = A(i,j) for j<=i<=min(n,j+k).
-*> \endverbatim
-*>
-*> \param[in] LDAB
-*> \verbatim
-*>          LDAB is INTEGER
-*>          The leading dimension of the array AB.  LDAB >= K+1.
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is REAL array, dimension (MAX(1,LWORK)),
-*>          where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
-*>          WORK is not referenced.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date September 2012
-*
-*> \ingroup complexOTHERauxiliary
-*
-*  =====================================================================
-      REAL             FUNCTION CLANSB( NORM, UPLO, N, K, AB, LDAB,
-     $                 WORK )
-*
-*  -- LAPACK auxiliary routine (version 3.4.2) --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
-*
-*     .. Scalar Arguments ..
-      CHARACTER          NORM, UPLO
-      INTEGER            K, LDAB, N
-*     ..
-*     .. Array Arguments ..
-      REAL               WORK( * )
-      COMPLEX            AB( LDAB, * )
-*     ..
-*
-* =====================================================================
-*
-*     .. Parameters ..
-      REAL               ONE, ZERO
-      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
-*     ..
-*     .. Local Scalars ..
-      INTEGER            I, J, L
-      REAL               ABSA, SCALE, SUM, VALUE
-*     ..
-*     .. External Functions ..
-      LOGICAL            LSAME, SISNAN
-      EXTERNAL           LSAME, SISNAN
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           CLASSQ
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, MAX, MIN, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-      VALUE = ZERO
-      IF( N.EQ.0 ) THEN
-         VALUE = ZERO
-      ELSE IF( LSAME( NORM, 'M' ) ) THEN
-*
-*        Find max(abs(A(i,j))).
-*
-         VALUE = ZERO
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            DO 20 J = 1, N
-               DO 10 I = MAX( K+2-J, 1 ), K + 1
-                  SUM = ABS( AB( I, J ) )
-                  IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   10          CONTINUE
-   20       CONTINUE
-         ELSE
-            DO 40 J = 1, N
-               DO 30 I = 1, MIN( N+1-J, K+1 )
-                  SUM = ABS( AB( I, J ) )
-                  IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   30          CONTINUE
-   40       CONTINUE
-         END IF
-      ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR.
-     $         ( NORM.EQ.'1' ) ) THEN
-*
-*        Find normI(A) ( = norm1(A), since A is symmetric).
-*
-         VALUE = ZERO
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            DO 60 J = 1, N
-               SUM = ZERO
-               L = K + 1 - J
-               DO 50 I = MAX( 1, J-K ), J - 1
-                  ABSA = ABS( AB( L+I, J ) )
-                  SUM = SUM + ABSA
-                  WORK( I ) = WORK( I ) + ABSA
-   50          CONTINUE
-               WORK( J ) = SUM + ABS( AB( K+1, J ) )
-   60       CONTINUE
-            DO 70 I = 1, N
-               SUM = WORK( I )
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   70       CONTINUE
-         ELSE
-            DO 80 I = 1, N
-               WORK( I ) = ZERO
-   80       CONTINUE
-            DO 100 J = 1, N
-               SUM = WORK( J ) + ABS( AB( 1, J ) )
-               L = 1 - J
-               DO 90 I = J + 1, MIN( N, J+K )
-                  ABSA = ABS( AB( L+I, J ) )
-                  SUM = SUM + ABSA
-                  WORK( I ) = WORK( I ) + ABSA
-   90          CONTINUE
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-  100       CONTINUE
-         END IF
-      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
-*
-*        Find normF(A).
-*
-         SCALE = ZERO
-         SUM = ONE
-         IF( K.GT.0 ) THEN
-            IF( LSAME( UPLO, 'U' ) ) THEN
-               DO 110 J = 2, N
-                  CALL CLASSQ( MIN( J-1, K ), AB( MAX( K+2-J, 1 ), J ),
-     $                         1, SCALE, SUM )
-  110          CONTINUE
-               L = K + 1
-            ELSE
-               DO 120 J = 1, N - 1
-                  CALL CLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE,
-     $                         SUM )
-  120          CONTINUE
-               L = 1
-            END IF
-            SUM = 2*SUM
-         ELSE
-            L = 1
-         END IF
-         CALL CLASSQ( N, AB( L, 1 ), LDAB, SCALE, SUM )
-         VALUE = SCALE*SQRT( SUM )
-      END IF
-*
-      CLANSB = VALUE
-      RETURN
-*
-*     End of CLANSB
-*
-      END
diff --git a/blas_fix/clansp.f b/blas_fix/clansp.f
deleted file mode 100644
index 19b85d6ce..000000000
--- a/blas_fix/clansp.f
+++ /dev/null
@@ -1,273 +0,0 @@
-*> \brief \b CLANSP returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a symmetric matrix supplied in packed form.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*> \htmlonly
-*> Download CLANSP + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/clansp.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/clansp.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/clansp.f"> 
-*> [TXT]</a>
-*> \endhtmlonly 
-*
-*  Definition:
-*  ===========
-*
-*       REAL             FUNCTION CLANSP( NORM, UPLO, N, AP, WORK )
-* 
-*       .. Scalar Arguments ..
-*       CHARACTER          NORM, UPLO
-*       INTEGER            N
-*       ..
-*       .. Array Arguments ..
-*       REAL               WORK( * )
-*       COMPLEX            AP( * )
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CLANSP  returns the value of the one norm,  or the Frobenius norm, or
-*> the  infinity norm,  or the  element of  largest absolute value  of a
-*> complex symmetric matrix A,  supplied in packed form.
-*> \endverbatim
-*>
-*> \return CLANSP
-*> \verbatim
-*>
-*>    CLANSP = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-*>             (
-*>             ( norm1(A),         NORM = '1', 'O' or 'o'
-*>             (
-*>             ( normI(A),         NORM = 'I' or 'i'
-*>             (
-*>             ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-*>
-*> where  norm1  denotes the  one norm of a matrix (maximum column sum),
-*> normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-*> normF  denotes the  Frobenius norm of a matrix (square root of sum of
-*> squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] NORM
-*> \verbatim
-*>          NORM is CHARACTER*1
-*>          Specifies the value to be returned in CLANSP as described
-*>          above.
-*> \endverbatim
-*>
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>          Specifies whether the upper or lower triangular part of the
-*>          symmetric matrix A is supplied.
-*>          = 'U':  Upper triangular part of A is supplied
-*>          = 'L':  Lower triangular part of A is supplied
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The order of the matrix A.  N >= 0.  When N = 0, CLANSP is
-*>          set to zero.
-*> \endverbatim
-*>
-*> \param[in] AP
-*> \verbatim
-*>          AP is COMPLEX array, dimension (N*(N+1)/2)
-*>          The upper or lower triangle of the symmetric matrix A, packed
-*>          columnwise in a linear array.  The j-th column of A is stored
-*>          in the array AP as follows:
-*>          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
-*>          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is REAL array, dimension (MAX(1,LWORK)),
-*>          where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
-*>          WORK is not referenced.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date September 2012
-*
-*> \ingroup complexOTHERauxiliary
-*
-*  =====================================================================
-      REAL             FUNCTION CLANSP( NORM, UPLO, N, AP, WORK )
-*
-*  -- LAPACK auxiliary routine (version 3.4.2) --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
-*
-*     .. Scalar Arguments ..
-      CHARACTER          NORM, UPLO
-      INTEGER            N
-*     ..
-*     .. Array Arguments ..
-      REAL               WORK( * )
-      COMPLEX            AP( * )
-*     ..
-*
-* =====================================================================
-*
-*     .. Parameters ..
-      REAL               ONE, ZERO
-      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
-*     ..
-*     .. Local Scalars ..
-      INTEGER            I, J, K
-      REAL               ABSA, SCALE, SUM, VALUE
-*     ..
-*     .. External Functions ..
-      LOGICAL            LSAME, SISNAN
-      EXTERNAL           LSAME, SISNAN
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           CLASSQ
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, AIMAG, REAL, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-      VALUE = ZERO
-      IF( N.EQ.0 ) THEN
-         VALUE = ZERO
-      ELSE IF( LSAME( NORM, 'M' ) ) THEN
-*
-*        Find max(abs(A(i,j))).
-*
-         VALUE = ZERO
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            K = 1
-            DO 20 J = 1, N
-               DO 10 I = K, K + J - 1
-                  SUM = ABS( AP( I ) )
-                  IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   10          CONTINUE
-               K = K + J
-   20       CONTINUE
-         ELSE
-            K = 1
-            DO 40 J = 1, N
-               DO 30 I = K, K + N - J
-                  SUM = ABS( AP( I ) )
-                  IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   30          CONTINUE
-               K = K + N - J + 1
-   40       CONTINUE
-         END IF
-      ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR.
-     $         ( NORM.EQ.'1' ) ) THEN
-*
-*        Find normI(A) ( = norm1(A), since A is symmetric).
-*
-         VALUE = ZERO
-         K = 1
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            DO 60 J = 1, N
-               SUM = ZERO
-               DO 50 I = 1, J - 1
-                  ABSA = ABS( AP( K ) )
-                  SUM = SUM + ABSA
-                  WORK( I ) = WORK( I ) + ABSA
-                  K = K + 1
-   50          CONTINUE
-               WORK( J ) = SUM + ABS( AP( K ) )
-               K = K + 1
-   60       CONTINUE
-            DO 70 I = 1, N
-               SUM = WORK( I )
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   70       CONTINUE
-         ELSE
-            DO 80 I = 1, N
-               WORK( I ) = ZERO
-   80       CONTINUE
-            DO 100 J = 1, N
-               SUM = WORK( J ) + ABS( AP( K ) )
-               K = K + 1
-               DO 90 I = J + 1, N
-                  ABSA = ABS( AP( K ) )
-                  SUM = SUM + ABSA
-                  WORK( I ) = WORK( I ) + ABSA
-                  K = K + 1
-   90          CONTINUE
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-  100       CONTINUE
-         END IF
-      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
-*
-*        Find normF(A).
-*
-         SCALE = ZERO
-         SUM = ONE
-         K = 2
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            DO 110 J = 2, N
-               CALL CLASSQ( J-1, AP( K ), 1, SCALE, SUM )
-               K = K + J
-  110       CONTINUE
-         ELSE
-            DO 120 J = 1, N - 1
-               CALL CLASSQ( N-J, AP( K ), 1, SCALE, SUM )
-               K = K + N - J + 1
-  120       CONTINUE
-         END IF
-         SUM = 2*SUM
-         K = 1
-         DO 130 I = 1, N
-            IF( REAL( AP( K ) ).NE.ZERO ) THEN
-               ABSA = ABS( REAL( AP( K ) ) )
-               IF( SCALE.LT.ABSA ) THEN
-                  SUM = ONE + SUM*( SCALE / ABSA )**2
-                  SCALE = ABSA
-               ELSE
-                  SUM = SUM + ( ABSA / SCALE )**2
-               END IF
-            END IF
-            IF( AIMAG( AP( K ) ).NE.ZERO ) THEN
-               ABSA = ABS( AIMAG( AP( K ) ) )
-               IF( SCALE.LT.ABSA ) THEN
-                  SUM = ONE + SUM*( SCALE / ABSA )**2
-                  SCALE = ABSA
-               ELSE
-                  SUM = SUM + ( ABSA / SCALE )**2
-               END IF
-            END IF
-            IF( LSAME( UPLO, 'U' ) ) THEN
-               K = K + I + 1
-            ELSE
-               K = K + N - I + 1
-            END IF
-  130    CONTINUE
-         VALUE = SCALE*SQRT( SUM )
-      END IF
-*
-      CLANSP = VALUE
-      RETURN
-*
-*     End of CLANSP
-*
-      END
diff --git a/blas_fix/clansy.f b/blas_fix/clansy.f
deleted file mode 100644
index 18cb018ff..000000000
--- a/blas_fix/clansy.f
+++ /dev/null
@@ -1,244 +0,0 @@
-*> \brief \b CLANSY returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a complex symmetric matrix.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*> \htmlonly
-*> Download CLANSY + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/clansy.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/clansy.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/clansy.f"> 
-*> [TXT]</a>
-*> \endhtmlonly 
-*
-*  Definition:
-*  ===========
-*
-*       REAL             FUNCTION CLANSY( NORM, UPLO, N, A, LDA, WORK )
-* 
-*       .. Scalar Arguments ..
-*       CHARACTER          NORM, UPLO
-*       INTEGER            LDA, N
-*       ..
-*       .. Array Arguments ..
-*       REAL               WORK( * )
-*       COMPLEX            A( LDA, * )
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CLANSY  returns the value of the one norm,  or the Frobenius norm, or
-*> the  infinity norm,  or the  element of  largest absolute value  of a
-*> complex symmetric matrix A.
-*> \endverbatim
-*>
-*> \return CLANSY
-*> \verbatim
-*>
-*>    CLANSY = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-*>             (
-*>             ( norm1(A),         NORM = '1', 'O' or 'o'
-*>             (
-*>             ( normI(A),         NORM = 'I' or 'i'
-*>             (
-*>             ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-*>
-*> where  norm1  denotes the  one norm of a matrix (maximum column sum),
-*> normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-*> normF  denotes the  Frobenius norm of a matrix (square root of sum of
-*> squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] NORM
-*> \verbatim
-*>          NORM is CHARACTER*1
-*>          Specifies the value to be returned in CLANSY as described
-*>          above.
-*> \endverbatim
-*>
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>          Specifies whether the upper or lower triangular part of the
-*>          symmetric matrix A is to be referenced.
-*>          = 'U':  Upper triangular part of A is referenced
-*>          = 'L':  Lower triangular part of A is referenced
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The order of the matrix A.  N >= 0.  When N = 0, CLANSY is
-*>          set to zero.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array, dimension (LDA,N)
-*>          The symmetric matrix A.  If UPLO = 'U', the leading n by n
-*>          upper triangular part of A contains the upper triangular part
-*>          of the matrix A, and the strictly lower triangular part of A
-*>          is not referenced.  If UPLO = 'L', the leading n by n lower
-*>          triangular part of A contains the lower triangular part of
-*>          the matrix A, and the strictly upper triangular part of A is
-*>          not referenced.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>          The leading dimension of the array A.  LDA >= max(N,1).
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is REAL array, dimension (MAX(1,LWORK)),
-*>          where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
-*>          WORK is not referenced.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date September 2012
-*
-*> \ingroup complexSYauxiliary
-*
-*  =====================================================================
-      REAL             FUNCTION CLANSY( NORM, UPLO, N, A, LDA, WORK )
-*
-*  -- LAPACK auxiliary routine (version 3.4.2) --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
-*
-*     .. Scalar Arguments ..
-      CHARACTER          NORM, UPLO
-      INTEGER            LDA, N
-*     ..
-*     .. Array Arguments ..
-      REAL               WORK( * )
-      COMPLEX            A( LDA, * )
-*     ..
-*
-* =====================================================================
-*
-*     .. Parameters ..
-      REAL               ONE, ZERO
-      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
-*     ..
-*     .. Local Scalars ..
-      INTEGER            I, J
-      REAL               ABSA, SCALE, SUM, VALUE
-*     ..
-*     .. External Functions ..
-      LOGICAL            LSAME, SISNAN
-      EXTERNAL           LSAME, SISNAN
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           CLASSQ
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-      VALUE = ZERO
-      IF( N.EQ.0 ) THEN
-         VALUE = ZERO
-      ELSE IF( LSAME( NORM, 'M' ) ) THEN
-*
-*        Find max(abs(A(i,j))).
-*
-         VALUE = ZERO
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            DO 20 J = 1, N
-               DO 10 I = 1, J
-                  SUM = ABS( A( I, J ) )
-                  IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   10          CONTINUE
-   20       CONTINUE
-         ELSE
-            DO 40 J = 1, N
-               DO 30 I = J, N
-                  SUM = ABS( A( I, J ) )
-                  IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   30          CONTINUE
-   40       CONTINUE
-         END IF
-      ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR.
-     $         ( NORM.EQ.'1' ) ) THEN
-*
-*        Find normI(A) ( = norm1(A), since A is symmetric).
-*
-         VALUE = ZERO
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            DO 60 J = 1, N
-               SUM = ZERO
-               DO 50 I = 1, J - 1
-                  ABSA = ABS( A( I, J ) )
-                  SUM = SUM + ABSA
-                  WORK( I ) = WORK( I ) + ABSA
-   50          CONTINUE
-               WORK( J ) = SUM + ABS( A( J, J ) )
-   60       CONTINUE
-            DO 70 I = 1, N
-               SUM = WORK( I )
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   70       CONTINUE
-         ELSE
-            DO 80 I = 1, N
-               WORK( I ) = ZERO
-   80       CONTINUE
-            DO 100 J = 1, N
-               SUM = WORK( J ) + ABS( A( J, J ) )
-               DO 90 I = J + 1, N
-                  ABSA = ABS( A( I, J ) )
-                  SUM = SUM + ABSA
-                  WORK( I ) = WORK( I ) + ABSA
-   90          CONTINUE
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-  100       CONTINUE
-         END IF
-      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
-*
-*        Find normF(A).
-*
-         SCALE = ZERO
-         SUM = ONE
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            DO 110 J = 2, N
-               CALL CLASSQ( J-1, A( 1, J ), 1, SCALE, SUM )
-  110       CONTINUE
-         ELSE
-            DO 120 J = 1, N - 1
-               CALL CLASSQ( N-J, A( J+1, J ), 1, SCALE, SUM )
-  120       CONTINUE
-         END IF
-         SUM = 2*SUM
-         CALL CLASSQ( N, A, LDA+1, SCALE, SUM )
-         VALUE = SCALE*SQRT( SUM )
-      END IF
-*
-      CLANSY = VALUE
-      RETURN
-*
-*     End of CLANSY
-*
-      END
diff --git a/blas_fix/clantb.f b/blas_fix/clantb.f
deleted file mode 100644
index 6104f8ef7..000000000
--- a/blas_fix/clantb.f
+++ /dev/null
@@ -1,364 +0,0 @@
-*> \brief \b CLANTB returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a triangular band matrix.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*> \htmlonly
-*> Download CLANTB + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/clantb.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/clantb.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/clantb.f"> 
-*> [TXT]</a>
-*> \endhtmlonly 
-*
-*  Definition:
-*  ===========
-*
-*       REAL             FUNCTION CLANTB( NORM, UPLO, DIAG, N, K, AB,
-*                        LDAB, WORK )
-* 
-*       .. Scalar Arguments ..
-*       CHARACTER          DIAG, NORM, UPLO
-*       INTEGER            K, LDAB, N
-*       ..
-*       .. Array Arguments ..
-*       REAL               WORK( * )
-*       COMPLEX            AB( LDAB, * )
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CLANTB  returns the value of the one norm,  or the Frobenius norm, or
-*> the  infinity norm,  or the element of  largest absolute value  of an
-*> n by n triangular band matrix A,  with ( k + 1 ) diagonals.
-*> \endverbatim
-*>
-*> \return CLANTB
-*> \verbatim
-*>
-*>    CLANTB = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-*>             (
-*>             ( norm1(A),         NORM = '1', 'O' or 'o'
-*>             (
-*>             ( normI(A),         NORM = 'I' or 'i'
-*>             (
-*>             ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-*>
-*> where  norm1  denotes the  one norm of a matrix (maximum column sum),
-*> normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-*> normF  denotes the  Frobenius norm of a matrix (square root of sum of
-*> squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] NORM
-*> \verbatim
-*>          NORM is CHARACTER*1
-*>          Specifies the value to be returned in CLANTB as described
-*>          above.
-*> \endverbatim
-*>
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>          Specifies whether the matrix A is upper or lower triangular.
-*>          = 'U':  Upper triangular
-*>          = 'L':  Lower triangular
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>          Specifies whether or not the matrix A is unit triangular.
-*>          = 'N':  Non-unit triangular
-*>          = 'U':  Unit triangular
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The order of the matrix A.  N >= 0.  When N = 0, CLANTB is
-*>          set to zero.
-*> \endverbatim
-*>
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>          The number of super-diagonals of the matrix A if UPLO = 'U',
-*>          or the number of sub-diagonals of the matrix A if UPLO = 'L'.
-*>          K >= 0.
-*> \endverbatim
-*>
-*> \param[in] AB
-*> \verbatim
-*>          AB is COMPLEX array, dimension (LDAB,N)
-*>          The upper or lower triangular band matrix A, stored in the
-*>          first k+1 rows of AB.  The j-th column of A is stored
-*>          in the j-th column of the array AB as follows:
-*>          if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j;
-*>          if UPLO = 'L', AB(1+i-j,j)   = A(i,j) for j<=i<=min(n,j+k).
-*>          Note that when DIAG = 'U', the elements of the array AB
-*>          corresponding to the diagonal elements of the matrix A are
-*>          not referenced, but are assumed to be one.
-*> \endverbatim
-*>
-*> \param[in] LDAB
-*> \verbatim
-*>          LDAB is INTEGER
-*>          The leading dimension of the array AB.  LDAB >= K+1.
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is REAL array, dimension (MAX(1,LWORK)),
-*>          where LWORK >= N when NORM = 'I'; otherwise, WORK is not
-*>          referenced.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date September 2012
-*
-*> \ingroup complexOTHERauxiliary
-*
-*  =====================================================================
-      REAL             FUNCTION CLANTB( NORM, UPLO, DIAG, N, K, AB,
-     $                 LDAB, WORK )
-*
-*  -- LAPACK auxiliary routine (version 3.4.2) --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
-*
-*     .. Scalar Arguments ..
-      CHARACTER          DIAG, NORM, UPLO
-      INTEGER            K, LDAB, N
-*     ..
-*     .. Array Arguments ..
-      REAL               WORK( * )
-      COMPLEX            AB( LDAB, * )
-*     ..
-*
-* =====================================================================
-*
-*     .. Parameters ..
-      REAL               ONE, ZERO
-      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
-*     ..
-*     .. Local Scalars ..
-      LOGICAL            UDIAG
-      INTEGER            I, J, L
-      REAL               SCALE, SUM, VALUE
-*     ..
-*     .. External Functions ..
-      LOGICAL            LSAME, SISNAN
-      EXTERNAL           LSAME, SISNAN
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           CLASSQ
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, MAX, MIN, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-      VALUE = ZERO
-      IF( N.EQ.0 ) THEN
-         VALUE = ZERO
-      ELSE IF( LSAME( NORM, 'M' ) ) THEN
-*
-*        Find max(abs(A(i,j))).
-*
-         IF( LSAME( DIAG, 'U' ) ) THEN
-            VALUE = ONE
-            IF( LSAME( UPLO, 'U' ) ) THEN
-               DO 20 J = 1, N
-                  DO 10 I = MAX( K+2-J, 1 ), K
-                     SUM = ABS( AB( I, J ) )
-                     IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   10             CONTINUE
-   20          CONTINUE
-            ELSE
-               DO 40 J = 1, N
-                  DO 30 I = 2, MIN( N+1-J, K+1 )
-                     SUM = ABS( AB( I, J ) )
-                     IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   30             CONTINUE
-   40          CONTINUE
-            END IF
-         ELSE
-            VALUE = ZERO
-            IF( LSAME( UPLO, 'U' ) ) THEN
-               DO 60 J = 1, N
-                  DO 50 I = MAX( K+2-J, 1 ), K + 1
-                     SUM = ABS( AB( I, J ) )
-                     IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   50             CONTINUE
-   60          CONTINUE
-            ELSE
-               DO 80 J = 1, N
-                  DO 70 I = 1, MIN( N+1-J, K+1 )
-                     SUM = ABS( AB( I, J ) )
-                     IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   70             CONTINUE
-   80          CONTINUE
-            END IF
-         END IF
-      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
-*
-*        Find norm1(A).
-*
-         VALUE = ZERO
-         UDIAG = LSAME( DIAG, 'U' )
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            DO 110 J = 1, N
-               IF( UDIAG ) THEN
-                  SUM = ONE
-                  DO 90 I = MAX( K+2-J, 1 ), K
-                     SUM = SUM + ABS( AB( I, J ) )
-   90             CONTINUE
-               ELSE
-                  SUM = ZERO
-                  DO 100 I = MAX( K+2-J, 1 ), K + 1
-                     SUM = SUM + ABS( AB( I, J ) )
-  100             CONTINUE
-               END IF
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM               
-  110       CONTINUE
-         ELSE
-            DO 140 J = 1, N
-               IF( UDIAG ) THEN
-                  SUM = ONE
-                  DO 120 I = 2, MIN( N+1-J, K+1 )
-                     SUM = SUM + ABS( AB( I, J ) )
-  120             CONTINUE
-               ELSE
-                  SUM = ZERO
-                  DO 130 I = 1, MIN( N+1-J, K+1 )
-                     SUM = SUM + ABS( AB( I, J ) )
-  130             CONTINUE
-               END IF
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-  140       CONTINUE
-         END IF
-      ELSE IF( LSAME( NORM, 'I' ) ) THEN
-*
-*        Find normI(A).
-*
-         VALUE = ZERO
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            IF( LSAME( DIAG, 'U' ) ) THEN
-               DO 150 I = 1, N
-                  WORK( I ) = ONE
-  150          CONTINUE
-               DO 170 J = 1, N
-                  L = K + 1 - J
-                  DO 160 I = MAX( 1, J-K ), J - 1
-                     WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) )
-  160             CONTINUE
-  170          CONTINUE
-            ELSE
-               DO 180 I = 1, N
-                  WORK( I ) = ZERO
-  180          CONTINUE
-               DO 200 J = 1, N
-                  L = K + 1 - J
-                  DO 190 I = MAX( 1, J-K ), J
-                     WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) )
-  190             CONTINUE
-  200          CONTINUE
-            END IF
-         ELSE
-            IF( LSAME( DIAG, 'U' ) ) THEN
-               DO 210 I = 1, N
-                  WORK( I ) = ONE
-  210          CONTINUE
-               DO 230 J = 1, N
-                  L = 1 - J
-                  DO 220 I = J + 1, MIN( N, J+K )
-                     WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) )
-  220             CONTINUE
-  230          CONTINUE
-            ELSE
-               DO 240 I = 1, N
-                  WORK( I ) = ZERO
-  240          CONTINUE
-               DO 260 J = 1, N
-                  L = 1 - J
-                  DO 250 I = J, MIN( N, J+K )
-                     WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) )
-  250             CONTINUE
-  260          CONTINUE
-            END IF
-         END IF
-         DO 270 I = 1, N
-            SUM = WORK( I )
-            IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-  270    CONTINUE
-      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
-*
-*        Find normF(A).
-*
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            IF( LSAME( DIAG, 'U' ) ) THEN
-               SCALE = ONE
-               SUM = N
-               IF( K.GT.0 ) THEN
-                  DO 280 J = 2, N
-                     CALL CLASSQ( MIN( J-1, K ),
-     $                            AB( MAX( K+2-J, 1 ), J ), 1, SCALE,
-     $                            SUM )
-  280             CONTINUE
-               END IF
-            ELSE
-               SCALE = ZERO
-               SUM = ONE
-               DO 290 J = 1, N
-                  CALL CLASSQ( MIN( J, K+1 ), AB( MAX( K+2-J, 1 ), J ),
-     $                         1, SCALE, SUM )
-  290          CONTINUE
-            END IF
-         ELSE
-            IF( LSAME( DIAG, 'U' ) ) THEN
-               SCALE = ONE
-               SUM = N
-               IF( K.GT.0 ) THEN
-                  DO 300 J = 1, N - 1
-                     CALL CLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE,
-     $                            SUM )
-  300             CONTINUE
-               END IF
-            ELSE
-               SCALE = ZERO
-               SUM = ONE
-               DO 310 J = 1, N
-                  CALL CLASSQ( MIN( N-J+1, K+1 ), AB( 1, J ), 1, SCALE,
-     $                         SUM )
-  310          CONTINUE
-            END IF
-         END IF
-         VALUE = SCALE*SQRT( SUM )
-      END IF
-*
-      CLANTB = VALUE
-      RETURN
-*
-*     End of CLANTB
-*
-      END
diff --git a/blas_fix/clantp.f b/blas_fix/clantp.f
deleted file mode 100644
index 7a67253b8..000000000
--- a/blas_fix/clantp.f
+++ /dev/null
@@ -1,358 +0,0 @@
-*> \brief \b CLANTP returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a triangular matrix supplied in packed form.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*> \htmlonly
-*> Download CLANTP + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/clantp.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/clantp.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/clantp.f"> 
-*> [TXT]</a>
-*> \endhtmlonly 
-*
-*  Definition:
-*  ===========
-*
-*       REAL             FUNCTION CLANTP( NORM, UPLO, DIAG, N, AP, WORK )
-* 
-*       .. Scalar Arguments ..
-*       CHARACTER          DIAG, NORM, UPLO
-*       INTEGER            N
-*       ..
-*       .. Array Arguments ..
-*       REAL               WORK( * )
-*       COMPLEX            AP( * )
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CLANTP  returns the value of the one norm,  or the Frobenius norm, or
-*> the  infinity norm,  or the  element of  largest absolute value  of a
-*> triangular matrix A, supplied in packed form.
-*> \endverbatim
-*>
-*> \return CLANTP
-*> \verbatim
-*>
-*>    CLANTP = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-*>             (
-*>             ( norm1(A),         NORM = '1', 'O' or 'o'
-*>             (
-*>             ( normI(A),         NORM = 'I' or 'i'
-*>             (
-*>             ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-*>
-*> where  norm1  denotes the  one norm of a matrix (maximum column sum),
-*> normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-*> normF  denotes the  Frobenius norm of a matrix (square root of sum of
-*> squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] NORM
-*> \verbatim
-*>          NORM is CHARACTER*1
-*>          Specifies the value to be returned in CLANTP as described
-*>          above.
-*> \endverbatim
-*>
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>          Specifies whether the matrix A is upper or lower triangular.
-*>          = 'U':  Upper triangular
-*>          = 'L':  Lower triangular
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>          Specifies whether or not the matrix A is unit triangular.
-*>          = 'N':  Non-unit triangular
-*>          = 'U':  Unit triangular
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The order of the matrix A.  N >= 0.  When N = 0, CLANTP is
-*>          set to zero.
-*> \endverbatim
-*>
-*> \param[in] AP
-*> \verbatim
-*>          AP is COMPLEX array, dimension (N*(N+1)/2)
-*>          The upper or lower triangular matrix A, packed columnwise in
-*>          a linear array.  The j-th column of A is stored in the array
-*>          AP as follows:
-*>          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
-*>          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
-*>          Note that when DIAG = 'U', the elements of the array AP
-*>          corresponding to the diagonal elements of the matrix A are
-*>          not referenced, but are assumed to be one.
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is REAL array, dimension (MAX(1,LWORK)),
-*>          where LWORK >= N when NORM = 'I'; otherwise, WORK is not
-*>          referenced.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date September 2012
-*
-*> \ingroup complexOTHERauxiliary
-*
-*  =====================================================================
-      REAL             FUNCTION CLANTP( NORM, UPLO, DIAG, N, AP, WORK )
-*
-*  -- LAPACK auxiliary routine (version 3.4.2) --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
-*
-*     .. Scalar Arguments ..
-      CHARACTER          DIAG, NORM, UPLO
-      INTEGER            N
-*     ..
-*     .. Array Arguments ..
-      REAL               WORK( * )
-      COMPLEX            AP( * )
-*     ..
-*
-* =====================================================================
-*
-*     .. Parameters ..
-      REAL               ONE, ZERO
-      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
-*     ..
-*     .. Local Scalars ..
-      LOGICAL            UDIAG
-      INTEGER            I, J, K
-      REAL               SCALE, SUM, VALUE
-*     ..
-*     .. External Functions ..
-      LOGICAL            LSAME, SISNAN
-      EXTERNAL           LSAME, SISNAN
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           CLASSQ
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-      VALUE = ZERO
-      IF( N.EQ.0 ) THEN
-         VALUE = ZERO
-      ELSE IF( LSAME( NORM, 'M' ) ) THEN
-*
-*        Find max(abs(A(i,j))).
-*
-         K = 1
-         IF( LSAME( DIAG, 'U' ) ) THEN
-            VALUE = ONE
-            IF( LSAME( UPLO, 'U' ) ) THEN
-               DO 20 J = 1, N
-                  DO 10 I = K, K + J - 2
-                     SUM = ABS( AP( I ) )
-                     IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   10             CONTINUE
-                  K = K + J
-   20          CONTINUE
-            ELSE
-               DO 40 J = 1, N
-                  DO 30 I = K + 1, K + N - J
-                     SUM = ABS( AP( I ) )
-                     IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   30             CONTINUE
-                  K = K + N - J + 1
-   40          CONTINUE
-            END IF
-         ELSE
-            VALUE = ZERO
-            IF( LSAME( UPLO, 'U' ) ) THEN
-               DO 60 J = 1, N
-                  DO 50 I = K, K + J - 1
-                     SUM = ABS( AP( I ) )
-                     IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   50             CONTINUE
-                  K = K + J
-   60          CONTINUE
-            ELSE
-               DO 80 J = 1, N
-                  DO 70 I = K, K + N - J
-                     SUM = ABS( AP( I ) )
-                     IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   70             CONTINUE
-                  K = K + N - J + 1
-   80          CONTINUE
-            END IF
-         END IF
-      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
-*
-*        Find norm1(A).
-*
-         VALUE = ZERO
-         K = 1
-         UDIAG = LSAME( DIAG, 'U' )
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            DO 110 J = 1, N
-               IF( UDIAG ) THEN
-                  SUM = ONE
-                  DO 90 I = K, K + J - 2
-                     SUM = SUM + ABS( AP( I ) )
-   90             CONTINUE
-               ELSE
-                  SUM = ZERO
-                  DO 100 I = K, K + J - 1
-                     SUM = SUM + ABS( AP( I ) )
-  100             CONTINUE
-               END IF
-               K = K + J
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-  110       CONTINUE
-         ELSE
-            DO 140 J = 1, N
-               IF( UDIAG ) THEN
-                  SUM = ONE
-                  DO 120 I = K + 1, K + N - J
-                     SUM = SUM + ABS( AP( I ) )
-  120             CONTINUE
-               ELSE
-                  SUM = ZERO
-                  DO 130 I = K, K + N - J
-                     SUM = SUM + ABS( AP( I ) )
-  130             CONTINUE
-               END IF
-               K = K + N - J + 1
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-  140       CONTINUE
-         END IF
-      ELSE IF( LSAME( NORM, 'I' ) ) THEN
-*
-*        Find normI(A).
-*
-         K = 1
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            IF( LSAME( DIAG, 'U' ) ) THEN
-               DO 150 I = 1, N
-                  WORK( I ) = ONE
-  150          CONTINUE
-               DO 170 J = 1, N
-                  DO 160 I = 1, J - 1
-                     WORK( I ) = WORK( I ) + ABS( AP( K ) )
-                     K = K + 1
-  160             CONTINUE
-                  K = K + 1
-  170          CONTINUE
-            ELSE
-               DO 180 I = 1, N
-                  WORK( I ) = ZERO
-  180          CONTINUE
-               DO 200 J = 1, N
-                  DO 190 I = 1, J
-                     WORK( I ) = WORK( I ) + ABS( AP( K ) )
-                     K = K + 1
-  190             CONTINUE
-  200          CONTINUE
-            END IF
-         ELSE
-            IF( LSAME( DIAG, 'U' ) ) THEN
-               DO 210 I = 1, N
-                  WORK( I ) = ONE
-  210          CONTINUE
-               DO 230 J = 1, N
-                  K = K + 1
-                  DO 220 I = J + 1, N
-                     WORK( I ) = WORK( I ) + ABS( AP( K ) )
-                     K = K + 1
-  220             CONTINUE
-  230          CONTINUE
-            ELSE
-               DO 240 I = 1, N
-                  WORK( I ) = ZERO
-  240          CONTINUE
-               DO 260 J = 1, N
-                  DO 250 I = J, N
-                     WORK( I ) = WORK( I ) + ABS( AP( K ) )
-                     K = K + 1
-  250             CONTINUE
-  260          CONTINUE
-            END IF
-         END IF
-         VALUE = ZERO
-         DO 270 I = 1, N
-            SUM = WORK( I )
-            IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-  270    CONTINUE
-      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
-*
-*        Find normF(A).
-*
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            IF( LSAME( DIAG, 'U' ) ) THEN
-               SCALE = ONE
-               SUM = N
-               K = 2
-               DO 280 J = 2, N
-                  CALL CLASSQ( J-1, AP( K ), 1, SCALE, SUM )
-                  K = K + J
-  280          CONTINUE
-            ELSE
-               SCALE = ZERO
-               SUM = ONE
-               K = 1
-               DO 290 J = 1, N
-                  CALL CLASSQ( J, AP( K ), 1, SCALE, SUM )
-                  K = K + J
-  290          CONTINUE
-            END IF
-         ELSE
-            IF( LSAME( DIAG, 'U' ) ) THEN
-               SCALE = ONE
-               SUM = N
-               K = 2
-               DO 300 J = 1, N - 1
-                  CALL CLASSQ( N-J, AP( K ), 1, SCALE, SUM )
-                  K = K + N - J + 1
-  300          CONTINUE
-            ELSE
-               SCALE = ZERO
-               SUM = ONE
-               K = 1
-               DO 310 J = 1, N
-                  CALL CLASSQ( N-J+1, AP( K ), 1, SCALE, SUM )
-                  K = K + N - J + 1
-  310          CONTINUE
-            END IF
-         END IF
-         VALUE = SCALE*SQRT( SUM )
-      END IF
-*
-      CLANTP = VALUE
-      RETURN
-*
-*     End of CLANTP
-*
-      END
diff --git a/blas_fix/clantr.f b/blas_fix/clantr.f
deleted file mode 100644
index f5ad18ce3..000000000
--- a/blas_fix/clantr.f
+++ /dev/null
@@ -1,356 +0,0 @@
-*> \brief \b CLANTR returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a trapezoidal or triangular matrix.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*> \htmlonly
-*> Download CLANTR + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/clantr.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/clantr.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/clantr.f"> 
-*> [TXT]</a>
-*> \endhtmlonly 
-*
-*  Definition:
-*  ===========
-*
-*       REAL             FUNCTION CLANTR( NORM, UPLO, DIAG, M, N, A, LDA,
-*                        WORK )
-* 
-*       .. Scalar Arguments ..
-*       CHARACTER          DIAG, NORM, UPLO
-*       INTEGER            LDA, M, N
-*       ..
-*       .. Array Arguments ..
-*       REAL               WORK( * )
-*       COMPLEX            A( LDA, * )
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CLANTR  returns the value of the one norm,  or the Frobenius norm, or
-*> the  infinity norm,  or the  element of  largest absolute value  of a
-*> trapezoidal or triangular matrix A.
-*> \endverbatim
-*>
-*> \return CLANTR
-*> \verbatim
-*>
-*>    CLANTR = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-*>             (
-*>             ( norm1(A),         NORM = '1', 'O' or 'o'
-*>             (
-*>             ( normI(A),         NORM = 'I' or 'i'
-*>             (
-*>             ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-*>
-*> where  norm1  denotes the  one norm of a matrix (maximum column sum),
-*> normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-*> normF  denotes the  Frobenius norm of a matrix (square root of sum of
-*> squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] NORM
-*> \verbatim
-*>          NORM is CHARACTER*1
-*>          Specifies the value to be returned in CLANTR as described
-*>          above.
-*> \endverbatim
-*>
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>          Specifies whether the matrix A is upper or lower trapezoidal.
-*>          = 'U':  Upper trapezoidal
-*>          = 'L':  Lower trapezoidal
-*>          Note that A is triangular instead of trapezoidal if M = N.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>          Specifies whether or not the matrix A has unit diagonal.
-*>          = 'N':  Non-unit diagonal
-*>          = 'U':  Unit diagonal
-*> \endverbatim
-*>
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>          The number of rows of the matrix A.  M >= 0, and if
-*>          UPLO = 'U', M <= N.  When M = 0, CLANTR is set to zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The number of columns of the matrix A.  N >= 0, and if
-*>          UPLO = 'L', N <= M.  When N = 0, CLANTR is set to zero.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array, dimension (LDA,N)
-*>          The trapezoidal matrix A (A is triangular if M = N).
-*>          If UPLO = 'U', the leading m by n upper trapezoidal part of
-*>          the array A contains the upper trapezoidal matrix, and the
-*>          strictly lower triangular part of A is not referenced.
-*>          If UPLO = 'L', the leading m by n lower trapezoidal part of
-*>          the array A contains the lower trapezoidal matrix, and the
-*>          strictly upper triangular part of A is not referenced.  Note
-*>          that when DIAG = 'U', the diagonal elements of A are not
-*>          referenced and are assumed to be one.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>          The leading dimension of the array A.  LDA >= max(M,1).
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is REAL array, dimension (MAX(1,LWORK)),
-*>          where LWORK >= M when NORM = 'I'; otherwise, WORK is not
-*>          referenced.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date September 2012
-*
-*> \ingroup complexOTHERauxiliary
-*
-*  =====================================================================
-      REAL             FUNCTION CLANTR( NORM, UPLO, DIAG, M, N, A, LDA,
-     $                 WORK )
-*
-*  -- LAPACK auxiliary routine (version 3.4.2) --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
-*
-*     .. Scalar Arguments ..
-      CHARACTER          DIAG, NORM, UPLO
-      INTEGER            LDA, M, N
-*     ..
-*     .. Array Arguments ..
-      REAL               WORK( * )
-      COMPLEX            A( LDA, * )
-*     ..
-*
-* =====================================================================
-*
-*     .. Parameters ..
-      REAL               ONE, ZERO
-      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
-*     ..
-*     .. Local Scalars ..
-      LOGICAL            UDIAG
-      INTEGER            I, J
-      REAL               SCALE, SUM, VALUE
-*     ..
-*     .. External Functions ..
-      LOGICAL            LSAME, SISNAN
-      EXTERNAL           LSAME, SISNAN
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           CLASSQ
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, MIN, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-      VALUE = ZERO
-      IF( MIN( M, N ).EQ.0 ) THEN
-         VALUE = ZERO
-      ELSE IF( LSAME( NORM, 'M' ) ) THEN
-*
-*        Find max(abs(A(i,j))).
-*
-         IF( LSAME( DIAG, 'U' ) ) THEN
-            VALUE = ONE
-            IF( LSAME( UPLO, 'U' ) ) THEN
-               DO 20 J = 1, N
-                  DO 10 I = 1, MIN( M, J-1 )
-                     SUM = ABS( A( I, J ) )
-                     IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   10             CONTINUE
-   20          CONTINUE
-            ELSE
-               DO 40 J = 1, N
-                  DO 30 I = J + 1, M
-                     SUM = ABS( A( I, J ) )
-                     IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   30             CONTINUE
-   40          CONTINUE
-            END IF
-         ELSE
-            VALUE = ZERO
-            IF( LSAME( UPLO, 'U' ) ) THEN
-               DO 60 J = 1, N
-                  DO 50 I = 1, MIN( M, J )
-                     SUM = ABS( A( I, J ) )
-                     IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   50             CONTINUE
-   60          CONTINUE
-            ELSE
-               DO 80 J = 1, N
-                  DO 70 I = J, M
-                     SUM = ABS( A( I, J ) )
-                     IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   70             CONTINUE
-   80          CONTINUE
-            END IF
-         END IF
-      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
-*
-*        Find norm1(A).
-*
-         VALUE = ZERO
-         UDIAG = LSAME( DIAG, 'U' )
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            DO 110 J = 1, N
-               IF( ( UDIAG ) .AND. ( J.LE.M ) ) THEN
-                  SUM = ONE
-                  DO 90 I = 1, J - 1
-                     SUM = SUM + ABS( A( I, J ) )
-   90             CONTINUE
-               ELSE
-                  SUM = ZERO
-                  DO 100 I = 1, MIN( M, J )
-                     SUM = SUM + ABS( A( I, J ) )
-  100             CONTINUE
-               END IF
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-  110       CONTINUE
-         ELSE
-            DO 140 J = 1, N
-               IF( UDIAG ) THEN
-                  SUM = ONE
-                  DO 120 I = J + 1, M
-                     SUM = SUM + ABS( A( I, J ) )
-  120             CONTINUE
-               ELSE
-                  SUM = ZERO
-                  DO 130 I = J, M
-                     SUM = SUM + ABS( A( I, J ) )
-  130             CONTINUE
-               END IF
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-  140       CONTINUE
-         END IF
-      ELSE IF( LSAME( NORM, 'I' ) ) THEN
-*
-*        Find normI(A).
-*
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            IF( LSAME( DIAG, 'U' ) ) THEN
-               DO 150 I = 1, M
-                  WORK( I ) = ONE
-  150          CONTINUE
-               DO 170 J = 1, N
-                  DO 160 I = 1, MIN( M, J-1 )
-                     WORK( I ) = WORK( I ) + ABS( A( I, J ) )
-  160             CONTINUE
-  170          CONTINUE
-            ELSE
-               DO 180 I = 1, M
-                  WORK( I ) = ZERO
-  180          CONTINUE
-               DO 200 J = 1, N
-                  DO 190 I = 1, MIN( M, J )
-                     WORK( I ) = WORK( I ) + ABS( A( I, J ) )
-  190             CONTINUE
-  200          CONTINUE
-            END IF
-         ELSE
-            IF( LSAME( DIAG, 'U' ) ) THEN
-               DO 210 I = 1, N
-                  WORK( I ) = ONE
-  210          CONTINUE
-               DO 220 I = N + 1, M
-                  WORK( I ) = ZERO
-  220          CONTINUE
-               DO 240 J = 1, N
-                  DO 230 I = J + 1, M
-                     WORK( I ) = WORK( I ) + ABS( A( I, J ) )
-  230             CONTINUE
-  240          CONTINUE
-            ELSE
-               DO 250 I = 1, M
-                  WORK( I ) = ZERO
-  250          CONTINUE
-               DO 270 J = 1, N
-                  DO 260 I = J, M
-                     WORK( I ) = WORK( I ) + ABS( A( I, J ) )
-  260             CONTINUE
-  270          CONTINUE
-            END IF
-         END IF
-         VALUE = ZERO
-         DO 280 I = 1, M
-            SUM = WORK( I )
-            IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-  280    CONTINUE
-      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
-*
-*        Find normF(A).
-*
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            IF( LSAME( DIAG, 'U' ) ) THEN
-               SCALE = ONE
-               SUM = MIN( M, N )
-               DO 290 J = 2, N
-                  CALL CLASSQ( MIN( M, J-1 ), A( 1, J ), 1, SCALE, SUM )
-  290          CONTINUE
-            ELSE
-               SCALE = ZERO
-               SUM = ONE
-               DO 300 J = 1, N
-                  CALL CLASSQ( MIN( M, J ), A( 1, J ), 1, SCALE, SUM )
-  300          CONTINUE
-            END IF
-         ELSE
-            IF( LSAME( DIAG, 'U' ) ) THEN
-               SCALE = ONE
-               SUM = MIN( M, N )
-               DO 310 J = 1, N
-                  CALL CLASSQ( M-J, A( MIN( M, J+1 ), J ), 1, SCALE,
-     $                         SUM )
-  310          CONTINUE
-            ELSE
-               SCALE = ZERO
-               SUM = ONE
-               DO 320 J = 1, N
-                  CALL CLASSQ( M-J+1, A( J, J ), 1, SCALE, SUM )
-  320          CONTINUE
-            END IF
-         END IF
-         VALUE = SCALE*SQRT( SUM )
-      END IF
-*
-      CLANTR = VALUE
-      RETURN
-*
-*     End of CLANTR
-*
-      END
diff --git a/blas_fix/sasum.f b/blas_fix/sasum.f
deleted file mode 100644
index 46a4ecc1b..000000000
--- a/blas_fix/sasum.f
+++ /dev/null
@@ -1,112 +0,0 @@
-*> \brief \b SASUM
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*  Definition:
-*  ===========
-*
-*       REAL FUNCTION SASUM(N,SX,INCX)
-* 
-*       .. Scalar Arguments ..
-*       INTEGER INCX,N
-*       ..
-*       .. Array Arguments ..
-*       REAL SX(*)
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    SASUM takes the sum of the absolute values.
-*>    uses unrolled loops for increment equal to one.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date November 2011
-*
-*> \ingroup single_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 3/93 to return if incx .le. 0.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      REAL FUNCTION SASUM(N,SX,INCX)
-*
-*  -- Reference BLAS level1 routine (version 3.4.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,N
-*     ..
-*     .. Array Arguments ..
-      REAL SX(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      REAL STEMP
-      INTEGER I,M,MP1,NINCX
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC ABS,MOD
-*     ..
-      SASUM = 0.0e0
-      STEMP = 0.0e0
-      IF (N.LE.0 .OR. INCX.LE.0) RETURN
-      IF (INCX.EQ.1) THEN
-*        code for increment equal to 1
-*
-*
-*        clean-up loop
-*
-         M = MOD(N,6)
-         IF (M.NE.0) THEN
-            DO I = 1,M
-               STEMP = STEMP + ABS(SX(I))
-            END DO
-            IF (N.LT.6) THEN
-               SASUM = STEMP
-               RETURN
-            END IF
-         END IF
-         MP1 = M + 1
-         DO I = MP1,N,6
-            STEMP = STEMP + ABS(SX(I)) + ABS(SX(I+1)) +
-     $              ABS(SX(I+2)) + ABS(SX(I+3)) +
-     $              ABS(SX(I+4)) + ABS(SX(I+5))
-         END DO
-      ELSE
-*
-*        code for increment not equal to 1
-*
-         NINCX = N*INCX
-         DO I = 1,NINCX,INCX
-            STEMP = STEMP + ABS(SX(I))
-         END DO
-      END IF
-      SASUM = STEMP
-      RETURN
-      END
diff --git a/blas_fix/scabs1.f b/blas_fix/scabs1.f
deleted file mode 100644
index cdb5c0b9a..000000000
--- a/blas_fix/scabs1.f
+++ /dev/null
@@ -1,57 +0,0 @@
-*> \brief \b SCABS1
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*  Definition:
-*  ===========
-*
-*       REAL FUNCTION SCABS1(Z)
-* 
-*       .. Scalar Arguments ..
-*       COMPLEX Z
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SCABS1 computes absolute value of a complex number
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date November 2011
-*
-*> \ingroup single_blas_level1
-*
-*  =====================================================================
-      REAL FUNCTION SCABS1(Z)
-*
-*  -- Reference BLAS level1 routine (version 3.4.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
-*
-*     .. Scalar Arguments ..
-      COMPLEX Z
-*     ..
-*
-*  =====================================================================
-*
-*     .. Intrinsic Functions ..
-      INTRINSIC ABS,AIMAG,REAL
-*     ..
-      SCABS1 = ABS(REAL(Z)) + ABS(AIMAG(Z))
-      RETURN
-      END
diff --git a/blas_fix/scasum.f b/blas_fix/scasum.f
deleted file mode 100644
index 03154eb58..000000000
--- a/blas_fix/scasum.f
+++ /dev/null
@@ -1,97 +0,0 @@
-*> \brief \b SCASUM
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*  Definition:
-*  ===========
-*
-*       REAL FUNCTION SCASUM(N,CX,INCX)
-* 
-*       .. Scalar Arguments ..
-*       INTEGER INCX,N
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX CX(*)
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    SCASUM takes the sum of the absolute values of a complex vector and
-*>    returns a single precision result.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date November 2011
-*
-*> \ingroup single_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 3/93 to return if incx .le. 0.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      REAL FUNCTION SCASUM(N,CX,INCX)
-*
-*  -- Reference BLAS level1 routine (version 3.4.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,N
-*     ..
-*     .. Array Arguments ..
-      COMPLEX CX(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      REAL STEMP
-      INTEGER I,NINCX
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC ABS,AIMAG,REAL
-*     ..
-      SCASUM = 0.0e0
-      STEMP = 0.0e0
-      IF (N.LE.0 .OR. INCX.LE.0) RETURN
-      IF (INCX.EQ.1) THEN
-*
-*        code for increment equal to 1
-*
-         DO I = 1,N
-            STEMP = STEMP + ABS(REAL(CX(I))) + ABS(AIMAG(CX(I)))
-         END DO
-      ELSE
-*
-*        code for increment not equal to 1
-*
-         NINCX = N*INCX
-         DO I = 1,NINCX,INCX
-            STEMP = STEMP + ABS(REAL(CX(I))) + ABS(AIMAG(CX(I)))
-         END DO
-      END IF
-      SCASUM = STEMP
-      RETURN
-      END
diff --git a/blas_fix/scnrm2.f b/blas_fix/scnrm2.f
deleted file mode 100644
index 4a581e8e1..000000000
--- a/blas_fix/scnrm2.f
+++ /dev/null
@@ -1,119 +0,0 @@
-*> \brief \b SCNRM2
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*  Definition:
-*  ===========
-*
-*       REAL FUNCTION SCNRM2(N,X,INCX)
-* 
-*       .. Scalar Arguments ..
-*       INTEGER INCX,N
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX X(*)
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SCNRM2 returns the euclidean norm of a vector via the function
-*> name, so that
-*>
-*>    SCNRM2 := sqrt( x**H*x )
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date November 2011
-*
-*> \ingroup single_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  -- This version written on 25-October-1982.
-*>     Modified on 14-October-1993 to inline the call to CLASSQ.
-*>     Sven Hammarling, Nag Ltd.
-*> \endverbatim
-*>
-*  =====================================================================
-      REAL FUNCTION SCNRM2(N,X,INCX)
-*
-*  -- Reference BLAS level1 routine (version 3.4.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,N
-*     ..
-*     .. Array Arguments ..
-      COMPLEX X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      REAL ONE,ZERO
-      PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-      REAL NORM,SCALE,SSQ,TEMP
-      INTEGER IX
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC ABS,AIMAG,REAL,SQRT
-*     ..
-      IF (N.LT.1 .OR. INCX.LT.1) THEN
-          NORM = ZERO
-      ELSE
-          SCALE = ZERO
-          SSQ = ONE
-*        The following loop is equivalent to this call to the LAPACK
-*        auxiliary routine:
-*        CALL CLASSQ( N, X, INCX, SCALE, SSQ )
-*
-          DO 10 IX = 1,1 + (N-1)*INCX,INCX
-              IF (REAL(X(IX)).NE.ZERO) THEN
-                  TEMP = ABS(REAL(X(IX)))
-                  IF (SCALE.LT.TEMP) THEN
-                      SSQ = ONE + SSQ* (SCALE/TEMP)**2
-                      SCALE = TEMP
-                  ELSE
-                      SSQ = SSQ + (TEMP/SCALE)**2
-                  END IF
-              END IF
-              IF (AIMAG(X(IX)).NE.ZERO) THEN
-                  TEMP = ABS(AIMAG(X(IX)))
-                  IF (SCALE.LT.TEMP) THEN
-                      SSQ = ONE + SSQ* (SCALE/TEMP)**2
-                      SCALE = TEMP
-                  ELSE
-                      SSQ = SSQ + (TEMP/SCALE)**2
-                  END IF
-              END IF
-   10     CONTINUE
-          NORM = SCALE*SQRT(SSQ)
-      END IF
-*
-      SCNRM2 = NORM
-      RETURN
-*
-*     End of SCNRM2.
-*
-      END
diff --git a/blas_fix/scsum1.f b/blas_fix/scsum1.f
deleted file mode 100644
index 2fbb911b9..000000000
--- a/blas_fix/scsum1.f
+++ /dev/null
@@ -1,140 +0,0 @@
-*> \brief \b SCSUM1 forms the 1-norm of the complex vector using the true absolute value.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*> \htmlonly
-*> Download SCSUM1 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/scsum1.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/scsum1.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/scsum1.f"> 
-*> [TXT]</a>
-*> \endhtmlonly 
-*
-*  Definition:
-*  ===========
-*
-*       REAL             FUNCTION SCSUM1( N, CX, INCX )
-* 
-*       .. Scalar Arguments ..
-*       INTEGER            INCX, N
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX            CX( * )
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SCSUM1 takes the sum of the absolute values of a complex
-*> vector and returns a single precision result.
-*>
-*> Based on SCASUM from the Level 1 BLAS.
-*> The change is to use the 'genuine' absolute value.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The number of elements in the vector CX.
-*> \endverbatim
-*>
-*> \param[in] CX
-*> \verbatim
-*>          CX is COMPLEX array, dimension (N)
-*>          The vector whose elements will be summed.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>          The spacing between successive values of CX.  INCX > 0.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date September 2012
-*
-*> \ingroup complexOTHERauxiliary
-*
-*> \par Contributors:
-*  ==================
-*>
-*> Nick Higham for use with CLACON.
-*
-*  =====================================================================
-      REAL             FUNCTION SCSUM1( N, CX, INCX )
-*
-*  -- LAPACK auxiliary routine (version 3.4.2) --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
-*
-*     .. Scalar Arguments ..
-      INTEGER            INCX, N
-*     ..
-*     .. Array Arguments ..
-      COMPLEX            CX( * )
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      INTEGER            I, NINCX
-      REAL               STEMP
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS
-*     ..
-*     .. Executable Statements ..
-*
-      SCSUM1 = 0.0E0
-      STEMP = 0.0E0
-      IF( N.LE.0 )
-     $   RETURN
-      IF( INCX.EQ.1 )
-     $   GO TO 20
-*
-*     CODE FOR INCREMENT NOT EQUAL TO 1
-*
-      NINCX = N*INCX
-      DO 10 I = 1, NINCX, INCX
-*
-*        NEXT LINE MODIFIED.
-*
-         STEMP = STEMP + ABS( CX( I ) )
-   10 CONTINUE
-      SCSUM1 = STEMP
-      RETURN
-*
-*     CODE FOR INCREMENT EQUAL TO 1
-*
-   20 CONTINUE
-      DO 30 I = 1, N
-*
-*        NEXT LINE MODIFIED.
-*
-         STEMP = STEMP + ABS( CX( I ) )
-   30 CONTINUE
-      SCSUM1 = STEMP
-      RETURN
-*
-*     End of SCSUM1
-*
-      END
diff --git a/blas_fix/sdot.f b/blas_fix/sdot.f
deleted file mode 100644
index 68555aad8..000000000
--- a/blas_fix/sdot.f
+++ /dev/null
@@ -1,117 +0,0 @@
-*> \brief \b SDOT
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*  Definition:
-*  ===========
-*
-*       REAL FUNCTION SDOT(N,SX,INCX,SY,INCY)
-* 
-*       .. Scalar Arguments ..
-*       INTEGER INCX,INCY,N
-*       ..
-*       .. Array Arguments ..
-*       REAL SX(*),SY(*)
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    SDOT forms the dot product of two vectors.
-*>    uses unrolled loops for increments equal to one.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date November 2011
-*
-*> \ingroup single_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      REAL FUNCTION SDOT(N,SX,INCX,SY,INCY)
-*
-*  -- Reference BLAS level1 routine (version 3.4.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,INCY,N
-*     ..
-*     .. Array Arguments ..
-      REAL SX(*),SY(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      REAL STEMP
-      INTEGER I,IX,IY,M,MP1
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MOD
-*     ..
-      STEMP = 0.0e0
-      SDOT = 0.0e0
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*
-*        code for both increments equal to 1
-*
-*
-*        clean-up loop
-*
-         M = MOD(N,5)
-         IF (M.NE.0) THEN
-            DO I = 1,M
-               STEMP = STEMP + SX(I)*SY(I)
-            END DO
-            IF (N.LT.5) THEN
-               SDOT=STEMP
-            RETURN
-            END IF
-         END IF
-         MP1 = M + 1
-         DO I = MP1,N,5
-          STEMP = STEMP + SX(I)*SY(I) + SX(I+1)*SY(I+1) +
-     $            SX(I+2)*SY(I+2) + SX(I+3)*SY(I+3) + SX(I+4)*SY(I+4)
-         END DO
-      ELSE
-*
-*        code for unequal increments or equal increments
-*          not equal to 1
-*
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            STEMP = STEMP + SX(IX)*SY(IY)
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      SDOT = STEMP
-      RETURN
-      END
diff --git a/blas_fix/sdsdot.f b/blas_fix/sdsdot.f
deleted file mode 100644
index 736ba0a0f..000000000
--- a/blas_fix/sdsdot.f
+++ /dev/null
@@ -1,255 +0,0 @@
-*> \brief \b SDSDOT
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*  Definition:
-*  ===========
-*
-*       REAL FUNCTION SDSDOT(N,SB,SX,INCX,SY,INCY)
-* 
-*       .. Scalar Arguments ..
-*       REAL SB
-*       INTEGER INCX,INCY,N
-*       ..
-*       .. Array Arguments ..
-*       REAL SX(*),SY(*)
-*       ..
-*  
-*    PURPOSE
-*    =======
-*  
-*    Compute the inner product of two vectors with extended
-*    precision accumulation.
-*  
-*    Returns S.P. result with dot product accumulated in D.P.
-*    SDSDOT = SB + sum for I = 0 to N-1 of SX(LX+I*INCX)*SY(LY+I*INCY),
-*    where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is
-*    defined in a similar way using INCY.
-*  
-*    AUTHOR
-*    ======
-*    Lawson, C. L., (JPL), Hanson, R. J., (SNLA),
-*    Kincaid, D. R., (U. of Texas), Krogh, F. T., (JPL)
-*  
-*    ARGUMENTS 
-*    =========
-*  
-*    N      (input) INTEGER
-*           number of elements in input vector(s)
-*  
-*    SB     (input) REAL
-*           single precision scalar to be added to inner product
-*  
-*    SX     (input) REAL array, dimension (N)
-*           single precision vector with N elements
-*  
-*    INCX   (input) INTEGER
-*           storage spacing between elements of SX
-*  
-*    SY     (input) REAL array, dimension (N)
-*           single precision vector with N elements
-*  
-*    INCY   (input) INTEGER
-*           storage spacing between elements of SY
-*  
-*    SDSDOT (output) REAL
-*           single precision dot product (SB if N .LE. 0)
-*  
-*    Further Details
-*    ===============
-*  
-*    REFERENCES
-*  
-*    C. L. Lawson, R. J. Hanson, D. R. Kincaid and F. T.
-*    Krogh, Basic linear algebra subprograms for Fortran
-*    usage, Algorithm No. 539, Transactions on Mathematical
-*    Software 5, 3 (September 1979), pp. 308-323.
-*  
-*    REVISION HISTORY  (YYMMDD)
-*        
-*    791001  DATE WRITTEN
-*    890531  Changed all specific intrinsics to generic.  (WRB)
-*    890831  Modified array declarations.  (WRB)
-*    890831  REVISION DATE from Version 3.2
-*    891214  Prologue converted to Version 4.0 format.  (BAB)
-*    920310  Corrected definition of LX in DESCRIPTION.  (WRB)
-*    920501  Reformatted the REFERENCES section.  (WRB)
-*    070118  Reformat to LAPACK coding style
-*  
-*    =====================================================================
-*  
-*       .. Local Scalars ..
-*       DOUBLE PRECISION DSDOT
-*       INTEGER I,KX,KY,NS
-*       ..
-*       .. Intrinsic Functions ..
-*       INTRINSIC DBLE
-*       ..
-*       DSDOT = SB
-*       IF (N.LE.0) THEN
-*          SDSDOT = DSDOT
-*          RETURN
-*       END IF   
-*       IF (INCX.EQ.INCY .AND. INCX.GT.0) THEN
-*  
-*       Code for equal and positive increments.
-*  
-*          NS = N*INCX
-*          DO I = 1,NS,INCX
-*             DSDOT = DSDOT + DBLE(SX(I))*DBLE(SY(I))
-*          END DO
-*       ELSE
-*  
-*       Code for unequal or nonpositive increments.
-*  
-*          KX = 1
-*          KY = 1
-*          IF (INCX.LT.0) KX = 1 + (1-N)*INCX
-*          IF (INCY.LT.0) KY = 1 + (1-N)*INCY
-*          DO I = 1,N
-*             DSDOT = DSDOT + DBLE(SX(KX))*DBLE(SY(KY))
-*             KX = KX + INCX
-*             KY = KY + INCY
-*          END DO
-*       END IF
-*       SDSDOT = DSDOT
-*       RETURN
-*       END
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date November 2011
-*
-*> \ingroup single_blas_level1
-*
-*  =====================================================================
-      REAL FUNCTION SDSDOT(N,SB,SX,INCX,SY,INCY)
-*
-*  -- Reference BLAS level1 routine (version 3.4.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
-*
-*     .. Scalar Arguments ..
-      REAL SB
-      INTEGER INCX,INCY,N
-*     ..
-*     .. Array Arguments ..
-      REAL SX(*),SY(*)
-*     ..
-*
-*  PURPOSE
-*  =======
-*
-*  Compute the inner product of two vectors with extended
-*  precision accumulation.
-*
-*  Returns S.P. result with dot product accumulated in D.P.
-*  SDSDOT = SB + sum for I = 0 to N-1 of SX(LX+I*INCX)*SY(LY+I*INCY),
-*  where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is
-*  defined in a similar way using INCY.
-*
-*  AUTHOR
-*  ======
-*  Lawson, C. L., (JPL), Hanson, R. J., (SNLA),
-*  Kincaid, D. R., (U. of Texas), Krogh, F. T., (JPL)
-*
-*  ARGUMENTS 
-*  =========
-*
-*  N      (input) INTEGER
-*         number of elements in input vector(s)
-*
-*  SB     (input) REAL
-*         single precision scalar to be added to inner product
-*
-*  SX     (input) REAL array, dimension (N)
-*         single precision vector with N elements
-*
-*  INCX   (input) INTEGER
-*         storage spacing between elements of SX
-*
-*  SY     (input) REAL array, dimension (N)
-*         single precision vector with N elements
-*
-*  INCY   (input) INTEGER
-*         storage spacing between elements of SY
-*
-*  SDSDOT (output) REAL
-*         single precision dot product (SB if N .LE. 0)
-*
-*  Further Details
-*  ===============
-*
-*  REFERENCES
-*
-*  C. L. Lawson, R. J. Hanson, D. R. Kincaid and F. T.
-*  Krogh, Basic linear algebra subprograms for Fortran
-*  usage, Algorithm No. 539, Transactions on Mathematical
-*  Software 5, 3 (September 1979), pp. 308-323.
-*
-*  REVISION HISTORY  (YYMMDD)
-*      
-*  791001  DATE WRITTEN
-*  890531  Changed all specific intrinsics to generic.  (WRB)
-*  890831  Modified array declarations.  (WRB)
-*  890831  REVISION DATE from Version 3.2
-*  891214  Prologue converted to Version 4.0 format.  (BAB)
-*  920310  Corrected definition of LX in DESCRIPTION.  (WRB)
-*  920501  Reformatted the REFERENCES section.  (WRB)
-*  070118  Reformat to LAPACK coding style
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      DOUBLE PRECISION DSDOT
-      INTEGER I,KX,KY,NS
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC DBLE
-*     ..
-      DSDOT = SB
-      IF (N.LE.0) THEN
-         SDSDOT = REAL(DSDOT)
-         RETURN
-      END IF   
-      IF (INCX.EQ.INCY .AND. INCX.GT.0) THEN
-*
-*     Code for equal and positive increments.
-*
-         NS = N*INCX
-         DO I = 1,NS,INCX
-            DSDOT = DSDOT + DBLE(SX(I))*DBLE(SY(I))
-         END DO
-      ELSE
-*
-*     Code for unequal or nonpositive increments.
-*
-         KX = 1
-         KY = 1
-         IF (INCX.LT.0) KX = 1 + (1-N)*INCX
-         IF (INCY.LT.0) KY = 1 + (1-N)*INCY
-         DO I = 1,N
-            DSDOT = DSDOT + DBLE(SX(KX))*DBLE(SY(KY))
-            KX = KX + INCX
-            KY = KY + INCY
-         END DO
-      END IF
-      SDSDOT = REAL(DSDOT)
-      RETURN
-      END
diff --git a/blas_fix/sladiv.f b/blas_fix/sladiv.f
deleted file mode 100644
index 6d26da20c..000000000
--- a/blas_fix/sladiv.f
+++ /dev/null
@@ -1,253 +0,0 @@
-*> \brief \b SLADIV performs complex division in real arithmetic, avoiding unnecessary overflow.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*> \htmlonly
-*> Download SLADIV + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sladiv.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sladiv.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sladiv.f"> 
-*> [TXT]</a>
-*> \endhtmlonly 
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE SLADIV( A, B, C, D, P, Q )
-* 
-*       .. Scalar Arguments ..
-*       REAL               A, B, C, D, P, Q
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SLADIV performs complex division in  real arithmetic
-*>
-*>                       a + i*b
-*>            p + i*q = ---------
-*>                       c + i*d
-*>
-*> The algorithm is due to Michael Baudin and Robert L. Smith
-*> and can be found in the paper
-*> "A Robust Complex Division in Scilab"
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] A
-*> \verbatim
-*>          A is REAL
-*> \endverbatim
-*>
-*> \param[in] B
-*> \verbatim
-*>          B is REAL
-*> \endverbatim
-*>
-*> \param[in] C
-*> \verbatim
-*>          C is REAL
-*> \endverbatim
-*>
-*> \param[in] D
-*> \verbatim
-*>          D is REAL
-*>          The scalars a, b, c, and d in the above expression.
-*> \endverbatim
-*>
-*> \param[out] P
-*> \verbatim
-*>          P is REAL
-*> \endverbatim
-*>
-*> \param[out] Q
-*> \verbatim
-*>          Q is REAL
-*>          The scalars p and q in the above expression.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date January 2013
-*
-*> \ingroup auxOTHERauxiliary
-*
-*  =====================================================================
-      SUBROUTINE SLADIV( A, B, C, D, P, Q )
-*
-*  -- LAPACK auxiliary routine (version 3.5.0) --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     January 2013
-*
-*     .. Scalar Arguments ..
-      REAL               A, B, C, D, P, Q
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      REAL               BS
-      PARAMETER          ( BS = 2.0E0 )
-      REAL               HALF
-      PARAMETER          ( HALF = 0.5E0 )
-      REAL               TWO
-      PARAMETER          ( TWO = 2.0E0 )
-*
-*     .. Local Scalars ..
-      REAL               AA, BB, CC, DD, AB, CD, S, OV, UN, BE, EPS
-*     ..
-*     .. External Functions ..
-      REAL               SLAMCH
-      EXTERNAL           SLAMCH
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           SLADIV1
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, MAX
-*     ..
-*     .. Executable Statements ..
-*
-      AA = A
-      BB = B
-      CC = C
-      DD = D
-      AB = MAX( ABS(A), ABS(B) )
-      CD = MAX( ABS(C), ABS(D) )
-      S = 1.0E0
-      
-      OV = SLAMCH( 'Overflow threshold' )
-      UN = SLAMCH( 'Safe minimum' )
-      EPS = SLAMCH( 'Epsilon' )
-      BE = BS / (EPS*EPS)
-      
-      IF( AB >= HALF*OV ) THEN
-         AA = HALF * AA
-         BB = HALF * BB
-         S  = TWO * S
-      END IF
-      IF( CD >= HALF*OV ) THEN
-         CC = HALF * CC
-         DD = HALF * DD
-         S  = HALF * S
-      END IF
-      IF( AB <= UN*BS/EPS ) THEN
-         AA = AA * BE
-         BB = BB * BE
-         S  = S / BE
-      END IF
-      IF( CD <= UN*BS/EPS ) THEN
-         CC = CC * BE
-         DD = DD * BE
-         S  = S * BE
-      END IF
-      IF( ABS( D ).LE.ABS( C ) ) THEN
-         CALL SLADIV1(AA, BB, CC, DD, P, Q)
-      ELSE
-         CALL SLADIV1(BB, AA, DD, CC, P, Q)
-         Q = -Q
-      END IF
-      P = P * S
-      Q = Q * S
-*
-      RETURN
-*
-*     End of SLADIV
-*
-      END
-
-      
-
-      SUBROUTINE SLADIV1( A, B, C, D, P, Q )
-*
-*  -- LAPACK auxiliary routine (version 3.5.0) --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     January 2013
-*
-*     .. Scalar Arguments ..
-      REAL               A, B, C, D, P, Q
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      REAL               ONE
-      PARAMETER          ( ONE = 1.0E0 )
-*
-*     .. Local Scalars ..
-      REAL               R, T
-*     ..
-*     .. External Functions ..
-      REAL               SLADIV2
-      EXTERNAL           SLADIV2
-*     ..
-*     .. Executable Statements ..
-*
-      R = D / C
-      T = ONE / (C + D * R)
-      P = SLADIV2(A, B, C, D, R, T)
-      A = -A
-      Q = SLADIV2(B, A, C, D, R, T)
-*
-      RETURN
-*
-*     End of SLADIV1
-*
-      END
-
-      REAL FUNCTION SLADIV2( A, B, C, D, R, T )
-*
-*  -- LAPACK auxiliary routine (version 3.5.0) --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     January 2013
-*
-*     .. Scalar Arguments ..
-      REAL               A, B, C, D, R, T
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      REAL               ZERO
-      PARAMETER          ( ZERO = 0.0E0 )
-*
-*     .. Local Scalars ..
-      REAL               BR
-*     ..
-*     .. Executable Statements ..
-*
-      IF( R.NE.ZERO ) THEN
-         BR = B * R
-         if( BR.NE.ZERO ) THEN
-            SLADIV2 = (A + BR) * T
-         ELSE
-            SLADIV2 = A * T + (B * T) * R
-         END IF
-      ELSE
-         SLADIV2 = (A + D * (B / C)) * T
-      END IF
-*
-      RETURN
-*
-*     End of SLADIV
-*
-      END
diff --git a/blas_fix/slamch.f b/blas_fix/slamch.f
deleted file mode 100644
index 8653c66e7..000000000
--- a/blas_fix/slamch.f
+++ /dev/null
@@ -1,192 +0,0 @@
-*> \brief \b SLAMCH
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*  Definition:
-*  ===========
-*
-*      REAL             FUNCTION SLAMCH( CMACH )
-*
-*     .. Scalar Arguments ..
-*      CHARACTER          CMACH
-*     ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SLAMCH determines single precision machine parameters.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] CMACH
-*> \verbatim
-*>          Specifies the value to be returned by SLAMCH:
-*>          = 'E' or 'e',   SLAMCH := eps
-*>          = 'S' or 's ,   SLAMCH := sfmin
-*>          = 'B' or 'b',   SLAMCH := base
-*>          = 'P' or 'p',   SLAMCH := eps*base
-*>          = 'N' or 'n',   SLAMCH := t
-*>          = 'R' or 'r',   SLAMCH := rnd
-*>          = 'M' or 'm',   SLAMCH := emin
-*>          = 'U' or 'u',   SLAMCH := rmin
-*>          = 'L' or 'l',   SLAMCH := emax
-*>          = 'O' or 'o',   SLAMCH := rmax
-*>          where
-*>          eps   = relative machine precision
-*>          sfmin = safe minimum, such that 1/sfmin does not overflow
-*>          base  = base of the machine
-*>          prec  = eps*base
-*>          t     = number of (base) digits in the mantissa
-*>          rnd   = 1.0 when rounding occurs in addition, 0.0 otherwise
-*>          emin  = minimum exponent before (gradual) underflow
-*>          rmin  = underflow threshold - base**(emin-1)
-*>          emax  = largest exponent before overflow
-*>          rmax  = overflow threshold  - (base**emax)*(1-eps)
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date November 2011
-*
-*> \ingroup auxOTHERauxiliary
-*
-*  =====================================================================
-      REAL             FUNCTION SLAMCH( CMACH )
-*
-*  -- LAPACK auxiliary routine (version 3.4.0) --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
-*
-*     .. Scalar Arguments ..
-      CHARACTER          CMACH
-*     ..
-*
-* =====================================================================
-*
-*     .. Parameters ..
-      REAL               ONE, ZERO
-      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
-*     ..
-*     .. Local Scalars ..
-      REAL               RND, EPS, SFMIN, SMALL, RMACH
-*     ..
-*     .. External Functions ..
-      LOGICAL            LSAME
-      EXTERNAL           LSAME
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          DIGITS, EPSILON, HUGE, MAXEXPONENT,
-     $                   MINEXPONENT, RADIX, TINY
-*     ..
-*     .. Executable Statements ..
-*
-*
-*     Assume rounding, not chopping. Always.
-*
-      RND = ONE
-*
-      IF( ONE.EQ.RND ) THEN
-         EPS = EPSILON(ZERO) * 0.5
-      ELSE
-         EPS = EPSILON(ZERO)
-      END IF
-*
-      IF( LSAME( CMACH, 'E' ) ) THEN
-         RMACH = EPS
-      ELSE IF( LSAME( CMACH, 'S' ) ) THEN
-         SFMIN = TINY(ZERO)
-         SMALL = ONE / HUGE(ZERO)
-         IF( SMALL.GE.SFMIN ) THEN
-*
-*           Use SMALL plus a bit, to avoid the possibility of rounding
-*           causing overflow when computing  1/sfmin.
-*
-            SFMIN = SMALL*( ONE+EPS )
-         END IF
-         RMACH = SFMIN
-      ELSE IF( LSAME( CMACH, 'B' ) ) THEN
-         RMACH = RADIX(ZERO)
-      ELSE IF( LSAME( CMACH, 'P' ) ) THEN
-         RMACH = EPS * RADIX(ZERO)
-      ELSE IF( LSAME( CMACH, 'N' ) ) THEN
-         RMACH = DIGITS(ZERO)
-      ELSE IF( LSAME( CMACH, 'R' ) ) THEN
-         RMACH = RND
-      ELSE IF( LSAME( CMACH, 'M' ) ) THEN
-         RMACH = MINEXPONENT(ZERO)
-      ELSE IF( LSAME( CMACH, 'U' ) ) THEN
-         RMACH = tiny(zero)
-      ELSE IF( LSAME( CMACH, 'L' ) ) THEN
-         RMACH = MAXEXPONENT(ZERO)
-      ELSE IF( LSAME( CMACH, 'O' ) ) THEN
-         RMACH = HUGE(ZERO)
-      ELSE
-         RMACH = ZERO
-      END IF
-*
-      SLAMCH = RMACH
-      RETURN
-*
-*     End of SLAMCH
-*
-      END
-* =====================================================================
-*> \brief \b SLAMC3
-*> \details
-*> \b Purpose:
-*> \verbatim
-*> SLAMC3  is intended to force  A  and  B  to be stored prior to doing
-*> the addition of  A  and  B ,  for use in situations where optimizers
-*> might hold one of these in a register.
-*> \endverbatim
-*> \author LAPACK is a software package provided by Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
-*> \date November 2011
-*> \ingroup auxOTHERauxiliary
-*>
-*> \param[in] A
-*> \verbatim
-*> \endverbatim
-*>
-*> \param[in] B
-*> \verbatim
-*>          The values A and B.
-*> \endverbatim
-*>
-*
-      REAL             FUNCTION SLAMC3( A, B )
-*
-*  -- LAPACK auxiliary routine (version 3.4.0) --
-*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
-*     November 2010
-*
-*     .. Scalar Arguments ..
-      REAL               A, B
-*     ..
-* =====================================================================
-*
-*     .. Executable Statements ..
-*
-      SLAMC3 = A + B
-*
-      RETURN
-*
-*     End of SLAMC3
-*
-      END
-*
-* =====================================================================
diff --git a/blas_fix/slangb.f b/blas_fix/slangb.f
deleted file mode 100644
index c543372ef..000000000
--- a/blas_fix/slangb.f
+++ /dev/null
@@ -1,226 +0,0 @@
-*> \brief \b SLANGB returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value of any element of general band matrix.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*> \htmlonly
-*> Download SLANGB + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slangb.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slangb.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slangb.f"> 
-*> [TXT]</a>
-*> \endhtmlonly 
-*
-*  Definition:
-*  ===========
-*
-*       REAL             FUNCTION SLANGB( NORM, N, KL, KU, AB, LDAB,
-*                        WORK )
-* 
-*       .. Scalar Arguments ..
-*       CHARACTER          NORM
-*       INTEGER            KL, KU, LDAB, N
-*       ..
-*       .. Array Arguments ..
-*       REAL               AB( LDAB, * ), WORK( * )
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SLANGB  returns the value of the one norm,  or the Frobenius norm, or
-*> the  infinity norm,  or the element of  largest absolute value  of an
-*> n by n band matrix  A,  with kl sub-diagonals and ku super-diagonals.
-*> \endverbatim
-*>
-*> \return SLANGB
-*> \verbatim
-*>
-*>    SLANGB = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-*>             (
-*>             ( norm1(A),         NORM = '1', 'O' or 'o'
-*>             (
-*>             ( normI(A),         NORM = 'I' or 'i'
-*>             (
-*>             ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-*>
-*> where  norm1  denotes the  one norm of a matrix (maximum column sum),
-*> normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-*> normF  denotes the  Frobenius norm of a matrix (square root of sum of
-*> squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] NORM
-*> \verbatim
-*>          NORM is CHARACTER*1
-*>          Specifies the value to be returned in SLANGB as described
-*>          above.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The order of the matrix A.  N >= 0.  When N = 0, SLANGB is
-*>          set to zero.
-*> \endverbatim
-*>
-*> \param[in] KL
-*> \verbatim
-*>          KL is INTEGER
-*>          The number of sub-diagonals of the matrix A.  KL >= 0.
-*> \endverbatim
-*>
-*> \param[in] KU
-*> \verbatim
-*>          KU is INTEGER
-*>          The number of super-diagonals of the matrix A.  KU >= 0.
-*> \endverbatim
-*>
-*> \param[in] AB
-*> \verbatim
-*>          AB is REAL array, dimension (LDAB,N)
-*>          The band matrix A, stored in rows 1 to KL+KU+1.  The j-th
-*>          column of A is stored in the j-th column of the array AB as
-*>          follows:
-*>          AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl).
-*> \endverbatim
-*>
-*> \param[in] LDAB
-*> \verbatim
-*>          LDAB is INTEGER
-*>          The leading dimension of the array AB.  LDAB >= KL+KU+1.
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is REAL array, dimension (MAX(1,LWORK)),
-*>          where LWORK >= N when NORM = 'I'; otherwise, WORK is not
-*>          referenced.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date September 2012
-*
-*> \ingroup realGBauxiliary
-*
-*  =====================================================================
-      REAL             FUNCTION SLANGB( NORM, N, KL, KU, AB, LDAB,
-     $                 WORK )
-*
-*  -- LAPACK auxiliary routine (version 3.4.2) --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
-*
-*     .. Scalar Arguments ..
-      CHARACTER          NORM
-      INTEGER            KL, KU, LDAB, N
-*     ..
-*     .. Array Arguments ..
-      REAL               AB( LDAB, * ), WORK( * )
-*     ..
-*
-* =====================================================================
-*
-*
-*     .. Parameters ..
-      REAL               ONE, ZERO
-      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
-*     ..
-*     .. Local Scalars ..
-      INTEGER            I, J, K, L
-      REAL               SCALE, SUM, VALUE, TEMP
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           SLASSQ
-*     ..
-*     .. External Functions ..
-      LOGICAL            LSAME, SISNAN
-      EXTERNAL           LSAME, SISNAN
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, MAX, MIN, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-      VALUE = ZERO
-      IF( N.EQ.0 ) THEN
-         VALUE = ZERO
-      ELSE IF( LSAME( NORM, 'M' ) ) THEN
-*
-*        Find max(abs(A(i,j))).
-*
-         VALUE = ZERO
-         DO 20 J = 1, N
-            DO 10 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 )
-               TEMP = ABS( AB( I, J ) ) 
-               IF( VALUE.LT.TEMP .OR. SISNAN( TEMP ) ) VALUE = TEMP
-   10       CONTINUE
-   20    CONTINUE
-      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
-*
-*        Find norm1(A).
-*
-         VALUE = ZERO
-         DO 40 J = 1, N
-            SUM = ZERO
-            DO 30 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 )
-               SUM = SUM + ABS( AB( I, J ) )
-   30       CONTINUE
-            IF( VALUE.LT.SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   40    CONTINUE
-      ELSE IF( LSAME( NORM, 'I' ) ) THEN
-*
-*        Find normI(A).
-*
-         DO 50 I = 1, N
-            WORK( I ) = ZERO
-   50    CONTINUE
-         DO 70 J = 1, N
-            K = KU + 1 - J
-            DO 60 I = MAX( 1, J-KU ), MIN( N, J+KL )
-               WORK( I ) = WORK( I ) + ABS( AB( K+I, J ) )
-   60       CONTINUE
-   70    CONTINUE
-         VALUE = ZERO
-         DO 80 I = 1, N
-            TEMP = WORK( I )
-            IF( VALUE.LT.TEMP .OR. SISNAN( TEMP ) ) VALUE = TEMP
-   80    CONTINUE
-      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
-*
-*        Find normF(A).
-*
-         SCALE = ZERO
-         SUM = ONE
-         DO 90 J = 1, N
-            L = MAX( 1, J-KU )
-            K = KU + 1 - J + L
-            CALL SLASSQ( MIN( N, J+KL )-L+1, AB( K, J ), 1, SCALE, SUM )
-   90    CONTINUE
-         VALUE = SCALE*SQRT( SUM )
-      END IF
-*
-      SLANGB = VALUE
-      RETURN
-*
-*     End of SLANGB
-*
-      END
diff --git a/blas_fix/slange.f b/blas_fix/slange.f
deleted file mode 100644
index 2ba10ac5f..000000000
--- a/blas_fix/slange.f
+++ /dev/null
@@ -1,212 +0,0 @@
-*> \brief \b SLANGE returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value of any element of a general rectangular matrix.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*> \htmlonly
-*> Download SLANGE + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slange.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slange.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slange.f"> 
-*> [TXT]</a>
-*> \endhtmlonly 
-*
-*  Definition:
-*  ===========
-*
-*       REAL             FUNCTION SLANGE( NORM, M, N, A, LDA, WORK )
-* 
-*       .. Scalar Arguments ..
-*       CHARACTER          NORM
-*       INTEGER            LDA, M, N
-*       ..
-*       .. Array Arguments ..
-*       REAL               A( LDA, * ), WORK( * )
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SLANGE  returns the value of the one norm,  or the Frobenius norm, or
-*> the  infinity norm,  or the  element of  largest absolute value  of a
-*> real matrix A.
-*> \endverbatim
-*>
-*> \return SLANGE
-*> \verbatim
-*>
-*>    SLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-*>             (
-*>             ( norm1(A),         NORM = '1', 'O' or 'o'
-*>             (
-*>             ( normI(A),         NORM = 'I' or 'i'
-*>             (
-*>             ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-*>
-*> where  norm1  denotes the  one norm of a matrix (maximum column sum),
-*> normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-*> normF  denotes the  Frobenius norm of a matrix (square root of sum of
-*> squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] NORM
-*> \verbatim
-*>          NORM is CHARACTER*1
-*>          Specifies the value to be returned in SLANGE as described
-*>          above.
-*> \endverbatim
-*>
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>          The number of rows of the matrix A.  M >= 0.  When M = 0,
-*>          SLANGE is set to zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The number of columns of the matrix A.  N >= 0.  When N = 0,
-*>          SLANGE is set to zero.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is REAL array, dimension (LDA,N)
-*>          The m by n matrix A.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>          The leading dimension of the array A.  LDA >= max(M,1).
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is REAL array, dimension (MAX(1,LWORK)),
-*>          where LWORK >= M when NORM = 'I'; otherwise, WORK is not
-*>          referenced.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date September 2012
-*
-*> \ingroup realGEauxiliary
-*
-*  =====================================================================
-      REAL             FUNCTION SLANGE( NORM, M, N, A, LDA, WORK )
-*
-*  -- LAPACK auxiliary routine (version 3.4.2) --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
-*
-*     .. Scalar Arguments ..
-      CHARACTER          NORM
-      INTEGER            LDA, M, N
-*     ..
-*     .. Array Arguments ..
-      REAL               A( LDA, * ), WORK( * )
-*     ..
-*
-* =====================================================================
-*
-*     .. Parameters ..
-      REAL               ONE, ZERO
-      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
-*     ..
-*     .. Local Scalars ..
-      INTEGER            I, J
-      REAL               SCALE, SUM, VALUE, TEMP
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           SLASSQ
-*     ..
-*     .. External Functions ..
-      LOGICAL            LSAME, SISNAN
-      EXTERNAL           LSAME, SISNAN
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, MIN, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-      VALUE = ZERO
-      IF( MIN( M, N ).EQ.0 ) THEN
-         VALUE = ZERO
-      ELSE IF( LSAME( NORM, 'M' ) ) THEN
-*
-*        Find max(abs(A(i,j))).
-*
-         VALUE = ZERO
-         DO 20 J = 1, N
-            DO 10 I = 1, M
-               TEMP = ABS( A( I, J ) ) 
-               IF( VALUE.LT.TEMP .OR. SISNAN( TEMP ) ) VALUE = TEMP
-   10       CONTINUE
-   20    CONTINUE
-      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
-*
-*        Find norm1(A).
-*
-         VALUE = ZERO
-         DO 40 J = 1, N
-            SUM = ZERO
-            DO 30 I = 1, M
-               SUM = SUM + ABS( A( I, J ) )
-   30       CONTINUE
-            IF( VALUE.LT.SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   40    CONTINUE
-      ELSE IF( LSAME( NORM, 'I' ) ) THEN
-*
-*        Find normI(A).
-*
-         DO 50 I = 1, M
-            WORK( I ) = ZERO
-   50    CONTINUE
-         DO 70 J = 1, N
-            DO 60 I = 1, M
-               WORK( I ) = WORK( I ) + ABS( A( I, J ) )
-   60       CONTINUE
-   70    CONTINUE
-         VALUE = ZERO
-         DO 80 I = 1, M
-            TEMP = WORK( I )
-            IF( VALUE.LT.TEMP .OR. SISNAN( TEMP ) ) VALUE = TEMP
-   80    CONTINUE
-      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
-*
-*        Find normF(A).
-*
-         SCALE = ZERO
-         SUM = ONE
-         DO 90 J = 1, N
-            CALL SLASSQ( M, A( 1, J ), 1, SCALE, SUM )
-   90    CONTINUE
-         VALUE = SCALE*SQRT( SUM )
-      END IF
-*
-      SLANGE = VALUE
-      RETURN
-*
-*     End of SLANGE
-*
-      END
diff --git a/blas_fix/slangt.f b/blas_fix/slangt.f
deleted file mode 100644
index 3a9310680..000000000
--- a/blas_fix/slangt.f
+++ /dev/null
@@ -1,209 +0,0 @@
-*> \brief \b SLANGT returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value of any element of a general tridiagonal matrix.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*> \htmlonly
-*> Download SLANGT + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slangt.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slangt.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slangt.f"> 
-*> [TXT]</a>
-*> \endhtmlonly 
-*
-*  Definition:
-*  ===========
-*
-*       REAL             FUNCTION SLANGT( NORM, N, DL, D, DU )
-* 
-*       .. Scalar Arguments ..
-*       CHARACTER          NORM
-*       INTEGER            N
-*       ..
-*       .. Array Arguments ..
-*       REAL               D( * ), DL( * ), DU( * )
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SLANGT  returns the value of the one norm,  or the Frobenius norm, or
-*> the  infinity norm,  or the  element of  largest absolute value  of a
-*> real tridiagonal matrix A.
-*> \endverbatim
-*>
-*> \return SLANGT
-*> \verbatim
-*>
-*>    SLANGT = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-*>             (
-*>             ( norm1(A),         NORM = '1', 'O' or 'o'
-*>             (
-*>             ( normI(A),         NORM = 'I' or 'i'
-*>             (
-*>             ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-*>
-*> where  norm1  denotes the  one norm of a matrix (maximum column sum),
-*> normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-*> normF  denotes the  Frobenius norm of a matrix (square root of sum of
-*> squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] NORM
-*> \verbatim
-*>          NORM is CHARACTER*1
-*>          Specifies the value to be returned in SLANGT as described
-*>          above.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The order of the matrix A.  N >= 0.  When N = 0, SLANGT is
-*>          set to zero.
-*> \endverbatim
-*>
-*> \param[in] DL
-*> \verbatim
-*>          DL is REAL array, dimension (N-1)
-*>          The (n-1) sub-diagonal elements of A.
-*> \endverbatim
-*>
-*> \param[in] D
-*> \verbatim
-*>          D is REAL array, dimension (N)
-*>          The diagonal elements of A.
-*> \endverbatim
-*>
-*> \param[in] DU
-*> \verbatim
-*>          DU is REAL array, dimension (N-1)
-*>          The (n-1) super-diagonal elements of A.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date September 2012
-*
-*> \ingroup realOTHERauxiliary
-*
-*  =====================================================================
-      REAL             FUNCTION SLANGT( NORM, N, DL, D, DU )
-*
-*  -- LAPACK auxiliary routine (version 3.4.2) --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
-*
-*     .. Scalar Arguments ..
-      CHARACTER          NORM
-      INTEGER            N
-*     ..
-*     .. Array Arguments ..
-      REAL               D( * ), DL( * ), DU( * )
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      REAL               ONE, ZERO
-      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
-*     ..
-*     .. Local Scalars ..
-      INTEGER            I
-      REAL               ANORM, SCALE, SUM, TEMP
-*     ..
-*     .. External Functions ..
-      LOGICAL            LSAME, SISNAN
-      EXTERNAL           LSAME, SISNAN
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           SLASSQ
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-      ANORM = ZERO
-      IF( N.LE.0 ) THEN
-         ANORM = ZERO
-      ELSE IF( LSAME( NORM, 'M' ) ) THEN
-*
-*        Find max(abs(A(i,j))).
-*
-         ANORM = ABS( D( N ) )
-         DO 10 I = 1, N - 1
-            IF( ANORM.LT.ABS( DL( I ) ) .OR. SISNAN( ABS( DL( I ) ) ) ) 
-     $           ANORM = ABS(DL(I))
-            IF( ANORM.LT.ABS( D( I ) ) .OR. SISNAN( ABS( D( I ) ) ) ) 
-     $           ANORM = ABS(D(I))
-            IF( ANORM.LT.ABS( DU( I ) ) .OR. SISNAN (ABS( DU( I ) ) ) ) 
-     $           ANORM = ABS(DU(I))
-   10    CONTINUE
-      ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' ) THEN
-*
-*        Find norm1(A).
-*
-         IF( N.EQ.1 ) THEN
-            ANORM = ABS( D( 1 ) )
-         ELSE
-            ANORM = ABS( D( 1 ) )+ABS( DL( 1 ) )
-            TEMP = ABS( D( N ) )+ABS( DU( N-1 ) ) 
-            IF( ANORM .LT. TEMP .OR. SISNAN( TEMP ) ) ANORM = TEMP
-            DO 20 I = 2, N - 1
-               TEMP = ABS( D( I ) )+ABS( DL( I ) )+ABS( DU( I-1 ) )
-               IF( ANORM .LT. TEMP .OR. SISNAN( TEMP ) ) ANORM = TEMP
-   20       CONTINUE
-         END IF
-      ELSE IF( LSAME( NORM, 'I' ) ) THEN
-*
-*        Find normI(A).
-*
-         IF( N.EQ.1 ) THEN
-            ANORM = ABS( D( 1 ) )
-         ELSE
-            ANORM = ABS( D( 1 ) )+ABS( DU( 1 ) )
-            TEMP = ABS( D( N ) )+ABS( DL( N-1 ) )
-            IF( ANORM .LT. TEMP .OR. SISNAN( TEMP ) ) ANORM = TEMP
-            DO 30 I = 2, N - 1
-               TEMP = ABS( D( I ) )+ABS( DU( I ) )+ABS( DL( I-1 ) )
-               IF( ANORM .LT. TEMP .OR. SISNAN( TEMP ) ) ANORM = TEMP
-   30       CONTINUE
-         END IF
-      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
-*
-*        Find normF(A).
-*
-         SCALE = ZERO
-         SUM = ONE
-         CALL SLASSQ( N, D, 1, SCALE, SUM )
-         IF( N.GT.1 ) THEN
-            CALL SLASSQ( N-1, DL, 1, SCALE, SUM )
-            CALL SLASSQ( N-1, DU, 1, SCALE, SUM )
-         END IF
-         ANORM = SCALE*SQRT( SUM )
-      END IF
-*
-      SLANGT = ANORM
-      RETURN
-*
-*     End of SLANGT
-*
-      END
diff --git a/blas_fix/slanhs.f b/blas_fix/slanhs.f
deleted file mode 100644
index fbf4d993c..000000000
--- a/blas_fix/slanhs.f
+++ /dev/null
@@ -1,206 +0,0 @@
-*> \brief \b SLANHS returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value of any element of an upper Hessenberg matrix.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*> \htmlonly
-*> Download SLANHS + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slanhs.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slanhs.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slanhs.f"> 
-*> [TXT]</a>
-*> \endhtmlonly 
-*
-*  Definition:
-*  ===========
-*
-*       REAL             FUNCTION SLANHS( NORM, N, A, LDA, WORK )
-* 
-*       .. Scalar Arguments ..
-*       CHARACTER          NORM
-*       INTEGER            LDA, N
-*       ..
-*       .. Array Arguments ..
-*       REAL               A( LDA, * ), WORK( * )
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SLANHS  returns the value of the one norm,  or the Frobenius norm, or
-*> the  infinity norm,  or the  element of  largest absolute value  of a
-*> Hessenberg matrix A.
-*> \endverbatim
-*>
-*> \return SLANHS
-*> \verbatim
-*>
-*>    SLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-*>             (
-*>             ( norm1(A),         NORM = '1', 'O' or 'o'
-*>             (
-*>             ( normI(A),         NORM = 'I' or 'i'
-*>             (
-*>             ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-*>
-*> where  norm1  denotes the  one norm of a matrix (maximum column sum),
-*> normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-*> normF  denotes the  Frobenius norm of a matrix (square root of sum of
-*> squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] NORM
-*> \verbatim
-*>          NORM is CHARACTER*1
-*>          Specifies the value to be returned in SLANHS as described
-*>          above.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The order of the matrix A.  N >= 0.  When N = 0, SLANHS is
-*>          set to zero.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is REAL array, dimension (LDA,N)
-*>          The n by n upper Hessenberg matrix A; the part of A below the
-*>          first sub-diagonal is not referenced.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>          The leading dimension of the array A.  LDA >= max(N,1).
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is REAL array, dimension (MAX(1,LWORK)),
-*>          where LWORK >= N when NORM = 'I'; otherwise, WORK is not
-*>          referenced.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date September 2012
-*
-*> \ingroup realOTHERauxiliary
-*
-*  =====================================================================
-      REAL             FUNCTION SLANHS( NORM, N, A, LDA, WORK )
-*
-*  -- LAPACK auxiliary routine (version 3.4.2) --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
-*
-*     .. Scalar Arguments ..
-      CHARACTER          NORM
-      INTEGER            LDA, N
-*     ..
-*     .. Array Arguments ..
-      REAL               A( LDA, * ), WORK( * )
-*     ..
-*
-* =====================================================================
-*
-*     .. Parameters ..
-      REAL               ONE, ZERO
-      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
-*     ..
-*     .. Local Scalars ..
-      INTEGER            I, J
-      REAL               SCALE, SUM, VALUE
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           SLASSQ
-*     ..
-*     .. External Functions ..
-      LOGICAL            LSAME, SISNAN
-      EXTERNAL           LSAME, SISNAN
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, MIN, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-      VALUE = ZERO
-      IF( N.EQ.0 ) THEN
-         VALUE = ZERO
-      ELSE IF( LSAME( NORM, 'M' ) ) THEN
-*
-*        Find max(abs(A(i,j))).
-*
-         VALUE = ZERO
-         DO 20 J = 1, N
-            DO 10 I = 1, MIN( N, J+1 )
-               SUM = ABS( A( I, J ) )
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   10       CONTINUE
-   20    CONTINUE
-      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
-*
-*        Find norm1(A).
-*
-         VALUE = ZERO
-         DO 40 J = 1, N
-            SUM = ZERO
-            DO 30 I = 1, MIN( N, J+1 )
-               SUM = SUM + ABS( A( I, J ) )
-   30       CONTINUE
-            IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   40    CONTINUE
-      ELSE IF( LSAME( NORM, 'I' ) ) THEN
-*
-*        Find normI(A).
-*
-         DO 50 I = 1, N
-            WORK( I ) = ZERO
-   50    CONTINUE
-         DO 70 J = 1, N
-            DO 60 I = 1, MIN( N, J+1 )
-               WORK( I ) = WORK( I ) + ABS( A( I, J ) )
-   60       CONTINUE
-   70    CONTINUE
-         VALUE = ZERO
-         DO 80 I = 1, N
-            SUM = WORK( I )
-            IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   80    CONTINUE
-      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
-*
-*        Find normF(A).
-*
-         SCALE = ZERO
-         SUM = ONE
-         DO 90 J = 1, N
-            CALL SLASSQ( MIN( N, J+1 ), A( 1, J ), 1, SCALE, SUM )
-   90    CONTINUE
-         VALUE = SCALE*SQRT( SUM )
-      END IF
-*
-      SLANHS = VALUE
-      RETURN
-*
-*     End of SLANHS
-*
-      END
diff --git a/blas_fix/slansb.f b/blas_fix/slansb.f
deleted file mode 100644
index 9ac026464..000000000
--- a/blas_fix/slansb.f
+++ /dev/null
@@ -1,259 +0,0 @@
-*> \brief \b SLANSB returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a symmetric band matrix.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*> \htmlonly
-*> Download SLANSB + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slansb.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slansb.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slansb.f"> 
-*> [TXT]</a>
-*> \endhtmlonly 
-*
-*  Definition:
-*  ===========
-*
-*       REAL             FUNCTION SLANSB( NORM, UPLO, N, K, AB, LDAB,
-*                        WORK )
-* 
-*       .. Scalar Arguments ..
-*       CHARACTER          NORM, UPLO
-*       INTEGER            K, LDAB, N
-*       ..
-*       .. Array Arguments ..
-*       REAL               AB( LDAB, * ), WORK( * )
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SLANSB  returns the value of the one norm,  or the Frobenius norm, or
-*> the  infinity norm,  or the element of  largest absolute value  of an
-*> n by n symmetric band matrix A,  with k super-diagonals.
-*> \endverbatim
-*>
-*> \return SLANSB
-*> \verbatim
-*>
-*>    SLANSB = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-*>             (
-*>             ( norm1(A),         NORM = '1', 'O' or 'o'
-*>             (
-*>             ( normI(A),         NORM = 'I' or 'i'
-*>             (
-*>             ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-*>
-*> where  norm1  denotes the  one norm of a matrix (maximum column sum),
-*> normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-*> normF  denotes the  Frobenius norm of a matrix (square root of sum of
-*> squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] NORM
-*> \verbatim
-*>          NORM is CHARACTER*1
-*>          Specifies the value to be returned in SLANSB as described
-*>          above.
-*> \endverbatim
-*>
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>          Specifies whether the upper or lower triangular part of the
-*>          band matrix A is supplied.
-*>          = 'U':  Upper triangular part is supplied
-*>          = 'L':  Lower triangular part is supplied
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The order of the matrix A.  N >= 0.  When N = 0, SLANSB is
-*>          set to zero.
-*> \endverbatim
-*>
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>          The number of super-diagonals or sub-diagonals of the
-*>          band matrix A.  K >= 0.
-*> \endverbatim
-*>
-*> \param[in] AB
-*> \verbatim
-*>          AB is REAL array, dimension (LDAB,N)
-*>          The upper or lower triangle of the symmetric band matrix A,
-*>          stored in the first K+1 rows of AB.  The j-th column of A is
-*>          stored in the j-th column of the array AB as follows:
-*>          if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j;
-*>          if UPLO = 'L', AB(1+i-j,j)   = A(i,j) for j<=i<=min(n,j+k).
-*> \endverbatim
-*>
-*> \param[in] LDAB
-*> \verbatim
-*>          LDAB is INTEGER
-*>          The leading dimension of the array AB.  LDAB >= K+1.
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is REAL array, dimension (MAX(1,LWORK)),
-*>          where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
-*>          WORK is not referenced.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date September 2012
-*
-*> \ingroup realOTHERauxiliary
-*
-*  =====================================================================
-      REAL             FUNCTION SLANSB( NORM, UPLO, N, K, AB, LDAB,
-     $                 WORK )
-*
-*  -- LAPACK auxiliary routine (version 3.4.2) --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
-*
-*     .. Scalar Arguments ..
-      CHARACTER          NORM, UPLO
-      INTEGER            K, LDAB, N
-*     ..
-*     .. Array Arguments ..
-      REAL               AB( LDAB, * ), WORK( * )
-*     ..
-*
-* =====================================================================
-*
-*     .. Parameters ..
-      REAL               ONE, ZERO
-      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
-*     ..
-*     .. Local Scalars ..
-      INTEGER            I, J, L
-      REAL               ABSA, SCALE, SUM, VALUE
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           SLASSQ
-*     ..
-*     .. External Functions ..
-      LOGICAL            LSAME, SISNAN
-      EXTERNAL           LSAME, SISNAN
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, MAX, MIN, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-      VALUE = ZERO
-      IF( N.EQ.0 ) THEN
-         VALUE = ZERO
-      ELSE IF( LSAME( NORM, 'M' ) ) THEN
-*
-*        Find max(abs(A(i,j))).
-*
-         VALUE = ZERO
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            DO 20 J = 1, N
-               DO 10 I = MAX( K+2-J, 1 ), K + 1
-                  SUM = ABS( AB( I, J ) )
-                  IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   10          CONTINUE
-   20       CONTINUE
-         ELSE
-            DO 40 J = 1, N
-               DO 30 I = 1, MIN( N+1-J, K+1 )
-                  SUM = ABS( AB( I, J ) )
-                  IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   30          CONTINUE
-   40       CONTINUE
-         END IF
-      ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR.
-     $         ( NORM.EQ.'1' ) ) THEN
-*
-*        Find normI(A) ( = norm1(A), since A is symmetric).
-*
-         VALUE = ZERO
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            DO 60 J = 1, N
-               SUM = ZERO
-               L = K + 1 - J
-               DO 50 I = MAX( 1, J-K ), J - 1
-                  ABSA = ABS( AB( L+I, J ) )
-                  SUM = SUM + ABSA
-                  WORK( I ) = WORK( I ) + ABSA
-   50          CONTINUE
-               WORK( J ) = SUM + ABS( AB( K+1, J ) )
-   60       CONTINUE
-            DO 70 I = 1, N
-               SUM = WORK( I )
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   70       CONTINUE
-         ELSE
-            DO 80 I = 1, N
-               WORK( I ) = ZERO
-   80       CONTINUE
-            DO 100 J = 1, N
-               SUM = WORK( J ) + ABS( AB( 1, J ) )
-               L = 1 - J
-               DO 90 I = J + 1, MIN( N, J+K )
-                  ABSA = ABS( AB( L+I, J ) )
-                  SUM = SUM + ABSA
-                  WORK( I ) = WORK( I ) + ABSA
-   90          CONTINUE
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-  100       CONTINUE
-         END IF
-      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
-*
-*        Find normF(A).
-*
-         SCALE = ZERO
-         SUM = ONE
-         IF( K.GT.0 ) THEN
-            IF( LSAME( UPLO, 'U' ) ) THEN
-               DO 110 J = 2, N
-                  CALL SLASSQ( MIN( J-1, K ), AB( MAX( K+2-J, 1 ), J ),
-     $                         1, SCALE, SUM )
-  110          CONTINUE
-               L = K + 1
-            ELSE
-               DO 120 J = 1, N - 1
-                  CALL SLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE,
-     $                         SUM )
-  120          CONTINUE
-               L = 1
-            END IF
-            SUM = 2*SUM
-         ELSE
-            L = 1
-         END IF
-         CALL SLASSQ( N, AB( L, 1 ), LDAB, SCALE, SUM )
-         VALUE = SCALE*SQRT( SUM )
-      END IF
-*
-      SLANSB = VALUE
-      RETURN
-*
-*     End of SLANSB
-*
-      END
diff --git a/blas_fix/slansf.f b/blas_fix/slansf.f
deleted file mode 100644
index a9f6c58a1..000000000
--- a/blas_fix/slansf.f
+++ /dev/null
@@ -1,965 +0,0 @@
-*> \brief \b SLANSF
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*> \htmlonly
-*> Download SLANSF + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slansf.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slansf.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slansf.f"> 
-*> [TXT]</a>
-*> \endhtmlonly 
-*
-*  Definition:
-*  ===========
-*
-*       REAL FUNCTION SLANSF( NORM, TRANSR, UPLO, N, A, WORK )
-* 
-*       .. Scalar Arguments ..
-*       CHARACTER          NORM, TRANSR, UPLO
-*       INTEGER            N
-*       ..
-*       .. Array Arguments ..
-*       REAL               A( 0: * ), WORK( 0: * )
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SLANSF returns the value of the one norm, or the Frobenius norm, or
-*> the infinity norm, or the element of largest absolute value of a
-*> real symmetric matrix A in RFP format.
-*> \endverbatim
-*>
-*> \return SLANSF
-*> \verbatim
-*>
-*>    SLANSF = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-*>             (
-*>             ( norm1(A),         NORM = '1', 'O' or 'o'
-*>             (
-*>             ( normI(A),         NORM = 'I' or 'i'
-*>             (
-*>             ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-*>
-*> where  norm1  denotes the  one norm of a matrix (maximum column sum),
-*> normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-*> normF  denotes the  Frobenius norm of a matrix (square root of sum of
-*> squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] NORM
-*> \verbatim
-*>          NORM is CHARACTER*1
-*>          Specifies the value to be returned in SLANSF as described
-*>          above.
-*> \endverbatim
-*>
-*> \param[in] TRANSR
-*> \verbatim
-*>          TRANSR is CHARACTER*1
-*>          Specifies whether the RFP format of A is normal or
-*>          transposed format.
-*>          = 'N':  RFP format is Normal;
-*>          = 'T':  RFP format is Transpose.
-*> \endverbatim
-*>
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the RFP matrix A came from
-*>           an upper or lower triangular matrix as follows:
-*>           = 'U': RFP A came from an upper triangular matrix;
-*>           = 'L': RFP A came from a lower triangular matrix.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The order of the matrix A. N >= 0. When N = 0, SLANSF is
-*>          set to zero.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is REAL array, dimension ( N*(N+1)/2 );
-*>          On entry, the upper (if UPLO = 'U') or lower (if UPLO = 'L')
-*>          part of the symmetric matrix A stored in RFP format. See the
-*>          "Notes" below for more details.
-*>          Unchanged on exit.
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is REAL array, dimension (MAX(1,LWORK)),
-*>          where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
-*>          WORK is not referenced.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date September 2012
-*
-*> \ingroup realOTHERcomputational
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  We first consider Rectangular Full Packed (RFP) Format when N is
-*>  even. We give an example where N = 6.
-*>
-*>      AP is Upper             AP is Lower
-*>
-*>   00 01 02 03 04 05       00
-*>      11 12 13 14 15       10 11
-*>         22 23 24 25       20 21 22
-*>            33 34 35       30 31 32 33
-*>               44 45       40 41 42 43 44
-*>                  55       50 51 52 53 54 55
-*>
-*>
-*>  Let TRANSR = 'N'. RFP holds AP as follows:
-*>  For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last
-*>  three columns of AP upper. The lower triangle A(4:6,0:2) consists of
-*>  the transpose of the first three columns of AP upper.
-*>  For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first
-*>  three columns of AP lower. The upper triangle A(0:2,0:2) consists of
-*>  the transpose of the last three columns of AP lower.
-*>  This covers the case N even and TRANSR = 'N'.
-*>
-*>         RFP A                   RFP A
-*>
-*>        03 04 05                33 43 53
-*>        13 14 15                00 44 54
-*>        23 24 25                10 11 55
-*>        33 34 35                20 21 22
-*>        00 44 45                30 31 32
-*>        01 11 55                40 41 42
-*>        02 12 22                50 51 52
-*>
-*>  Now let TRANSR = 'T'. RFP A in both UPLO cases is just the
-*>  transpose of RFP A above. One therefore gets:
-*>
-*>
-*>           RFP A                   RFP A
-*>
-*>     03 13 23 33 00 01 02    33 00 10 20 30 40 50
-*>     04 14 24 34 44 11 12    43 44 11 21 31 41 51
-*>     05 15 25 35 45 55 22    53 54 55 22 32 42 52
-*>
-*>
-*>  We then consider Rectangular Full Packed (RFP) Format when N is
-*>  odd. We give an example where N = 5.
-*>
-*>     AP is Upper                 AP is Lower
-*>
-*>   00 01 02 03 04              00
-*>      11 12 13 14              10 11
-*>         22 23 24              20 21 22
-*>            33 34              30 31 32 33
-*>               44              40 41 42 43 44
-*>
-*>
-*>  Let TRANSR = 'N'. RFP holds AP as follows:
-*>  For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last
-*>  three columns of AP upper. The lower triangle A(3:4,0:1) consists of
-*>  the transpose of the first two columns of AP upper.
-*>  For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first
-*>  three columns of AP lower. The upper triangle A(0:1,1:2) consists of
-*>  the transpose of the last two columns of AP lower.
-*>  This covers the case N odd and TRANSR = 'N'.
-*>
-*>         RFP A                   RFP A
-*>
-*>        02 03 04                00 33 43
-*>        12 13 14                10 11 44
-*>        22 23 24                20 21 22
-*>        00 33 34                30 31 32
-*>        01 11 44                40 41 42
-*>
-*>  Now let TRANSR = 'T'. RFP A in both UPLO cases is just the
-*>  transpose of RFP A above. One therefore gets:
-*>
-*>           RFP A                   RFP A
-*>
-*>     02 12 22 00 01             00 10 20 30 40 50
-*>     03 13 23 33 11             33 11 21 31 41 51
-*>     04 14 24 34 44             43 44 22 32 42 52
-*> \endverbatim
-*
-*  =====================================================================
-      REAL FUNCTION SLANSF( NORM, TRANSR, UPLO, N, A, WORK )
-*
-*  -- LAPACK computational routine (version 3.4.2) --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
-*
-*     .. Scalar Arguments ..
-      CHARACTER          NORM, TRANSR, UPLO
-      INTEGER            N
-*     ..
-*     .. Array Arguments ..
-      REAL               A( 0: * ), WORK( 0: * )
-*     ..
-*
-*  =====================================================================
-*
-*     ..
-*     .. Parameters ..
-      REAL               ONE, ZERO
-      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
-*     ..
-*     .. Local Scalars ..
-      INTEGER            I, J, IFM, ILU, NOE, N1, K, L, LDA
-      REAL               SCALE, S, VALUE, AA, TEMP
-*     ..
-*     .. External Functions ..
-      LOGICAL            LSAME, SISNAN
-      EXTERNAL           LSAME, SISNAN
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           SLASSQ
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-      VALUE = ZERO
-      IF( N.EQ.0 ) THEN
-         SLANSF = ZERO
-         RETURN
-      ELSE IF( N.EQ.1 ) THEN
-         SLANSF = ABS( A(0) )
-         RETURN
-      END IF
-*
-*     set noe = 1 if n is odd. if n is even set noe=0
-*
-      NOE = 1
-      IF( MOD( N, 2 ).EQ.0 )
-     $   NOE = 0
-*
-*     set ifm = 0 when form='T or 't' and 1 otherwise
-*
-      IFM = 1
-      IF( LSAME( TRANSR, 'T' ) )
-     $   IFM = 0
-*
-*     set ilu = 0 when uplo='U or 'u' and 1 otherwise
-*
-      ILU = 1
-      IF( LSAME( UPLO, 'U' ) )
-     $   ILU = 0
-*
-*     set lda = (n+1)/2 when ifm = 0
-*     set lda = n when ifm = 1 and noe = 1
-*     set lda = n+1 when ifm = 1 and noe = 0
-*
-      IF( IFM.EQ.1 ) THEN
-         IF( NOE.EQ.1 ) THEN
-            LDA = N
-         ELSE
-*           noe=0
-            LDA = N + 1
-         END IF
-      ELSE
-*        ifm=0
-         LDA = ( N+1 ) / 2
-      END IF
-*
-      IF( LSAME( NORM, 'M' ) ) THEN
-*
-*       Find max(abs(A(i,j))).
-*
-         K = ( N+1 ) / 2
-         VALUE = ZERO
-         IF( NOE.EQ.1 ) THEN
-*           n is odd
-            IF( IFM.EQ.1 ) THEN
-*           A is n by k
-               DO J = 0, K - 1
-                  DO I = 0, N - 1
-                     TEMP = ABS( A( I+J*LDA ) )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) 
-     $                    VALUE = TEMP
-                  END DO
-               END DO
-            ELSE
-*              xpose case; A is k by n
-               DO J = 0, N - 1
-                  DO I = 0, K - 1
-                     TEMP = ABS( A( I+J*LDA ) )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) 
-     $                    VALUE = TEMP
-                  END DO
-               END DO
-            END IF
-         ELSE
-*           n is even
-            IF( IFM.EQ.1 ) THEN
-*              A is n+1 by k
-               DO J = 0, K - 1
-                  DO I = 0, N
-                     TEMP = ABS( A( I+J*LDA ) )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) 
-     $                    VALUE = TEMP
-                  END DO
-               END DO
-            ELSE
-*              xpose case; A is k by n+1
-               DO J = 0, N
-                  DO I = 0, K - 1
-                     TEMP = ABS( A( I+J*LDA ) )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) 
-     $                    VALUE = TEMP
-                  END DO
-               END DO
-            END IF
-         END IF
-      ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR.
-     $         ( NORM.EQ.'1' ) ) THEN
-*
-*        Find normI(A) ( = norm1(A), since A is symmetric).
-*
-         IF( IFM.EQ.1 ) THEN
-            K = N / 2
-            IF( NOE.EQ.1 ) THEN
-*              n is odd
-               IF( ILU.EQ.0 ) THEN
-                  DO I = 0, K - 1
-                     WORK( I ) = ZERO
-                  END DO
-                  DO J = 0, K
-                     S = ZERO
-                     DO I = 0, K + J - 1
-                        AA = ABS( A( I+J*LDA ) )
-*                       -> A(i,j+k)
-                        S = S + AA
-                        WORK( I ) = WORK( I ) + AA
-                     END DO
-                     AA = ABS( A( I+J*LDA ) )
-*                    -> A(j+k,j+k)
-                     WORK( J+K ) = S + AA
-                     IF( I.EQ.K+K )
-     $                  GO TO 10
-                     I = I + 1
-                     AA = ABS( A( I+J*LDA ) )
-*                    -> A(j,j)
-                     WORK( J ) = WORK( J ) + AA
-                     S = ZERO
-                     DO L = J + 1, K - 1
-                        I = I + 1
-                        AA = ABS( A( I+J*LDA ) )
-*                       -> A(l,j)
-                        S = S + AA
-                        WORK( L ) = WORK( L ) + AA
-                     END DO
-                     WORK( J ) = WORK( J ) + S
-                  END DO
-   10             CONTINUE
-                  VALUE = WORK( 0 )
-                  DO I = 1, N-1
-                     TEMP = WORK( I )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) 
-     $                    VALUE = TEMP
-                  END DO
-               ELSE
-*                 ilu = 1
-                  K = K + 1
-*                 k=(n+1)/2 for n odd and ilu=1
-                  DO I = K, N - 1
-                     WORK( I ) = ZERO
-                  END DO
-                  DO J = K - 1, 0, -1
-                     S = ZERO
-                     DO I = 0, J - 2
-                        AA = ABS( A( I+J*LDA ) )
-*                       -> A(j+k,i+k)
-                        S = S + AA
-                        WORK( I+K ) = WORK( I+K ) + AA
-                     END DO
-                     IF( J.GT.0 ) THEN
-                        AA = ABS( A( I+J*LDA ) )
-*                       -> A(j+k,j+k)
-                        S = S + AA
-                        WORK( I+K ) = WORK( I+K ) + S
-*                       i=j
-                        I = I + 1
-                     END IF
-                     AA = ABS( A( I+J*LDA ) )
-*                    -> A(j,j)
-                     WORK( J ) = AA
-                     S = ZERO
-                     DO L = J + 1, N - 1
-                        I = I + 1
-                        AA = ABS( A( I+J*LDA ) )
-*                       -> A(l,j)
-                        S = S + AA
-                        WORK( L ) = WORK( L ) + AA
-                     END DO
-                     WORK( J ) = WORK( J ) + S
-                  END DO
-                  VALUE = WORK( 0 )
-                  DO I = 1, N-1
-                     TEMP = WORK( I )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) 
-     $                    VALUE = TEMP
-                  END DO
-               END IF
-            ELSE
-*              n is even
-               IF( ILU.EQ.0 ) THEN
-                  DO I = 0, K - 1
-                     WORK( I ) = ZERO
-                  END DO
-                  DO J = 0, K - 1
-                     S = ZERO
-                     DO I = 0, K + J - 1
-                        AA = ABS( A( I+J*LDA ) )
-*                       -> A(i,j+k)
-                        S = S + AA
-                        WORK( I ) = WORK( I ) + AA
-                     END DO
-                     AA = ABS( A( I+J*LDA ) )
-*                    -> A(j+k,j+k)
-                     WORK( J+K ) = S + AA
-                     I = I + 1
-                     AA = ABS( A( I+J*LDA ) )
-*                    -> A(j,j)
-                     WORK( J ) = WORK( J ) + AA
-                     S = ZERO
-                     DO L = J + 1, K - 1
-                        I = I + 1
-                        AA = ABS( A( I+J*LDA ) )
-*                       -> A(l,j)
-                        S = S + AA
-                        WORK( L ) = WORK( L ) + AA
-                     END DO
-                     WORK( J ) = WORK( J ) + S
-                  END DO
-                  VALUE = WORK( 0 )
-                  DO I = 1, N-1
-                     TEMP = WORK( I )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) 
-     $                    VALUE = TEMP
-                  END DO
-               ELSE
-*                 ilu = 1
-                  DO I = K, N - 1
-                     WORK( I ) = ZERO
-                  END DO
-                  DO J = K - 1, 0, -1
-                     S = ZERO
-                     DO I = 0, J - 1
-                        AA = ABS( A( I+J*LDA ) )
-*                       -> A(j+k,i+k)
-                        S = S + AA
-                        WORK( I+K ) = WORK( I+K ) + AA
-                     END DO
-                     AA = ABS( A( I+J*LDA ) )
-*                    -> A(j+k,j+k)
-                     S = S + AA
-                     WORK( I+K ) = WORK( I+K ) + S
-*                    i=j
-                     I = I + 1
-                     AA = ABS( A( I+J*LDA ) )
-*                    -> A(j,j)
-                     WORK( J ) = AA
-                     S = ZERO
-                     DO L = J + 1, N - 1
-                        I = I + 1
-                        AA = ABS( A( I+J*LDA ) )
-*                       -> A(l,j)
-                        S = S + AA
-                        WORK( L ) = WORK( L ) + AA
-                     END DO
-                     WORK( J ) = WORK( J ) + S
-                  END DO
-                  VALUE = WORK( 0 )
-                  DO I = 1, N-1
-                     TEMP = WORK( I )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) 
-     $                    VALUE = TEMP
-                  END DO
-               END IF
-            END IF
-         ELSE
-*           ifm=0
-            K = N / 2
-            IF( NOE.EQ.1 ) THEN
-*              n is odd
-               IF( ILU.EQ.0 ) THEN
-                  N1 = K
-*                 n/2
-                  K = K + 1
-*                 k is the row size and lda
-                  DO I = N1, N - 1
-                     WORK( I ) = ZERO
-                  END DO
-                  DO J = 0, N1 - 1
-                     S = ZERO
-                     DO I = 0, K - 1
-                        AA = ABS( A( I+J*LDA ) )
-*                       A(j,n1+i)
-                        WORK( I+N1 ) = WORK( I+N1 ) + AA
-                        S = S + AA
-                     END DO
-                     WORK( J ) = S
-                  END DO
-*                 j=n1=k-1 is special
-                  S = ABS( A( 0+J*LDA ) )
-*                 A(k-1,k-1)
-                  DO I = 1, K - 1
-                     AA = ABS( A( I+J*LDA ) )
-*                    A(k-1,i+n1)
-                     WORK( I+N1 ) = WORK( I+N1 ) + AA
-                     S = S + AA
-                  END DO
-                  WORK( J ) = WORK( J ) + S
-                  DO J = K, N - 1
-                     S = ZERO
-                     DO I = 0, J - K - 1
-                        AA = ABS( A( I+J*LDA ) )
-*                       A(i,j-k)
-                        WORK( I ) = WORK( I ) + AA
-                        S = S + AA
-                     END DO
-*                    i=j-k
-                     AA = ABS( A( I+J*LDA ) )
-*                    A(j-k,j-k)
-                     S = S + AA
-                     WORK( J-K ) = WORK( J-K ) + S
-                     I = I + 1
-                     S = ABS( A( I+J*LDA ) )
-*                    A(j,j)
-                     DO L = J + 1, N - 1
-                        I = I + 1
-                        AA = ABS( A( I+J*LDA ) )
-*                       A(j,l)
-                        WORK( L ) = WORK( L ) + AA
-                        S = S + AA
-                     END DO
-                     WORK( J ) = WORK( J ) + S
-                  END DO
-                  VALUE = WORK( 0 )
-                  DO I = 1, N-1
-                     TEMP = WORK( I )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) 
-     $                    VALUE = TEMP
-                  END DO
-               ELSE
-*                 ilu=1
-                  K = K + 1
-*                 k=(n+1)/2 for n odd and ilu=1
-                  DO I = K, N - 1
-                     WORK( I ) = ZERO
-                  END DO
-                  DO J = 0, K - 2
-*                    process
-                     S = ZERO
-                     DO I = 0, J - 1
-                        AA = ABS( A( I+J*LDA ) )
-*                       A(j,i)
-                        WORK( I ) = WORK( I ) + AA
-                        S = S + AA
-                     END DO
-                     AA = ABS( A( I+J*LDA ) )
-*                    i=j so process of A(j,j)
-                     S = S + AA
-                     WORK( J ) = S
-*                    is initialised here
-                     I = I + 1
-*                    i=j process A(j+k,j+k)
-                     AA = ABS( A( I+J*LDA ) )
-                     S = AA
-                     DO L = K + J + 1, N - 1
-                        I = I + 1
-                        AA = ABS( A( I+J*LDA ) )
-*                       A(l,k+j)
-                        S = S + AA
-                        WORK( L ) = WORK( L ) + AA
-                     END DO
-                     WORK( K+J ) = WORK( K+J ) + S
-                  END DO
-*                 j=k-1 is special :process col A(k-1,0:k-1)
-                  S = ZERO
-                  DO I = 0, K - 2
-                     AA = ABS( A( I+J*LDA ) )
-*                    A(k,i)
-                     WORK( I ) = WORK( I ) + AA
-                     S = S + AA
-                  END DO
-*                 i=k-1
-                  AA = ABS( A( I+J*LDA ) )
-*                 A(k-1,k-1)
-                  S = S + AA
-                  WORK( I ) = S
-*                 done with col j=k+1
-                  DO J = K, N - 1
-*                    process col j of A = A(j,0:k-1)
-                     S = ZERO
-                     DO I = 0, K - 1
-                        AA = ABS( A( I+J*LDA ) )
-*                       A(j,i)
-                        WORK( I ) = WORK( I ) + AA
-                        S = S + AA
-                     END DO
-                     WORK( J ) = WORK( J ) + S
-                  END DO
-                  VALUE = WORK( 0 )
-                  DO I = 1, N-1
-                     TEMP = WORK( I )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) 
-     $                    VALUE = TEMP
-                  END DO
-               END IF
-            ELSE
-*              n is even
-               IF( ILU.EQ.0 ) THEN
-                  DO I = K, N - 1
-                     WORK( I ) = ZERO
-                  END DO
-                  DO J = 0, K - 1
-                     S = ZERO
-                     DO I = 0, K - 1
-                        AA = ABS( A( I+J*LDA ) )
-*                       A(j,i+k)
-                        WORK( I+K ) = WORK( I+K ) + AA
-                        S = S + AA
-                     END DO
-                     WORK( J ) = S
-                  END DO
-*                 j=k
-                  AA = ABS( A( 0+J*LDA ) )
-*                 A(k,k)
-                  S = AA
-                  DO I = 1, K - 1
-                     AA = ABS( A( I+J*LDA ) )
-*                    A(k,k+i)
-                     WORK( I+K ) = WORK( I+K ) + AA
-                     S = S + AA
-                  END DO
-                  WORK( J ) = WORK( J ) + S
-                  DO J = K + 1, N - 1
-                     S = ZERO
-                     DO I = 0, J - 2 - K
-                        AA = ABS( A( I+J*LDA ) )
-*                       A(i,j-k-1)
-                        WORK( I ) = WORK( I ) + AA
-                        S = S + AA
-                     END DO
-*                     i=j-1-k
-                     AA = ABS( A( I+J*LDA ) )
-*                    A(j-k-1,j-k-1)
-                     S = S + AA
-                     WORK( J-K-1 ) = WORK( J-K-1 ) + S
-                     I = I + 1
-                     AA = ABS( A( I+J*LDA ) )
-*                    A(j,j)
-                     S = AA
-                     DO L = J + 1, N - 1
-                        I = I + 1
-                        AA = ABS( A( I+J*LDA ) )
-*                       A(j,l)
-                        WORK( L ) = WORK( L ) + AA
-                        S = S + AA
-                     END DO
-                     WORK( J ) = WORK( J ) + S
-                  END DO
-*                 j=n
-                  S = ZERO
-                  DO I = 0, K - 2
-                     AA = ABS( A( I+J*LDA ) )
-*                    A(i,k-1)
-                     WORK( I ) = WORK( I ) + AA
-                     S = S + AA
-                  END DO
-*                 i=k-1
-                  AA = ABS( A( I+J*LDA ) )
-*                 A(k-1,k-1)
-                  S = S + AA
-                  WORK( I ) = WORK( I ) + S
-                  VALUE = WORK ( 0 )
-                  DO I = 1, N-1
-                     TEMP = WORK( I )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                  END DO
-               ELSE
-*                 ilu=1
-                  DO I = K, N - 1
-                     WORK( I ) = ZERO
-                  END DO
-*                 j=0 is special :process col A(k:n-1,k)
-                  S = ABS( A( 0 ) )
-*                 A(k,k)
-                  DO I = 1, K - 1
-                     AA = ABS( A( I ) )
-*                    A(k+i,k)
-                     WORK( I+K ) = WORK( I+K ) + AA
-                     S = S + AA
-                  END DO
-                  WORK( K ) = WORK( K ) + S
-                  DO J = 1, K - 1
-*                    process
-                     S = ZERO
-                     DO I = 0, J - 2
-                        AA = ABS( A( I+J*LDA ) )
-*                       A(j-1,i)
-                        WORK( I ) = WORK( I ) + AA
-                        S = S + AA
-                     END DO
-                     AA = ABS( A( I+J*LDA ) )
-*                    i=j-1 so process of A(j-1,j-1)
-                     S = S + AA
-                     WORK( J-1 ) = S
-*                    is initialised here
-                     I = I + 1
-*                    i=j process A(j+k,j+k)
-                     AA = ABS( A( I+J*LDA ) )
-                     S = AA
-                     DO L = K + J + 1, N - 1
-                        I = I + 1
-                        AA = ABS( A( I+J*LDA ) )
-*                       A(l,k+j)
-                        S = S + AA
-                        WORK( L ) = WORK( L ) + AA
-                     END DO
-                     WORK( K+J ) = WORK( K+J ) + S
-                  END DO
-*                 j=k is special :process col A(k,0:k-1)
-                  S = ZERO
-                  DO I = 0, K - 2
-                     AA = ABS( A( I+J*LDA ) )
-*                    A(k,i)
-                     WORK( I ) = WORK( I ) + AA
-                     S = S + AA
-                  END DO
-*                 i=k-1
-                  AA = ABS( A( I+J*LDA ) )
-*                 A(k-1,k-1)
-                  S = S + AA
-                  WORK( I ) = S
-*                 done with col j=k+1
-                  DO J = K + 1, N
-*                    process col j-1 of A = A(j-1,0:k-1)
-                     S = ZERO
-                     DO I = 0, K - 1
-                        AA = ABS( A( I+J*LDA ) )
-*                       A(j-1,i)
-                        WORK( I ) = WORK( I ) + AA
-                        S = S + AA
-                     END DO
-                     WORK( J-1 ) = WORK( J-1 ) + S
-                  END DO
-                  VALUE = WORK( 0 )
-                  DO I = 1, N-1
-                     TEMP = WORK( I )
-                     IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) )
-     $                    VALUE = TEMP
-                  END DO
-               END IF
-            END IF
-         END IF
-      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
-*
-*       Find normF(A).
-*
-         K = ( N+1 ) / 2
-         SCALE = ZERO
-         S = ONE
-         IF( NOE.EQ.1 ) THEN
-*           n is odd
-            IF( IFM.EQ.1 ) THEN
-*              A is normal
-               IF( ILU.EQ.0 ) THEN
-*                 A is upper
-                  DO J = 0, K - 3
-                     CALL SLASSQ( K-J-2, A( K+J+1+J*LDA ), 1, SCALE, S )
-*                    L at A(k,0)
-                  END DO
-                  DO J = 0, K - 1
-                     CALL SLASSQ( K+J-1, A( 0+J*LDA ), 1, SCALE, S )
-*                    trap U at A(0,0)
-                  END DO
-                  S = S + S
-*                 double s for the off diagonal elements
-                  CALL SLASSQ( K-1, A( K ), LDA+1, SCALE, S )
-*                 tri L at A(k,0)
-                  CALL SLASSQ( K, A( K-1 ), LDA+1, SCALE, S )
-*                 tri U at A(k-1,0)
-               ELSE
-*                 ilu=1 & A is lower
-                  DO J = 0, K - 1
-                     CALL SLASSQ( N-J-1, A( J+1+J*LDA ), 1, SCALE, S )
-*                    trap L at A(0,0)
-                  END DO
-                  DO J = 0, K - 2
-                     CALL SLASSQ( J, A( 0+( 1+J )*LDA ), 1, SCALE, S )
-*                    U at A(0,1)
-                  END DO
-                  S = S + S
-*                 double s for the off diagonal elements
-                  CALL SLASSQ( K, A( 0 ), LDA+1, SCALE, S )
-*                 tri L at A(0,0)
-                  CALL SLASSQ( K-1, A( 0+LDA ), LDA+1, SCALE, S )
-*                 tri U at A(0,1)
-               END IF
-            ELSE
-*              A is xpose
-               IF( ILU.EQ.0 ) THEN
-*                 A**T is upper
-                  DO J = 1, K - 2
-                     CALL SLASSQ( J, A( 0+( K+J )*LDA ), 1, SCALE, S )
-*                    U at A(0,k)
-                  END DO
-                  DO J = 0, K - 2
-                     CALL SLASSQ( K, A( 0+J*LDA ), 1, SCALE, S )
-*                    k by k-1 rect. at A(0,0)
-                  END DO
-                  DO J = 0, K - 2
-                     CALL SLASSQ( K-J-1, A( J+1+( J+K-1 )*LDA ), 1,
-     $                            SCALE, S )
-*                    L at A(0,k-1)
-                  END DO
-                  S = S + S
-*                 double s for the off diagonal elements
-                  CALL SLASSQ( K-1, A( 0+K*LDA ), LDA+1, SCALE, S )
-*                 tri U at A(0,k)
-                  CALL SLASSQ( K, A( 0+( K-1 )*LDA ), LDA+1, SCALE, S )
-*                 tri L at A(0,k-1)
-               ELSE
-*                 A**T is lower
-                  DO J = 1, K - 1
-                     CALL SLASSQ( J, A( 0+J*LDA ), 1, SCALE, S )
-*                    U at A(0,0)
-                  END DO
-                  DO J = K, N - 1
-                     CALL SLASSQ( K, A( 0+J*LDA ), 1, SCALE, S )
-*                    k by k-1 rect. at A(0,k)
-                  END DO
-                  DO J = 0, K - 3
-                     CALL SLASSQ( K-J-2, A( J+2+J*LDA ), 1, SCALE, S )
-*                    L at A(1,0)
-                  END DO
-                  S = S + S
-*                 double s for the off diagonal elements
-                  CALL SLASSQ( K, A( 0 ), LDA+1, SCALE, S )
-*                 tri U at A(0,0)
-                  CALL SLASSQ( K-1, A( 1 ), LDA+1, SCALE, S )
-*                 tri L at A(1,0)
-               END IF
-            END IF
-         ELSE
-*           n is even
-            IF( IFM.EQ.1 ) THEN
-*              A is normal
-               IF( ILU.EQ.0 ) THEN
-*                 A is upper
-                  DO J = 0, K - 2
-                     CALL SLASSQ( K-J-1, A( K+J+2+J*LDA ), 1, SCALE, S )
-*                    L at A(k+1,0)
-                  END DO
-                  DO J = 0, K - 1
-                     CALL SLASSQ( K+J, A( 0+J*LDA ), 1, SCALE, S )
-*                    trap U at A(0,0)
-                  END DO
-                  S = S + S
-*                 double s for the off diagonal elements
-                  CALL SLASSQ( K, A( K+1 ), LDA+1, SCALE, S )
-*                 tri L at A(k+1,0)
-                  CALL SLASSQ( K, A( K ), LDA+1, SCALE, S )
-*                 tri U at A(k,0)
-               ELSE
-*                 ilu=1 & A is lower
-                  DO J = 0, K - 1
-                     CALL SLASSQ( N-J-1, A( J+2+J*LDA ), 1, SCALE, S )
-*                    trap L at A(1,0)
-                  END DO
-                  DO J = 1, K - 1
-                     CALL SLASSQ( J, A( 0+J*LDA ), 1, SCALE, S )
-*                    U at A(0,0)
-                  END DO
-                  S = S + S
-*                 double s for the off diagonal elements
-                  CALL SLASSQ( K, A( 1 ), LDA+1, SCALE, S )
-*                 tri L at A(1,0)
-                  CALL SLASSQ( K, A( 0 ), LDA+1, SCALE, S )
-*                 tri U at A(0,0)
-               END IF
-            ELSE
-*              A is xpose
-               IF( ILU.EQ.0 ) THEN
-*                 A**T is upper
-                  DO J = 1, K - 1
-                     CALL SLASSQ( J, A( 0+( K+1+J )*LDA ), 1, SCALE, S )
-*                    U at A(0,k+1)
-                  END DO
-                  DO J = 0, K - 1
-                     CALL SLASSQ( K, A( 0+J*LDA ), 1, SCALE, S )
-*                    k by k rect. at A(0,0)
-                  END DO
-                  DO J = 0, K - 2
-                     CALL SLASSQ( K-J-1, A( J+1+( J+K )*LDA ), 1, SCALE,
-     $                            S )
-*                    L at A(0,k)
-                  END DO
-                  S = S + S
-*                 double s for the off diagonal elements
-                  CALL SLASSQ( K, A( 0+( K+1 )*LDA ), LDA+1, SCALE, S )
-*                 tri U at A(0,k+1)
-                  CALL SLASSQ( K, A( 0+K*LDA ), LDA+1, SCALE, S )
-*                 tri L at A(0,k)
-               ELSE
-*                 A**T is lower
-                  DO J = 1, K - 1
-                     CALL SLASSQ( J, A( 0+( J+1 )*LDA ), 1, SCALE, S )
-*                    U at A(0,1)
-                  END DO
-                  DO J = K + 1, N
-                     CALL SLASSQ( K, A( 0+J*LDA ), 1, SCALE, S )
-*                    k by k rect. at A(0,k+1)
-                  END DO
-                  DO J = 0, K - 2
-                     CALL SLASSQ( K-J-1, A( J+1+J*LDA ), 1, SCALE, S )
-*                    L at A(0,0)
-                  END DO
-                  S = S + S
-*                 double s for the off diagonal elements
-                  CALL SLASSQ( K, A( LDA ), LDA+1, SCALE, S )
-*                 tri L at A(0,1)
-                  CALL SLASSQ( K, A( 0 ), LDA+1, SCALE, S )
-*                 tri U at A(0,0)
-               END IF
-            END IF
-         END IF
-         VALUE = SCALE*SQRT( S )
-      END IF
-*
-      SLANSF = VALUE
-      RETURN
-*
-*     End of SLANSF
-*
-      END
diff --git a/blas_fix/slansp.f b/blas_fix/slansp.f
deleted file mode 100644
index 28b0e70c8..000000000
--- a/blas_fix/slansp.f
+++ /dev/null
@@ -1,262 +0,0 @@
-*> \brief \b SLANSP returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a symmetric matrix supplied in packed form.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*> \htmlonly
-*> Download SLANSP + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slansp.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slansp.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slansp.f"> 
-*> [TXT]</a>
-*> \endhtmlonly 
-*
-*  Definition:
-*  ===========
-*
-*       REAL             FUNCTION SLANSP( NORM, UPLO, N, AP, WORK )
-* 
-*       .. Scalar Arguments ..
-*       CHARACTER          NORM, UPLO
-*       INTEGER            N
-*       ..
-*       .. Array Arguments ..
-*       REAL               AP( * ), WORK( * )
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SLANSP  returns the value of the one norm,  or the Frobenius norm, or
-*> the  infinity norm,  or the  element of  largest absolute value  of a
-*> real symmetric matrix A,  supplied in packed form.
-*> \endverbatim
-*>
-*> \return SLANSP
-*> \verbatim
-*>
-*>    SLANSP = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-*>             (
-*>             ( norm1(A),         NORM = '1', 'O' or 'o'
-*>             (
-*>             ( normI(A),         NORM = 'I' or 'i'
-*>             (
-*>             ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-*>
-*> where  norm1  denotes the  one norm of a matrix (maximum column sum),
-*> normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-*> normF  denotes the  Frobenius norm of a matrix (square root of sum of
-*> squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] NORM
-*> \verbatim
-*>          NORM is CHARACTER*1
-*>          Specifies the value to be returned in SLANSP as described
-*>          above.
-*> \endverbatim
-*>
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>          Specifies whether the upper or lower triangular part of the
-*>          symmetric matrix A is supplied.
-*>          = 'U':  Upper triangular part of A is supplied
-*>          = 'L':  Lower triangular part of A is supplied
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The order of the matrix A.  N >= 0.  When N = 0, SLANSP is
-*>          set to zero.
-*> \endverbatim
-*>
-*> \param[in] AP
-*> \verbatim
-*>          AP is REAL array, dimension (N*(N+1)/2)
-*>          The upper or lower triangle of the symmetric matrix A, packed
-*>          columnwise in a linear array.  The j-th column of A is stored
-*>          in the array AP as follows:
-*>          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
-*>          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is REAL array, dimension (MAX(1,LWORK)),
-*>          where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
-*>          WORK is not referenced.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date September 2012
-*
-*> \ingroup realOTHERauxiliary
-*
-*  =====================================================================
-      REAL             FUNCTION SLANSP( NORM, UPLO, N, AP, WORK )
-*
-*  -- LAPACK auxiliary routine (version 3.4.2) --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
-*
-*     .. Scalar Arguments ..
-      CHARACTER          NORM, UPLO
-      INTEGER            N
-*     ..
-*     .. Array Arguments ..
-      REAL               AP( * ), WORK( * )
-*     ..
-*
-* =====================================================================
-*
-*     .. Parameters ..
-      REAL               ONE, ZERO
-      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
-*     ..
-*     .. Local Scalars ..
-      INTEGER            I, J, K
-      REAL               ABSA, SCALE, SUM, VALUE
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           SLASSQ
-*     ..
-*     .. External Functions ..
-      LOGICAL            LSAME, SISNAN
-      EXTERNAL           LSAME, SISNAN
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-      VALUE = ZERO
-      IF( N.EQ.0 ) THEN
-         VALUE = ZERO
-      ELSE IF( LSAME( NORM, 'M' ) ) THEN
-*
-*        Find max(abs(A(i,j))).
-*
-         VALUE = ZERO
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            K = 1
-            DO 20 J = 1, N
-               DO 10 I = K, K + J - 1
-                  SUM = ABS( AP( I ) )
-                  IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   10          CONTINUE
-               K = K + J
-   20       CONTINUE
-         ELSE
-            K = 1
-            DO 40 J = 1, N
-               DO 30 I = K, K + N - J
-                  SUM = ABS( AP( I ) )
-                  IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   30          CONTINUE
-               K = K + N - J + 1
-   40       CONTINUE
-         END IF
-      ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR.
-     $         ( NORM.EQ.'1' ) ) THEN
-*
-*        Find normI(A) ( = norm1(A), since A is symmetric).
-*
-         VALUE = ZERO
-         K = 1
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            DO 60 J = 1, N
-               SUM = ZERO
-               DO 50 I = 1, J - 1
-                  ABSA = ABS( AP( K ) )
-                  SUM = SUM + ABSA
-                  WORK( I ) = WORK( I ) + ABSA
-                  K = K + 1
-   50          CONTINUE
-               WORK( J ) = SUM + ABS( AP( K ) )
-               K = K + 1
-   60       CONTINUE
-            DO 70 I = 1, N
-               SUM = WORK( I )
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM               
-   70       CONTINUE
-         ELSE
-            DO 80 I = 1, N
-               WORK( I ) = ZERO
-   80       CONTINUE
-            DO 100 J = 1, N
-               SUM = WORK( J ) + ABS( AP( K ) )
-               K = K + 1
-               DO 90 I = J + 1, N
-                  ABSA = ABS( AP( K ) )
-                  SUM = SUM + ABSA
-                  WORK( I ) = WORK( I ) + ABSA
-                  K = K + 1
-   90          CONTINUE
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM               
-  100       CONTINUE
-         END IF
-      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
-*
-*        Find normF(A).
-*
-         SCALE = ZERO
-         SUM = ONE
-         K = 2
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            DO 110 J = 2, N
-               CALL SLASSQ( J-1, AP( K ), 1, SCALE, SUM )
-               K = K + J
-  110       CONTINUE
-         ELSE
-            DO 120 J = 1, N - 1
-               CALL SLASSQ( N-J, AP( K ), 1, SCALE, SUM )
-               K = K + N - J + 1
-  120       CONTINUE
-         END IF
-         SUM = 2*SUM
-         K = 1
-         DO 130 I = 1, N
-            IF( AP( K ).NE.ZERO ) THEN
-               ABSA = ABS( AP( K ) )
-               IF( SCALE.LT.ABSA ) THEN
-                  SUM = ONE + SUM*( SCALE / ABSA )**2
-                  SCALE = ABSA
-               ELSE
-                  SUM = SUM + ( ABSA / SCALE )**2
-               END IF
-            END IF
-            IF( LSAME( UPLO, 'U' ) ) THEN
-               K = K + I + 1
-            ELSE
-               K = K + N - I + 1
-            END IF
-  130    CONTINUE
-         VALUE = SCALE*SQRT( SUM )
-      END IF
-*
-      SLANSP = VALUE
-      RETURN
-*
-*     End of SLANSP
-*
-      END
diff --git a/blas_fix/slanst.f b/blas_fix/slanst.f
deleted file mode 100644
index 92c9227b7..000000000
--- a/blas_fix/slanst.f
+++ /dev/null
@@ -1,187 +0,0 @@
-*> \brief \b SLANST returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a real symmetric tridiagonal matrix.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*> \htmlonly
-*> Download SLANST + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slanst.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slanst.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slanst.f"> 
-*> [TXT]</a>
-*> \endhtmlonly 
-*
-*  Definition:
-*  ===========
-*
-*       REAL             FUNCTION SLANST( NORM, N, D, E )
-* 
-*       .. Scalar Arguments ..
-*       CHARACTER          NORM
-*       INTEGER            N
-*       ..
-*       .. Array Arguments ..
-*       REAL               D( * ), E( * )
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SLANST  returns the value of the one norm,  or the Frobenius norm, or
-*> the  infinity norm,  or the  element of  largest absolute value  of a
-*> real symmetric tridiagonal matrix A.
-*> \endverbatim
-*>
-*> \return SLANST
-*> \verbatim
-*>
-*>    SLANST = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-*>             (
-*>             ( norm1(A),         NORM = '1', 'O' or 'o'
-*>             (
-*>             ( normI(A),         NORM = 'I' or 'i'
-*>             (
-*>             ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-*>
-*> where  norm1  denotes the  one norm of a matrix (maximum column sum),
-*> normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-*> normF  denotes the  Frobenius norm of a matrix (square root of sum of
-*> squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] NORM
-*> \verbatim
-*>          NORM is CHARACTER*1
-*>          Specifies the value to be returned in SLANST as described
-*>          above.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The order of the matrix A.  N >= 0.  When N = 0, SLANST is
-*>          set to zero.
-*> \endverbatim
-*>
-*> \param[in] D
-*> \verbatim
-*>          D is REAL array, dimension (N)
-*>          The diagonal elements of A.
-*> \endverbatim
-*>
-*> \param[in] E
-*> \verbatim
-*>          E is REAL array, dimension (N-1)
-*>          The (n-1) sub-diagonal or super-diagonal elements of A.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date September 2012
-*
-*> \ingroup auxOTHERauxiliary
-*
-*  =====================================================================
-      REAL             FUNCTION SLANST( NORM, N, D, E )
-*
-*  -- LAPACK auxiliary routine (version 3.4.2) --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
-*
-*     .. Scalar Arguments ..
-      CHARACTER          NORM
-      INTEGER            N
-*     ..
-*     .. Array Arguments ..
-      REAL               D( * ), E( * )
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      REAL               ONE, ZERO
-      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
-*     ..
-*     .. Local Scalars ..
-      INTEGER            I
-      REAL               ANORM, SCALE, SUM
-*     ..
-*     .. External Functions ..
-      LOGICAL            LSAME, SISNAN
-      EXTERNAL           LSAME, SISNAN
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           SLASSQ
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-      ANORM = ZERO
-      IF( N.LE.0 ) THEN
-         ANORM = ZERO
-      ELSE IF( LSAME( NORM, 'M' ) ) THEN
-*
-*        Find max(abs(A(i,j))).
-*
-         ANORM = ABS( D( N ) )
-         DO 10 I = 1, N - 1
-            SUM = ABS( D( I ) )
-            IF( ANORM .LT. SUM .OR. SISNAN( SUM ) ) ANORM = SUM
-            SUM = ABS( E( I ) )
-            IF( ANORM .LT. SUM .OR. SISNAN( SUM ) ) ANORM = SUM
-   10    CONTINUE
-      ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' .OR.
-     $         LSAME( NORM, 'I' ) ) THEN
-*
-*        Find norm1(A).
-*
-         IF( N.EQ.1 ) THEN
-            ANORM = ABS( D( 1 ) )
-         ELSE
-            ANORM = ABS( D( 1 ) )+ABS( E( 1 ) )
-            SUM = ABS( E( N-1 ) )+ABS( D( N ) )
-            IF( ANORM .LT. SUM .OR. SISNAN( SUM ) ) ANORM = SUM
-            DO 20 I = 2, N - 1
-               SUM = ABS( D( I ) )+ABS( E( I ) )+ABS( E( I-1 ) )
-               IF( ANORM .LT. SUM .OR. SISNAN( SUM ) ) ANORM = SUM
-   20       CONTINUE
-         END IF
-      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
-*
-*        Find normF(A).
-*
-         SCALE = ZERO
-         SUM = ONE
-         IF( N.GT.1 ) THEN
-            CALL SLASSQ( N-1, E, 1, SCALE, SUM )
-            SUM = 2*SUM
-         END IF
-         CALL SLASSQ( N, D, 1, SCALE, SUM )
-         ANORM = SCALE*SQRT( SUM )
-      END IF
-*
-      SLANST = ANORM
-      RETURN
-*
-*     End of SLANST
-*
-      END
diff --git a/blas_fix/slansy.f b/blas_fix/slansy.f
deleted file mode 100644
index 54c28a25a..000000000
--- a/blas_fix/slansy.f
+++ /dev/null
@@ -1,242 +0,0 @@
-*> \brief \b SLANSY returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a real symmetric matrix.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*> \htmlonly
-*> Download SLANSY + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slansy.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slansy.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slansy.f"> 
-*> [TXT]</a>
-*> \endhtmlonly 
-*
-*  Definition:
-*  ===========
-*
-*       REAL             FUNCTION SLANSY( NORM, UPLO, N, A, LDA, WORK )
-* 
-*       .. Scalar Arguments ..
-*       CHARACTER          NORM, UPLO
-*       INTEGER            LDA, N
-*       ..
-*       .. Array Arguments ..
-*       REAL               A( LDA, * ), WORK( * )
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SLANSY  returns the value of the one norm,  or the Frobenius norm, or
-*> the  infinity norm,  or the  element of  largest absolute value  of a
-*> real symmetric matrix A.
-*> \endverbatim
-*>
-*> \return SLANSY
-*> \verbatim
-*>
-*>    SLANSY = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-*>             (
-*>             ( norm1(A),         NORM = '1', 'O' or 'o'
-*>             (
-*>             ( normI(A),         NORM = 'I' or 'i'
-*>             (
-*>             ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-*>
-*> where  norm1  denotes the  one norm of a matrix (maximum column sum),
-*> normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-*> normF  denotes the  Frobenius norm of a matrix (square root of sum of
-*> squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] NORM
-*> \verbatim
-*>          NORM is CHARACTER*1
-*>          Specifies the value to be returned in SLANSY as described
-*>          above.
-*> \endverbatim
-*>
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>          Specifies whether the upper or lower triangular part of the
-*>          symmetric matrix A is to be referenced.
-*>          = 'U':  Upper triangular part of A is referenced
-*>          = 'L':  Lower triangular part of A is referenced
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The order of the matrix A.  N >= 0.  When N = 0, SLANSY is
-*>          set to zero.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is REAL array, dimension (LDA,N)
-*>          The symmetric matrix A.  If UPLO = 'U', the leading n by n
-*>          upper triangular part of A contains the upper triangular part
-*>          of the matrix A, and the strictly lower triangular part of A
-*>          is not referenced.  If UPLO = 'L', the leading n by n lower
-*>          triangular part of A contains the lower triangular part of
-*>          the matrix A, and the strictly upper triangular part of A is
-*>          not referenced.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>          The leading dimension of the array A.  LDA >= max(N,1).
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is REAL array, dimension (MAX(1,LWORK)),
-*>          where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
-*>          WORK is not referenced.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date September 2012
-*
-*> \ingroup realSYauxiliary
-*
-*  =====================================================================
-      REAL             FUNCTION SLANSY( NORM, UPLO, N, A, LDA, WORK )
-*
-*  -- LAPACK auxiliary routine (version 3.4.2) --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
-*
-*     .. Scalar Arguments ..
-      CHARACTER          NORM, UPLO
-      INTEGER            LDA, N
-*     ..
-*     .. Array Arguments ..
-      REAL               A( LDA, * ), WORK( * )
-*     ..
-*
-* =====================================================================
-*
-*     .. Parameters ..
-      REAL               ONE, ZERO
-      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
-*     ..
-*     .. Local Scalars ..
-      INTEGER            I, J
-      REAL               ABSA, SCALE, SUM, VALUE
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           SLASSQ
-*     ..
-*     .. External Functions ..
-      LOGICAL            LSAME, SISNAN
-      EXTERNAL           LSAME, SISNAN
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-      VALUE = ZERO
-      IF( N.EQ.0 ) THEN
-         VALUE = ZERO
-      ELSE IF( LSAME( NORM, 'M' ) ) THEN
-*
-*        Find max(abs(A(i,j))).
-*
-         VALUE = ZERO
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            DO 20 J = 1, N
-               DO 10 I = 1, J
-                  SUM = ABS( A( I, J ) )
-                  IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   10          CONTINUE
-   20       CONTINUE
-         ELSE
-            DO 40 J = 1, N
-               DO 30 I = J, N
-                  SUM = ABS( A( I, J ) )
-                  IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   30          CONTINUE
-   40       CONTINUE
-         END IF
-      ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR.
-     $         ( NORM.EQ.'1' ) ) THEN
-*
-*        Find normI(A) ( = norm1(A), since A is symmetric).
-*
-         VALUE = ZERO
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            DO 60 J = 1, N
-               SUM = ZERO
-               DO 50 I = 1, J - 1
-                  ABSA = ABS( A( I, J ) )
-                  SUM = SUM + ABSA
-                  WORK( I ) = WORK( I ) + ABSA
-   50          CONTINUE
-               WORK( J ) = SUM + ABS( A( J, J ) )
-   60       CONTINUE
-            DO 70 I = 1, N
-               SUM = WORK( I )
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   70       CONTINUE
-         ELSE
-            DO 80 I = 1, N
-               WORK( I ) = ZERO
-   80       CONTINUE
-            DO 100 J = 1, N
-               SUM = WORK( J ) + ABS( A( J, J ) )
-               DO 90 I = J + 1, N
-                  ABSA = ABS( A( I, J ) )
-                  SUM = SUM + ABSA
-                  WORK( I ) = WORK( I ) + ABSA
-   90          CONTINUE
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-  100       CONTINUE
-         END IF
-      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
-*
-*        Find normF(A).
-*
-         SCALE = ZERO
-         SUM = ONE
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            DO 110 J = 2, N
-               CALL SLASSQ( J-1, A( 1, J ), 1, SCALE, SUM )
-  110       CONTINUE
-         ELSE
-            DO 120 J = 1, N - 1
-               CALL SLASSQ( N-J, A( J+1, J ), 1, SCALE, SUM )
-  120       CONTINUE
-         END IF
-         SUM = 2*SUM
-         CALL SLASSQ( N, A, LDA+1, SCALE, SUM )
-         VALUE = SCALE*SQRT( SUM )
-      END IF
-*
-      SLANSY = VALUE
-      RETURN
-*
-*     End of SLANSY
-*
-      END
diff --git a/blas_fix/slantb.f b/blas_fix/slantb.f
deleted file mode 100644
index 5135bd625..000000000
--- a/blas_fix/slantb.f
+++ /dev/null
@@ -1,362 +0,0 @@
-*> \brief \b SLANTB returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a triangular band matrix.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*> \htmlonly
-*> Download SLANTB + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slantb.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slantb.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slantb.f"> 
-*> [TXT]</a>
-*> \endhtmlonly 
-*
-*  Definition:
-*  ===========
-*
-*       REAL             FUNCTION SLANTB( NORM, UPLO, DIAG, N, K, AB,
-*                        LDAB, WORK )
-* 
-*       .. Scalar Arguments ..
-*       CHARACTER          DIAG, NORM, UPLO
-*       INTEGER            K, LDAB, N
-*       ..
-*       .. Array Arguments ..
-*       REAL               AB( LDAB, * ), WORK( * )
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SLANTB  returns the value of the one norm,  or the Frobenius norm, or
-*> the  infinity norm,  or the element of  largest absolute value  of an
-*> n by n triangular band matrix A,  with ( k + 1 ) diagonals.
-*> \endverbatim
-*>
-*> \return SLANTB
-*> \verbatim
-*>
-*>    SLANTB = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-*>             (
-*>             ( norm1(A),         NORM = '1', 'O' or 'o'
-*>             (
-*>             ( normI(A),         NORM = 'I' or 'i'
-*>             (
-*>             ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-*>
-*> where  norm1  denotes the  one norm of a matrix (maximum column sum),
-*> normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-*> normF  denotes the  Frobenius norm of a matrix (square root of sum of
-*> squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] NORM
-*> \verbatim
-*>          NORM is CHARACTER*1
-*>          Specifies the value to be returned in SLANTB as described
-*>          above.
-*> \endverbatim
-*>
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>          Specifies whether the matrix A is upper or lower triangular.
-*>          = 'U':  Upper triangular
-*>          = 'L':  Lower triangular
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>          Specifies whether or not the matrix A is unit triangular.
-*>          = 'N':  Non-unit triangular
-*>          = 'U':  Unit triangular
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The order of the matrix A.  N >= 0.  When N = 0, SLANTB is
-*>          set to zero.
-*> \endverbatim
-*>
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>          The number of super-diagonals of the matrix A if UPLO = 'U',
-*>          or the number of sub-diagonals of the matrix A if UPLO = 'L'.
-*>          K >= 0.
-*> \endverbatim
-*>
-*> \param[in] AB
-*> \verbatim
-*>          AB is REAL array, dimension (LDAB,N)
-*>          The upper or lower triangular band matrix A, stored in the
-*>          first k+1 rows of AB.  The j-th column of A is stored
-*>          in the j-th column of the array AB as follows:
-*>          if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j;
-*>          if UPLO = 'L', AB(1+i-j,j)   = A(i,j) for j<=i<=min(n,j+k).
-*>          Note that when DIAG = 'U', the elements of the array AB
-*>          corresponding to the diagonal elements of the matrix A are
-*>          not referenced, but are assumed to be one.
-*> \endverbatim
-*>
-*> \param[in] LDAB
-*> \verbatim
-*>          LDAB is INTEGER
-*>          The leading dimension of the array AB.  LDAB >= K+1.
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is REAL array, dimension (MAX(1,LWORK)),
-*>          where LWORK >= N when NORM = 'I'; otherwise, WORK is not
-*>          referenced.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date September 2012
-*
-*> \ingroup realOTHERauxiliary
-*
-*  =====================================================================
-      REAL             FUNCTION SLANTB( NORM, UPLO, DIAG, N, K, AB,
-     $                 LDAB, WORK )
-*
-*  -- LAPACK auxiliary routine (version 3.4.2) --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
-*
-*     .. Scalar Arguments ..
-      CHARACTER          DIAG, NORM, UPLO
-      INTEGER            K, LDAB, N
-*     ..
-*     .. Array Arguments ..
-      REAL               AB( LDAB, * ), WORK( * )
-*     ..
-*
-* =====================================================================
-*
-*     .. Parameters ..
-      REAL               ONE, ZERO
-      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
-*     ..
-*     .. Local Scalars ..
-      LOGICAL            UDIAG
-      INTEGER            I, J, L
-      REAL               SCALE, SUM, VALUE
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           SLASSQ
-*     ..
-*     .. External Functions ..
-      LOGICAL            LSAME, SISNAN
-      EXTERNAL           LSAME, SISNAN
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, MAX, MIN, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-      VALUE = ZERO
-      IF( N.EQ.0 ) THEN
-         VALUE = ZERO
-      ELSE IF( LSAME( NORM, 'M' ) ) THEN
-*
-*        Find max(abs(A(i,j))).
-*
-         IF( LSAME( DIAG, 'U' ) ) THEN
-            VALUE = ONE
-            IF( LSAME( UPLO, 'U' ) ) THEN
-               DO 20 J = 1, N
-                  DO 10 I = MAX( K+2-J, 1 ), K
-                     SUM = ABS( AB( I, J ) )
-                     IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   10             CONTINUE
-   20          CONTINUE
-            ELSE
-               DO 40 J = 1, N
-                  DO 30 I = 2, MIN( N+1-J, K+1 )
-                     SUM = ABS( AB( I, J ) )
-                     IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   30             CONTINUE
-   40          CONTINUE
-            END IF
-         ELSE
-            VALUE = ZERO
-            IF( LSAME( UPLO, 'U' ) ) THEN
-               DO 60 J = 1, N
-                  DO 50 I = MAX( K+2-J, 1 ), K + 1
-                     SUM = ABS( AB( I, J ) )
-                     IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   50             CONTINUE
-   60          CONTINUE
-            ELSE
-               DO 80 J = 1, N
-                  DO 70 I = 1, MIN( N+1-J, K+1 )
-                     SUM = ABS( AB( I, J ) )
-                     IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   70             CONTINUE
-   80          CONTINUE
-            END IF
-         END IF
-      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
-*
-*        Find norm1(A).
-*
-         VALUE = ZERO
-         UDIAG = LSAME( DIAG, 'U' )
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            DO 110 J = 1, N
-               IF( UDIAG ) THEN
-                  SUM = ONE
-                  DO 90 I = MAX( K+2-J, 1 ), K
-                     SUM = SUM + ABS( AB( I, J ) )
-   90             CONTINUE
-               ELSE
-                  SUM = ZERO
-                  DO 100 I = MAX( K+2-J, 1 ), K + 1
-                     SUM = SUM + ABS( AB( I, J ) )
-  100             CONTINUE
-               END IF
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-  110       CONTINUE
-         ELSE
-            DO 140 J = 1, N
-               IF( UDIAG ) THEN
-                  SUM = ONE
-                  DO 120 I = 2, MIN( N+1-J, K+1 )
-                     SUM = SUM + ABS( AB( I, J ) )
-  120             CONTINUE
-               ELSE
-                  SUM = ZERO
-                  DO 130 I = 1, MIN( N+1-J, K+1 )
-                     SUM = SUM + ABS( AB( I, J ) )
-  130             CONTINUE
-               END IF
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-  140       CONTINUE
-         END IF
-      ELSE IF( LSAME( NORM, 'I' ) ) THEN
-*
-*        Find normI(A).
-*
-         VALUE = ZERO
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            IF( LSAME( DIAG, 'U' ) ) THEN
-               DO 150 I = 1, N
-                  WORK( I ) = ONE
-  150          CONTINUE
-               DO 170 J = 1, N
-                  L = K + 1 - J
-                  DO 160 I = MAX( 1, J-K ), J - 1
-                     WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) )
-  160             CONTINUE
-  170          CONTINUE
-            ELSE
-               DO 180 I = 1, N
-                  WORK( I ) = ZERO
-  180          CONTINUE
-               DO 200 J = 1, N
-                  L = K + 1 - J
-                  DO 190 I = MAX( 1, J-K ), J
-                     WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) )
-  190             CONTINUE
-  200          CONTINUE
-            END IF
-         ELSE
-            IF( LSAME( DIAG, 'U' ) ) THEN
-               DO 210 I = 1, N
-                  WORK( I ) = ONE
-  210          CONTINUE
-               DO 230 J = 1, N
-                  L = 1 - J
-                  DO 220 I = J + 1, MIN( N, J+K )
-                     WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) )
-  220             CONTINUE
-  230          CONTINUE
-            ELSE
-               DO 240 I = 1, N
-                  WORK( I ) = ZERO
-  240          CONTINUE
-               DO 260 J = 1, N
-                  L = 1 - J
-                  DO 250 I = J, MIN( N, J+K )
-                     WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) )
-  250             CONTINUE
-  260          CONTINUE
-            END IF
-         END IF
-         DO 270 I = 1, N
-            SUM = WORK( I )
-            IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-  270    CONTINUE
-      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
-*
-*        Find normF(A).
-*
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            IF( LSAME( DIAG, 'U' ) ) THEN
-               SCALE = ONE
-               SUM = N
-               IF( K.GT.0 ) THEN
-                  DO 280 J = 2, N
-                     CALL SLASSQ( MIN( J-1, K ),
-     $                            AB( MAX( K+2-J, 1 ), J ), 1, SCALE,
-     $                            SUM )
-  280             CONTINUE
-               END IF
-            ELSE
-               SCALE = ZERO
-               SUM = ONE
-               DO 290 J = 1, N
-                  CALL SLASSQ( MIN( J, K+1 ), AB( MAX( K+2-J, 1 ), J ),
-     $                         1, SCALE, SUM )
-  290          CONTINUE
-            END IF
-         ELSE
-            IF( LSAME( DIAG, 'U' ) ) THEN
-               SCALE = ONE
-               SUM = N
-               IF( K.GT.0 ) THEN
-                  DO 300 J = 1, N - 1
-                     CALL SLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE,
-     $                            SUM )
-  300             CONTINUE
-               END IF
-            ELSE
-               SCALE = ZERO
-               SUM = ONE
-               DO 310 J = 1, N
-                  CALL SLASSQ( MIN( N-J+1, K+1 ), AB( 1, J ), 1, SCALE,
-     $                         SUM )
-  310          CONTINUE
-            END IF
-         END IF
-         VALUE = SCALE*SQRT( SUM )
-      END IF
-*
-      SLANTB = VALUE
-      RETURN
-*
-*     End of SLANTB
-*
-      END
diff --git a/blas_fix/slantp.f b/blas_fix/slantp.f
deleted file mode 100644
index e27d62c0e..000000000
--- a/blas_fix/slantp.f
+++ /dev/null
@@ -1,356 +0,0 @@
-*> \brief \b SLANTP returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a triangular matrix supplied in packed form.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*> \htmlonly
-*> Download SLANTP + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slantp.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slantp.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slantp.f"> 
-*> [TXT]</a>
-*> \endhtmlonly 
-*
-*  Definition:
-*  ===========
-*
-*       REAL             FUNCTION SLANTP( NORM, UPLO, DIAG, N, AP, WORK )
-* 
-*       .. Scalar Arguments ..
-*       CHARACTER          DIAG, NORM, UPLO
-*       INTEGER            N
-*       ..
-*       .. Array Arguments ..
-*       REAL               AP( * ), WORK( * )
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SLANTP  returns the value of the one norm,  or the Frobenius norm, or
-*> the  infinity norm,  or the  element of  largest absolute value  of a
-*> triangular matrix A, supplied in packed form.
-*> \endverbatim
-*>
-*> \return SLANTP
-*> \verbatim
-*>
-*>    SLANTP = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-*>             (
-*>             ( norm1(A),         NORM = '1', 'O' or 'o'
-*>             (
-*>             ( normI(A),         NORM = 'I' or 'i'
-*>             (
-*>             ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-*>
-*> where  norm1  denotes the  one norm of a matrix (maximum column sum),
-*> normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-*> normF  denotes the  Frobenius norm of a matrix (square root of sum of
-*> squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] NORM
-*> \verbatim
-*>          NORM is CHARACTER*1
-*>          Specifies the value to be returned in SLANTP as described
-*>          above.
-*> \endverbatim
-*>
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>          Specifies whether the matrix A is upper or lower triangular.
-*>          = 'U':  Upper triangular
-*>          = 'L':  Lower triangular
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>          Specifies whether or not the matrix A is unit triangular.
-*>          = 'N':  Non-unit triangular
-*>          = 'U':  Unit triangular
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The order of the matrix A.  N >= 0.  When N = 0, SLANTP is
-*>          set to zero.
-*> \endverbatim
-*>
-*> \param[in] AP
-*> \verbatim
-*>          AP is REAL array, dimension (N*(N+1)/2)
-*>          The upper or lower triangular matrix A, packed columnwise in
-*>          a linear array.  The j-th column of A is stored in the array
-*>          AP as follows:
-*>          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
-*>          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
-*>          Note that when DIAG = 'U', the elements of the array AP
-*>          corresponding to the diagonal elements of the matrix A are
-*>          not referenced, but are assumed to be one.
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is REAL array, dimension (MAX(1,LWORK)),
-*>          where LWORK >= N when NORM = 'I'; otherwise, WORK is not
-*>          referenced.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date September 2012
-*
-*> \ingroup realOTHERauxiliary
-*
-*  =====================================================================
-      REAL             FUNCTION SLANTP( NORM, UPLO, DIAG, N, AP, WORK )
-*
-*  -- LAPACK auxiliary routine (version 3.4.2) --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
-*
-*     .. Scalar Arguments ..
-      CHARACTER          DIAG, NORM, UPLO
-      INTEGER            N
-*     ..
-*     .. Array Arguments ..
-      REAL               AP( * ), WORK( * )
-*     ..
-*
-* =====================================================================
-*
-*     .. Parameters ..
-      REAL               ONE, ZERO
-      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
-*     ..
-*     .. Local Scalars ..
-      LOGICAL            UDIAG
-      INTEGER            I, J, K
-      REAL               SCALE, SUM, VALUE
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           SLASSQ
-*     ..
-*     .. External Functions ..
-      LOGICAL            LSAME, SISNAN
-      EXTERNAL           LSAME, SISNAN
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-      VALUE = ZERO
-      IF( N.EQ.0 ) THEN
-         VALUE = ZERO
-      ELSE IF( LSAME( NORM, 'M' ) ) THEN
-*
-*        Find max(abs(A(i,j))).
-*
-         K = 1
-         IF( LSAME( DIAG, 'U' ) ) THEN
-            VALUE = ONE
-            IF( LSAME( UPLO, 'U' ) ) THEN
-               DO 20 J = 1, N
-                  DO 10 I = K, K + J - 2
-                     SUM = ABS( AP( I ) )
-                     IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   10             CONTINUE
-                  K = K + J
-   20          CONTINUE
-            ELSE
-               DO 40 J = 1, N
-                  DO 30 I = K + 1, K + N - J
-                     SUM = ABS( AP( I ) )
-                     IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   30             CONTINUE
-                  K = K + N - J + 1
-   40          CONTINUE
-            END IF
-         ELSE
-            VALUE = ZERO
-            IF( LSAME( UPLO, 'U' ) ) THEN
-               DO 60 J = 1, N
-                  DO 50 I = K, K + J - 1
-                     SUM = ABS( AP( I ) )
-                     IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   50             CONTINUE
-                  K = K + J
-   60          CONTINUE
-            ELSE
-               DO 80 J = 1, N
-                  DO 70 I = K, K + N - J
-                     SUM = ABS( AP( I ) )
-                     IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   70             CONTINUE
-                  K = K + N - J + 1
-   80          CONTINUE
-            END IF
-         END IF
-      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
-*
-*        Find norm1(A).
-*
-         VALUE = ZERO
-         K = 1
-         UDIAG = LSAME( DIAG, 'U' )
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            DO 110 J = 1, N
-               IF( UDIAG ) THEN
-                  SUM = ONE
-                  DO 90 I = K, K + J - 2
-                     SUM = SUM + ABS( AP( I ) )
-   90             CONTINUE
-               ELSE
-                  SUM = ZERO
-                  DO 100 I = K, K + J - 1
-                     SUM = SUM + ABS( AP( I ) )
-  100             CONTINUE
-               END IF
-               K = K + J
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-  110       CONTINUE
-         ELSE
-            DO 140 J = 1, N
-               IF( UDIAG ) THEN
-                  SUM = ONE
-                  DO 120 I = K + 1, K + N - J
-                     SUM = SUM + ABS( AP( I ) )
-  120             CONTINUE
-               ELSE
-                  SUM = ZERO
-                  DO 130 I = K, K + N - J
-                     SUM = SUM + ABS( AP( I ) )
-  130             CONTINUE
-               END IF
-               K = K + N - J + 1
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-  140       CONTINUE
-         END IF
-      ELSE IF( LSAME( NORM, 'I' ) ) THEN
-*
-*        Find normI(A).
-*
-         K = 1
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            IF( LSAME( DIAG, 'U' ) ) THEN
-               DO 150 I = 1, N
-                  WORK( I ) = ONE
-  150          CONTINUE
-               DO 170 J = 1, N
-                  DO 160 I = 1, J - 1
-                     WORK( I ) = WORK( I ) + ABS( AP( K ) )
-                     K = K + 1
-  160             CONTINUE
-                  K = K + 1
-  170          CONTINUE
-            ELSE
-               DO 180 I = 1, N
-                  WORK( I ) = ZERO
-  180          CONTINUE
-               DO 200 J = 1, N
-                  DO 190 I = 1, J
-                     WORK( I ) = WORK( I ) + ABS( AP( K ) )
-                     K = K + 1
-  190             CONTINUE
-  200          CONTINUE
-            END IF
-         ELSE
-            IF( LSAME( DIAG, 'U' ) ) THEN
-               DO 210 I = 1, N
-                  WORK( I ) = ONE
-  210          CONTINUE
-               DO 230 J = 1, N
-                  K = K + 1
-                  DO 220 I = J + 1, N
-                     WORK( I ) = WORK( I ) + ABS( AP( K ) )
-                     K = K + 1
-  220             CONTINUE
-  230          CONTINUE
-            ELSE
-               DO 240 I = 1, N
-                  WORK( I ) = ZERO
-  240          CONTINUE
-               DO 260 J = 1, N
-                  DO 250 I = J, N
-                     WORK( I ) = WORK( I ) + ABS( AP( K ) )
-                     K = K + 1
-  250             CONTINUE
-  260          CONTINUE
-            END IF
-         END IF
-         VALUE = ZERO
-         DO 270 I = 1, N
-            SUM = WORK( I )
-            IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-  270    CONTINUE
-      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
-*
-*        Find normF(A).
-*
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            IF( LSAME( DIAG, 'U' ) ) THEN
-               SCALE = ONE
-               SUM = N
-               K = 2
-               DO 280 J = 2, N
-                  CALL SLASSQ( J-1, AP( K ), 1, SCALE, SUM )
-                  K = K + J
-  280          CONTINUE
-            ELSE
-               SCALE = ZERO
-               SUM = ONE
-               K = 1
-               DO 290 J = 1, N
-                  CALL SLASSQ( J, AP( K ), 1, SCALE, SUM )
-                  K = K + J
-  290          CONTINUE
-            END IF
-         ELSE
-            IF( LSAME( DIAG, 'U' ) ) THEN
-               SCALE = ONE
-               SUM = N
-               K = 2
-               DO 300 J = 1, N - 1
-                  CALL SLASSQ( N-J, AP( K ), 1, SCALE, SUM )
-                  K = K + N - J + 1
-  300          CONTINUE
-            ELSE
-               SCALE = ZERO
-               SUM = ONE
-               K = 1
-               DO 310 J = 1, N
-                  CALL SLASSQ( N-J+1, AP( K ), 1, SCALE, SUM )
-                  K = K + N - J + 1
-  310          CONTINUE
-            END IF
-         END IF
-         VALUE = SCALE*SQRT( SUM )
-      END IF
-*
-      SLANTP = VALUE
-      RETURN
-*
-*     End of SLANTP
-*
-      END
diff --git a/blas_fix/slantr.f b/blas_fix/slantr.f
deleted file mode 100644
index 8ce04ea0f..000000000
--- a/blas_fix/slantr.f
+++ /dev/null
@@ -1,354 +0,0 @@
-*> \brief \b SLANTR returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a trapezoidal or triangular matrix.
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*> \htmlonly
-*> Download SLANTR + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slantr.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slantr.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slantr.f"> 
-*> [TXT]</a>
-*> \endhtmlonly 
-*
-*  Definition:
-*  ===========
-*
-*       REAL             FUNCTION SLANTR( NORM, UPLO, DIAG, M, N, A, LDA,
-*                        WORK )
-* 
-*       .. Scalar Arguments ..
-*       CHARACTER          DIAG, NORM, UPLO
-*       INTEGER            LDA, M, N
-*       ..
-*       .. Array Arguments ..
-*       REAL               A( LDA, * ), WORK( * )
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SLANTR  returns the value of the one norm,  or the Frobenius norm, or
-*> the  infinity norm,  or the  element of  largest absolute value  of a
-*> trapezoidal or triangular matrix A.
-*> \endverbatim
-*>
-*> \return SLANTR
-*> \verbatim
-*>
-*>    SLANTR = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-*>             (
-*>             ( norm1(A),         NORM = '1', 'O' or 'o'
-*>             (
-*>             ( normI(A),         NORM = 'I' or 'i'
-*>             (
-*>             ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-*>
-*> where  norm1  denotes the  one norm of a matrix (maximum column sum),
-*> normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-*> normF  denotes the  Frobenius norm of a matrix (square root of sum of
-*> squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] NORM
-*> \verbatim
-*>          NORM is CHARACTER*1
-*>          Specifies the value to be returned in SLANTR as described
-*>          above.
-*> \endverbatim
-*>
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>          Specifies whether the matrix A is upper or lower trapezoidal.
-*>          = 'U':  Upper trapezoidal
-*>          = 'L':  Lower trapezoidal
-*>          Note that A is triangular instead of trapezoidal if M = N.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>          Specifies whether or not the matrix A has unit diagonal.
-*>          = 'N':  Non-unit diagonal
-*>          = 'U':  Unit diagonal
-*> \endverbatim
-*>
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>          The number of rows of the matrix A.  M >= 0, and if
-*>          UPLO = 'U', M <= N.  When M = 0, SLANTR is set to zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>          The number of columns of the matrix A.  N >= 0, and if
-*>          UPLO = 'L', N <= M.  When N = 0, SLANTR is set to zero.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is REAL array, dimension (LDA,N)
-*>          The trapezoidal matrix A (A is triangular if M = N).
-*>          If UPLO = 'U', the leading m by n upper trapezoidal part of
-*>          the array A contains the upper trapezoidal matrix, and the
-*>          strictly lower triangular part of A is not referenced.
-*>          If UPLO = 'L', the leading m by n lower trapezoidal part of
-*>          the array A contains the lower trapezoidal matrix, and the
-*>          strictly upper triangular part of A is not referenced.  Note
-*>          that when DIAG = 'U', the diagonal elements of A are not
-*>          referenced and are assumed to be one.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>          The leading dimension of the array A.  LDA >= max(M,1).
-*> \endverbatim
-*>
-*> \param[out] WORK
-*> \verbatim
-*>          WORK is REAL array, dimension (MAX(1,LWORK)),
-*>          where LWORK >= M when NORM = 'I'; otherwise, WORK is not
-*>          referenced.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date September 2012
-*
-*> \ingroup realOTHERauxiliary
-*
-*  =====================================================================
-      REAL             FUNCTION SLANTR( NORM, UPLO, DIAG, M, N, A, LDA,
-     $                 WORK )
-*
-*  -- LAPACK auxiliary routine (version 3.4.2) --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
-*
-*     .. Scalar Arguments ..
-      CHARACTER          DIAG, NORM, UPLO
-      INTEGER            LDA, M, N
-*     ..
-*     .. Array Arguments ..
-      REAL               A( LDA, * ), WORK( * )
-*     ..
-*
-* =====================================================================
-*
-*     .. Parameters ..
-      REAL               ONE, ZERO
-      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
-*     ..
-*     .. Local Scalars ..
-      LOGICAL            UDIAG
-      INTEGER            I, J
-      REAL               SCALE, SUM, VALUE
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           SLASSQ
-*     ..
-*     .. External Functions ..
-      LOGICAL            LSAME, SISNAN
-      EXTERNAL           LSAME, SISNAN
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, MIN, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-      VALUE = ZERO
-      IF( MIN( M, N ).EQ.0 ) THEN
-         VALUE = ZERO
-      ELSE IF( LSAME( NORM, 'M' ) ) THEN
-*
-*        Find max(abs(A(i,j))).
-*
-         IF( LSAME( DIAG, 'U' ) ) THEN
-            VALUE = ONE
-            IF( LSAME( UPLO, 'U' ) ) THEN
-               DO 20 J = 1, N
-                  DO 10 I = 1, MIN( M, J-1 )
-                     SUM = ABS( A( I, J ) )
-                     IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   10             CONTINUE
-   20          CONTINUE
-            ELSE
-               DO 40 J = 1, N
-                  DO 30 I = J + 1, M
-                     SUM = ABS( A( I, J ) )
-                     IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   30             CONTINUE
-   40          CONTINUE
-            END IF
-         ELSE
-            VALUE = ZERO
-            IF( LSAME( UPLO, 'U' ) ) THEN
-               DO 60 J = 1, N
-                  DO 50 I = 1, MIN( M, J )
-                     SUM = ABS( A( I, J ) )
-                     IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   50             CONTINUE
-   60          CONTINUE
-            ELSE
-               DO 80 J = 1, N
-                  DO 70 I = J, M
-                     SUM = ABS( A( I, J ) )
-                     IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-   70             CONTINUE
-   80          CONTINUE
-            END IF
-         END IF
-      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
-*
-*        Find norm1(A).
-*
-         VALUE = ZERO
-         UDIAG = LSAME( DIAG, 'U' )
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            DO 110 J = 1, N
-               IF( ( UDIAG ) .AND. ( J.LE.M ) ) THEN
-                  SUM = ONE
-                  DO 90 I = 1, J - 1
-                     SUM = SUM + ABS( A( I, J ) )
-   90             CONTINUE
-               ELSE
-                  SUM = ZERO
-                  DO 100 I = 1, MIN( M, J )
-                     SUM = SUM + ABS( A( I, J ) )
-  100             CONTINUE
-               END IF
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-  110       CONTINUE
-         ELSE
-            DO 140 J = 1, N
-               IF( UDIAG ) THEN
-                  SUM = ONE
-                  DO 120 I = J + 1, M
-                     SUM = SUM + ABS( A( I, J ) )
-  120             CONTINUE
-               ELSE
-                  SUM = ZERO
-                  DO 130 I = J, M
-                     SUM = SUM + ABS( A( I, J ) )
-  130             CONTINUE
-               END IF
-               IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-  140       CONTINUE
-         END IF
-      ELSE IF( LSAME( NORM, 'I' ) ) THEN
-*
-*        Find normI(A).
-*
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            IF( LSAME( DIAG, 'U' ) ) THEN
-               DO 150 I = 1, M
-                  WORK( I ) = ONE
-  150          CONTINUE
-               DO 170 J = 1, N
-                  DO 160 I = 1, MIN( M, J-1 )
-                     WORK( I ) = WORK( I ) + ABS( A( I, J ) )
-  160             CONTINUE
-  170          CONTINUE
-            ELSE
-               DO 180 I = 1, M
-                  WORK( I ) = ZERO
-  180          CONTINUE
-               DO 200 J = 1, N
-                  DO 190 I = 1, MIN( M, J )
-                     WORK( I ) = WORK( I ) + ABS( A( I, J ) )
-  190             CONTINUE
-  200          CONTINUE
-            END IF
-         ELSE
-            IF( LSAME( DIAG, 'U' ) ) THEN
-               DO 210 I = 1, N
-                  WORK( I ) = ONE
-  210          CONTINUE
-               DO 220 I = N + 1, M
-                  WORK( I ) = ZERO
-  220          CONTINUE
-               DO 240 J = 1, N
-                  DO 230 I = J + 1, M
-                     WORK( I ) = WORK( I ) + ABS( A( I, J ) )
-  230             CONTINUE
-  240          CONTINUE
-            ELSE
-               DO 250 I = 1, M
-                  WORK( I ) = ZERO
-  250          CONTINUE
-               DO 270 J = 1, N
-                  DO 260 I = J, M
-                     WORK( I ) = WORK( I ) + ABS( A( I, J ) )
-  260             CONTINUE
-  270          CONTINUE
-            END IF
-         END IF
-         VALUE = ZERO
-         DO 280 I = 1, M
-            SUM = WORK( I )
-            IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
-  280    CONTINUE
-      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
-*
-*        Find normF(A).
-*
-         IF( LSAME( UPLO, 'U' ) ) THEN
-            IF( LSAME( DIAG, 'U' ) ) THEN
-               SCALE = ONE
-               SUM = MIN( M, N )
-               DO 290 J = 2, N
-                  CALL SLASSQ( MIN( M, J-1 ), A( 1, J ), 1, SCALE, SUM )
-  290          CONTINUE
-            ELSE
-               SCALE = ZERO
-               SUM = ONE
-               DO 300 J = 1, N
-                  CALL SLASSQ( MIN( M, J ), A( 1, J ), 1, SCALE, SUM )
-  300          CONTINUE
-            END IF
-         ELSE
-            IF( LSAME( DIAG, 'U' ) ) THEN
-               SCALE = ONE
-               SUM = MIN( M, N )
-               DO 310 J = 1, N
-                  CALL SLASSQ( M-J, A( MIN( M, J+1 ), J ), 1, SCALE,
-     $                         SUM )
-  310          CONTINUE
-            ELSE
-               SCALE = ZERO
-               SUM = ONE
-               DO 320 J = 1, N
-                  CALL SLASSQ( M-J+1, A( J, J ), 1, SCALE, SUM )
-  320          CONTINUE
-            END IF
-         END IF
-         VALUE = SCALE*SQRT( SUM )
-      END IF
-*
-      SLANTR = VALUE
-      RETURN
-*
-*     End of SLANTR
-*
-      END
diff --git a/blas_fix/slapy2.f b/blas_fix/slapy2.f
deleted file mode 100644
index 85f502bd1..000000000
--- a/blas_fix/slapy2.f
+++ /dev/null
@@ -1,104 +0,0 @@
-*> \brief \b SLAPY2 returns sqrt(x2+y2).
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*> \htmlonly
-*> Download SLAPY2 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slapy2.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slapy2.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slapy2.f"> 
-*> [TXT]</a>
-*> \endhtmlonly 
-*
-*  Definition:
-*  ===========
-*
-*       REAL             FUNCTION SLAPY2( X, Y )
-* 
-*       .. Scalar Arguments ..
-*       REAL               X, Y
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary
-*> overflow.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] X
-*> \verbatim
-*>          X is REAL
-*> \endverbatim
-*>
-*> \param[in] Y
-*> \verbatim
-*>          Y is REAL
-*>          X and Y specify the values x and y.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date September 2012
-*
-*> \ingroup auxOTHERauxiliary
-*
-*  =====================================================================
-      REAL             FUNCTION SLAPY2( X, Y )
-*
-*  -- LAPACK auxiliary routine (version 3.4.2) --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
-*
-*     .. Scalar Arguments ..
-      REAL               X, Y
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      REAL               ZERO
-      PARAMETER          ( ZERO = 0.0E0 )
-      REAL               ONE
-      PARAMETER          ( ONE = 1.0E0 )
-*     ..
-*     .. Local Scalars ..
-      REAL               W, XABS, YABS, Z
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, MAX, MIN, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-      XABS = ABS( X )
-      YABS = ABS( Y )
-      W = MAX( XABS, YABS )
-      Z = MIN( XABS, YABS )
-      IF( Z.EQ.ZERO ) THEN
-         SLAPY2 = W
-      ELSE
-         SLAPY2 = W*SQRT( ONE+( Z / W )**2 )
-      END IF
-      RETURN
-*
-*     End of SLAPY2
-*
-      END
diff --git a/blas_fix/slapy3.f b/blas_fix/slapy3.f
deleted file mode 100644
index 34bf5e15e..000000000
--- a/blas_fix/slapy3.f
+++ /dev/null
@@ -1,111 +0,0 @@
-*> \brief \b SLAPY3 returns sqrt(x2+y2+z2).
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*> \htmlonly
-*> Download SLAPY3 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slapy3.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slapy3.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slapy3.f"> 
-*> [TXT]</a>
-*> \endhtmlonly 
-*
-*  Definition:
-*  ===========
-*
-*       REAL             FUNCTION SLAPY3( X, Y, Z )
-* 
-*       .. Scalar Arguments ..
-*       REAL               X, Y, Z
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SLAPY3 returns sqrt(x**2+y**2+z**2), taking care not to cause
-*> unnecessary overflow.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] X
-*> \verbatim
-*>          X is REAL
-*> \endverbatim
-*>
-*> \param[in] Y
-*> \verbatim
-*>          Y is REAL
-*> \endverbatim
-*>
-*> \param[in] Z
-*> \verbatim
-*>          Z is REAL
-*>          X, Y and Z specify the values x, y and z.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date September 2012
-*
-*> \ingroup auxOTHERauxiliary
-*
-*  =====================================================================
-      REAL             FUNCTION SLAPY3( X, Y, Z )
-*
-*  -- LAPACK auxiliary routine (version 3.4.2) --
-*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
-*
-*     .. Scalar Arguments ..
-      REAL               X, Y, Z
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      REAL               ZERO
-      PARAMETER          ( ZERO = 0.0E0 )
-*     ..
-*     .. Local Scalars ..
-      REAL               W, XABS, YABS, ZABS
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, MAX, SQRT
-*     ..
-*     .. Executable Statements ..
-*
-      XABS = ABS( X )
-      YABS = ABS( Y )
-      ZABS = ABS( Z )
-      W = MAX( XABS, YABS, ZABS )
-      IF( W.EQ.ZERO ) THEN
-*     W can be zero for max(0,nan,0)
-*     adding all three entries together will make sure
-*     NaN will not disappear.
-         SLAPY3 =  XABS + YABS + ZABS
-      ELSE
-         SLAPY3 = W*SQRT( ( XABS / W )**2+( YABS / W )**2+
-     $            ( ZABS / W )**2 )
-      END IF
-      RETURN
-*
-*     End of SLAPY3
-*
-      END
diff --git a/blas_fix/snrm2.f b/blas_fix/snrm2.f
deleted file mode 100644
index a3674a6d7..000000000
--- a/blas_fix/snrm2.f
+++ /dev/null
@@ -1,112 +0,0 @@
-*> \brief \b SNRM2
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
-*
-*  Definition:
-*  ===========
-*
-*       REAL FUNCTION SNRM2(N,X,INCX)
-* 
-*       .. Scalar Arguments ..
-*       INTEGER INCX,N
-*       ..
-*       .. Array Arguments ..
-*       REAL X(*)
-*       ..
-*  
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SNRM2 returns the euclidean norm of a vector via the function
-*> name, so that
-*>
-*>    SNRM2 := sqrt( x'*x ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
-*
-*> \date November 2011
-*
-*> \ingroup single_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  -- This version written on 25-October-1982.
-*>     Modified on 14-October-1993 to inline the call to SLASSQ.
-*>     Sven Hammarling, Nag Ltd.
-*> \endverbatim
-*>
-*  =====================================================================
-      REAL FUNCTION SNRM2(N,X,INCX)
-*
-*  -- Reference BLAS level1 routine (version 3.4.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,N
-*     ..
-*     .. Array Arguments ..
-      REAL X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      REAL ONE,ZERO
-      PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-      REAL ABSXI,NORM,SCALE,SSQ
-      INTEGER IX
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC ABS,SQRT
-*     ..
-      IF (N.LT.1 .OR. INCX.LT.1) THEN
-          NORM = ZERO
-      ELSE IF (N.EQ.1) THEN
-          NORM = ABS(X(1))
-      ELSE
-          SCALE = ZERO
-          SSQ = ONE
-*        The following loop is equivalent to this call to the LAPACK
-*        auxiliary routine:
-*        CALL SLASSQ( N, X, INCX, SCALE, SSQ )
-*
-          DO 10 IX = 1,1 + (N-1)*INCX,INCX
-              IF (X(IX).NE.ZERO) THEN
-                  ABSXI = ABS(X(IX))
-                  IF (SCALE.LT.ABSXI) THEN
-                      SSQ = ONE + SSQ* (SCALE/ABSXI)**2
-                      SCALE = ABSXI
-                  ELSE
-                      SSQ = SSQ + (ABSXI/SCALE)**2
-                  END IF
-              END IF
-   10     CONTINUE
-          NORM = SCALE*SQRT(SSQ)
-      END IF
-*
-      SNRM2 = NORM
-      RETURN
-*
-*     End of SNRM2.
-*
-      END

From 1b5739dc70ebf6793fad0a29d926d4c0aee44c4f Mon Sep 17 00:00:00 2001
From: Mark Gates <mgates3@icl.utk.edu>
Date: Fri, 14 Feb 2025 13:35:46 +0100
Subject: [PATCH 06/27] remove macOS support

---
 CMakeLists.txt              |  53 +-------
 Makefile                    |  47 +------
 Makefile.internal           |   3 -
 Makefile.subdir             |   1 -
 README                      |  12 +-
 ReleaseChecklist            |   7 +-
 control/magma_internal.h    |   8 +-
 docs/contributors-guide.txt |  39 +++---
 docs/documentation.txt      |  13 +-
 fortran/Makefile            |   3 +-
 testing/Makefile.src        |   2 -
 testing/testing_veclib.cpp  | 241 ------------------------------------
 tools/build.sh              |   2 +-
 tools/checklist.csh         |   2 +-
 14 files changed, 44 insertions(+), 389 deletions(-)
 delete mode 100644 testing/testing_veclib.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9324be3eb..3e051965b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -327,13 +327,13 @@ set(BLA_VENDOR "" CACHE STRING
 
 # List from CMake 3.17, minus some obsolete ones:
 # PhiPACK, Compaq CXML, DEC Alpha DXML, SunPerf, SGI SCSL, SGIMATH,
-# Intel, NAS (Apple veclib).
+# Intel, NAS.
+# macOS Accelerate is not supported, since Apple dropped CUDA.
 # FLAME is BLIS.
 set_property(CACHE BLA_VENDOR PROPERTY STRINGS
     "" "All" "Goto" "OpenBLAS" "FLAME" "ATLAS" "IBMESSL"
     "Intel10_64lp" "Intel10_64lp_seq" "Intel10_64ilp" "Intel10_64ilp_seq"
     "ACML" "ACML_MP" "ACML_GPU"
-    "Apple"
     "Arm" "Arm_mp" "Arm_ilp64" "Arm_ilp64_mp"
     "Generic")
 
@@ -407,38 +407,6 @@ else()
     set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-unused-function" )
 endif()
 
-if (CMAKE_HOST_APPLE)
-    # Use rpaths, which is on by default in CMake 3.
-    set( CMAKE_MACOSX_RPATH 1 )
-
-    # 64-bit veclib (Accelerate) has issues; substitute correct functions from LAPACK.
-    # (The issue is single precision functions that return doubles;
-    # if a consistent prototype is used, the problem goes away in C,
-    # but this is not feasible in Fortran.)
-    if (LAPACK_LIBRARIES MATCHES "Accelerate")
-        if (USE_FORTRAN)
-            message( STATUS "MacOS X: adding blas_fix library" )
-            add_library( blas_fix ${libblas_fix_src} )
-            target_link_libraries( blas_fix
-                ${LAPACK_LIBRARIES}
-            )
-            set( blas_fix blas_fix )
-            set( blas_fix_lib -lblas_fix )
-        else()
-            message( WARNING "\n      Warning: cannot compile blas_fix library for MacOS X without Fortran compiler.\n" )
-        endif()
-    endif()
-
-    set( CMAKE_C_FLAGS     "${CMAKE_C_FLAGS} -DMAGMA_NOAFFINITY" )
-    set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMAGMA_NOAFFINITY" )
-
-    # previously, just compile as 32-bit, but CUDA 6.5 no longer has 32-bit FAT libraries
-    ## set( CMAKE_C_FLAGS       "${CMAKE_C_FLAGS} -m32" )
-    ## set( CMAKE_CXX_FLAGS     "${CMAKE_CXX_FLAGS} -m32" )
-    ## set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -m32" )
-    ## set( CUDA_64_BIT_DEVICE_CODE OFF )
-endif()
-
 include_directories( "${CMAKE_BINARY_DIR}/include" )
 
 include_directories( include )
@@ -506,7 +474,6 @@ else()
     #message(FATAL_ERROR "${libmagma_all}")
       add_library( magma ${libmagma_all} )
       target_link_libraries( magma
-        ${blas_fix}
         ${LAPACK_LIBRARIES}
         CUDA::cudart
         CUDA::cublas
@@ -525,7 +492,6 @@ else()
       add_library( magma ${libmagma_all} )
       target_link_libraries( magma
         hip::host
-        ${blas_fix}
         ${LAPACK_LIBRARIES}
         hip::device
         roc::hipblas
@@ -568,7 +534,6 @@ else()
     add_library( lapacktest ${liblapacktest_all_cpp} )
 endif()
 target_link_libraries( lapacktest
-    ${blas_fix}
     ${LAPACK_LIBRARIES}
 )
 
@@ -579,7 +544,6 @@ add_library( tester ${libtest_all} )
 target_link_libraries( tester
     magma
     lapacktest
-    ${blas_fix}
     ${LAPACK_LIBRARIES}
 )
 
@@ -602,7 +566,6 @@ if (MAGMA_ENABLE_CUDA)
   set_property(TARGET magma_sparse PROPERTY CUDA_STANDARD 14)
   target_link_libraries( magma_sparse
     magma
-    ${blas_fix}
     ${LAPACK_LIBRARIES}
     CUDA::cudart
     CUDA::cublas
@@ -613,7 +576,6 @@ else()
   add_library( magma_sparse ${libsparse_all} )
   target_link_libraries( magma_sparse
     magma
-    ${blas_fix}
     ${LAPACK_LIBRARIES}
     hip::device
     roc::hipblas
@@ -673,7 +635,7 @@ add_custom_target( sparse-testing DEPENDS ${sparse-testing} )
 
 # ----------------------------------------
 # what to install
-install( TARGETS magma magma_sparse ${blas_fix}
+install( TARGETS magma magma_sparse
          RUNTIME DESTINATION bin
          LIBRARY DESTINATION lib
          ARCHIVE DESTINATION lib )
@@ -701,15 +663,13 @@ message( STATUS "pkgconfig ${pkgconfig}" )
 set( INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}" )
 set( CFLAGS "${CMAKE_C_FLAGS}" )
 set( CXXFLAGS "${CMAKE_CXX_FLAGS}" )
-# CMake finds the Accelerate directory; we want -framework Accelerate for linking.
-string( REPLACE "/System/Library/Frameworks/Accelerate.framework" "-framework Accelerate" LAPACK_LIBS "${LAPACK_LIBRARIES}" )
 if (MAGMA_ENABLE_CUDA)
   string( REPLACE ";" " " LIBS
-    "${blas_fix_lib} ${LAPACK_LIBS} -L${CUDAToolkit_LIBRARY_DIR} -lcudart -lcublas -lcusparse")
+          "${LAPACK_LIBS} -L${CUDAToolkit_LIBRARY_DIR} -lcudart -lcublas -lcusparse")
 else()
   string( REPLACE ";" " " LIBS
-     "${blas_fix_lib} ${LAPACK_LIBS} ${HIP} ${rocBLAS} ${rocSPARSE}" )
-#    "${blas_fix_lib} ${LAPACK_LIBS} hip::device roc::hipblas roc::hipsparse" )
+          "${LAPACK_LIBS} ${HIP} ${rocBLAS} ${rocSPARSE}" )
+#         "${LAPACK_LIBS} hip::device roc::hipblas roc::hipsparse" )
 endif()
 set( MAGMA_REQUIRED "" )
 configure_file( "${pkgconfig}.in" "${pkgconfig}" @ONLY )
@@ -730,7 +690,6 @@ else()
 endif()
 message( STATUS "    FFLAGS:                ${CMAKE_Fortran_FLAGS}" )
 message( STATUS "    LIBS:                  ${LIBS}" )
-message( STATUS "    blas_fix:              ${blas_fix}  (MacOS Accelerate only)" )
 message( STATUS "    LAPACK_LIBRARIES:      ${LAPACK_LIBRARIES}"      )
 message( STATUS "    INCLUDE_DIRECTORIES:   ${MAGMA_INCLUDE}"   )
 if (MAGMA_ENABLE_CUDA)
diff --git a/Makefile b/Makefile
index 1fabf54ca..df8238d96 100644
--- a/Makefile
+++ b/Makefile
@@ -106,11 +106,6 @@ prefix     ?= /usr/local/magma
 # ------------------------------------------------------------------------------
 # MAGMA-specific programs & flags
 
-ifeq ($(blas_fix),1)
-    # prepend -lblas_fix to LIB (it must come before LAPACK library/framework)
-    LIB := -lblas_fix $(LIB)
-endif
-
 LIBS       = $(LIBDIR) $(LIB)
 
 # preprocessor flags. See below for MAGMA_INC
@@ -323,7 +318,6 @@ libsparse_dynamic_src:=
 sparse_testing_src   :=
 
 subdirs := \
-	blas_fix            \
 	control             \
 	include             \
 	src                 \
@@ -384,7 +378,6 @@ ifeq ($(FORT),)
 endif
 
 libmagma_obj       := $(addsuffix .$(o_ext), $(basename $(libmagma_all)))
-libblas_fix_obj    := $(addsuffix .$(o_ext), $(basename $(libblas_fix_src)))
 libtest_obj        := $(addsuffix .$(o_ext), $(basename $(libtest_all)))
 liblapacktest_obj  := $(addsuffix .$(o_ext), $(basename $(liblapacktest_all2)))
 testing_obj        := $(addsuffix .$(o_ext), $(basename $(testing_all)))
@@ -418,7 +411,6 @@ endif
 
 deps :=
 deps += $(addsuffix .d, $(basename $(libmagma_all)))
-deps += $(addsuffix .d, $(basename $(libblas_fix_src)))
 deps += $(addsuffix .d, $(basename $(libtest_all)))
 deps += $(addsuffix .d, $(basename $(lapacktest_all2)))
 deps += $(addsuffix .d, $(basename $(testing_all)))
@@ -441,7 +433,6 @@ $(sparse_testing_obj): testing/testings.h
 
 # this allows "make force=force" to force re-compiling
 $(libmagma_obj):       $(force)
-$(libblas_fix_obj):    $(force)
 $(libtest_obj):        $(force)
 $(liblapacktest_obj):  $(force)
 $(testing_obj):        $(force)
@@ -520,7 +511,6 @@ test_headers: $(header_gch)
 # ----- libraries
 libmagma_a      := lib/libmagma.a
 libmagma_so     := lib/libmagma.so
-libblas_fix_a   := lib/libblas_fix.a
 libtest_a       := testing/libtest.a
 liblapacktest_a := testing/lin/liblapacktest.a
 libsparse_a     := lib/libmagma_sparse.a
@@ -532,7 +522,6 @@ libs_a := \
 	$(libmagma_a)		\
 	$(libtest_a)		\
 	$(liblapacktest_a)	\
-	$(libblas_fix_a)	\
 	$(libsparse_a)		\
 
 # shared libraries
@@ -545,7 +534,6 @@ libs_so := \
 # add objects to libraries
 $(libmagma_a):      $(libmagma_obj)
 $(libmagma_so):     $(libmagma_obj)
-$(libblas_fix_a):   $(libblas_fix_obj)
 $(libtest_a):       $(libtest_obj)
 $(liblapacktest_a): $(liblapacktest_obj)
 $(libsparse_a):     $(libsparse_obj)
@@ -571,26 +559,6 @@ $(testers):        $(libtest_a) $(liblapacktest_a)
 $(testers_f):      $(libtest_a) $(liblapacktest_a)
 $(sparse_testers): $(libtest_a)  # doesn't use liblapacktest
 
-# ----- blas_fix
-# if using blas_fix (e.g., on MacOS), libmagma requires libblas_fix
-ifeq ($(blas_fix),1)
-    $(libmagma_a):     | $(libblas_fix_a)
-    $(libmagma_so):    | $(libblas_fix_a)
-    $(testers):        | $(libblas_fix_a)
-    $(testers_f):      | $(libblas_fix_a)
-    $(sparse_testers): | $(libblas_fix_a)
-endif
-
-
-# ------------------------------------------------------------------------------
-# MacOS (darwin) needs shared library's path set
-# $OSTYPE may not be exported from the shell, so echo it
-ostype = ${shell echo $${OSTYPE}}
-ifneq ($(findstring darwin, ${ostype}),)
-    $(libmagma_so):  LDFLAGS += -install_name @rpath/$(notdir $(libmagma_so))
-    $(libsparse_so): LDFLAGS += -install_name @rpath/$(notdir $(libsparse_so))
-endif
-
 
 # ------------------------------------------------------------------------------
 # targets
@@ -683,10 +651,6 @@ else
 endif
 # --------------------
 
-ifeq ($(blas_fix),1)
-    libs += $(libblas_fix_a)
-endif
-
 
 # ------------------------------------------------------------------------------
 # static libraries
@@ -721,8 +685,6 @@ endif
 # sub-directory builds
 include:             $(header_all)
 
-blas_fix:            $(libblas_fix_a)
-
 control:             $(control_obj)
 
 
@@ -757,9 +719,6 @@ run_test: test
 include/clean:
 	-rm -f $(shdr) $(dhdr) $(chdr)
 
-blas_fix/clean:
-	-rm -f $(libblas_fix_a) $(libblas_fix_obj)
-
 control/clean:
 	-rm -f $(control_obj) include/*.mod control/*.mod
 
@@ -794,7 +753,7 @@ testing/lin/clean:
 	-rm -f $(liblapacktest_a) $(liblapacktest_obj)
 
 # hmm... what should lib/clean do? just the libraries, not objects?
-lib/clean: blas_fix/clean sparse/clean
+lib/clean: sparse/clean
 	-rm -f $(libmagma_a) $(libmagma_so) $(libmagma_obj)
 
 sparse/clean: sparse/testing/clean
@@ -1066,10 +1025,6 @@ echo:
 	@echo "libsparse_a        $(libsparse_a)"
 	@echo "libsparse_so       $(libsparse_so)"
 	@echo "====="
-	@echo "blas_fix           $(blas_fix)"
-	@echo "libblas_fix_src    $(libblas_fix_src)"
-	@echo "libblas_fix_a      $(libblas_fix_a)"
-	@echo "====="
 	@echo "libtest_src        $(libtest_src)\n"
 	@echo "libtest_all        $(libtest_all)\n"
 	@echo "libtest_obj        $(libtest_obj)\n"
diff --git a/Makefile.internal b/Makefile.internal
index 55e12bfa0..afd5d25f6 100644
--- a/Makefile.internal
+++ b/Makefile.internal
@@ -27,9 +27,6 @@ $(CMAKESRC): Makefile.gen.$(BACKEND)
 	echo "set( libmagma_all"                           >> $@
 	echo    "$(libmagma_all) )"          | $(newlines) >> $@
 	echo                                               >> $@
-	echo "set( libblas_fix_src"                        >> $@
-	echo    "$(libblas_fix_src) )"       | $(newlines) >> $@
-	echo                                               >> $@
 	echo "set( libtest_all"                            >> $@
 	echo    "$(libtest_all) )"           | $(newlines) >> $@
 	echo                                               >> $@
diff --git a/Makefile.subdir b/Makefile.subdir
index 05d30bd2b..419e72dcb 100644
--- a/Makefile.subdir
+++ b/Makefile.subdir
@@ -13,7 +13,6 @@
 # alphabetic order
 targets := \
 	all                  \
-	blas_fix             \
 	cleanall             \
 	cleandep             \
 	cleangen             \
diff --git a/README b/README
index ff1720593..c9f72b77e 100644
--- a/README
+++ b/README
@@ -22,14 +22,14 @@ MAGMA README FILE
         make install prefix=/usr/local/magma
 
     MAGMA uses HIP code generation to compile for AMD GPUs.  MAGMA HIP sources will be
-    generated from interface_cuda, magmablas, and sparse into interface_hip, magmablas_hip, 
+    generated from interface_cuda, magmablas, and sparse into interface_hip, magmablas_hip,
     and sparse_hip, respectively.
 
 * Quick start (CMake)
 
     There is also a CMake option to configure and build MAGMA.
     For more Windows-specific instructions, see README-Windows.
-    On Unix & MacOS:
+    On Unix:
 
     Step 0: setup
     If you downloaded an official release (e.g., magma-2.6.0.tar.gz), you can
@@ -57,8 +57,8 @@ MAGMA README FILE
         make install
 
     Options include:
-        -DMAGMA_ENABLE_CUDA=ON to install MAGMA for CUDA. This is default. 
-        -DMAGMA_ENABLE_HIP=ON to install MAGMA for HIP. This option requires 
+        -DMAGMA_ENABLE_CUDA=ON to install MAGMA for CUDA. This is default.
+        -DMAGMA_ENABLE_HIP=ON to install MAGMA for HIP. This option requires
             to specify the hipcc compiler, e.g.,
             cmake -DMAGMA_ENABLE_HIP=ON -DCMAKE_CXX_COMPILER=hipcc ..
 
@@ -69,11 +69,11 @@ MAGMA README FILE
             Kepler, Maxwell, Pascal, Volta, Turing, Ampere
             or valid sm_[0-9][0-9] for NVIDIA GPUs.
             For AMD GPUs include one or more valid GPU gfx numbers
-            (https://llvm.org/docs/AMDGPUUsage.html#target-triples). 
+            (https://llvm.org/docs/AMDGPUUsage.html#target-triples).
 
         -DBLA_VENDOR=vendor, where vendor is one of:
             Intel10_64lp, Intel10_64lp_seq, Intel10_64ilp, Intel10_64ilp_seq,
-            Intel10_32, OpenBLAS, FLAME, ACML, Apple, NAS, Generic.
+            Intel10_32, OpenBLAS, FLAME, Generic, etc.
             See https://cmake.org/cmake/help/latest/module/FindLAPACK.html
                 https://cmake.org/cmake/help/latest/module/FindBLAS.html
 
diff --git a/ReleaseChecklist b/ReleaseChecklist
index 09c652347..c8f6d58f5 100644
--- a/ReleaseChecklist
+++ b/ReleaseChecklist
@@ -34,7 +34,7 @@ Compilation checks
 Documentation
 	cd docs && ./groups.sh -- any groups unused or undefined?
 	(currently QUARK undefined; geqrf_tile unused)
-	
+
 	cd docs && make
 	check errors in output_err
 	open docs/html/index.html and browse through Modules to see that things look right
@@ -52,15 +52,14 @@ Compile & run on:
 		MKL ILP64
 		ACML 5 or 6
 		OpenBLAS
-	MacOS using gcc & veclib
 	Windows using CMake & MKL
-	
+
 	CUDA 5.0
 	CUDA 5.5
 	CUDA 6.0
 	CUDA 6.5
 	CUDA 7.0
-	
+
 	pgi compilers?
 
 Any compiler warnings?
diff --git a/control/magma_internal.h b/control/magma_internal.h
index e4e533403..0097b23d3 100644
--- a/control/magma_internal.h
+++ b/control/magma_internal.h
@@ -52,12 +52,12 @@
     #include <inttypes.h>
 
     // our magma_winthread doesn't have pthread_key;
-    // assume other platforms (Linux, MacOS, etc.) do.
+    // assume other platforms (Linux, etc.) do.
     #define HAVE_PTHREAD_KEY
 
 #endif
 
-// provide our own support for pthread_barrier on MacOS and Windows
+// provide our own support for pthread_barrier on Windows
 #include "pthread_barrier.h"
 
 #include "magma_v2.h"
@@ -118,9 +118,9 @@ struct magma_queue
             dCarray__ = dBarray__ + maxbatch__;
         }
     }
-    
+
     #ifdef MAGMA_HAVE_HIP
-    
+
     hipStream_t      hip_stream()      { return stream__; };
 
     hipblasHandle_t  hipblas_handle()  { return hipblas__; };
diff --git a/docs/contributors-guide.txt b/docs/contributors-guide.txt
index 8e4ba2171..a42dfa378 100644
--- a/docs/contributors-guide.txt
+++ b/docs/contributors-guide.txt
@@ -24,9 +24,10 @@ This guide is based on the PLASMA coding style guide, though slightly different.
       gcc -std=c99   -Wall -pedantic -c magma.c
 
   This can be done by setting CFLAGS and CXXFLAGS appropriately in make.inc.
-  MAGMA code needs to be portable and work on Unix, MacOS, and Windows. Most
-  MAGMA code is compiled as C++, but uses essentially C style conventions. For
-  example, all externally visible functions are declared `extern "C"` and
+  MAGMA code needs to be portable and work on Unix and Windows.
+  (Since Apple dropped CUDA, macOS support has been dropped.)
+  Most MAGMA code is compiled as C++, but uses essentially C style conventions.
+  For example, all externally visible functions are declared `extern "C"` and
   C-style malloc routines are used, instead of the C++ `new` operator.
 
 - **No Trailing Whitespace:**
@@ -80,49 +81,49 @@ This guide is based on the PLASMA coding style guide, though slightly different.
         {
             if (m <= 0 || n <= 0 || k <= 0) return;  // Avoid
             code_not_inside_if();
-        
+
             if (m <= 0 || n <= 0 || k <= 0)          // Okay
                 return;
-        
+
             if (m <= 0 || n <= 0 || k <= 0) {        // Better
                 return;
             }
-        
+
             if (condition)      // Avoid single statement, multi-line if and loops!
                 magma_zherk( uplo, trans, n, k,
                              alpha, dA(0,0), ldda,
                              beta,  dB(0,0), lddb );
-        
+
             if (condition) {    // Better
                 magma_zherk( uplo, trans, n, k,
                              alpha, dA(0,0), ldda,
                              beta,  dB(0,0), lddb );
             }
-        
+
             if (condition)      // Avoid single block, multi-statement if and loops!
                 for (i=0; i < n; ++i) {
                     A(i,i) = 0;
                     cnt += 1;
                 }
-        
+
             if (condition) {    // Avoid inconsistent indentation! Always use 4 spaces.
               for (i=0; i < n; ++i) {
                     A(i,i) = 0;
                      cnt += 1;
                }
             }
-        
+
             if (condition) {    // Better
                 for (i=0; i < n; ++i) {
                     A(i,i) = 0;
                     cnt += 1;
                 }
             }
-        
+
             if (condition) {    // Avoid excessive newlines
-        
+
                 norm = magma_dnrm2( n, dx(0), incx );
-        
+
             }
         }
   \endcode
@@ -139,7 +140,7 @@ This guide is based on the PLASMA coding style guide, though slightly different.
   \code
         // host pointers are the same for clBLAS, CUDA, and Xeon Phi.
         #define  A(i_,j_)  (A + (i_) + (j_)*lda)
-        
+
         // for clBLAS, return cl_mem object and offset (2 values);
         // for others, return (pointer + offset) (1 value).
         #ifdef MAGMA_HAVE_OPENCL
@@ -147,12 +148,12 @@ This guide is based on the PLASMA coding style guide, though slightly different.
         #else
             #define dA(i_,j_) (dA + (i_) + ((size_t) j_)*lda)
         #endif
-        
+
         // for host pointers, A == A(0,0).
         blasf77_zherk( lapack_uplo_const(uplo), lapack_trans_const(trans), &n, &k,
                        &alpha, A,      &lda,
                        &beta,  B(i,j), &ldb );
-        
+
         // for device pointers, use dA(0,0) instead of dA, to aid porting to OpenCL.
         magma_zherk( uplo, trans, n, k,
                      alpha, dA(0,0), ldda,
@@ -211,17 +212,17 @@ This guide is based on the PLASMA coding style guide, though slightly different.
   fine. Clarity is paramount. For multi-line function calls it is recommended
   that new lines start either after the left paranthesis or indented 4 spaces,
   always aligned with the first argument.
-  
+
   \code
         magma_zherk(                           // Okay; preferred style for prototypes
             uplo, trans, n, k,
             alpha, dA(0,0), ldda,
             beta,  dB(i,j), lddb );
-        
+
         magma_zherk( uplo, trans, n, k,        // Okay
                      alpha, dA(0,0), ldda,
                      beta,  dB(i,j), lddb );
-        
+
         magma_zherk( uplo, trans, n, k,        // Avoid
             alpha, dA(0,0), ldda,
             beta,  dB(i,j), lddb );
diff --git a/docs/documentation.txt b/docs/documentation.txt
index 200198361..68172dc78 100644
--- a/docs/documentation.txt
+++ b/docs/documentation.txt
@@ -140,16 +140,6 @@ For csh/tcsh, put in ~/.cshrc:
 
 Some bugs exist with OpenBLAS 0.2.19; see BUGS.txt.
 
-#### MacOS Accelerate (previously Veclib)
-
-Unfortunately, the MacOS Accelerate framework uses an old ABI for BLAS and
-LAPACK, where single precision functions -- such as `sdot`, `cdot`, `slange`,
-and `clange` -- return a double precision result. This makes them incompatibile
-with our C/C++ headers and with the Fortran code used in our testers. The fix is
-to substitute reference implementations of these functions, found in
-`magma/blas_fix`. Setting `blas_fix = 1` in `make.inc` will compile these into
-`magma/lib/libblas_fix.a`, with which your application should link.
-
 
 Linking to BLAS
 --------------------------------------------------------------------------------
@@ -221,10 +211,9 @@ For multi-GPU functions, set `\$MAGMA_NUM_GPUS` to the number of GPUs to use.
 
 - `\$OMP_NUM_THREADS`
 - `\$MKL_NUM_THREADS`
-- `\$VECLIB_MAXIMUM_THREADS`
 
     For multi-core BLAS libraries, set `\$OMP_NUM_THREADS` or `\$MKL_NUM_THREADS`
-    or `\$VECLIB_MAXIMUM_THREADS` to the number of CPU threads, depending on your
+    to the number of CPU threads, depending on your
     BLAS library. See the documentation for your BLAS and LAPACK libraries.
 
 
diff --git a/fortran/Makefile b/fortran/Makefile
index 2017e4e42..018408495 100644
--- a/fortran/Makefile
+++ b/fortran/Makefile
@@ -17,8 +17,7 @@ LDFLAGS    =
 LIBS       = -L$(MAGMADIR)/lib -lmagma -Wl,-rpath,$(MAGMADIR)/lib
 
 # BLAS and LAPACK libraries
-# framework Accelerate is for MacOS
-LIBS      += -framework Accelerate
+LIBS      += -lopenblas
 
 obj = \
 	test.o			\
diff --git a/testing/Makefile.src b/testing/Makefile.src
index 6ce84e7d9..e6bdbd283 100644
--- a/testing/Makefile.src
+++ b/testing/Makefile.src
@@ -63,8 +63,6 @@ testing_src += \
 	$(cdir)/testing_parse_opts.cpp	\
 	$(cdir)/testing_zgenerate.cpp	\
 
-	#$(cdir)/testing_veclib.cpp	\
-
 # ----------
 # Cholesky, GPU interface
 testing_src += \
diff --git a/testing/testing_veclib.cpp b/testing/testing_veclib.cpp
deleted file mode 100644
index a908dc910..000000000
--- a/testing/testing_veclib.cpp
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
-    -- MAGMA (version 2.0) --
-       Univ. of Tennessee, Knoxville
-       Univ. of California, Berkeley
-       Univ. of Colorado, Denver
-       @date
-
-       @author Mark Gates
-*/
-
-// Checks vector and matrix norms, for -m32 and -m64, with return as float or as double.
-// In MacOS, -m64 must return double for both single and double precision
-// functions, e.g., {s,d}dot, {s,d}nrm2, {s,d}lange, {s,d}lansy.
-// Oddly, with -m32 both return float and double for single precision functions works.
-// This is essentially a bug from an old f2c version of lapack (clapack).
-//
-// We work around this bug by putting replacement routines in the magma/lib/libblas_fix.a library.
-// These correctly return float for the single precision functions that had issues.
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-
-// hack to NOT include magma_lapack.h, by pre-defining MAGMA_LAPACK_H.
-// we re-define the lapack prototypes below, so can't include that header.
-#define MAGMA_LAPACK_H
-#include "testings.h"
-#include "magma_mangling.h"
-
-// ------------------------------------------------------------
-//#define LAPACK_RETURN_DOUBLE
-
-#ifdef LAPACK_RETURN_DOUBLE
-typedef double RETURN_FLOAT;
-#else
-typedef float  RETURN_FLOAT;
-#endif
-
-
-// ------------------------------------------------------------
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define blasf77_sdot     FORTRAN_NAME( sdot,   SDOT   )
-#define blasf77_snrm2    FORTRAN_NAME( snrm2,  SNRM2  )
-#define lapackf77_slange FORTRAN_NAME( slange, SLANGE )
-#define lapackf77_slansy FORTRAN_NAME( slansy, SLANSY )
-
-#define blasf77_ddot     FORTRAN_NAME( ddot,   DDOT   )
-#define blasf77_dnrm2    FORTRAN_NAME( dnrm2,  DNRM2  )
-#define lapackf77_dlange FORTRAN_NAME( dlange, DLANGE )
-#define lapackf77_dlansy FORTRAN_NAME( dlansy, DLANSY )
-
-RETURN_FLOAT
-blasf77_sdot(   const magma_int_t *n,
-                const float *x, const magma_int_t *incx,
-                const float *y, const magma_int_t *incy );
-                
-RETURN_FLOAT
-blasf77_snrm2(  const magma_int_t *n,
-                const float *x, const magma_int_t *incx );
-
-RETURN_FLOAT
-lapackf77_slange( const char *norm,
-                const magma_int_t *m, const magma_int_t *n,
-                const float *A, const magma_int_t *lda,
-                float *work );
-
-RETURN_FLOAT
-lapackf77_slansy( const char *norm, const char* uplo,
-                const magma_int_t *n,
-                const float *A, const magma_int_t *lda,
-                float *work );
-
-double
-blasf77_ddot(   const magma_int_t *n,
-                const double *x, const magma_int_t *incx,
-                const double *y, const magma_int_t *incy );
-
-double
-blasf77_dnrm2(  const magma_int_t *n,
-                const double *x, const magma_int_t *incx );
-
-double
-lapackf77_dlange( const char *norm,
-                const magma_int_t *m, const magma_int_t *n,
-                const double *A, const magma_int_t *lda,
-                double *work );
-
-double
-lapackf77_dlansy( const char *norm, const char* uplo,
-                const magma_int_t *n,
-                const double *A, const magma_int_t *lda,
-                double *work );
-
-#ifdef __cplusplus
-}
-#endif
-
-// ------------------------------------------------------------
-// call matrix norms {s,d}lan{ge,sy}.
-// return value, to check that the call stack isn't messed up.
-float test( magma_int_t m, magma_int_t n )
-{
-#define sA(i,j) (sA + (i) + (j)*lda)
-#define dA(i,j) (dA + (i) + (j)*lda)
-    
-    float  *sA, *swork;
-    float  snorm_one, snorm_inf, snorm_fro, snorm_max;
-    
-    double *dA, *dwork;
-    double dnorm_one, dnorm_inf, dnorm_fro, dnorm_max;
-    
-    const magma_int_t ione = 1;
-    magma_int_t lda = max(m,n);
-    
-    TESTING_CHECK( magma_smalloc_cpu( &&sA,    lda*n ));
-    TESTING_CHECK( magma_dmalloc_cpu( &&dA,    lda*n ));
-    TESTING_CHECK( magma_smalloc_cpu( &&swork, m     ));
-    TESTING_CHECK( magma_dmalloc_cpu( &&dwork, m     ));
-    
-    for( magma_int_t j = 0; j < n; ++j ) {
-        for( magma_int_t i = 0; i < lda; ++i ) {
-            double tmp = rand() / (double)(RAND_MAX);
-            *sA(i,j) = tmp;
-            *dA(i,j) = tmp;
-        }
-    }
-    
-    double error;
-    int status = 0;
-    
-    // can repeat multiple times, but shows same results every time
-    status = 0;
-    for( magma_int_t i=0; i < 1; ++i ) {
-        snorm_one = blasf77_sdot(  &m, sA, &ione, sA, &ione );
-        dnorm_one = blasf77_ddot(  &m, dA, &ione, dA, &ione );
-        snorm_fro = blasf77_snrm2( &m, sA, &ione );
-        dnorm_fro = blasf77_dnrm2( &m, dA, &ione );
-        printf( "m %lld, sdot %12.8f, snrm2 %12.8f\n", (long long) m, snorm_one, snorm_fro );
-        printf( "m %lld, ddot %12.8f, dnrm2 %12.8f\n", (long long) m, dnorm_one, dnorm_fro );
-        error = fabs(snorm_one - dnorm_one) / dnorm_one;
-        status |= ! (error < 1e-6);
-    }
-    if ( status ) {
-        printf( "**** failed ****\n" );
-    }
-    else {
-        printf( "ok\n" );
-    }
-    printf( "\n" );
-    
-    status = 0;
-    for( magma_int_t i=0; i < 1; ++i ) {
-        snorm_one = lapackf77_slange( "one", &m, &n, sA, &lda, swork );
-        snorm_inf = lapackf77_slange( "inf", &m, &n, sA, &lda, swork );
-        snorm_max = lapackf77_slange( "max", &m, &n, sA, &lda, swork );
-        snorm_fro = lapackf77_slange( "fro", &m, &n, sA, &lda, swork );
-        
-        dnorm_one = lapackf77_dlange( "one", &m, &n, dA, &lda, dwork );
-        dnorm_inf = lapackf77_dlange( "inf", &m, &n, dA, &lda, dwork );
-        dnorm_max = lapackf77_dlange( "max", &m, &n, dA, &lda, dwork );
-        dnorm_fro = lapackf77_dlange( "fro", &m, &n, dA, &lda, dwork );
-        
-        printf( "m %lld, n %lld, slange norm one %12.8f,  inf %12.8f,  max %12.8f,  fro %12.8f\n",
-                (long long) m, (long long) n, snorm_one, snorm_inf, snorm_max, snorm_fro );
-        
-        printf( "m %lld, n %lld, dlange norm one %12.8f,  inf %12.8f,  max %12.8f,  fro %12.8f\n",
-                (long long) m, (long long) n, dnorm_one, dnorm_inf, dnorm_max, dnorm_fro );
-        error = fabs(snorm_one - dnorm_one) / dnorm_one;
-        status |= ! (error < 1e-6);
-    }
-    if ( status ) {
-        printf( "**** failed ****\n" );
-    }
-    else {
-        printf( "ok\n" );
-    }
-    printf( "\n" );
-    
-    status = 0;
-    for( magma_int_t i=0; i < 1; ++i ) {
-        snorm_one = lapackf77_slansy( "one", "up", &n, sA, &lda, swork );
-        snorm_inf = lapackf77_slansy( "inf", "up", &n, sA, &lda, swork );
-        snorm_max = lapackf77_slansy( "max", "up", &n, sA, &lda, swork );
-        snorm_fro = lapackf77_slansy( "fro", "up", &n, sA, &lda, swork );
-        
-        dnorm_one = lapackf77_dlansy( "one", "up", &n, dA, &lda, dwork );
-        dnorm_inf = lapackf77_dlansy( "inf", "up", &n, dA, &lda, dwork );
-        dnorm_max = lapackf77_dlansy( "max", "up", &n, dA, &lda, dwork );
-        dnorm_fro = lapackf77_dlansy( "fro", "up", &n, dA, &lda, dwork );
-        
-        printf( "m %lld, n %lld, slansy norm one %12.8f,  inf %12.8f,  max %12.8f,  fro %12.8f\n",
-                (long long) m, (long long) n, snorm_one, snorm_inf, snorm_max, snorm_fro );
-        
-        printf( "m %lld, n %lld, dlansy norm one %12.8f,  inf %12.8f,  max %12.8f,  fro %12.8f\n",
-                (long long) m, (long long) n, dnorm_one, dnorm_inf, dnorm_max, dnorm_fro );
-        error = fabs(snorm_one - dnorm_one) / dnorm_one;
-        status |= ! (error < 1e-6);
-    }
-    if ( status ) {
-        printf( "**** failed ****\n" );
-    }
-    else {
-        printf( "ok\n" );
-    }
-    printf( "\n" );
-    
-    magma_free_cpu( sA );
-    magma_free_cpu( dA );
-    magma_free_cpu( swork );
-    magma_free_cpu( dwork );
-    
-    return 1.125;
-}
-
-
-// ------------------------------------------------------------
-int main( int argc, char** argv )
-{
-    magma_int_t m = 100;
-    magma_int_t n = m;
-    if ( argc > 1 ) {
-        n = atoi( argv[1] );
-    }
-    
-    float value;
-    
-    printf( "--------------------\n" );
-    printf( "sizeof(void*) %lu, sizeof(RETURN_FLOAT) %lu\n\n",
-            sizeof(void*), sizeof(RETURN_FLOAT) );
-    
-    // can repeat multiple times, but shows same results every time
-    for( magma_int_t i=0; i < 1; ++i ) {
-        value = test( m, n );
-        printf( "value %.4f\n\n", value );
-    }
-    
-    return 0;
-}
diff --git a/tools/build.sh b/tools/build.sh
index b88881020..08f47fcb4 100755
--- a/tools/build.sh
+++ b/tools/build.sh
@@ -20,7 +20,7 @@ link=""
 tar=""
 pause=""
 
-usage="Usage: $0 [options] [acml macos mkl-gcc openblas ...]
+usage="Usage: $0 [options] [mkl-gcc openblas ...]
     -h  --help      help
     -j #            parallel make threads, default $j
         --no-clean  skip 'make clean' before build; only one configuration allowed
diff --git a/tools/checklist.csh b/tools/checklist.csh
index e638280fb..5b31e36fc 100755
--- a/tools/checklist.csh
+++ b/tools/checklist.csh
@@ -180,7 +180,7 @@ echo "============================================================ should be fix
 # fixed
 echo "========== use @date instead of a specific hard-coded date"
 egrep '(January|February|March|April|May|June|July|August|September|October|November|December) +[0-9]{4} *$' $FILES \
-    | egrep -v 'testing/(checkdiag|lin|matgen)|blas_fix'
+    | egrep -v 'testing/(checkdiag|lin|matgen)'
 echo
 
 echo "========== cuda, cublas functions                                          *** should be fixed ***"

From 8f9121c5cf87e5accad644723cf8985c907a6734 Mon Sep 17 00:00:00 2001
From: Mark Gates <mgates3@icl.utk.edu>
Date: Fri, 14 Feb 2025 13:38:48 +0100
Subject: [PATCH 07/27] remove ACML support, since AMD dropped it circa 2017.
 Also remove out-dated flock references.

---
 CMakeLists.txt               |  1 -
 Makefile                     |  2 +-
 ReleaseChecklist             |  5 ++---
 docs/documentation.txt       | 37 ++++++++++++------------------------
 interface_cuda/interface.cpp | 17 +++--------------
 make.check-acml              |  8 --------
 tools/checklist.csh          |  2 +-
 7 files changed, 19 insertions(+), 53 deletions(-)
 delete mode 100644 make.check-acml

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3e051965b..d55ea406b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -333,7 +333,6 @@ set(BLA_VENDOR "" CACHE STRING
 set_property(CACHE BLA_VENDOR PROPERTY STRINGS
     "" "All" "Goto" "OpenBLAS" "FLAME" "ATLAS" "IBMESSL"
     "Intel10_64lp" "Intel10_64lp_seq" "Intel10_64ilp" "Intel10_64ilp_seq"
-    "ACML" "ACML_MP" "ACML_GPU"
     "Arm" "Arm_mp" "Arm_ilp64" "Arm_ilp64_mp"
     "Generic")
 
diff --git a/Makefile b/Makefile
index df8238d96..cc0d52749 100644
--- a/Makefile
+++ b/Makefile
@@ -941,7 +941,7 @@ $(sparse_testers): %: %.$(o_ext)
 # filter out MAGMA-specific options for pkg-config
 #TODO: add hip specific ones
 INSTALL_FLAGS := $(filter-out \
-	-DMAGMA_NOAFFINITY -DMAGMA_SETAFFINITY -DMAGMA_WITH_ACML -DMAGMA_WITH_MKL -DUSE_FLOCK \
+	-DMAGMA_NOAFFINITY -DMAGMA_SETAFFINITY -DMAGMA_WITH_MKL \
 	-DMAGMA_CUDA_ARCH=% -DMAGMA_CUDA_ARCH_MIN=% \
 	-DMAGMA_HAVE_CUDA -DMAGMA_HAVE_HIP -DMAGMA_HAVE_clBLAS \
 	-fno-strict-aliasing -fPIC -O0 -O1 -O2 -O3 -pedantic -std=c99 -stdc++98 -stdc++14 \
diff --git a/ReleaseChecklist b/ReleaseChecklist
index c8f6d58f5..f29eb5f5a 100644
--- a/ReleaseChecklist
+++ b/ReleaseChecklist
@@ -44,13 +44,12 @@ Compile & run with -DNDEBUG (no asserts)
 
 Compile & run on:
 	This script will compile and save output from each make.inc file:
-	tools/checklist_builds.csh [ acml atlas mkl-gcc mkl-icc mkl-gcc-ilp64 mkl-icc-ilp64 ... ]
-	
+	tools/checklist_builds.csh [ atlas mkl-gcc mkl-icc mkl-gcc-ilp64 mkl-icc-ilp64 ... ]
+
 	Linux
 		icc, gcc
 		MKL
 		MKL ILP64
-		ACML 5 or 6
 		OpenBLAS
 	Windows using CMake & MKL
 
diff --git a/docs/documentation.txt b/docs/documentation.txt
index 68172dc78..a3618a904 100644
--- a/docs/documentation.txt
+++ b/docs/documentation.txt
@@ -98,21 +98,6 @@ For csh/tcsh, put in ~/.cshrc:
 MAGMA is tested with MKL 11.3.3 (2016), both LP64 and ILP64;
 other versions may work.
 
-#### AMD ACML
-
-The ACML `make.inc` file assumes `\$ACMLDIR` is set in your environment.
-For bash (sh), put in ~/.bashrc (with your system's path):
-
-    export ACMLDIR=/opt/acml-5.3.1
-
-For csh/tcsh, put in ~/.cshrc:
-
-    setenv ACMLDIR  /opt/acml-5.3.1
-
-MAGMA is tested with ACML 5.3.1; other versions may work.
-See comments in `make.inc.acml` regarding ACML 4;
-a couple testers fail to compile with ACML 4.
-
 #### ATLAS
 
 The ATLAS `make.inc` file assumes `\$ATLASDIR` and `\$LAPACKDIR` are set in your environment.
@@ -154,12 +139,19 @@ Set `-DADD_`, `-DUPCASE`, or `-DNOCHANGE`, respectively, in all FLAGS in your
 `make.inc` file to select the appropriate one. Use `nm` to examine your BLAS
 library:
 
-    acml-5.3.1/gfortran64_mp/lib> nm libacml_mp.a | grep -i 'T.*dgemm'
-    0000000000000000 T dgemm
-    00000000000004e0 T dgemm_
+    sh methane lib> nm libopenblas.so | grep -i dsyr2k
+    000000000017ee50 T cblas_dsyr2k
+    000000000017c8b0 T dsyr2k_           # Note this line
+    00000000001fa690 T dsyr2k_LN
+    00000000001fb2e0 T dsyr2k_LT
+    00000000001f8f70 T dsyr2k_UN
+    00000000001f9b70 T dsyr2k_UT
+    00000000001fcab0 T dsyr2k_kernel_L
+    00000000001fc750 T dsyr2k_kernel_U
 
-In this case, it shows that either `-DADD_ (dgemm_)` or `-DNOCHANGE (dgemm)`
-should work. The default in all make.inc files is `-DADD_`.
+In this case, it shows that `-DADD_ (dsyr2k_)` should work.
+The default in all example make.inc files is `-DADD_`,
+except for IBM ESSL, which uses `-DNOCHANGE`.
 
 
 Compile-time options
@@ -173,11 +165,6 @@ using the `-D` compiler flag, e.g., `-DMAGMA_WITH_MKL` in CFLAGS.
     If linked with MKL, allows MAGMA to get MKL's version and
     set MKL's number of threads.
 
-- `MAGMA_WITH_ACML`
-
-    If linked with ACML 5 or later, allows MAGMA to get ACML's version.
-    ACML's number of threads are set via OpenMP.
-
 - `MAGMA_NOAFFINITY`
 
     Disables thread affinity, available in glibc 2.6 and later.
diff --git a/interface_cuda/interface.cpp b/interface_cuda/interface.cpp
index 19dd6dc4b..c3ae4e0eb 100644
--- a/interface_cuda/interface.cpp
+++ b/interface_cuda/interface.cpp
@@ -26,15 +26,11 @@
 #include <mkl_service.h>
 #endif
 
-#if defined(MAGMA_WITH_ACML)
-#include <acml.h>
-#endif
-
 #include <cuda_runtime.h>
 
 // defining MAGMA_LAPACK_H is a hack to NOT include magma_lapack.h
-// via magma_internal.h here, since it conflicts with acml.h and we don't
-// need lapack here, but we want acml.h for the acmlversion() function.
+// via magma_internal.h here, since it conflicts with some vendor's
+// headers (acml.h), and we don't need lapack here.
 #define MAGMA_LAPACK_H
 
 #include "magma_internal.h"
@@ -327,7 +323,7 @@ magma_print_environment()
 
     printf( "%% Compiled for CUDA architectures %s\n", MAGMA_CUDA_ARCH );
 
-    // CUDA, OpenCL, OpenMP, MKL, ACML versions all printed on same line
+    // CUDA, OpenCL, OpenMP, MKL versions all printed on same line
     int cuda_runtime=0, cuda_driver=0;
     cudaError_t err;
     err = cudaDriverGetVersion( &cuda_driver );
@@ -381,13 +377,6 @@ magma_print_environment()
             mkl_get_max_threads() );
 #endif
 
-#if defined(MAGMA_WITH_ACML)
-    // ACML 4 doesn't have acml_build parameter
-    int acml_major, acml_minor, acml_patch, acml_build;
-    acmlversion( &acml_major, &acml_minor, &acml_patch, &acml_build );
-    printf( "ACML %d.%d.%d.%d ", acml_major, acml_minor, acml_patch, acml_build );
-#endif
-
     printf( "\n" );
 
     // print devices
diff --git a/make.check-acml b/make.check-acml
deleted file mode 100644
index 095cdd024..000000000
--- a/make.check-acml
+++ /dev/null
@@ -1,8 +0,0 @@
-# check for ACML directories and give user hint how to set them
-ifeq ($(ACMLDIR),)
-$(error Set $$ACMLDIR, preferably in your environment, e.g., run "export ACMLDIR=/opt/acml" in ~/.bashrc, or "setenv ACMLDIR /opt/acml" in ~/.cshrc)
-endif
-
-ifeq ($(wildcard $(ACMLDIR)),)
-$(error $$ACMLDIR=$(ACMLDIR) does not exist. Please set $$ACMLDIR to where ACML is installed.)
-endif
diff --git a/tools/checklist.csh b/tools/checklist.csh
index 5b31e36fc..4c6778c2d 100755
--- a/tools/checklist.csh
+++ b/tools/checklist.csh
@@ -170,7 +170,7 @@ echo
 # needs lots of work; also lots of exceptions
 echo "========== int instead of magma_int_t (in headers)                         *** required fix ***"
 egrep '\bint\b' $HEADERS \
-    | egrep -v 'int argc|int flock|quark|magma_timer\.h|commonblas_z\.h|magma_templates\.h|magma_winthread\.h|pthread_barrier\.h|cblas\.h|typedef int +magma_|const char\* func, const char\* file, int line|int mm_'
+    | egrep -v 'int argc|quark|magma_timer\.h|commonblas_z\.h|magma_templates\.h|magma_winthread\.h|pthread_barrier\.h|cblas\.h|typedef int +magma_|const char\* func, const char\* file, int line|int mm_'
 echo
 
 echo

From 4c76e7c6bbca49268cfc683c649cdb720ea8e8b3 Mon Sep 17 00:00:00 2001
From: Mark Gates <mgates3@icl.utk.edu>
Date: Tue, 8 Jul 2025 02:48:09 -0400
Subject: [PATCH 08/27] make: remove extraneous check from make.inc files that
 is repeated in Makefile

---
 make.inc-examples/make.inc.hip-gcc-mkl      | 5 -----
 make.inc-examples/make.inc.hip-gcc-openblas | 5 -----
 make.inc-examples/make.inc.openblas         | 5 -----
 3 files changed, 15 deletions(-)

diff --git a/make.inc-examples/make.inc.hip-gcc-mkl b/make.inc-examples/make.inc.hip-gcc-mkl
index 24ee6bf6b..7a04a97bb 100644
--- a/make.inc-examples/make.inc.hip-gcc-mkl
+++ b/make.inc-examples/make.inc.hip-gcc-mkl
@@ -36,11 +36,6 @@ BACKEND      = hip
 ROCM_PATH ?= /opt/rocm
 CUDADIR   ?= /usr/local/cuda
 
-# require either hip or cuda
-ifeq (,$(findstring $(BACKEND),hip cuda))
-    $(error "'BACKEND' should be either 'cuda' or 'hip' (got $$BACKEND=$(BACKEND))")
-endif
-
 # --------------------
 # programs
 
diff --git a/make.inc-examples/make.inc.hip-gcc-openblas b/make.inc-examples/make.inc.hip-gcc-openblas
index 547169bb1..0bea560b2 100644
--- a/make.inc-examples/make.inc.hip-gcc-openblas
+++ b/make.inc-examples/make.inc.hip-gcc-openblas
@@ -37,11 +37,6 @@ OPENBLASDIR ?= /usr/local/openblas
 ROCM_PATH   ?= /opt/rocm
 CUDADIR     ?= /usr/local/cuda
 
-# require either hip or cuda
-ifeq (,$(findstring $(BACKEND),hip cuda))
-    $(error "'BACKEND' should be either 'cuda' or 'hip' (got $$BACKEND=$(BACKEND))")
-endif
-
 # --------------------
 # programs
 
diff --git a/make.inc-examples/make.inc.openblas b/make.inc-examples/make.inc.openblas
index 9e73c4e5e..81a48e994 100644
--- a/make.inc-examples/make.inc.openblas
+++ b/make.inc-examples/make.inc.openblas
@@ -20,11 +20,6 @@ OPENBLASDIR ?= /usr/local/openblas
 CUDADIR     ?= /usr/local/cuda
 ROCM_PATH   ?= /opt/rocm
 
-# require either hip or cuda
-ifeq (,$(findstring $(BACKEND),hip cuda))
-    $(error "'BACKEND' should be either 'cuda' or 'hip' (got $(BACKEND))")
-endif
-
 # --------------------
 # programs
 

From f8c541bacb2ef407b716d4c7e85d8f5d0db3cde5 Mon Sep 17 00:00:00 2001
From: Mark Gates <mgates3@icl.utk.edu>
Date: Tue, 8 Jul 2025 02:48:33 -0400
Subject: [PATCH 09/27] make: add AMD AOCL / BLIS & FLAME config

---
 docs/documentation.txt                      |  20 +++-
 make.check-aocl-blis                        |  17 +++
 make.inc-examples/make.inc.aocl-blis        | 120 ++++++++++++++++++++
 make.inc-examples/make.inc.hip-gcc-openblas |   2 +-
 make.inc-examples/make.inc.openblas         |   4 +-
 5 files changed, 155 insertions(+), 8 deletions(-)
 create mode 100644 make.check-aocl-blis
 create mode 100644 make.inc-examples/make.inc.aocl-blis

diff --git a/docs/documentation.txt b/docs/documentation.txt
index a3618a904..38a03ae43 100644
--- a/docs/documentation.txt
+++ b/docs/documentation.txt
@@ -82,7 +82,20 @@ For csh/tcsh, put in `~/.cshrc`:
 
     setenv CUDADIR /usr/local/cuda
 
-MAGMA is tested with CUDA >= 7.5. Some functionality requires a newer version.
+#### AMD Optimizing CPU Libraries (AOCL) / BLIS & FLAME
+
+AOCL has adopted the BLIS and libFLAME libraries. These may be installed
+in separate directories. Set `\$BLIS_DIR` and `\$FLAME_DIR` in your
+environment or make.inc file.
+For bash (sh), put in ~/.bashrc (with your system's paths):
+
+    export BLIS_DIR=/opt/blis
+    export FLAME_DIR=/opt/libflame
+
+For csh/tcsh, put in ~/.cshrc:
+
+    setenv BLIS_DIR  /opt/blis
+    setenv FLAME_DIR /opt/libflame
 
 #### Intel MKL
 
@@ -95,8 +108,7 @@ For csh/tcsh, put in ~/.cshrc:
 
     source /opt/intel/bin/compilervars.csh intel64
 
-MAGMA is tested with MKL 11.3.3 (2016), both LP64 and ILP64;
-other versions may work.
+MAGMA is tested with both LP64 and ILP64.
 
 #### ATLAS
 
@@ -123,8 +135,6 @@ For csh/tcsh, put in ~/.cshrc:
 
     setenv OPENBLASDIR /opt/openblas
 
-Some bugs exist with OpenBLAS 0.2.19; see BUGS.txt.
-
 
 Linking to BLAS
 --------------------------------------------------------------------------------
diff --git a/make.check-aocl-blis b/make.check-aocl-blis
new file mode 100644
index 000000000..cc2af06a2
--- /dev/null
+++ b/make.check-aocl-blis
@@ -0,0 +1,17 @@
+# check for BLIS & FLAME directories and give user hint how to set them
+
+ifeq ($(BLIS_DIR),)
+$(error Set $$BLIS_DIR, preferably in your environment, e.g., run "export BLIS_DIR=/opt/blis" in ~/.bashrc, or "setenv BLIS_DIR /opt/blis" in ~/.cshrc)
+endif
+
+ifeq ($(wildcard $(BLIS_DIR)),)
+$(error $$BLIS_DIR=$(BLIS_DIR) does not exist. Please set $$BLIS_DIR to where BLIS is installed.)
+endif
+
+ifeq ($(FLAME_DIR),)
+$(error Set $$FLAME_DIR, preferably in your environment, e.g., run "export FLAME_DIR=/opt/libflame" in ~/.bashrc, or "setenv FLAME_DIR /opt/libflame" in ~/.cshrc)
+endif
+
+ifeq ($(wildcard $(FLAME_DIR)),)
+$(error $$FLAME_DIR=$(FLAME_DIR) does not exist. Please set $$FLAME_DIR to where libFLAME is installed.)
+endif
diff --git a/make.inc-examples/make.inc.aocl-blis b/make.inc-examples/make.inc.aocl-blis
new file mode 100644
index 000000000..ec25c0f86
--- /dev/null
+++ b/make.inc-examples/make.inc.aocl-blis
@@ -0,0 +1,120 @@
+#//////////////////////////////////////////////////////////////////////////////
+#   -- MAGMA (version 2.x) --
+#      Univ. of Tennessee, Knoxville
+#      Univ. of California, Berkeley
+#      Univ. of Colorado, Denver
+#      @date
+#//////////////////////////////////////////////////////////////////////////////
+
+
+
+# --------------------
+# configuration
+
+# should MAGMA be built on CUDA (NVIDIA only) or ROCM (AMD or NVIDIA)
+# enter 'cuda' or 'hip' respectively
+BACKEND     ?= cuda
+
+# set these to their real paths
+BLIS_DIR    ?= /usr/local/blis
+FLAME_DIR   ?= /usr/local/libflame
+CUDADIR     ?= /usr/local/cuda
+ROCM_PATH   ?= /opt/rocm
+
+# --------------------
+# programs
+
+# set compilers
+CC          ?= gcc
+CXX         ?= g++
+FORT        ?= gfortran
+HIPCC       ?= hipcc
+NVCC        ?= nvcc
+DEVCC       ?= NONE
+
+# set from 'BACKEND'
+ifeq ($(BACKEND),cuda)
+    DEVCC = $(NVCC)
+else ifeq ($(BACKEND),hip)
+    DEVCC = $(HIPCC)
+endif
+
+# and utilities
+ARCH      = ar
+ARCHFLAGS = cr
+RANLIB    = ranlib
+
+
+# --------------------
+# flags/settings
+
+# set our GPU targets
+ifeq ($(BACKEND),cuda)
+    GPU_TARGET ?= Volta Turing Ampere
+else ifeq ($(BACKEND),hip)
+    GPU_TARGET ?= gfx900 gfx906 gfx908
+endif
+
+# Use -fPIC to make shared (.so) and static (.a) library;
+# can be commented out if making only static library.
+FPIC      = -fPIC
+
+# now, generate our flags
+CFLAGS      = -O3 $(FPIC) -DNDEBUG -DADD_ -Wall -fopenmp -std=c99
+CXXFLAGS    = -O3 $(FPIC) -DNDEBUG -DADD_ -Wall -fopenmp -std=c++11
+FFLAGS      = -O3 $(FPIC) -DNDEBUG -DADD_ -Wall -Wno-unused-dummy-argument
+F90FLAGS    = -O3 $(FPIC) -DNDEBUG -DADD_ -Wall -Wno-unused-dummy-argument -x f95-cpp-input
+LDFLAGS     =     $(FPIC)                       -fopenmp
+
+DEVCCFLAGS  = -O3         -DNDEBUG -DADD_
+
+# DEVCCFLAGS are populated later in `backend-specific`
+
+
+# --------------------
+# libraries
+
+# gcc with BLIS/FLAME
+LIB       += -lflame -lblis
+
+# --------------------
+# directories
+
+# define library directories preferably in your environment, or here.
+LIBDIR    += -L${FLAME_DIR}/lib -Wl,-rpath,${FLAME_DIR}/lib \
+             -L${BLIS_DIR}/lib -Wl,-rpath,${BLIS_DIR}/lib
+INC       += -I${FLAME_DIR}/include -I${BLIS_DIR}/include
+
+
+# --------------------
+# checks
+
+# check for BLIS & FLAME
+-include make.check-aocl-blis
+
+
+# --------------------
+# backend-specific
+
+# add appropriate cuda flags
+ifeq ($(BACKEND),cuda)
+    -include make.check-cuda
+
+    DEVCCFLAGS += -Xcompiler "$(FPIC)" -std=c++11
+
+    # link with cuda specific libraries
+    INC    += -I$(CUDADIR)/include
+    LIBDIR += -L$(CUDADIR)/lib64 -Wl,-rpath,${CUDADIR}/lib64
+    LIB    += -lcublas -lcusparse -lcudart -lcudadevrt
+endif
+
+# add appropriate ROCM flags
+ifeq ($(BACKEND),hip)
+    -include make.check-hip
+
+    DEVCCFLAGS += $(FPIC) -std=c++11
+
+    INC    += -I$(ROCM_PATH)/include
+    LIBDIR += -L$(ROCM_PATH)/lib -Wl,-rpath,${ROCM_PATH}/lib
+    LIB    += -lhipblas -lhipsparse
+endif
diff --git a/make.inc-examples/make.inc.hip-gcc-openblas b/make.inc-examples/make.inc.hip-gcc-openblas
index 0bea560b2..4e3fe3809 100644
--- a/make.inc-examples/make.inc.hip-gcc-openblas
+++ b/make.inc-examples/make.inc.hip-gcc-openblas
@@ -71,7 +71,7 @@ RANLIB      ?= ranlib
 ifeq ($(BACKEND),cuda)
     # See a full table: https://nouveau.freedesktop.org/wiki/CodeNames/
     # note: provided by freedesktop, which is reliable, but not from the vendor
-    GPU_TARGET = Volta Turing Ampere
+    GPU_TARGET ?= Volta Turing Ampere
 else ifeq ($(BACKEND),hip)
     # See a full table:
     #   https://llvm.org/docs/AMDGPUUsage.html#amdgpu-processor-table
diff --git a/make.inc-examples/make.inc.openblas b/make.inc-examples/make.inc.openblas
index 81a48e994..ec6472b41 100644
--- a/make.inc-examples/make.inc.openblas
+++ b/make.inc-examples/make.inc.openblas
@@ -49,9 +49,9 @@ RANLIB    = ranlib
 
 # set our GPU targets
 ifeq ($(BACKEND),cuda)
-    GPU_TARGET = Volta Turing Ampere
+    GPU_TARGET ?= Volta Turing Ampere
 else ifeq ($(BACKEND),hip)
-    GPU_TARGET = gfx900 gfx906 gfx908
+    GPU_TARGET ?= gfx900 gfx906 gfx908
 endif
 
 # Use -fPIC to make shared (.so) and static (.a) library;

From 49aaf1d6edd6ff8249bd2ab2b77514e976e06353 Mon Sep 17 00:00:00 2001
From: Mark Gates <mgates3@icl.utk.edu>
Date: Tue, 8 Jul 2025 00:08:51 -0400
Subject: [PATCH 10/27] Fix testing_zhetrf for complex [cz] case. Symmetrize
 with conj and make diagonal real.

---
 testing/testing_zhetrf.cpp | 48 +++++++++++++++++++++++++++++---------
 1 file changed, 37 insertions(+), 11 deletions(-)

diff --git a/testing/testing_zhetrf.cpp b/testing/testing_zhetrf.cpp
index d48b52470..ad8665092 100644
--- a/testing/testing_zhetrf.cpp
+++ b/testing/testing_zhetrf.cpp
@@ -329,11 +329,11 @@ double get_residual_aasen(
 }
 
 /******************************************************************************/
-// On input, LU and ipiv is LU factorization of A. On output, LU is overwritten.
-// Works for any m, n.
+// On input, LD and ipiv is LDL^T factorization of A. On output, LD is overwritten.
+// Works for any n.
 // Uses init_matrix() to re-generate original A as needed.
 // Returns error in factorization, |PA - LU| / (n |A|)
-// This allocates 3 more matrices to store A, L, and U.
+// This allocates 3 more matrices to store A, L, and D.
 double get_LDLt_error(
     magma_opts &opts,
     bool nopiv, magma_uplo_t uplo, magma_int_t N,
@@ -363,21 +363,27 @@ double get_LDLt_error(
 
     // symmetrize; the pivoting code below assumes a full matrix
     if (opts.uplo == MagmaLower) {
-        // copy L to U
+        // Copy conj(L) to U, and make diagonal real.
         for (j = 0; j < N; ++j) {
             for (i = 0; i < j; ++i) {
-                A(i,j) = A(j,i);
+                A(i, j) = MAGMA_Z_CONJ( A(j, i) );
             }
+            A(j, j) = MAGMA_Z_MAKE( MAGMA_Z_REAL( A(j, j) ), 0 );
         }
     }
     else {
-        // copy U to L
+        // Copy conj(U) to L, and make diagonal real.
         for (j = 0; j < N; ++j) {
             for (i = 0; i < j; ++i) {
-                A(j,i) = A(i,j);
+                A(j, i) = MAGMA_Z_CONJ( A(i, j) );
             }
+            A(j, j) = MAGMA_Z_MAKE( MAGMA_Z_REAL( A(j, j) ), 0 );
         }
     }
+    if (opts.verbose) {
+        printf( "Aerr = " );
+        magma_zprint( N, N, A, lda );
+    }
 
     if (uplo == MagmaUpper) {
         for (j=N-1; j >= 0; j--) {
@@ -557,6 +563,11 @@ double get_LDLt_error(
     }
     residual = lapackf77_zlange( "Fro", &N, &N, D, &N, work);
 
+    if (opts.verbose) {
+        printf( "Residual = " );
+        magma_zprint( N, N, D, N );
+    }
+
     magma_free_cpu( A );
     magma_free_cpu( L );
     magma_free_cpu( D );
@@ -651,21 +662,27 @@ double get_LTLt_error(
 
     // symmetrize; the pivoting code below assumes a full matrix
     if (opts.uplo == MagmaLower) {
-        // copy L to U
+        // Copy conj(L) to U, and make diagonal real.
         for (j = 0; j < N; ++j) {
             for (i = 0; i < j; ++i) {
-                A(i,j) = A(j,i);
+                A(i, j) = MAGMA_Z_CONJ( A(j, i) );
             }
+            A(j, j) = MAGMA_Z_MAKE( MAGMA_Z_REAL( A(j, j) ), 0 );
         }
     }
     else {
-        // copy U to L
+        // Copy conj(U) to L, and make diagonal real.
         for (j = 0; j < N; ++j) {
             for (i = 0; i < j; ++i) {
-                A(j,i) = A(i,j);
+                A(j, i) = MAGMA_Z_CONJ( A(i, j) );
             }
+            A(j, j) = MAGMA_Z_MAKE( MAGMA_Z_REAL( A(j, j) ), 0 );
         }
     }
+    if (opts.verbose) {
+        printf( "Aerr = " );
+        magma_zprint( N, N, A, lda );
+    }
 
     // apply symmetric pivoting
     for (j=0; j < N; j++) {
@@ -694,6 +711,11 @@ double get_LTLt_error(
     }
     residual = lapackf77_zlange( "Fro", &N, &N, T, &N, work);
 
+    if (opts.verbose) {
+        printf( "Residual = " );
+        magma_zprint( N, N, T, N );
+    }
+
     magma_free_cpu( A );
     magma_free_cpu( L );
     magma_free_cpu( T );
@@ -829,6 +851,10 @@ int main( int argc, char** argv)
                Performs operation using MAGMA
                =================================================================== */
             init_matrix( opts, N, N, h_A, lda );
+            if (opts.verbose) {
+                printf( "A = " );
+                magma_zprint( N, N, h_A, lda );
+            }
 
             //printf( "A0=" );
             //magma_zprlong( N, N, h_A, lda );

From afdf8d3741ee7caeb5b96730551a1881d2fe3687 Mon Sep 17 00:00:00 2001
From: Mark Gates <mgates3@icl.utk.edu>
Date: Mon, 7 Jul 2025 23:57:42 -0400
Subject: [PATCH 11/27] Cleanup testing_zhetrf. Make spacing more consistent,
 use std::swap.

---
 testing/testing_zhetrf.cpp | 82 ++++++++++++++------------------------
 1 file changed, 29 insertions(+), 53 deletions(-)

diff --git a/testing/testing_zhetrf.cpp b/testing/testing_zhetrf.cpp
index ad8665092..bf0a668eb 100644
--- a/testing/testing_zhetrf.cpp
+++ b/testing/testing_zhetrf.cpp
@@ -13,6 +13,7 @@
 #include <stdio.h>
 #include <string.h>
 #include <math.h>
+#include <algorithm>
 
 // includes, project
 #include "flops.h"
@@ -408,21 +409,15 @@ double get_LDLt_error(
                 if (piv != j) {
                     // apply row-pivoting to previous L
                     for (i=j+2; i < N; i++) {
-                        magmaDoubleComplex val = L(j,i);
-                        L(j,i) = L(piv,i);
-                        L(piv,i) = val;
+                        std::swap( L(j, i), L(piv, i) );
                     }
                     // apply row-pivoting to A
                     for (i=0; i < N; i++) {
-                        magmaDoubleComplex val = A(j,i);
-                        A(j,i) = A(piv,i);
-                        A(piv,i) = val;
+                        std::swap( A(j, i), A(piv, i) );
                     }
                     // apply col-pivoting to A
                     for (i=0; i < N; i++) {
-                        magmaDoubleComplex val = A(i,j);
-                        A(i,j) = A(i,piv);
-                        A(i,piv) = val;
+                        std::swap( A(i, j), A(i, piv) );
                     }
                 }
             }
@@ -438,21 +433,15 @@ double get_LDLt_error(
                 if (piv != j) {
                     // apply row-pivoting to previous L
                     for (i=j+1; i < N; i++) {
-                        magmaDoubleComplex val = L(j,i);
-                        L(j,i) = L(piv,i);
-                        L(piv,i) = val;
+                        std::swap( L(j, i), L(piv, i) );
                     }
                     // apply row-pivoting to A
                     for (i=0; i < N; i++) {
-                        magmaDoubleComplex val = A(j,i);
-                        A(j,i) = A(piv,i);
-                        A(piv,i) = val;
+                        std::swap( A(j, i), A(piv, i) );
                     }
                     // apply col-pivoting to A
                     for (i=0; i < N; i++) {
-                        magmaDoubleComplex val = A(i,j);
-                        A(i,j) = A(i,piv);
-                        A(i,piv) = val;
+                        std::swap( A(i, j), A(i,piv) );
                     }
                 }
             }
@@ -497,21 +486,15 @@ double get_LDLt_error(
                 if (piv != j) {
                     // apply row-pivoting to previous L
                     for (i=0; i < j-1; i++) {
-                        magmaDoubleComplex val = L(j,i);
-                        L(j,i) = L(piv,i);
-                        L(piv,i) = val;
+                        std::swap( L(j, i), L(piv, i) );
                     }
                     // apply row-pivoting to A
                     for (i=0; i < N; i++) {
-                        magmaDoubleComplex val = A(j,i);
-                        A(j,i) = A(piv,i);
-                        A(piv,i) = val;
+                        std::swap( A(j, i), A(piv, i) );
                     }
                     // apply col-pivoting to A
                     for (i=0; i < N; i++) {
-                        magmaDoubleComplex val = A(i,j);
-                        A(i,j) = A(i,piv);
-                        A(i,piv) = val;
+                        std::swap( A(i, j), A(i, piv) );
                     }
                 }
             }
@@ -527,21 +510,15 @@ double get_LDLt_error(
                 if (piv != j) {
                     // apply row-pivoting to previous L
                     for (i=0; i < j; i++) {
-                        magmaDoubleComplex val = L(j,i);
-                        L(j,i) = L(piv,i);
-                        L(piv,i) = val;
+                        std::swap( L(j, i), L(piv, i) );
                     }
                     // apply row-pivoting to A
                     for (i=0; i < N; i++) {
-                        magmaDoubleComplex val = A(j,i);
-                        A(j,i) = A(piv,i);
-                        A(piv,i) = val;
+                        std::swap( A(j, i), A(piv, i) );
                     }
                     // apply col-pivoting to A
                     for (i=0; i < N; i++) {
-                        magmaDoubleComplex val = A(i,j);
-                        A(i,j) = A(i,piv);
-                        A(i,piv) = val;
+                        std::swap( A(i, j), A(i, piv) );
                     }
                 }
             }
@@ -690,15 +667,11 @@ double get_LTLt_error(
         if (piv != j) {
             // apply row-pivoting to A
             for (i=0; i < N; i++) {
-                magmaDoubleComplex val = A(j,i);
-                A(j,i) = A(piv,i);
-                A(piv,i) = val;
+                std::swap( A(j, i), A(piv, i) );
             }
             // apply col-pivoting to A
             for (i=0; i < N; i++) {
-                magmaDoubleComplex val = A(i,j);
-                A(i,j) = A(i,piv);
-                A(i,piv) = val;
+                std::swap( A(i, j), A(i, piv) );
             }
         }
     }
@@ -728,7 +701,7 @@ double get_LTLt_error(
 /* ////////////////////////////////////////////////////////////////////////////
    -- Testing zhetrf
 */
-int main( int argc, char** argv)
+int main( int argc, char** argv )
 {
     TESTING_CHECK( magma_init() );
     magma_print_environment();
@@ -752,8 +725,8 @@ int main( int argc, char** argv)
 
     // TODO: this doesn't work. Options need to be added to parse_opts()
     //for (int i = 1; i < argc; ++i) {
-    //    if ( strcmp("--cpu-panel", argv[i]) == 0) cpu_panel = 1;
-    //    if ( strcmp("--gpu-panel", argv[i]) == 0) cpu_panel = 0;
+    //    if (strcmp("--cpu-panel", argv[i]) == 0) cpu_panel = 1;
+    //    if (strcmp("--gpu-panel", argv[i]) == 0) cpu_panel = 0;
     //}
 
     printf( "%% --version 1 = Bunch-Kauffman (CPU)\n"
@@ -796,7 +769,7 @@ int main( int argc, char** argv)
 
     double tol = opts.tolerance * lapackf77_dlamch("E");
 
-    if ( opts.check == 2 ) {
+    if (opts.check == 2) {
         printf("%%   M     N   CPU Gflop/s (sec)   GPU Gflop/s (sec)   |Ax-b|/(N*|A|*|x|)\n");
     }
     else {
@@ -816,7 +789,7 @@ int main( int argc, char** argv)
             /* =====================================================================
                Performs operation using LAPACK
                =================================================================== */
-            if ( opts.lapack ) {
+            if (opts.lapack) {
                 lwork = -1;
                 lapackf77_zhetrf( lapack_uplo_const(opts.uplo), &N, h_A, &lda, ipiv, &temp, &lwork, &info );
                 lwork = (magma_int_t)MAGMA_Z_REAL( temp );
@@ -891,6 +864,7 @@ int main( int argc, char** argv)
                 magmaDoubleComplex_ptr d_A;
                 TESTING_CHECK( magma_zmalloc( &d_A, N*ldda ));
                 magma_zsetmatrix(N, N, h_A, lda, d_A, ldda, opts.queue );
+
                 gpu_time = magma_wtime();
                 magma_zhetrf_nopiv_gpu( opts.uplo, N, d_A, ldda, &info);
                 gpu_time = magma_wtime() - gpu_time;
@@ -928,6 +902,7 @@ int main( int argc, char** argv)
                 }
                 printf("inertia: positive / negative / zero = %d / %d / %d\n",
                        inert[0], inert[1], inert[2]);
+
                 magma_free( d_A );
             }
             else if (gpu) {
@@ -936,12 +911,13 @@ int main( int argc, char** argv)
                 magmaDoubleComplex_ptr d_A;
                 TESTING_CHECK( magma_zmalloc( &d_A, N*ldda ));
                 magma_zsetmatrix(N, N, h_A, lda, d_A, ldda, opts.queue );
+
                 gpu_time = magma_wtime();
                 magma_zhetrf_gpu( opts.uplo, N, d_A, ldda, ipiv, &info);
                 gpu_time = magma_wtime() - gpu_time;
 
                 magma_zgetmatrix(N, N, d_A, ldda, h_A, lda, opts.queue );
-                if ( opts.check == 2 && info == 0) {
+                if (opts.check == 2 && info == 0) {
                     error = get_residual_gpu( opts, (nopiv | nopiv_gpu), opts.uplo, N,
                                               h_A, lda, d_A, ldda, ipiv, solve_time );
                     magma_zgetmatrix(N, N, d_A, ldda, h_A, lda, opts.queue );
@@ -980,7 +956,7 @@ int main( int argc, char** argv)
             /* =====================================================================
                Check the factorization
                =================================================================== */
-            if ( opts.lapack ) {
+            if (opts.lapack) {
                 printf("%5lld %5lld   %7.2f (%7.2f)   %7.2f (%7.2f)",
                        (long long) N, (long long) N, cpu_perf, cpu_time, gpu_perf, gpu_time );
             }
@@ -988,11 +964,11 @@ int main( int argc, char** argv)
                 printf("%5lld %5lld     ---   (  ---  )   %7.2f (%7.2f)",
                        (long long) N, (long long) N, gpu_perf, gpu_time );
             }
-            if ( opts.check == 2 && info == 0) {
+            if (opts.check == 2 && info == 0) {
                 if (aasen) {
                     error = get_residual_aasen( opts, (nopiv | nopiv_gpu), opts.uplo, N, h_A, lda, ipiv );
                 }
-                else if (!gpu) {
+                else if (! gpu) {
                     error = get_residual( opts, (nopiv | nopiv_gpu), opts.uplo, N, h_A, lda, ipiv );
                 }
                 // gpu case calls get_residual_gpu before to initialize error and timing.
@@ -1011,7 +987,7 @@ int main( int argc, char** argv)
                 printf("\n");
                 status += ! (error < tol);
             }
-            else if ( opts.check && info == 0 ) {
+            else if (opts.check && info == 0) {
                 if (aasen) {
                     error = get_LTLt_error( opts, (nopiv | nopiv_gpu), opts.uplo, N, h_A, lda, ipiv );
                 }
@@ -1029,7 +1005,7 @@ int main( int argc, char** argv)
             magma_free_pinned( h_A  );
             fflush( stdout );
         }
-        if ( opts.niter > 1 ) {
+        if (opts.niter > 1) {
             printf( "\n" );
         }
     }

From 471406c257d097d4fe776230e80b63d903e1e4a1 Mon Sep 17 00:00:00 2001
From: weilinscenccs <135019969+weilinscenccs@users.noreply.github.com>
Date: Tue, 24 Jun 2025 16:35:55 +0200
Subject: [PATCH 12/27] Update magma2.F90

bind C name should be magma_sync_wtime
---
 fortran/magma2.F90 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fortran/magma2.F90 b/fortran/magma2.F90
index e004c8e21..5bb18dbed 100644
--- a/fortran/magma2.F90
+++ b/fortran/magma2.F90
@@ -82,7 +82,7 @@ real(c_double) function magma_wtime() &
     end function
 
     real(c_double) function magma_sync_wtime( queue ) &
-    bind(C, name="magma_wtime")
+    bind(C, name="magma_sync_wtime")
         use iso_c_binding
         type(c_ptr), value :: queue
     end function

From ebd725118f12d583c266eecf9f97e70833941bab Mon Sep 17 00:00:00 2001
From: cyy <cyyever@outlook.com>
Date: Sat, 17 May 2025 07:51:45 +0800
Subject: [PATCH 13/27] Improve C/C++ standard setting in CMake

---
 CMakeLists.txt | 28 ++++++----------------------
 1 file changed, 6 insertions(+), 22 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index d55ea406b..40f5dc73f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -6,9 +6,9 @@ cmake_minimum_required( VERSION 3.18 )
 option( USE_FORTRAN "Fortran is required for some tester checks, but can be disabled with reduced functionality" ON )
 
 if (USE_FORTRAN)
-    project( MAGMA C CXX Fortran )
+    project( MAGMA LANGUAGES C CXX Fortran )
 else()
-    project( MAGMA C CXX )
+    project( MAGMA LANGUAGES C CXX )
 endif()
 
 FIND_PROGRAM(PROGRAM_CCACHE ccache)
@@ -70,26 +70,10 @@ endif()
 # ----------------------------------------
 # use C++14 and C99
 # see http://stackoverflow.com/questions/10851247/how-to-activate-c-11-in-cmake
-include(CheckCXXCompilerFlag)
-include(CheckCCompilerFlag)
-CHECK_CXX_COMPILER_FLAG("-std=c++14" COMPILER_SUPPORTS_CXX14)
-CHECK_CXX_COMPILER_FLAG("-std=c++0x" COMPILER_SUPPORTS_CXX0X)
-CHECK_CXX_COMPILER_FLAG("-fPIC" COMPILER_SUPPORTS_FPIC)
-if (COMPILER_SUPPORTS_CXX14)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14")
-elseif(COMPILER_SUPPORTS_CXX0X)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x")
-else()
-    message( WARNING "The compiler ${CMAKE_CXX_COMPILER} doesn't support the -std=c++14 flag. Some code may not compile.")
-endif()
-
-CHECK_C_COMPILER_FLAG("-std=c99" COMPILER_SUPPORTS_C99)
-if (COMPILER_SUPPORTS_C99)
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99")
-else()
-    message( WARNING "The compiler ${CMAKE_C_COMPILER} doesn't support the -std=c99 flag. Some code may not compile.")
-endif()
-
+set( CMAKE_C_STANDARD 99 )
+set( CMAKE_C_STANDARD_REQUIRED OFF )
+set( CMAKE_CXX_STANDARD 14 )
+set( CMAKE_CXX_STANDARD_REQUIRED OFF )
 
 # ----------------------------------------
 # check Fortran name mangling

From 27c325404cd8844e7af3bcc7a893097bad5589cf Mon Sep 17 00:00:00 2001
From: cyy <cyyever@outlook.com>
Date: Thu, 29 May 2025 17:18:56 +0800
Subject: [PATCH 14/27] Fix fpic

Signed-off-by: cyy <cyyever@outlook.com>
---
 CMakeLists.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 40f5dc73f..4f436243f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -69,11 +69,12 @@ endif()
 
 # ----------------------------------------
 # use C++14 and C99
-# see http://stackoverflow.com/questions/10851247/how-to-activate-c-11-in-cmake
 set( CMAKE_C_STANDARD 99 )
 set( CMAKE_C_STANDARD_REQUIRED OFF )
 set( CMAKE_CXX_STANDARD 14 )
 set( CMAKE_CXX_STANDARD_REQUIRED OFF )
+include(CheckCXXCompilerFlag)
+CHECK_CXX_COMPILER_FLAG("-fPIC" COMPILER_SUPPORTS_FPIC)
 
 # ----------------------------------------
 # check Fortran name mangling

From 675053b340f8859b7999cd9dbbccb2bacad3a73c Mon Sep 17 00:00:00 2001
From: Mark Gates <mgates3@utk.edu>
Date: Thu, 17 Jul 2025 15:04:01 +0000
Subject: [PATCH 15/27] cmake: require c99 and c++14; prohibit decay to older
 standards and compiler extensions (gnu, etc.)

---
 CMakeLists.txt | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4f436243f..f6d9feb86 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -70,9 +70,11 @@ endif()
 # ----------------------------------------
 # use C++14 and C99
 set( CMAKE_C_STANDARD 99 )
-set( CMAKE_C_STANDARD_REQUIRED OFF )
+set( CMAKE_C_STANDARD_REQUIRED on )
+set( CMAKE_C_EXTENSIONS off )
 set( CMAKE_CXX_STANDARD 14 )
-set( CMAKE_CXX_STANDARD_REQUIRED OFF )
+set( CMAKE_CXX_STANDARD_REQUIRED on )  # prohibit "decay" to older standards (std=c++11, etc.)
+set( CMAKE_CXX_EXTENSIONS off )        # prohibit std=gnu++17, etc.
 include(CheckCXXCompilerFlag)
 CHECK_CXX_COMPILER_FLAG("-fPIC" COMPILER_SUPPORTS_FPIC)
 

From c26c19bc78bcac31c3645f00074a8233dc7dad73 Mon Sep 17 00:00:00 2001
From: Mark Gates <mgates3@utk.edu>
Date: Thu, 17 Jul 2025 15:32:53 +0000
Subject: [PATCH 16/27] fortran: use c_sizeof from f2008. Fixes #55.

---
 fortran/magma2_common.F90 | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/fortran/magma2_common.F90 b/fortran/magma2_common.F90
index ac18317b8..09c285370 100644
--- a/fortran/magma2_common.F90
+++ b/fortran/magma2_common.F90
@@ -12,15 +12,13 @@ module magma2_common
 integer(c_int),            parameter :: idummy = 0
 type(c_ptr),               parameter :: ptr_dummy = c_null_ptr
 
-!! Intel ifort chokes on c_sizeof here, so use extension sizeof
-!! see https://software.intel.com/en-us/forums/intel-visual-fortran-compiler-for-windows/topic/495001
 integer(c_size_t), parameter :: &
-    sizeof_real      = sizeof(sdummy), &
-    sizeof_double    = sizeof(ddummy), &
-    sizeof_complex   = sizeof(cdummy), &
-    sizeof_complex16 = sizeof(zdummy), &
-    sizeof_int       = sizeof(idummy), &
-    sizeof_ptr       = sizeof(ptr_dummy)
+    sizeof_real      = c_sizeof(sdummy), &
+    sizeof_double    = c_sizeof(ddummy), &
+    sizeof_complex   = c_sizeof(cdummy), &
+    sizeof_complex16 = c_sizeof(zdummy), &
+    sizeof_int       = c_sizeof(idummy), &
+    sizeof_ptr       = c_sizeof(ptr_dummy)
 
 
 !! =============================================================================

From 6914cea1e75a59930e695bba3030914bd180d1e6 Mon Sep 17 00:00:00 2001
From: Ahmad Abdelfattah <ahmad@icl.utk.edu>
Date: Wed, 23 Jul 2025 15:51:05 -0400
Subject: [PATCH 17/27] add support to Blackwell GPUs (might want to add more
 sm_xx)

---
 Makefile | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index cc0d52749..713a28a42 100644
--- a/Makefile
+++ b/Makefile
@@ -154,6 +154,10 @@ ifeq ($(BACKEND),cuda)
 		CUDA_ARCH_ += sm_90
 		CUDA_ARCH_ += sm_90a
 	endif
+	ifneq ($(findstring Blackwell, $(GPU_TARGET)),)
+		CUDA_ARCH_ += sm_100
+	endif
+
 
 	# Remember to add to CMakeLists.txt too!
 
@@ -167,7 +171,7 @@ ifeq ($(BACKEND),cuda)
 	# See also $(info compile for ...) in Makefile
 
 
-    CUDA_ARCH_UNKNOWN_ = $(filter-out sm_% Kepler Maxwell Pascal Volta Turing Ampere Ada Hopper, $(CUDA_ARCH_))
+    CUDA_ARCH_UNKNOWN_ = $(filter-out sm_% Kepler Maxwell Pascal Volta Turing Ampere Ada Hopper Blackwell, $(CUDA_ARCH_))
     ifneq ($(CUDA_ARCH_UNKNOWN_),)
         $(error ERROR: unknown `$(CUDA_ARCH_UNKNOWN_)` in GPU_TARGET)
     endif
@@ -186,7 +190,7 @@ ifeq ($(BACKEND),cuda)
 
     # Check for empty
     ifeq ($(NV_SM),)
-        $(error ERROR: unknown `GPU_TARGET=$(GPU_TARGET)`. Set cuda_arch to one or more of Kepler, Maxwell, Pascal, Volta, Turing, Ampere, Ada, Hopper, or valid sm_XX from nvcc -h)
+        $(error ERROR: unknown `GPU_TARGET=$(GPU_TARGET)`. Set cuda_arch to one or more of Kepler, Maxwell, Pascal, Volta, Turing, Ampere, Ada, Hopper, Blackwell, or valid sm_XX from nvcc -h)
     else
         # Get last option (last 2 words) of nv_compute.
         nwords := $(words $(NV_COMP))
@@ -203,7 +207,7 @@ ifeq ($(BACKEND),cuda)
     CUDA_ARCH := $(SMS)
     CUDA_ARCH_MIN := $(word 1, $(SMS))0
     ifeq ($(CUDA_ARCH_MIN),)
-        $(error GPU_TARGET, currently $(GPU_TARGET), must contain one or more of Fermi, Kepler, Maxwell, Pascal, Volta, Turing, Ampere, Ada, Hopper, or valid sm_[0-9][0-9]. Please edit your make.inc file)
+        $(error GPU_TARGET, currently $(GPU_TARGET), must contain one or more of Fermi, Kepler, Maxwell, Pascal, Volta, Turing, Ampere, Ada, Hopper, Blackwell, or valid sm_[0-9][0-9]. Please edit your make.inc file)
     endif
 else ifeq ($(BACKEND),hip)
 

From 004a8b2fc7ded1b636340ac75937b1c93b0bc052 Mon Sep 17 00:00:00 2001
From: Ahmad Abdelfattah <ahmad@icl.utk.edu>
Date: Wed, 23 Jul 2025 15:54:21 -0400
Subject: [PATCH 18/27] add blackwell to make.inc files

---
 make.inc-examples/make.inc.mkl-gcc             | 1 +
 make.inc-examples/make.inc.mkl-gcc-debug       | 1 +
 make.inc-examples/make.inc.mkl-gcc-ilp64       | 1 +
 make.inc-examples/make.inc.mkl-gcc-ilp64-debug | 1 +
 make.inc-examples/make.inc.mkl-icx             | 1 +
 make.inc-examples/make.inc.mkl-icx-debug       | 1 +
 make.inc-examples/make.inc.mkl-icx-ilp64       | 1 +
 make.inc-examples/make.inc.mkl-icx-ilp64-debug | 1 +
 make.inc-examples/make.inc.openblas-debug      | 1 +
 make.inc-examples/make.inc.power9-essl         | 1 +
 10 files changed, 10 insertions(+)

diff --git a/make.inc-examples/make.inc.mkl-gcc b/make.inc-examples/make.inc.mkl-gcc
index 71cffeb65..641789475 100644
--- a/make.inc-examples/make.inc.mkl-gcc
+++ b/make.inc-examples/make.inc.mkl-gcc
@@ -15,6 +15,7 @@
 #     Volta/Turing         - NVIDIA compute capability 7.x cards
 #     Ampere/Ada-Lovelace  - NVIDIA compute capability 8.x cards
 #     Hopper               - NVIDIA compute capability 9.x cards
+#     Blackwell            - NVIDIA compute capability 10.x and 12.x cards
 # The default is "Volta Turing Ampere".
 # Note that depending on the CUDA version, some compute capabilities may not be supported
 # See http://developer.nvidia.com/cuda-gpus
diff --git a/make.inc-examples/make.inc.mkl-gcc-debug b/make.inc-examples/make.inc.mkl-gcc-debug
index 785c1e7c7..5dead18fb 100644
--- a/make.inc-examples/make.inc.mkl-gcc-debug
+++ b/make.inc-examples/make.inc.mkl-gcc-debug
@@ -15,6 +15,7 @@
 #     Volta/Turing         - NVIDIA compute capability 7.x cards
 #     Ampere/Ada-Lovelace  - NVIDIA compute capability 8.x cards
 #     Hopper               - NVIDIA compute capability 9.x cards
+#     Blackwell            - NVIDIA compute capability 10.x and 12.x cards
 # The default is "Volta Turing Ampere".
 # Note that depending on the CUDA version, some compute capabilities may not be supported
 # See http://developer.nvidia.com/cuda-gpus
diff --git a/make.inc-examples/make.inc.mkl-gcc-ilp64 b/make.inc-examples/make.inc.mkl-gcc-ilp64
index 81ff2b1ac..992b9878f 100644
--- a/make.inc-examples/make.inc.mkl-gcc-ilp64
+++ b/make.inc-examples/make.inc.mkl-gcc-ilp64
@@ -15,6 +15,7 @@
 #     Volta/Turing         - NVIDIA compute capability 7.x cards
 #     Ampere/Ada-Lovelace  - NVIDIA compute capability 8.x cards
 #     Hopper               - NVIDIA compute capability 9.x cards
+#     Blackwell            - NVIDIA compute capability 10.x and 12.x cards
 # The default is "Volta Turing Ampere".
 # Note that depending on the CUDA version, some compute capabilities may not be supported
 # See http://developer.nvidia.com/cuda-gpus
diff --git a/make.inc-examples/make.inc.mkl-gcc-ilp64-debug b/make.inc-examples/make.inc.mkl-gcc-ilp64-debug
index df9a12ae3..b093e8af3 100644
--- a/make.inc-examples/make.inc.mkl-gcc-ilp64-debug
+++ b/make.inc-examples/make.inc.mkl-gcc-ilp64-debug
@@ -15,6 +15,7 @@
 #     Volta/Turing         - NVIDIA compute capability 7.x cards
 #     Ampere/Ada-Lovelace  - NVIDIA compute capability 8.x cards
 #     Hopper               - NVIDIA compute capability 9.x cards
+#     Blackwell            - NVIDIA compute capability 10.x and 12.x cards
 # The default is "Volta Turing Ampere".
 # Note that depending on the CUDA version, some compute capabilities may not be supported
 # See http://developer.nvidia.com/cuda-gpus
diff --git a/make.inc-examples/make.inc.mkl-icx b/make.inc-examples/make.inc.mkl-icx
index 86d59801b..f423354fb 100644
--- a/make.inc-examples/make.inc.mkl-icx
+++ b/make.inc-examples/make.inc.mkl-icx
@@ -15,6 +15,7 @@
 #     Volta/Turing         - NVIDIA compute capability 7.x cards
 #     Ampere/Ada-Lovelace  - NVIDIA compute capability 8.x cards
 #     Hopper               - NVIDIA compute capability 9.x cards
+#     Blackwell            - NVIDIA compute capability 10.x and 12.x cards
 # The default is "Volta Turing Ampere".
 # Note that depending on the CUDA version, some compute capabilities may not be supported
 # See http://developer.nvidia.com/cuda-gpus
diff --git a/make.inc-examples/make.inc.mkl-icx-debug b/make.inc-examples/make.inc.mkl-icx-debug
index 94e81c6ba..f42014f0f 100644
--- a/make.inc-examples/make.inc.mkl-icx-debug
+++ b/make.inc-examples/make.inc.mkl-icx-debug
@@ -15,6 +15,7 @@
 #     Volta/Turing         - NVIDIA compute capability 7.x cards
 #     Ampere/Ada-Lovelace  - NVIDIA compute capability 8.x cards
 #     Hopper               - NVIDIA compute capability 9.x cards
+#     Blackwell            - NVIDIA compute capability 10.x and 12.x cards
 # The default is "Volta Turing Ampere".
 # Note that depending on the CUDA version, some compute capabilities may not be supported
 # See http://developer.nvidia.com/cuda-gpus
diff --git a/make.inc-examples/make.inc.mkl-icx-ilp64 b/make.inc-examples/make.inc.mkl-icx-ilp64
index 93b8b2c00..d038b93af 100644
--- a/make.inc-examples/make.inc.mkl-icx-ilp64
+++ b/make.inc-examples/make.inc.mkl-icx-ilp64
@@ -15,6 +15,7 @@
 #     Volta/Turing         - NVIDIA compute capability 7.x cards
 #     Ampere/Ada-Lovelace  - NVIDIA compute capability 8.x cards
 #     Hopper               - NVIDIA compute capability 9.x cards
+#     Blackwell            - NVIDIA compute capability 10.x and 12.x cards
 # The default is "Volta Turing Ampere".
 # Note that depending on the CUDA version, some compute capabilities may not be supported
 # See http://developer.nvidia.com/cuda-gpus
diff --git a/make.inc-examples/make.inc.mkl-icx-ilp64-debug b/make.inc-examples/make.inc.mkl-icx-ilp64-debug
index e2a693a6f..e02b6a7d6 100644
--- a/make.inc-examples/make.inc.mkl-icx-ilp64-debug
+++ b/make.inc-examples/make.inc.mkl-icx-ilp64-debug
@@ -15,6 +15,7 @@
 #     Volta/Turing         - NVIDIA compute capability 7.x cards
 #     Ampere/Ada-Lovelace  - NVIDIA compute capability 8.x cards
 #     Hopper               - NVIDIA compute capability 9.x cards
+#     Blackwell            - NVIDIA compute capability 10.x and 12.x cards
 # The default is "Volta Turing Ampere".
 # Note that depending on the CUDA version, some compute capabilities may not be supported
 # See http://developer.nvidia.com/cuda-gpus
diff --git a/make.inc-examples/make.inc.openblas-debug b/make.inc-examples/make.inc.openblas-debug
index bcf97e19b..e6661ae2a 100644
--- a/make.inc-examples/make.inc.openblas-debug
+++ b/make.inc-examples/make.inc.openblas-debug
@@ -15,6 +15,7 @@
 #     Volta/Turing         - NVIDIA compute capability 7.x cards
 #     Ampere/Ada-Lovelace  - NVIDIA compute capability 8.x cards
 #     Hopper               - NVIDIA compute capability 9.x cards
+#     Blackwell            - NVIDIA compute capability 10.x and 12.x cards
 # The default is "Volta Turing Ampere".
 # Note that depending on the CUDA version, some compute capabilities may not be supported
 # See http://developer.nvidia.com/cuda-gpus
diff --git a/make.inc-examples/make.inc.power9-essl b/make.inc-examples/make.inc.power9-essl
index cafdb2ab9..1c9410ece 100644
--- a/make.inc-examples/make.inc.power9-essl
+++ b/make.inc-examples/make.inc.power9-essl
@@ -15,6 +15,7 @@
 #     Volta/Turing         - NVIDIA compute capability 7.x cards
 #     Ampere/Ada-Lovelace  - NVIDIA compute capability 8.x cards
 #     Hopper               - NVIDIA compute capability 9.x cards
+#     Blackwell            - NVIDIA compute capability 10.x and 12.x cards
 # The default is "Volta Turing Ampere".
 # Note that depending on the CUDA version, some compute capabilities may not be supported
 # See http://developer.nvidia.com/cuda-gpus

From b9f0a0f3344ccc1aff0cd57a6ce152f75950a7af Mon Sep 17 00:00:00 2001
From: Ahmad Abdelfattah <ahmad@icl.utk.edu>
Date: Wed, 23 Jul 2025 16:03:46 -0400
Subject: [PATCH 19/27] remove very old CUDA archs

---
 make.inc-examples/make.inc.mkl-gcc             | 4 +---
 make.inc-examples/make.inc.mkl-gcc-debug       | 4 +---
 make.inc-examples/make.inc.mkl-gcc-ilp64       | 4 +---
 make.inc-examples/make.inc.mkl-gcc-ilp64-debug | 4 +---
 make.inc-examples/make.inc.mkl-icx             | 4 +---
 make.inc-examples/make.inc.mkl-icx-debug       | 4 +---
 make.inc-examples/make.inc.mkl-icx-ilp64       | 4 +---
 make.inc-examples/make.inc.mkl-icx-ilp64-debug | 4 +---
 make.inc-examples/make.inc.openblas-debug      | 4 +---
 make.inc-examples/make.inc.power9-essl         | 4 +---
 10 files changed, 10 insertions(+), 30 deletions(-)

diff --git a/make.inc-examples/make.inc.mkl-gcc b/make.inc-examples/make.inc.mkl-gcc
index 641789475..159ea0c65 100644
--- a/make.inc-examples/make.inc.mkl-gcc
+++ b/make.inc-examples/make.inc.mkl-gcc
@@ -8,9 +8,6 @@
 
 # GPU_TARGET contains one or more of the architectures below
 # to specify for which GPUs you want to compile MAGMA:
-#     Fermi                - NVIDIA compute capability 2.x cards
-#     Kepler               - NVIDIA compute capability 3.x cards
-#     Maxwell              - NVIDIA compute capability 5.x cards
 #     Pascal               - NVIDIA compute capability 6.x cards
 #     Volta/Turing         - NVIDIA compute capability 7.x cards
 #     Ampere/Ada-Lovelace  - NVIDIA compute capability 8.x cards
@@ -19,6 +16,7 @@
 # The default is "Volta Turing Ampere".
 # Note that depending on the CUDA version, some compute capabilities may not be supported
 # See http://developer.nvidia.com/cuda-gpus
+# Older architectures are supported if the proper version of CUDA is used
 #
 GPU_TARGET ?= Volta Turing Ampere
 
diff --git a/make.inc-examples/make.inc.mkl-gcc-debug b/make.inc-examples/make.inc.mkl-gcc-debug
index 5dead18fb..12cbd9a49 100644
--- a/make.inc-examples/make.inc.mkl-gcc-debug
+++ b/make.inc-examples/make.inc.mkl-gcc-debug
@@ -8,9 +8,6 @@
 
 # GPU_TARGET contains one or more of the architectures below
 # to specify for which GPUs you want to compile MAGMA:
-#     Fermi                - NVIDIA compute capability 2.x cards
-#     Kepler               - NVIDIA compute capability 3.x cards
-#     Maxwell              - NVIDIA compute capability 5.x cards
 #     Pascal               - NVIDIA compute capability 6.x cards
 #     Volta/Turing         - NVIDIA compute capability 7.x cards
 #     Ampere/Ada-Lovelace  - NVIDIA compute capability 8.x cards
@@ -19,6 +16,7 @@
 # The default is "Volta Turing Ampere".
 # Note that depending on the CUDA version, some compute capabilities may not be supported
 # See http://developer.nvidia.com/cuda-gpus
+# Older architectures are supported if the proper version of CUDA is used
 #
 GPU_TARGET ?= Volta Turing Ampere
 
diff --git a/make.inc-examples/make.inc.mkl-gcc-ilp64 b/make.inc-examples/make.inc.mkl-gcc-ilp64
index 992b9878f..cd52fb7c2 100644
--- a/make.inc-examples/make.inc.mkl-gcc-ilp64
+++ b/make.inc-examples/make.inc.mkl-gcc-ilp64
@@ -8,9 +8,6 @@
 
 # GPU_TARGET contains one or more of the architectures below
 # to specify for which GPUs you want to compile MAGMA:
-#     Fermi                - NVIDIA compute capability 2.x cards
-#     Kepler               - NVIDIA compute capability 3.x cards
-#     Maxwell              - NVIDIA compute capability 5.x cards
 #     Pascal               - NVIDIA compute capability 6.x cards
 #     Volta/Turing         - NVIDIA compute capability 7.x cards
 #     Ampere/Ada-Lovelace  - NVIDIA compute capability 8.x cards
@@ -19,6 +16,7 @@
 # The default is "Volta Turing Ampere".
 # Note that depending on the CUDA version, some compute capabilities may not be supported
 # See http://developer.nvidia.com/cuda-gpus
+# Older architectures are supported if the proper version of CUDA is used
 #
 GPU_TARGET ?= Volta Turing Ampere
 
diff --git a/make.inc-examples/make.inc.mkl-gcc-ilp64-debug b/make.inc-examples/make.inc.mkl-gcc-ilp64-debug
index b093e8af3..e6947d425 100644
--- a/make.inc-examples/make.inc.mkl-gcc-ilp64-debug
+++ b/make.inc-examples/make.inc.mkl-gcc-ilp64-debug
@@ -8,9 +8,6 @@
 
 # GPU_TARGET contains one or more of the architectures below
 # to specify for which GPUs you want to compile MAGMA:
-#     Fermi                - NVIDIA compute capability 2.x cards
-#     Kepler               - NVIDIA compute capability 3.x cards
-#     Maxwell              - NVIDIA compute capability 5.x cards
 #     Pascal               - NVIDIA compute capability 6.x cards
 #     Volta/Turing         - NVIDIA compute capability 7.x cards
 #     Ampere/Ada-Lovelace  - NVIDIA compute capability 8.x cards
@@ -19,6 +16,7 @@
 # The default is "Volta Turing Ampere".
 # Note that depending on the CUDA version, some compute capabilities may not be supported
 # See http://developer.nvidia.com/cuda-gpus
+# Older architectures are supported if the proper version of CUDA is used
 #
 GPU_TARGET ?= Volta Turing Ampere
 
diff --git a/make.inc-examples/make.inc.mkl-icx b/make.inc-examples/make.inc.mkl-icx
index f423354fb..c9efc42f4 100644
--- a/make.inc-examples/make.inc.mkl-icx
+++ b/make.inc-examples/make.inc.mkl-icx
@@ -8,9 +8,6 @@
 
 # GPU_TARGET contains one or more of the architectures below
 # to specify for which GPUs you want to compile MAGMA:
-#     Fermi                - NVIDIA compute capability 2.x cards
-#     Kepler               - NVIDIA compute capability 3.x cards
-#     Maxwell              - NVIDIA compute capability 5.x cards
 #     Pascal               - NVIDIA compute capability 6.x cards
 #     Volta/Turing         - NVIDIA compute capability 7.x cards
 #     Ampere/Ada-Lovelace  - NVIDIA compute capability 8.x cards
@@ -19,6 +16,7 @@
 # The default is "Volta Turing Ampere".
 # Note that depending on the CUDA version, some compute capabilities may not be supported
 # See http://developer.nvidia.com/cuda-gpus
+# Older architectures are supported if the proper version of CUDA is used
 #
 GPU_TARGET ?= Volta Turing Ampere
 
diff --git a/make.inc-examples/make.inc.mkl-icx-debug b/make.inc-examples/make.inc.mkl-icx-debug
index f42014f0f..9d5dd6fa4 100644
--- a/make.inc-examples/make.inc.mkl-icx-debug
+++ b/make.inc-examples/make.inc.mkl-icx-debug
@@ -8,9 +8,6 @@
 
 # GPU_TARGET contains one or more of the architectures below
 # to specify for which GPUs you want to compile MAGMA:
-#     Fermi                - NVIDIA compute capability 2.x cards
-#     Kepler               - NVIDIA compute capability 3.x cards
-#     Maxwell              - NVIDIA compute capability 5.x cards
 #     Pascal               - NVIDIA compute capability 6.x cards
 #     Volta/Turing         - NVIDIA compute capability 7.x cards
 #     Ampere/Ada-Lovelace  - NVIDIA compute capability 8.x cards
@@ -19,6 +16,7 @@
 # The default is "Volta Turing Ampere".
 # Note that depending on the CUDA version, some compute capabilities may not be supported
 # See http://developer.nvidia.com/cuda-gpus
+# Older architectures are supported if the proper version of CUDA is used
 #
 GPU_TARGET ?= Volta Turing Ampere
 
diff --git a/make.inc-examples/make.inc.mkl-icx-ilp64 b/make.inc-examples/make.inc.mkl-icx-ilp64
index d038b93af..68765ea58 100644
--- a/make.inc-examples/make.inc.mkl-icx-ilp64
+++ b/make.inc-examples/make.inc.mkl-icx-ilp64
@@ -8,9 +8,6 @@
 
 # GPU_TARGET contains one or more of the architectures below
 # to specify for which GPUs you want to compile MAGMA:
-#     Fermi                - NVIDIA compute capability 2.x cards
-#     Kepler               - NVIDIA compute capability 3.x cards
-#     Maxwell              - NVIDIA compute capability 5.x cards
 #     Pascal               - NVIDIA compute capability 6.x cards
 #     Volta/Turing         - NVIDIA compute capability 7.x cards
 #     Ampere/Ada-Lovelace  - NVIDIA compute capability 8.x cards
@@ -19,6 +16,7 @@
 # The default is "Volta Turing Ampere".
 # Note that depending on the CUDA version, some compute capabilities may not be supported
 # See http://developer.nvidia.com/cuda-gpus
+# Older architectures are supported if the proper version of CUDA is used
 #
 GPU_TARGET ?= Volta Turing Ampere
 
diff --git a/make.inc-examples/make.inc.mkl-icx-ilp64-debug b/make.inc-examples/make.inc.mkl-icx-ilp64-debug
index e02b6a7d6..5e174205a 100644
--- a/make.inc-examples/make.inc.mkl-icx-ilp64-debug
+++ b/make.inc-examples/make.inc.mkl-icx-ilp64-debug
@@ -8,9 +8,6 @@
 
 # GPU_TARGET contains one or more of the architectures below
 # to specify for which GPUs you want to compile MAGMA:
-#     Fermi                - NVIDIA compute capability 2.x cards
-#     Kepler               - NVIDIA compute capability 3.x cards
-#     Maxwell              - NVIDIA compute capability 5.x cards
 #     Pascal               - NVIDIA compute capability 6.x cards
 #     Volta/Turing         - NVIDIA compute capability 7.x cards
 #     Ampere/Ada-Lovelace  - NVIDIA compute capability 8.x cards
@@ -19,6 +16,7 @@
 # The default is "Volta Turing Ampere".
 # Note that depending on the CUDA version, some compute capabilities may not be supported
 # See http://developer.nvidia.com/cuda-gpus
+# Older architectures are supported if the proper version of CUDA is used
 #
 GPU_TARGET ?= Volta Turing Ampere
 
diff --git a/make.inc-examples/make.inc.openblas-debug b/make.inc-examples/make.inc.openblas-debug
index e6661ae2a..f4215ab53 100644
--- a/make.inc-examples/make.inc.openblas-debug
+++ b/make.inc-examples/make.inc.openblas-debug
@@ -8,9 +8,6 @@
 
 # GPU_TARGET contains one or more of the architectures below
 # to specify for which GPUs you want to compile MAGMA:
-#     Fermi                - NVIDIA compute capability 2.x cards
-#     Kepler               - NVIDIA compute capability 3.x cards
-#     Maxwell              - NVIDIA compute capability 5.x cards
 #     Pascal               - NVIDIA compute capability 6.x cards
 #     Volta/Turing         - NVIDIA compute capability 7.x cards
 #     Ampere/Ada-Lovelace  - NVIDIA compute capability 8.x cards
@@ -19,6 +16,7 @@
 # The default is "Volta Turing Ampere".
 # Note that depending on the CUDA version, some compute capabilities may not be supported
 # See http://developer.nvidia.com/cuda-gpus
+# Older architectures are supported if the proper version of CUDA is used
 #
 GPU_TARGET ?= Volta Turing Ampere
 
diff --git a/make.inc-examples/make.inc.power9-essl b/make.inc-examples/make.inc.power9-essl
index 1c9410ece..254d7135f 100644
--- a/make.inc-examples/make.inc.power9-essl
+++ b/make.inc-examples/make.inc.power9-essl
@@ -8,9 +8,6 @@
 
 # GPU_TARGET contains one or more of the architectures below
 # to specify for which GPUs you want to compile MAGMA:
-#     Fermi                - NVIDIA compute capability 2.x cards
-#     Kepler               - NVIDIA compute capability 3.x cards
-#     Maxwell              - NVIDIA compute capability 5.x cards
 #     Pascal               - NVIDIA compute capability 6.x cards
 #     Volta/Turing         - NVIDIA compute capability 7.x cards
 #     Ampere/Ada-Lovelace  - NVIDIA compute capability 8.x cards
@@ -19,6 +16,7 @@
 # The default is "Volta Turing Ampere".
 # Note that depending on the CUDA version, some compute capabilities may not be supported
 # See http://developer.nvidia.com/cuda-gpus
+# Older architectures are supported if the proper version of CUDA is used
 #
 GPU_TARGET ?= Volta Turing Ampere
 

From 6ae9acd9919cc6b4dd45ebddc6c97b14906ddeb2 Mon Sep 17 00:00:00 2001
From: Ahmad Abdelfattah <ahmad@icl.utk.edu>
Date: Wed, 23 Jul 2025 16:24:52 -0400
Subject: [PATCH 20/27] also add sm 12.0 when Blackwell is selected

---
 Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Makefile b/Makefile
index 713a28a42..bbecfee83 100644
--- a/Makefile
+++ b/Makefile
@@ -156,6 +156,7 @@ ifeq ($(BACKEND),cuda)
 	endif
 	ifneq ($(findstring Blackwell, $(GPU_TARGET)),)
 		CUDA_ARCH_ += sm_100
+		CUDA_ARCH_ += sm_120
 	endif
 
 

From ac4262a82ea6a462c747759a623a8be05d9e0d59 Mon Sep 17 00:00:00 2001
From: Ahmad Abdelfattah <ahmad@icl.utk.edu>
Date: Thu, 24 Jul 2025 10:28:18 -0400
Subject: [PATCH 21/27] update release notes

---
 ReleaseNotes | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ReleaseNotes b/ReleaseNotes
index a3cb5f0ec..136001a36 100644
--- a/ReleaseNotes
+++ b/ReleaseNotes
@@ -26,6 +26,8 @@ Most routines have all four precisions:
 single (s), double (d), single-complex (c), double-complex (z).
 
 2.10.0 - XXX XX, XXXX
+    * Support added for NVIDIA Blackwell GPUs (sm_100 and sm_120)
+      - Requires CUDA-12.8 or higher
     * New functionality: Variable-size batch non-pivoting LU factorization
       - Contributed by Wajih Boukaram, Yang Liu, and Sherry Li at LBNL
       - magma_<T>getrf_nopiv_vbatched performs a non-pivoting LU factorization on a

From 0955da3feb16a38084b3284680e144a11af8dc58 Mon Sep 17 00:00:00 2001
From: Mark Gates <mgates3@icl.utk.edu>
Date: Fri, 25 Jul 2025 11:20:35 -0400
Subject: [PATCH 22/27] cmake: add Blackwell

---
 CMakeLists.txt | 4 ++++
 Makefile       | 4 +---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index f6d9feb86..155e6f73c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -166,6 +166,10 @@ if (MAGMA_ENABLE_CUDA)
                 set( GPU_TARGET "${GPU_TARGET} sm_90 sm_90a" )
             endif()
 
+            if (GPU_TARGET MATCHES Blackwell)
+                set( GPU_TARGET "${GPU_TARGET} sm_100 sm_120" )
+            endif()
+
             # Find all sm_XY and sm_XYZ, then strip off sm_.
             string( REGEX MATCHALL "sm_[0-9][0-9a-z]+" sms "${GPU_TARGET}" )
             string( REPLACE "sm_" "" __cuda_architectures "${sms}" )
diff --git a/Makefile b/Makefile
index bbecfee83..b8fa66a63 100644
--- a/Makefile
+++ b/Makefile
@@ -158,9 +158,7 @@ ifeq ($(BACKEND),cuda)
 		CUDA_ARCH_ += sm_100
 		CUDA_ARCH_ += sm_120
 	endif
-
-
-	# Remember to add to CMakeLists.txt too!
+	# Remember to add new architectures to CMakeLists.txt too!
 
 	# Next, add compile options for specific smXX
 	# sm_xx is binary, compute_xx is PTX for forward compatability

From 21d102f90096ecba5ab5d1656a6463634d14c592 Mon Sep 17 00:00:00 2001
From: Mark Gates <mgates3@icl.utk.edu>
Date: Sat, 2 Aug 2025 16:51:14 -0400
Subject: [PATCH 23/27] archive files that are not in Makefile.src. Note
 magma_zmlumerge.cpp is still in sparse/control (for now); rm copy in
 sparse/src.

---
 .../magmablas}/zswapdblk_batched.cu           |  0
 .../sparse}/blas/clag2z_sparse.cu             |  0
 .../sparse}/blas/magma_clag2z.cpp             |  0
 .../sparse}/blas/magma_zlag2c.cpp             |  0
 .../sparse}/blas/magma_zthrsselect.cu         |  0
 .../sparse}/blas/zbcsrblockinfo.cu            |  0
 {sparse => archive/sparse}/blas/zbcsrcpy.cu   |  0
 .../sparse}/blas/zbcsrlugemm.cu               |  0
 .../sparse}/blas/zbcsrlupivloc.cu             |  0
 {sparse => archive/sparse}/blas/zbcsrswp.cu   |  0
 {sparse => archive/sparse}/blas/zbcsrtrsv.cu  |  0
 .../sparse}/blas/zgeblockstruct.cu            |  0
 {sparse => archive/sparse}/blas/zgeisai.cu    |  0
 {sparse => archive/sparse}/blas/zgeisai_16.cu |  0
 {sparse => archive/sparse}/blas/zgeisai_32.cu |  0
 {sparse => archive/sparse}/blas/zgeisai_8.cu  |  0
 {sparse => archive/sparse}/blas/zilut.cpp     |  0
 .../sparse}/blas/zlag2c_sparse.cu             |  0
 .../sparse}/control/magma_zdummy.cpp          |  0
 .../sparse}/src/magma_zmlumerge.cpp           |  0
 .../sparse}/src/magma_zwrapper.cpp            |  0
 {sparse => archive/sparse}/src/zdummy.cpp     |  0
 {sparse => archive/sparse}/src/zgeisai.cpp    |  0
 {sparse => archive/sparse}/src/ziterict.cpp   |  0
 {sparse => archive/sparse}/src/zsyisai.cpp    |  0
 .../sparse}/testing/testing_zdiagdom.cpp      |  0
 .../sparse}/testing/testing_zgemv_cpu_gpu.cpp |  0
 .../sparse}/testing/testing_zjaccard.cpp      |  0
 .../sparse}/testing/testing_zoperation.cpp    |  0
 .../sparse}/testing/testing_zparilu.cpp       |  0
 .../testing/testing_zparilu_weight.cpp        |  0
 .../testing/testing_zsolver_energy.cpp        |  0
 .../sparse}/testing/testing_zthresselect.cpp  |  0
 .../testing/testing_zusemagma_example.cpp     |  0
 docs/Doxyfile-fast                            | 26 -------------------
 include/magma_zbatched.h                      |  7 -----
 sparse/control/Makefile.src                   |  5 ----
 sparse/testing/Makefile.src                   |  4 ---
 tools/MakeMagmaRelease.pl                     | 10 +++----
 39 files changed, 3 insertions(+), 49 deletions(-)
 rename {magmablas => archive/magmablas}/zswapdblk_batched.cu (100%)
 rename {sparse => archive/sparse}/blas/clag2z_sparse.cu (100%)
 rename {sparse => archive/sparse}/blas/magma_clag2z.cpp (100%)
 rename {sparse => archive/sparse}/blas/magma_zlag2c.cpp (100%)
 rename {sparse => archive/sparse}/blas/magma_zthrsselect.cu (100%)
 rename {sparse => archive/sparse}/blas/zbcsrblockinfo.cu (100%)
 rename {sparse => archive/sparse}/blas/zbcsrcpy.cu (100%)
 rename {sparse => archive/sparse}/blas/zbcsrlugemm.cu (100%)
 rename {sparse => archive/sparse}/blas/zbcsrlupivloc.cu (100%)
 rename {sparse => archive/sparse}/blas/zbcsrswp.cu (100%)
 rename {sparse => archive/sparse}/blas/zbcsrtrsv.cu (100%)
 rename {sparse => archive/sparse}/blas/zgeblockstruct.cu (100%)
 rename {sparse => archive/sparse}/blas/zgeisai.cu (100%)
 rename {sparse => archive/sparse}/blas/zgeisai_16.cu (100%)
 rename {sparse => archive/sparse}/blas/zgeisai_32.cu (100%)
 rename {sparse => archive/sparse}/blas/zgeisai_8.cu (100%)
 rename {sparse => archive/sparse}/blas/zilut.cpp (100%)
 rename {sparse => archive/sparse}/blas/zlag2c_sparse.cu (100%)
 rename {sparse => archive/sparse}/control/magma_zdummy.cpp (100%)
 rename {sparse => archive/sparse}/src/magma_zmlumerge.cpp (100%)
 rename {sparse => archive/sparse}/src/magma_zwrapper.cpp (100%)
 rename {sparse => archive/sparse}/src/zdummy.cpp (100%)
 rename {sparse => archive/sparse}/src/zgeisai.cpp (100%)
 rename {sparse => archive/sparse}/src/ziterict.cpp (100%)
 rename {sparse => archive/sparse}/src/zsyisai.cpp (100%)
 rename {sparse => archive/sparse}/testing/testing_zdiagdom.cpp (100%)
 rename {sparse => archive/sparse}/testing/testing_zgemv_cpu_gpu.cpp (100%)
 rename {sparse => archive/sparse}/testing/testing_zjaccard.cpp (100%)
 rename {sparse => archive/sparse}/testing/testing_zoperation.cpp (100%)
 rename {sparse => archive/sparse}/testing/testing_zparilu.cpp (100%)
 rename {sparse => archive/sparse}/testing/testing_zparilu_weight.cpp (100%)
 rename {sparse => archive/sparse}/testing/testing_zsolver_energy.cpp (100%)
 rename {sparse => archive/sparse}/testing/testing_zthresselect.cpp (100%)
 rename {sparse => archive/sparse}/testing/testing_zusemagma_example.cpp (100%)

diff --git a/magmablas/zswapdblk_batched.cu b/archive/magmablas/zswapdblk_batched.cu
similarity index 100%
rename from magmablas/zswapdblk_batched.cu
rename to archive/magmablas/zswapdblk_batched.cu
diff --git a/sparse/blas/clag2z_sparse.cu b/archive/sparse/blas/clag2z_sparse.cu
similarity index 100%
rename from sparse/blas/clag2z_sparse.cu
rename to archive/sparse/blas/clag2z_sparse.cu
diff --git a/sparse/blas/magma_clag2z.cpp b/archive/sparse/blas/magma_clag2z.cpp
similarity index 100%
rename from sparse/blas/magma_clag2z.cpp
rename to archive/sparse/blas/magma_clag2z.cpp
diff --git a/sparse/blas/magma_zlag2c.cpp b/archive/sparse/blas/magma_zlag2c.cpp
similarity index 100%
rename from sparse/blas/magma_zlag2c.cpp
rename to archive/sparse/blas/magma_zlag2c.cpp
diff --git a/sparse/blas/magma_zthrsselect.cu b/archive/sparse/blas/magma_zthrsselect.cu
similarity index 100%
rename from sparse/blas/magma_zthrsselect.cu
rename to archive/sparse/blas/magma_zthrsselect.cu
diff --git a/sparse/blas/zbcsrblockinfo.cu b/archive/sparse/blas/zbcsrblockinfo.cu
similarity index 100%
rename from sparse/blas/zbcsrblockinfo.cu
rename to archive/sparse/blas/zbcsrblockinfo.cu
diff --git a/sparse/blas/zbcsrcpy.cu b/archive/sparse/blas/zbcsrcpy.cu
similarity index 100%
rename from sparse/blas/zbcsrcpy.cu
rename to archive/sparse/blas/zbcsrcpy.cu
diff --git a/sparse/blas/zbcsrlugemm.cu b/archive/sparse/blas/zbcsrlugemm.cu
similarity index 100%
rename from sparse/blas/zbcsrlugemm.cu
rename to archive/sparse/blas/zbcsrlugemm.cu
diff --git a/sparse/blas/zbcsrlupivloc.cu b/archive/sparse/blas/zbcsrlupivloc.cu
similarity index 100%
rename from sparse/blas/zbcsrlupivloc.cu
rename to archive/sparse/blas/zbcsrlupivloc.cu
diff --git a/sparse/blas/zbcsrswp.cu b/archive/sparse/blas/zbcsrswp.cu
similarity index 100%
rename from sparse/blas/zbcsrswp.cu
rename to archive/sparse/blas/zbcsrswp.cu
diff --git a/sparse/blas/zbcsrtrsv.cu b/archive/sparse/blas/zbcsrtrsv.cu
similarity index 100%
rename from sparse/blas/zbcsrtrsv.cu
rename to archive/sparse/blas/zbcsrtrsv.cu
diff --git a/sparse/blas/zgeblockstruct.cu b/archive/sparse/blas/zgeblockstruct.cu
similarity index 100%
rename from sparse/blas/zgeblockstruct.cu
rename to archive/sparse/blas/zgeblockstruct.cu
diff --git a/sparse/blas/zgeisai.cu b/archive/sparse/blas/zgeisai.cu
similarity index 100%
rename from sparse/blas/zgeisai.cu
rename to archive/sparse/blas/zgeisai.cu
diff --git a/sparse/blas/zgeisai_16.cu b/archive/sparse/blas/zgeisai_16.cu
similarity index 100%
rename from sparse/blas/zgeisai_16.cu
rename to archive/sparse/blas/zgeisai_16.cu
diff --git a/sparse/blas/zgeisai_32.cu b/archive/sparse/blas/zgeisai_32.cu
similarity index 100%
rename from sparse/blas/zgeisai_32.cu
rename to archive/sparse/blas/zgeisai_32.cu
diff --git a/sparse/blas/zgeisai_8.cu b/archive/sparse/blas/zgeisai_8.cu
similarity index 100%
rename from sparse/blas/zgeisai_8.cu
rename to archive/sparse/blas/zgeisai_8.cu
diff --git a/sparse/blas/zilut.cpp b/archive/sparse/blas/zilut.cpp
similarity index 100%
rename from sparse/blas/zilut.cpp
rename to archive/sparse/blas/zilut.cpp
diff --git a/sparse/blas/zlag2c_sparse.cu b/archive/sparse/blas/zlag2c_sparse.cu
similarity index 100%
rename from sparse/blas/zlag2c_sparse.cu
rename to archive/sparse/blas/zlag2c_sparse.cu
diff --git a/sparse/control/magma_zdummy.cpp b/archive/sparse/control/magma_zdummy.cpp
similarity index 100%
rename from sparse/control/magma_zdummy.cpp
rename to archive/sparse/control/magma_zdummy.cpp
diff --git a/sparse/src/magma_zmlumerge.cpp b/archive/sparse/src/magma_zmlumerge.cpp
similarity index 100%
rename from sparse/src/magma_zmlumerge.cpp
rename to archive/sparse/src/magma_zmlumerge.cpp
diff --git a/sparse/src/magma_zwrapper.cpp b/archive/sparse/src/magma_zwrapper.cpp
similarity index 100%
rename from sparse/src/magma_zwrapper.cpp
rename to archive/sparse/src/magma_zwrapper.cpp
diff --git a/sparse/src/zdummy.cpp b/archive/sparse/src/zdummy.cpp
similarity index 100%
rename from sparse/src/zdummy.cpp
rename to archive/sparse/src/zdummy.cpp
diff --git a/sparse/src/zgeisai.cpp b/archive/sparse/src/zgeisai.cpp
similarity index 100%
rename from sparse/src/zgeisai.cpp
rename to archive/sparse/src/zgeisai.cpp
diff --git a/sparse/src/ziterict.cpp b/archive/sparse/src/ziterict.cpp
similarity index 100%
rename from sparse/src/ziterict.cpp
rename to archive/sparse/src/ziterict.cpp
diff --git a/sparse/src/zsyisai.cpp b/archive/sparse/src/zsyisai.cpp
similarity index 100%
rename from sparse/src/zsyisai.cpp
rename to archive/sparse/src/zsyisai.cpp
diff --git a/sparse/testing/testing_zdiagdom.cpp b/archive/sparse/testing/testing_zdiagdom.cpp
similarity index 100%
rename from sparse/testing/testing_zdiagdom.cpp
rename to archive/sparse/testing/testing_zdiagdom.cpp
diff --git a/sparse/testing/testing_zgemv_cpu_gpu.cpp b/archive/sparse/testing/testing_zgemv_cpu_gpu.cpp
similarity index 100%
rename from sparse/testing/testing_zgemv_cpu_gpu.cpp
rename to archive/sparse/testing/testing_zgemv_cpu_gpu.cpp
diff --git a/sparse/testing/testing_zjaccard.cpp b/archive/sparse/testing/testing_zjaccard.cpp
similarity index 100%
rename from sparse/testing/testing_zjaccard.cpp
rename to archive/sparse/testing/testing_zjaccard.cpp
diff --git a/sparse/testing/testing_zoperation.cpp b/archive/sparse/testing/testing_zoperation.cpp
similarity index 100%
rename from sparse/testing/testing_zoperation.cpp
rename to archive/sparse/testing/testing_zoperation.cpp
diff --git a/sparse/testing/testing_zparilu.cpp b/archive/sparse/testing/testing_zparilu.cpp
similarity index 100%
rename from sparse/testing/testing_zparilu.cpp
rename to archive/sparse/testing/testing_zparilu.cpp
diff --git a/sparse/testing/testing_zparilu_weight.cpp b/archive/sparse/testing/testing_zparilu_weight.cpp
similarity index 100%
rename from sparse/testing/testing_zparilu_weight.cpp
rename to archive/sparse/testing/testing_zparilu_weight.cpp
diff --git a/sparse/testing/testing_zsolver_energy.cpp b/archive/sparse/testing/testing_zsolver_energy.cpp
similarity index 100%
rename from sparse/testing/testing_zsolver_energy.cpp
rename to archive/sparse/testing/testing_zsolver_energy.cpp
diff --git a/sparse/testing/testing_zthresselect.cpp b/archive/sparse/testing/testing_zthresselect.cpp
similarity index 100%
rename from sparse/testing/testing_zthresselect.cpp
rename to archive/sparse/testing/testing_zthresselect.cpp
diff --git a/sparse/testing/testing_zusemagma_example.cpp b/archive/sparse/testing/testing_zusemagma_example.cpp
similarity index 100%
rename from sparse/testing/testing_zusemagma_example.cpp
rename to archive/sparse/testing/testing_zusemagma_example.cpp
diff --git a/docs/Doxyfile-fast b/docs/Doxyfile-fast
index 6f9d0869e..2d5411161 100644
--- a/docs/Doxyfile-fast
+++ b/docs/Doxyfile-fast
@@ -1078,7 +1078,6 @@ INPUT                  = \
 	../magmablas/zswap.cu                                       \
 	../magmablas/zswapblk.cu                                    \
 	../magmablas/zswapdblk.cu                                   \
-	../magmablas/zswapdblk_batched.cu                           \
 	../magmablas/zsymmetrize.cu                                 \
 	../magmablas/zsymmetrize_tiles.cu                           \
 	../magmablas/zsymv.cu                                       \
@@ -1116,32 +1115,21 @@ INPUT                  = \
 	../sparse/blas/atomicopsfloat.h                             \
 	../sparse/blas/atomicopsmagmaDoubleComplex.h                \
 	../sparse/blas/atomicopsmagmaFloatComplex.h                 \
-	../sparse/blas/clag2z_sparse.cu                             \
-	../sparse/blas/magma_clag2z.cpp                             \
 	../sparse/blas/magma_z_blaswrapper.cpp                      \
 	../sparse/blas/magma_zcuspaxpy.cpp                          \
 	../sparse/blas/magma_zcuspmm.cpp                            \
 	../sparse/blas/magma_zdiagcheck.cu                          \
 	../sparse/blas/magma_zget_rowptr.cu                         \
-	../sparse/blas/magma_zlag2c.cpp                             \
 	../sparse/blas/magma_zmconjugate.cu                         \
 	../sparse/blas/magma_zmcsrcompressor_gpu.cu                 \
 	../sparse/blas/magma_zpreselect.cu                          \
 	../sparse/blas/magma_zthrsrm.cu                             \
-	../sparse/blas/magma_zthrsselect.cu                         \
 	../sparse/blas/zbajac_csr.cu                                \
 	../sparse/blas/zbajac_csr_overlap.cu                        \
-	../sparse/blas/zbcsrblockinfo.cu                            \
-	../sparse/blas/zbcsrcpy.cu                                  \
-	../sparse/blas/zbcsrlugemm.cu                               \
-	../sparse/blas/zbcsrlupivloc.cu                             \
-	../sparse/blas/zbcsrswp.cu                                  \
-	../sparse/blas/zbcsrtrsv.cu                                 \
 	../sparse/blas/zcgecsrmv_mixed_prec.cu                      \
 	../sparse/blas/zcompact.cu                                  \
 	../sparse/blas/zge3pt.cu                                    \
 	../sparse/blas/zgeaxpy.cu                                   \
-	../sparse/blas/zgeblockstruct.cu                            \
 	../sparse/blas/zgecscsyncfreetrsm.cu                        \
 	../sparse/blas/zgecsr5mv.cu                                 \
 	../sparse/blas/zgecsrmv.cu                                  \
@@ -1150,10 +1138,6 @@ INPUT                  = \
 	../sparse/blas/zgeellmv.cu                                  \
 	../sparse/blas/zgeellrtmv.cu                                \
 	../sparse/blas/zgeelltmv.cu                                 \
-	../sparse/blas/zgeisai.cu                                   \
-	../sparse/blas/zgeisai_16.cu                                \
-	../sparse/blas/zgeisai_32.cu                                \
-	../sparse/blas/zgeisai_8.cu                                 \
 	../sparse/blas/zgeisai_batched32.cu                         \
 	../sparse/blas/zgeisai_maxblock.cu                          \
 	../sparse/blas/zgeisai_trsv.cu                              \
@@ -1161,10 +1145,8 @@ INPUT                  = \
 	../sparse/blas/zgesellcmmv.cu                               \
 	../sparse/blas/zgesellcmv.cu                                \
 	../sparse/blas/zilu.cpp                                     \
-	../sparse/blas/zilut.cpp                                    \
 	../sparse/blas/zjaccard_weights.cu                          \
 	../sparse/blas/zjacobisetup.cu                              \
-	../sparse/blas/zlag2c_sparse.cu                             \
 	../sparse/blas/zlobpcg_maxpy.cu                             \
 	../sparse/blas/zlobpcg_residuals.cu                         \
 	../sparse/blas/zlobpcg_shift.cu                             \
@@ -1192,7 +1174,6 @@ INPUT                  = \
 	../sparse/control/error.cpp                                 \
 	../sparse/control/magma_zcsrsplit.cpp                       \
 	../sparse/control/magma_zdomainoverlap.cpp                  \
-	../sparse/control/magma_zdummy.cpp                          \
 	../sparse/control/magma_zfree.cpp                           \
 	../sparse/control/magma_zmatrixchar.cpp                     \
 	../sparse/control/magma_zmconvert.cpp                       \
@@ -1205,7 +1186,6 @@ INPUT                  = \
 	../sparse/control/magma_zmgenerator.cpp                     \
 	../sparse/control/magma_zmilustruct.cpp                     \
 	../sparse/control/magma_zmio.cpp                            \
-	../sparse/control/magma_zmlumerge.cpp                       \
 	../sparse/control/magma_zmscale.cpp                         \
 	../sparse/control/magma_zmshrink.cpp                        \
 	../sparse/control/magma_zmslice.cpp                         \
@@ -1238,9 +1218,7 @@ INPUT                  = \
 	../sparse/src/magma_z_solver_wrapper.cpp                    \
 	../sparse/src/magma_zcustomprecond.cpp                      \
 	../sparse/src/magma_zcustomspmv.cpp                         \
-	../sparse/src/magma_zmlumerge.cpp                           \
 	../sparse/src/magma_zqr_wrapper.cpp                         \
-	../sparse/src/magma_zwrapper.cpp                            \
 	../sparse/src/zbaiter.cpp                                   \
 	../sparse/src/zbaiter_overlap.cpp                           \
 	../sparse/src/zbicg.cpp                                     \
@@ -1258,17 +1236,14 @@ INPUT                  = \
 	../sparse/src/zcgs_merge.cpp                                \
 	../sparse/src/zcustomic.cpp                                 \
 	../sparse/src/zcustomilu.cpp                                \
-	../sparse/src/zdummy.cpp                                    \
 	../sparse/src/zfgmres.cpp                                   \
 	../sparse/src/zftjacobi.cpp                                 \
-	../sparse/src/zgeisai.cpp                                   \
 	../sparse/src/zgeisai_apply.cpp                             \
 	../sparse/src/zgeisai_lower.cpp                             \
 	../sparse/src/zgeisai_upper.cpp                             \
 	../sparse/src/zidr.cpp                                      \
 	../sparse/src/zidr_merge.cpp                                \
 	../sparse/src/zidr_strms.cpp                                \
-	../sparse/src/ziterict.cpp                                  \
 	../sparse/src/ziterref.cpp                                  \
 	../sparse/src/zjacobi.cpp                                   \
 	../sparse/src/zjacobidomainoverlap.cpp                      \
@@ -1301,7 +1276,6 @@ INPUT                  = \
 	../sparse/src/zqmr_merge.cpp                                \
 	../sparse/src/zresidual.cpp                                 \
 	../sparse/src/zresidualvec.cpp                              \
-	../sparse/src/zsyisai.cpp                                   \
 	../sparse/src/ztfqmr.cpp                                    \
 	../sparse/src/ztfqmr_merge.cpp                              \
 	../sparse/src/ztfqmr_unrolled.cpp                           \
diff --git a/include/magma_zbatched.h b/include/magma_zbatched.h
index 5c4681620..97d1063a0 100644
--- a/include/magma_zbatched.h
+++ b/include/magma_zbatched.h
@@ -66,13 +66,6 @@ magma_int_t magma_get_zgetri_batched_ntcol(magma_int_t m, magma_int_t n);
 magma_int_t magma_get_ztrsm_batched_stop_nb(magma_side_t side, magma_int_t m, magma_int_t n);
 void magma_get_zgbtrf_batched_params(magma_int_t m, magma_int_t n, magma_int_t kl, magma_int_t ku, magma_int_t *nb, magma_int_t *threads);
 
-void
-magmablas_zswapdblk_batched(
-    magma_int_t n, magma_int_t nb,
-    magmaDoubleComplex **dA, magma_int_t ldda, magma_int_t inca,
-    magmaDoubleComplex **dB, magma_int_t lddb, magma_int_t incb,
-    magma_int_t batchCount, magma_queue_t queue );
-
   /*
    *  LAPACK batched routines
    */
diff --git a/sparse/control/Makefile.src b/sparse/control/Makefile.src
index 369243c40..9cfe78871 100644
--- a/sparse/control/Makefile.src
+++ b/sparse/control/Makefile.src
@@ -58,11 +58,6 @@ libsparse_src += \
 	$(cdir)/magma_zparict_tools.cpp       \
 
 
-
-# dummy to compensate for routines not included in release
-libsparse_src += \
-#	$(cdir)/magma_zdummy.cpp                    \
-
 # ----------------------------------------------------------------------
 # pop first directory
 cdir      := $(firstword $(dir_stack))
diff --git a/sparse/testing/Makefile.src b/sparse/testing/Makefile.src
index f089ce7ef..66962a6b3 100644
--- a/sparse/testing/Makefile.src
+++ b/sparse/testing/Makefile.src
@@ -60,10 +60,6 @@ sparse_testing_src += \
 #	$(cdir)/testing_z_mpk_4.cpp           \
 #	$(cdir)/testing_z_mpk_5.cpp           \
 
-# Big Data Analytics
-#sparse_testing_src += \
-	$(cdir)/testing_zjaccard.cpp              \
-
 # debugging
 sparse_testing_src += \
 	$(cdir)/testing_zsptrsv.cpp           \
diff --git a/tools/MakeMagmaRelease.pl b/tools/MakeMagmaRelease.pl
index a45a0464f..a53d2ee90 100755
--- a/tools/MakeMagmaRelease.pl
+++ b/tools/MakeMagmaRelease.pl
@@ -35,11 +35,7 @@
     sparse/python
 
     sparse/testing/testing_zpardiso.cpp
-    sparse/testing/testing_zparilu_weight.cpp
     sparse/testing/testing_zsolver_allufmc.cpp
-    sparse/testing/testing_zsolver_energy.cpp
-
-    sparse/blas/zilut.cpp
 
     testing/*.txt
     testing/fortran2.cpp
@@ -128,7 +124,7 @@ sub MakeRelease
     }
 
     myCmd("mkdir $RELEASE_PATH");
-    
+
     # Save current directory
     my $dir = `pwd`;
     chomp $dir;
@@ -220,9 +216,9 @@ sub MakeRelease
 
     # Generate cuda files
     myCmd("echo -e 'GPU_TARGET = Volta\nFORT = true' > make.inc");
-    
+
     # Compile the documentation
-    print "Compile the documentation\n";                                                                                                     
+    print "Compile the documentation\n";
     myCmd("make docs");
 
     myCmd("make -j generate");

From b89e846c2f2a95539c104a4d0c0ae937aafa9821 Mon Sep 17 00:00:00 2001
From: Mark Gates <mgates3@icl.utk.edu>
Date: Mon, 21 Jul 2025 23:57:07 -0400
Subject: [PATCH 24/27] make: remove out-dated hg commands

---
 Makefile | 27 ---------------------------
 1 file changed, 27 deletions(-)

diff --git a/Makefile b/Makefile
index b8fa66a63..14bb784de 100644
--- a/Makefile
+++ b/Makefile
@@ -977,33 +977,6 @@ pkgconfig:
 	    > $(DESTDIR)$(prefix)/lib$(LIB_SUFFIX)/pkgconfig/magma.pc
 
 
-# ------------------------------------------------------------------------------
-# files.txt is nearly all (active) files in SVN, excluding directories. Useful for rsync, etc.
-# files-doxygen.txt is all (active) source files in SVN, used by Doxyfile-fast
-
-# excludes non-active directories like obsolete.
-# excludes directories by matching *.* files (\w\.\w) and some exceptions like Makefile.
-files.txt: force
-	hg st -m -a -c \
-		| perl -pe 's/^. +//' | sort \
-		| egrep -v '^\.$$|obsolete|deprecated|contrib\b|^exp' \
-		| egrep '\w\.\w|Makefile|docs|run' \
-		> files.txt
-	egrep -v '(\.html|\.css|\.f|\.in|\.m|\.mtx|\.pl|\.png|\.sh|\.txt)$$|checkdiag|COPYRIGHT|docs|example|make\.|Makefile|quark|README|Release|results|testing_|testing/lin|testing/matgen|tools' files.txt \
-		| perl -pe 'chomp; $$_ = sprintf("\t../%-57s\\\n", $$_);' \
-		> files-doxygen.txt
-
-# files.txt per sub-directory
-subdir_files = $(addsuffix /files.txt,$(subdirs) $(sparse_subdirs))
-
-$(subdir_files): force
-	cd $(dir $@) && hg st -m -a -c -X '*/*' . \
-		| perl -pe 's/^. +//' | sort \
-		| egrep -v '^\.$$|obsolete|deprecated|contrib\b|^exp' \
-		| egrep '\w\.\w|Makefile|docs|run' \
-		> files.txt
-
-
 # ------------------------------------------------------------------------------
 echo:
 	@echo "====="

From 0176416df31837680d9cbdd42e55b8fc7721ea1a Mon Sep 17 00:00:00 2001
From: Ahmad Abdelfattah <ahmad@icl.utk.edu>
Date: Fri, 22 Aug 2025 11:56:30 -0400
Subject: [PATCH 25/27] fix compilation issue with cuda 13

---
 interface_cuda/interface.cpp | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/interface_cuda/interface.cpp b/interface_cuda/interface.cpp
index c3ae4e0eb..9c8e6c67f 100644
--- a/interface_cuda/interface.cpp
+++ b/interface_cuda/interface.cpp
@@ -391,10 +391,18 @@ magma_print_environment()
         check_error( err );
 
         #ifdef MAGMA_HAVE_CUDA
+
+        int clock_khz;
+        #if CUDA_VERSION >= 13000
+        cudaDeviceGetAttribute (&clock_khz, cudaDevAttrClockRate, dev);
+        #else
+        clock_khz = prop.clockRate;
+        #endif
+
         printf( "%% device %d: %s, %.1f MHz clock, %.1f MiB memory, capability %d.%d\n",
                 dev,
                 prop.name,
-                prop.clockRate / 1000.,
+                clock_khz / 1000.,
                 prop.totalGlobalMem / (1024.*1024.),
                 prop.major,
                 prop.minor );

From d42d1b552023f4222615d1810199618503bc7516 Mon Sep 17 00:00:00 2001
From: Ahmad Abdelfattah <ahmad@icl.utk.edu>
Date: Fri, 22 Aug 2025 11:57:16 -0400
Subject: [PATCH 26/27] remove gbtf2 kernels that use cooperative groups

---
 control/magma_zf77.cpp         |  28 --
 include/magma_z.h              |  28 --
 magmablas/zgbtf2_kernels.cu    | 592 ---------------------------------
 src/zgbtrf_gpu.cpp             |  21 --
 testing/testing_zgbtrf_gpu.cpp |   6 +-
 5 files changed, 1 insertion(+), 674 deletions(-)

diff --git a/control/magma_zf77.cpp b/control/magma_zf77.cpp
index 07bf53999..0e5b5ca54 100644
--- a/control/magma_zf77.cpp
+++ b/control/magma_zf77.cpp
@@ -208,34 +208,6 @@ void magmaf_zgbsv_native(
         info );
 }
 
-#define magmaf_zgbtf2_native_v2 FORTRAN_NAME( magmaf_zgbtf2_native_v2, MAGMAF_ZGBTF2_NATIVE_V2 )
-void magmaf_zgbtf2_native_v2(
-    magma_int_t *m, magma_int_t *n, magma_int_t *kl, magma_int_t *ku,
-    magmaDoubleComplex* dA, magma_int_t *ldda,
-    magma_int_t* ipiv,
-    magma_int_t* info, magma_queue_t *queue )
-{
-    magma_zgbtf2_native_v2(
-        *m, *n, *kl, *ku,
-        dA, *ldda,
-        ipiv,
-        info, *queue );
-}
-
-#define magmaf_zgbtf2_native FORTRAN_NAME( magmaf_zgbtf2_native, MAGMAF_ZGBTF2_NATIVE )
-void magmaf_zgbtf2_native(
-    magma_int_t *m, magma_int_t *n, magma_int_t *kl, magma_int_t *ku,
-    magmaDoubleComplex* dA, magma_int_t *ldda,
-    magma_int_t* ipiv,
-    magma_int_t* info, magma_queue_t *queue )
-{
-    magma_zgbtf2_native(
-        *m, *n, *kl, *ku,
-        dA, *ldda,
-        ipiv,
-        info, *queue );
-}
-
 #define magmaf_zgbtrf_native FORTRAN_NAME( magmaf_zgbtrf_native, MAGMAF_ZGBTRF_NATIVE )
 void magmaf_zgbtrf_native(
     magma_int_t *m, magma_int_t *n, magma_int_t *kl, magma_int_t *ku,
diff --git a/include/magma_z.h b/include/magma_z.h
index d8d0091c9..d9923b11c 100644
--- a/include/magma_z.h
+++ b/include/magma_z.h
@@ -108,20 +108,6 @@ magma_zgbsv_native(
         magmaDoubleComplex* dB, magma_int_t lddb,
         magma_int_t *info);
 
-magma_int_t
-magma_zgbtf2_native_v2(
-    magma_int_t m, magma_int_t n, magma_int_t kl, magma_int_t ku,
-    magmaDoubleComplex* dA, magma_int_t ldda, magma_int_t* ipiv,
-    magma_int_t* info, magma_queue_t queue);
-
-magma_int_t
-magma_zgbtf2_native_v2_work(
-    magma_int_t m, magma_int_t n, magma_int_t kl, magma_int_t ku,
-    magmaDoubleComplex* dA, magma_int_t ldda, magma_int_t* ipiv,
-    magma_int_t* info,
-    void* device_work, magma_int_t* lwork,
-    magma_queue_t queue);
-
 void
 magma_zgbsv_native_work(
         magma_int_t n, magma_int_t kl, magma_int_t ku, magma_int_t nrhs,
@@ -130,20 +116,6 @@ magma_zgbsv_native_work(
         magma_int_t *info, void* device_work, magma_int_t* lwork,
         magma_queue_t queue);
 
-magma_int_t
-magma_zgbtf2_native(
-    magma_int_t m, magma_int_t n, magma_int_t kl, magma_int_t ku,
-    magmaDoubleComplex* dA, magma_int_t ldda, magma_int_t* ipiv,
-    magma_int_t* info, magma_queue_t queue);
-
-magma_int_t
-magma_zgbtf2_native_work(
-    magma_int_t m, magma_int_t n, magma_int_t kl, magma_int_t ku,
-    magmaDoubleComplex* dA, magma_int_t ldda, magma_int_t* ipiv,
-    magma_int_t* info,
-    void* device_work, magma_int_t* lwork,
-    magma_queue_t queue);
-
 magma_int_t
 magma_zgbtrf_native(
     magma_int_t m, magma_int_t n,
diff --git a/magmablas/zgbtf2_kernels.cu b/magmablas/zgbtf2_kernels.cu
index 6f7f1fa3a..d1b2faef3 100644
--- a/magmablas/zgbtf2_kernels.cu
+++ b/magmablas/zgbtf2_kernels.cu
@@ -11,19 +11,6 @@
 */
 
 #include "magma_internal.h"
-#if   defined(MAGMA_HAVE_CUDA)
-#if CUDA_VERSION >= 12060
-#undef max
-#undef min
-#endif
-#include <cooperative_groups.h>
-namespace cg = cooperative_groups;
-#elif defined(MAGMA_HAVE_HIP)
-// the hip_cooperative_groups.h file conflicts with magma's definition of min
-#undef min
-#include <hip/hip_cooperative_groups.h>
-namespace cg = cooperative_groups;
-#endif
 
 #include "batched_kernel_param.h"
 #include "magma_templates.h"
@@ -189,582 +176,3 @@ magma_zgbtf2_scal_ger_batched(
     }
     return 0;
 }
-
-/******************************************************************************/
-#if CUDA_VERSION >= 11000
-__global__
-void zgbtf2_native_kernel(
-    int m, int n, int nb, int kl, int ku,
-    magmaDoubleComplex *dA, int ldda, magma_int_t *ipiv,
-    int* ju, int gbstep, magma_int_t *dinfo)
-{
-#define dA(i,j) dA[(j)*ldda + (i)]
-    extern __shared__ magmaDoubleComplex zdata[];
-    cg::grid_group grid = cg::this_grid();
-    const int tx     = threadIdx.x;
-    const int ntx    = blockDim.x;
-    const int bx     = blockIdx.x;
-    const int jc     = bx + gbstep;
-    const int kv     = kl + ku;
-    const int mband  = kv + 1 + kl;
-
-    int linfo    = (gbstep == 0) ?  0 : *dinfo;
-    int local_ju = (gbstep == 0) ? -1 : *ju;
-    int jp = 0;
-    double rx_abs_max = 0;
-    magmaDoubleComplex tmp = MAGMA_Z_ZERO, reg = MAGMA_Z_ZERO;
-
-    // setup shared memory
-    magmaDoubleComplex* sA = zdata;
-    double*             sX = (double*)(sA + mband);
-    int*                sI = (int*)(sX + kl+1);
-
-    // init sA to zero & sX to [0, 1, 2, ...]
-    for(int i = tx; i < mband; i+=ntx)
-        sA[i] = MAGMA_Z_ZERO;
-    for(int i = tx; i < (kl+1); i+=ntx)
-        sX[i] = i;
-    __syncthreads();
-
-
-    // determine column start/end
-    int col_start = 0, col_end = 0;
-    if( jc <= local_ju ) {
-        // columns affected by previous factorization steps
-        col_start  = 0;
-        col_end    = mband-1;
-    }
-    else {
-        // new columns
-        col_start  = kl + max(ku-jc,0);
-        col_end    = kl + ku + min(kl, n-1-jc);
-    }
-
-    // read columns
-    for(int i = col_start+tx; i <= col_end; i+=ntx) {
-        sA[i] = dA(i, jc);
-    }
-    __syncthreads();
-
-    // main loop
-    for(int j = 0; j < nb; j++) {
-        int gbj = j + gbstep;
-        int km  = 1 + min( kl, m-gbj ); // diagonal and subdiagonal(s)
-        // find pivot
-        if(bx == j) {
-            if(km >= 128) {
-                for(int i = tx; i < km; i+=ntx) {
-                    sX[i] = fabs( MAGMA_Z_REAL(sA[kv+i]) ) + fabs( MAGMA_Z_IMAG(sA[kv+i]) );
-                    sI[i] = i;
-                }
-                __syncthreads();
-
-                magma_getidmax_n(km, tx, sX, sI);
-                jp         = sI[0];
-                rx_abs_max = sX[0];
-            }
-            else{
-                for(int i = tx; i < km; i+=ntx) {
-                    sX[i] = fabs( MAGMA_Z_REAL(sA[kv+i]) ) + fabs( MAGMA_Z_IMAG(sA[kv+i]) );
-                }
-                __syncthreads();
-
-                rx_abs_max = sX[0];
-                jp = 0;
-                for(int i = 1; i < km; i++) {
-                    if( sX[i] > rx_abs_max ) {
-                        rx_abs_max = sX[i];
-                        jp         = i;
-                    }
-                }
-            }
-
-            linfo    = ( rx_abs_max == MAGMA_D_ZERO && linfo == 0) ? (gbj+1) : linfo;
-            local_ju = max(local_ju, min(gbj+ku+jp, n-1));
-
-            if(tx == 0) {
-                ipiv[gbj] = jp + gbj + 1;  // +1 for fortran indexing
-                *dinfo    = (magma_int_t)linfo;
-                *ju       = local_ju;
-            }
-        }
-        grid.sync();
-
-        // read information written by j-th block
-        if(bx >= j) {
-            jp       = ipiv[gbj] - gbj - 1;
-            linfo    = (int)(*dinfo);
-            local_ju = *ju;
-        }
-        //local_ju = max(local_ju, min(gbstep+j+ku+jp, n-1));
-        //swap_length = local_ju - (j+gbstep) + 1;
-        __syncthreads();
-
-        // swap
-        if(jc >= (j+gbstep) && jc <= local_ju && tx == 0) {
-            if(jp != 0) {
-                int j1 = (kv +  0) - (bx-j);
-                int j2 = (kv + jp) - (bx-j);
-                tmp    = sA[j1];
-                sA[j1] = sA[j2];
-                sA[j2] = tmp;
-            }
-        }
-        __syncthreads();
-
-        // scal & write to global memory
-        if(bx == j) {
-            reg = ( rx_abs_max == MAGMA_D_ZERO ) ? MAGMA_Z_ONE : MAGMA_Z_DIV(MAGMA_Z_ONE, sA[kv] );
-            for(int i = tx; i < (km-1); i+=ntx) {
-                sA[kv+1+i] *= reg;
-            }
-            __syncthreads();
-
-            for(int i = tx; i < mband; i+=ntx) {
-                dA(i,jc) = sA[i];
-            }
-        }
-        grid.sync();
-
-        // ger
-        if(jc > gbj && jc <= local_ju) {
-            int j1 = (kv + 0) - (bx-j);
-            for(int i = tx; i < km-1; i+=ntx) {
-                sA[j1+1+i] -= sA[j1] * dA(kv+1+i,gbj);
-            }
-            __syncthreads();
-        }
-    }
-
-    // write columns [nb : ju]
-    if(jc >= gbstep && jc <= local_ju) {
-        for(int i = tx; i < mband; i+=ntx) {
-            dA(i,jc) = sA[i];
-        }
-    }
-#undef dA
-}
-
-/******************************************************************************/
-extern "C"
-magma_int_t
-magma_zgbtf2_native_work(
-    magma_int_t m, magma_int_t n, magma_int_t kl, magma_int_t ku,
-    magmaDoubleComplex* dA, magma_int_t ldda, magma_int_t* ipiv,
-    magma_int_t* info,
-    void* device_work, magma_int_t* lwork,
-    magma_queue_t queue)
-{
-    magma_int_t kv    = kl + ku;
-    magma_int_t mband = kv + 1 + kl;
-
-    *info  = 0;
-    if( m < 0 )
-        *info = -1;
-    else if ( n < 0 )
-        *info = -2;
-    else if ( kl < 0 )
-        *info = -3;
-    else if ( ku < 0 )
-        *info = -4;
-    else if ( ldda < mband )
-        *info = -6;
-
-    // calculate workspace required
-    magma_int_t lwork_required = 0;
-    lwork_required += 1 * sizeof(magma_int_t); // ju
-    lwork_required += 1 * sizeof(magma_int_t); // dinfo
-
-    if(*lwork < 0) {
-       // query assumed
-       *lwork = lwork_required;
-       return *info;
-    }
-
-    if(*lwork < lwork_required) {
-        *info = -11;
-    }
-
-    if (*info != 0) {
-        magma_xerbla( __func__, -(*info) );
-        return *info;
-    }
-
-    magma_int_t sm_count = magma_getdevice_multiprocessor_count();
-    magma_int_t nb       = max((magma_int_t)8, sm_count - (kv + 1));
-    magma_int_t nthreads = magma_roundup(kv+1,32);
-
-    // device pointers
-    magma_int_t *ju    = (magma_int_t*)device_work;
-    magma_int_t *dinfo = ju + 1;
-
-    magma_int_t shmem = 0;
-    shmem += mband  * sizeof(magmaDoubleComplex);
-    shmem += (kl+1) * sizeof(double);
-    shmem += (kl+1) * sizeof(int);
-
-    dim3 threads(nthreads, 1, 1);
-    for(magma_int_t gbstep = 0; gbstep < n; gbstep += nb) {
-        magma_int_t ib      = min(nb, n-gbstep);
-        magma_int_t nblocks = min(ib+kv+1, n-gbstep);
-        dim3 grid(nblocks, 1, 1);
-        void *kernel_args[] = {&m, &n, &ib, &kl, &ku, &dA, &ldda, &ipiv, &ju, &gbstep, &dinfo};
-        cudaError_t e = cudaLaunchCooperativeKernel((void*)zgbtf2_native_kernel, grid, threads, kernel_args, shmem, queue->cuda_stream());
-    }
-
-    magma_igetvector_async( 1, dinfo, 1, info, 1, queue );
-
-    return *info;
-}
-#endif
-
-/******************************************************************************/
-extern "C"
-magma_int_t
-magma_zgbtf2_native(
-    magma_int_t m, magma_int_t n, magma_int_t kl, magma_int_t ku,
-    magmaDoubleComplex* dA, magma_int_t ldda, magma_int_t* ipiv,
-    magma_int_t* info, magma_queue_t queue)
-{
-#if CUDA_VERSION < 11000
-    fprintf( stderr, "%s is not supported for CUDA < 11.0\n", __func__);
-    *info = MAGMA_ERR_NOT_SUPPORTED;
-#else
-    magma_int_t kv    = kl + ku;
-    magma_int_t mband = kv + 1 + kl;
-
-    *info  = 0;
-    if( m < 0 )
-        *info = -1;
-    else if ( n < 0 )
-        *info = -2;
-    else if ( kl < 0 )
-        *info = -3;
-    else if ( ku < 0 )
-        *info = -4;
-    else if ( ldda < mband )
-        *info = -6;
-
-    if (*info != 0) {
-        magma_xerbla( __func__, -(*info) );
-        return *info;
-    }
-
-    // query workspace
-    magma_int_t lwork[1] = {-1};
-    magma_zgbtf2_native_work(m, n, kl, ku, NULL, ldda, NULL, info, NULL, lwork, queue);
-
-    void* device_work = NULL;
-    magma_malloc(&device_work, lwork[0]);
-
-    magma_zgbtf2_native_work(m, n, kl, ku, dA, ldda, ipiv, info, device_work, lwork, queue);
-
-    magma_free(device_work);
-#endif
-    return *info;
-}
-
-/******************************************************************************/
-// kernel for gbtf2 using cooperative groups and 1D cyclic dist. of columns
-// among thread-blocks
-#if CUDA_VERSION >= 11000
-__global__
-void zgbtf2_native_kernel_v2(
-    int m, int n, int nb, int NB, int kl, int ku,
-    magmaDoubleComplex *dA, int ldda, magma_int_t *ipiv,
-    int* ju, int gbstep, magma_int_t *dinfo)
-{
-#define dA(i, j) dA[(j)*ldda + (i)]
-#define sA(i, j) sA[(j)*slda + (i)]
-    extern __shared__ magmaDoubleComplex zdata[];
-    cg::grid_group grid = cg::this_grid();
-    const int nb1    = nb+1;
-    const int tx     = threadIdx.x;
-    const int ntx    = blockDim.x;
-    const int bx     = blockIdx.x;
-    const int nbx    = gridDim.x;
-    const int kv     = kl + ku;
-    const int mband  = kv + 1 + kl;
-    const int slda   = mband;
-
-    int linfo    = (gbstep == 0) ?  0 : *dinfo;
-    int local_ju = (gbstep == 0) ? -1 : *ju;
-    int jp = 0;
-    double rx_abs_max = 0;
-    magmaDoubleComplex tmp = MAGMA_Z_ZERO, reg = MAGMA_Z_ZERO;
-
-    // setup shared memory
-    magmaDoubleComplex* sA = zdata;
-    double*             sX = (double*)( sA + slda * nb1 );
-    int*                sI = (int*)(sX + kl+1);
-
-    // init sA to zero & sX to [0, 1, 2, ...]
-    for(int i = tx; i < slda*nb1; i+=ntx)
-        sA[i] = MAGMA_Z_ZERO;
-    __syncthreads();
-
-    // read columns -- nb1 cols/TB
-    const int total_columns    = min(n-gbstep, nbx * nb1);
-    const int total_factorize  = min(total_columns, NB); //min(nbx * nb, total_columns);
-    const int my_total_columns = (total_columns / nbx) + ((bx < (total_columns % nbx)) ? 1 : 0);
-    //const int my_last_column   = (my_total_columns-1) * nbx + bx;
-
-    int col_start = 0, col_end = 0, lj = 0, glj = 0;
-    for(int j = bx; j < total_columns; j += nbx) {
-        // determine column start/end
-        int jc = j + gbstep;
-        col_start = (jc <= local_ju) ? 0       : kl + max(ku-jc,0);
-        col_end   = (jc <= local_ju) ? mband-1 : kl + ku + min(kl, n-1-jc);
-
-        // read columns
-        for(int i = col_start+tx; i <= col_end; i+=ntx) {
-            sA(i,lj) = dA(i, jc);
-        }
-        lj++;
-    }
-    __syncthreads();
-
-    for(int j = 0; j < total_factorize; j++) {
-        int gbj     = j + gbstep;
-        int km      = 1 + min( kl, m-gbj );
-        int pivoter = j%nbx;
-
-        // find pivot
-        if( bx ==  pivoter) {
-            lj = j / nbx;
-            if(km >= 128) {
-                for(int i = tx; i < km; i+=ntx) {
-                    sX[i] = fabs( MAGMA_Z_REAL(sA(kv+i,lj)) ) + fabs( MAGMA_Z_IMAG(sA(kv+i,lj)) );
-                    sI[i] = i;
-                }
-                __syncthreads();
-
-                magma_getidmax_n(km, tx, sX, sI);
-                jp         = sI[0];
-                rx_abs_max = sX[0];
-            }
-            else{
-                for(int i = tx; i < km; i+=ntx) {
-                    sX[i] = fabs( MAGMA_Z_REAL(sA(kv+i,lj)) ) + fabs( MAGMA_Z_IMAG(sA(kv+i,lj)) );
-                }
-                __syncthreads();
-
-                rx_abs_max = sX[0];
-                jp = 0;
-                for(int i = 1; i < km; i++) {
-                    if( sX[i] > rx_abs_max ) {
-                        rx_abs_max = sX[i];
-                        jp         = i;
-                    }
-                }
-            }
-            linfo    = ( rx_abs_max == MAGMA_D_ZERO && linfo == 0) ? (gbj+1) : linfo;
-            local_ju = max(local_ju, min(gbj+ku+jp, n-1));
-            __syncthreads();
-
-            // swap current col. only, and write pivot info
-            if(tx == 0) {
-                ipiv[gbj] = jp + gbj + 1;  // +1 for fortran indexing
-                *dinfo    = (magma_int_t)linfo;
-                *ju       = local_ju;
-
-                if(jp != 0) {
-                    int j1 = (kv +  0);
-                    int j2 = (kv + jp);
-                    tmp    = sA(j1,lj);
-                    sA(j1,lj) = sA(j2,lj);
-                    sA(j2,lj) = tmp;
-                }
-            }
-            __syncthreads();
-
-            // scal current column and write it to global mem.
-            reg = ( rx_abs_max == MAGMA_D_ZERO ) ? MAGMA_Z_ONE : MAGMA_Z_DIV(MAGMA_Z_ONE, sA(kv,lj) );
-            for(int i = tx; i < (km-1); i+=ntx) {
-                sA(kv+1+i,lj) *= reg;
-            }
-            __syncthreads();
-
-            for(int i = tx; i < mband; i+=ntx) {
-                dA(i,gbj) = sA(i,lj);
-            }
-        }
-        grid.sync();
-
-        // other blocks read the information
-        if( !(bx == pivoter) ) {
-            jp       = ipiv[gbj] - gbj - 1;
-            linfo    = (int)(*dinfo);
-            local_ju = *ju;
-        }
-        __syncthreads();
-
-        // determine your next update column
-        //lj  = (bx <= pivoter) ? (gbj / nb1) + 1 : (gbj / nb1);
-        lj  = (bx <= pivoter) ? (j / nbx) + 1 : (j / nbx);
-        lj  = min(lj, my_total_columns-1);
-        glj = min(gbstep + (lj * nbx) + bx, n-1);
-
-        // swap
-        if(tx == 0 && glj > gbj && glj <= local_ju) {
-            if(jp != 0) {
-                int j1 = (kv +  0) - (glj-gbj);
-                int j2 = (kv + jp) - (glj-gbj);
-                tmp       = sA(j1,lj);
-                sA(j1,lj) = sA(j2,lj);
-                sA(j2,lj) = tmp;
-            }
-        }
-        __syncthreads();
-
-        // ger
-        if(glj > gbj && glj <= local_ju) {
-            int j1 = (kv + 0) - (glj-gbj);
-            for(int i = tx; i < km-1; i+=ntx) {
-                sA(j1+1+i,lj) -= sA(j1,lj) * dA(kv+1+i,gbj);
-            }
-            __syncthreads();
-        }
-    } // end of main loop
-
-
-    int jj = gbstep + (nb * nbx + bx);
-
-    if( jj < n ) {
-        for(int i = tx; i < mband; i+=ntx) {
-            dA(i,jj) = sA(i,nb);
-        }
-    }
-#undef dA
-#undef sA
-}
-
-/******************************************************************************/
-extern "C"
-magma_int_t
-magma_zgbtf2_native_v2_work(
-    magma_int_t m, magma_int_t n, magma_int_t kl, magma_int_t ku,
-    magmaDoubleComplex* dA, magma_int_t ldda, magma_int_t* ipiv,
-    magma_int_t* info,
-    void* device_work, magma_int_t* lwork,
-    magma_queue_t queue)
-{
-    magma_int_t kv    = kl + ku;
-    magma_int_t mband = kv + 1 + kl;
-
-    *info  = 0;
-    if( m < 0 )
-        *info = -1;
-    else if ( n < 0 )
-        *info = -2;
-    else if ( kl < 0 )
-        *info = -3;
-    else if ( ku < 0 )
-        *info = -4;
-    else if ( ldda < mband )
-        *info = -6;
-
-    // calculate workspace required
-    magma_int_t lwork_required = 0;
-    lwork_required += 1 * sizeof(magma_int_t); // ju
-    lwork_required += 1 * sizeof(magma_int_t); // dinfo
-
-    if(*lwork < 0) {
-       // query assumed
-       *lwork = lwork_required;
-       return *info;
-    }
-
-    if(*lwork < lwork_required) {
-        *info = -11;
-    }
-
-    if (*info != 0) {
-        magma_xerbla( __func__, -(*info) );
-        return *info;
-    }
-    magma_int_t ione= 1;
-    magma_int_t nb  = 64;
-    magma_int_t nb1 = nb+1;
-    magma_int_t NB  = nb * (kv + 1);
-
-    magma_int_t nthreads = magma_roundup(kv+1,32);
-    magma_int_t slda     = mband;
-
-    // device pointers
-    magma_int_t *ju    = (magma_int_t*)device_work;
-    magma_int_t *dinfo = ju + 1;
-
-    magma_int_t shmem = 0;
-    shmem += slda  * nb1 * sizeof(magmaDoubleComplex);
-    shmem += (kl+1) * sizeof(double);
-    shmem += (kl+1) * sizeof(int);
-
-    dim3 threads(nthreads, 1, 1);
-
-    for(magma_int_t gbstep = 0; gbstep < n; gbstep += NB) {
-        magma_int_t ib      = min(NB, n-gbstep);
-        magma_int_t nblocks = min(ib, kv+1);
-        magma_int_t nb      = max(ione, ib / nblocks);
-        dim3 grid(nblocks, 1, 1);
-
-        void *kernel_args[] = {&m, &n, &nb, &NB, &kl, &ku, &dA, &ldda, &ipiv, &ju, &gbstep, &dinfo};
-        cudaError_t e = cudaLaunchCooperativeKernel((void*)zgbtf2_native_kernel_v2, grid, threads, kernel_args, shmem, queue->cuda_stream());
-        if(e != cudaSuccess) {
-            printf("ERROR: %s \n", cudaGetErrorString(e));
-            *info = -100;
-            return *info;
-        }
-    }
-
-    magma_igetvector_async( 1, dinfo, 1, info, 1, queue );
-
-    return *info;
-}
-#endif
-
-/******************************************************************************/
-extern "C"
-magma_int_t
-magma_zgbtf2_native_v2(
-    magma_int_t m, magma_int_t n, magma_int_t kl, magma_int_t ku,
-    magmaDoubleComplex* dA, magma_int_t ldda, magma_int_t* ipiv,
-    magma_int_t* info, magma_queue_t queue)
-{
-#if CUDA_VERSION < 11000
-    fprintf( stderr, "%s is not supported for CUDA < 11.0\n", __func__);
-    *info = MAGMA_ERR_NOT_SUPPORTED;
-#else
-    magma_int_t kv    = kl + ku;
-    magma_int_t mband = kv + 1 + kl;
-
-    *info  = 0;
-    if( m < 0 )
-        *info = -1;
-    else if ( n < 0 )
-        *info = -2;
-    else if ( kl < 0 )
-        *info = -3;
-    else if ( ku < 0 )
-        *info = -4;
-    else if ( ldda < mband )
-        *info = -6;
-
-    if (*info != 0) {
-        magma_xerbla( __func__, -(*info) );
-        return *info;
-    }
-
-    // query workspace
-    magma_int_t lwork[1] = {-1};
-    magma_zgbtf2_native_v2_work(m, n, kl, ku, NULL, ldda, NULL, info, NULL, lwork, queue);
-
-    void* device_work = NULL;
-    magma_malloc(&device_work, lwork[0]);
-
-    magma_zgbtf2_native_v2_work(m, n, kl, ku, dA, ldda, ipiv, info, device_work, lwork, queue);
-
-    magma_free(device_work);
-#endif
-    return *info;
-}
diff --git a/src/zgbtrf_gpu.cpp b/src/zgbtrf_gpu.cpp
index 5cf3a554c..a4357b0ab 100644
--- a/src/zgbtrf_gpu.cpp
+++ b/src/zgbtrf_gpu.cpp
@@ -13,8 +13,6 @@
 #include "magma_internal.h"
 #include "batched_kernel_param.h"
 
-#define MAGMA_ZGBTRF_NATIVE_DISABLE_COOP_KERNEL
-
 extern "C" void
 magma_zgbtrf_native_work(
         magma_int_t m, magma_int_t n,
@@ -50,22 +48,9 @@ magma_zgbtrf_native_work(
         NULL, lddab, lddab*n, NULL, min(m,n),
         NULL, NULL, gbtrf_batch_lwork, 1, queue);
 
-    #ifndef MAGMA_ZGBTRF_NATIVE_DISABLE_COOP_KERNEL
-    // [2] workspace of native gbtrf with cooperative groups
-    magma_int_t gbtrf_cogroups_lwork[1] = {-1};
-    magma_zgbtf2_native_v2_work(
-        m, n, kl, ku,
-        NULL, lddab, NULL, info,
-        NULL, gbtrf_cogroups_lwork, queue);
-    #endif
-
     // [3] we need a "device_info" on device memory
     magma_int_t gbtrf_native_lwork[1] = {0};
-    #ifndef MAGMA_ZGBTRF_NATIVE_DISABLE_COOP_KERNEL
-    gbtrf_native_lwork[0] = gbtrf_batch_lwork[0] + gbtrf_cogroups_lwork[0] + sizeof(magma_int_t);
-    #else
     gbtrf_native_lwork[0] = gbtrf_batch_lwork[0] + sizeof(magma_int_t);
-    #endif
 
     if(*lwork < 0) {
         // workspace query assumed
@@ -79,12 +64,6 @@ magma_zgbtrf_native_work(
         return;
     }
 
-    #ifndef MAGMA_ZGBTRF_NATIVE_DISABLE_COOP_KERNEL
-    // try cooperative groups kernel first
-    magma_zgbtf2_native_v2_work(m, n, kl, ku, dAB, lddab, dipiv, info, device_work, gbtrf_cogroups_lwork, queue);
-    if(*info != -100) return; // cooperative group kernel finished successfully
-    #endif
-
     magma_int_t* device_info = (magma_int_t*)((uint8_t*)device_work + gbtrf_batch_lwork[0]);
     magma_zgbtrf_batched_strided_work(
         m, n, kl, ku,
diff --git a/testing/testing_zgbtrf_gpu.cpp b/testing/testing_zgbtrf_gpu.cpp
index 2ddd0164f..10faf71c8 100644
--- a/testing/testing_zgbtrf_gpu.cpp
+++ b/testing/testing_zgbtrf_gpu.cpp
@@ -283,11 +283,7 @@ int main( int argc, char** argv)
                     device_work, lwork, opts.queue);
                 magma_time = magma_sync_wtime( opts.queue ) - magma_time;
             }
-            else if(opts.version == 3) {
-                magma_time = magma_wtime();
-                magma_zgbtf2_native_v2(M, N, KL, KU, dA, lddab, dipiv_magma, &info, opts.queue);
-                magma_time = magma_wtime() - magma_time;
-            }
+
             magma_perf = gflops / magma_time;
             magma_zgetmatrix( Mband, Nband, dA, lddab, h_Amagma, ldab, opts.queue );
 

From d29cb95ae8fb5fe1a5636b35ab8ce843248ffb0b Mon Sep 17 00:00:00 2001
From: Ahmad Abdelfattah <ahmad@icl.utk.edu>
Date: Fri, 22 Aug 2025 12:26:13 -0400
Subject: [PATCH 27/27] remove unwanted fortran wrappers

---
 control/magma_cf77.cpp | 28 ----------------------------
 control/magma_df77.cpp | 28 ----------------------------
 control/magma_sf77.cpp | 28 ----------------------------
 3 files changed, 84 deletions(-)

diff --git a/control/magma_cf77.cpp b/control/magma_cf77.cpp
index 84690cc1b..1bc8f10e6 100644
--- a/control/magma_cf77.cpp
+++ b/control/magma_cf77.cpp
@@ -208,34 +208,6 @@ void magmaf_cgbsv_native(
         info );
 }
 
-#define magmaf_cgbtf2_native_v2 FORTRAN_NAME( magmaf_cgbtf2_native_v2, MAGMAF_CGBTF2_NATIVE_V2 )
-void magmaf_cgbtf2_native_v2(
-    magma_int_t *m, magma_int_t *n, magma_int_t *kl, magma_int_t *ku,
-    magmaFloatComplex* dA, magma_int_t *ldda,
-    magma_int_t* ipiv,
-    magma_int_t* info, magma_queue_t *queue )
-{
-    magma_cgbtf2_native_v2(
-        *m, *n, *kl, *ku,
-        dA, *ldda,
-        ipiv,
-        info, *queue );
-}
-
-#define magmaf_cgbtf2_native FORTRAN_NAME( magmaf_cgbtf2_native, MAGMAF_CGBTF2_NATIVE )
-void magmaf_cgbtf2_native(
-    magma_int_t *m, magma_int_t *n, magma_int_t *kl, magma_int_t *ku,
-    magmaFloatComplex* dA, magma_int_t *ldda,
-    magma_int_t* ipiv,
-    magma_int_t* info, magma_queue_t *queue )
-{
-    magma_cgbtf2_native(
-        *m, *n, *kl, *ku,
-        dA, *ldda,
-        ipiv,
-        info, *queue );
-}
-
 #define magmaf_cgbtrf_native FORTRAN_NAME( magmaf_cgbtrf_native, MAGMAF_CGBTRF_NATIVE )
 void magmaf_cgbtrf_native(
     magma_int_t *m, magma_int_t *n, magma_int_t *kl, magma_int_t *ku,
diff --git a/control/magma_df77.cpp b/control/magma_df77.cpp
index 8c4704eec..265a98fe9 100644
--- a/control/magma_df77.cpp
+++ b/control/magma_df77.cpp
@@ -280,34 +280,6 @@ void magmaf_dgbsv_native(
         info );
 }
 
-#define magmaf_dgbtf2_native_v2 FORTRAN_NAME( magmaf_dgbtf2_native_v2, MAGMAF_DGBTF2_NATIVE_V2 )
-void magmaf_dgbtf2_native_v2(
-    magma_int_t *m, magma_int_t *n, magma_int_t *kl, magma_int_t *ku,
-    double* dA, magma_int_t *ldda,
-    magma_int_t* ipiv,
-    magma_int_t* info, magma_queue_t *queue )
-{
-    magma_dgbtf2_native_v2(
-        *m, *n, *kl, *ku,
-        dA, *ldda,
-        ipiv,
-        info, *queue );
-}
-
-#define magmaf_dgbtf2_native FORTRAN_NAME( magmaf_dgbtf2_native, MAGMAF_DGBTF2_NATIVE )
-void magmaf_dgbtf2_native(
-    magma_int_t *m, magma_int_t *n, magma_int_t *kl, magma_int_t *ku,
-    double* dA, magma_int_t *ldda,
-    magma_int_t* ipiv,
-    magma_int_t* info, magma_queue_t *queue )
-{
-    magma_dgbtf2_native(
-        *m, *n, *kl, *ku,
-        dA, *ldda,
-        ipiv,
-        info, *queue );
-}
-
 #define magmaf_dgbtrf_native FORTRAN_NAME( magmaf_dgbtrf_native, MAGMAF_DGBTRF_NATIVE )
 void magmaf_dgbtrf_native(
     magma_int_t *m, magma_int_t *n, magma_int_t *kl, magma_int_t *ku,
diff --git a/control/magma_sf77.cpp b/control/magma_sf77.cpp
index a23a1b1a3..3602baa46 100644
--- a/control/magma_sf77.cpp
+++ b/control/magma_sf77.cpp
@@ -280,34 +280,6 @@ void magmaf_sgbsv_native(
         info );
 }
 
-#define magmaf_sgbtf2_native_v2 FORTRAN_NAME( magmaf_sgbtf2_native_v2, MAGMAF_SGBTF2_NATIVE_V2 )
-void magmaf_sgbtf2_native_v2(
-    magma_int_t *m, magma_int_t *n, magma_int_t *kl, magma_int_t *ku,
-    float* dA, magma_int_t *ldda,
-    magma_int_t* ipiv,
-    magma_int_t* info, magma_queue_t *queue )
-{
-    magma_sgbtf2_native_v2(
-        *m, *n, *kl, *ku,
-        dA, *ldda,
-        ipiv,
-        info, *queue );
-}
-
-#define magmaf_sgbtf2_native FORTRAN_NAME( magmaf_sgbtf2_native, MAGMAF_SGBTF2_NATIVE )
-void magmaf_sgbtf2_native(
-    magma_int_t *m, magma_int_t *n, magma_int_t *kl, magma_int_t *ku,
-    float* dA, magma_int_t *ldda,
-    magma_int_t* ipiv,
-    magma_int_t* info, magma_queue_t *queue )
-{
-    magma_sgbtf2_native(
-        *m, *n, *kl, *ku,
-        dA, *ldda,
-        ipiv,
-        info, *queue );
-}
-
 #define magmaf_sgbtrf_native FORTRAN_NAME( magmaf_sgbtrf_native, MAGMAF_SGBTRF_NATIVE )
 void magmaf_sgbtrf_native(
     magma_int_t *m, magma_int_t *n, magma_int_t *kl, magma_int_t *ku,