From b9a811aacd9de96e1749dd36c707aff499dec970 Mon Sep 17 00:00:00 2001 From: Zarko Asen Date: Sun, 31 Aug 2025 12:30:37 +0200 Subject: [PATCH 01/27] Port of gesv_rbt_async() from the Bitbucket repo, unit tests need to be updated. --- CMakeLists.txt | 879 +++++++++++++++++++------------------- include/magma_auxiliary.h | 32 ++ include/magma_z.h | 65 +++ include/magmablas_z.h | 16 + interface_cuda/alloc.cpp | 105 ++++- magmablas/ztrsm.cu | 326 ++++++++++++++ magmablas/ztrtri_diag.cu | 102 +++++ src/zgerbt_gpu.cpp | 64 +++ src/zgerfs_nopiv_gpu.cpp | 190 ++++++++ src/zgesv_nopiv_gpu.cpp | 35 ++ src/zgesv_rbt.cpp | 198 +++++++++ src/zgetrf_nopiv_gpu.cpp | 150 +++++++ src/zgetrs_nopiv_gpu.cpp | 60 +++ 13 files changed, 1784 insertions(+), 438 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fc61af74d..ea852439f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,16 +1,23 @@ -cmake_minimum_required( VERSION 3.18 ) +cmake_minimum_required(VERSION 3.18) # ---------------------------------------- # to disable Fortran, set this to "off" # see also -DADD_ below -option( USE_FORTRAN "Fortran is required for some tester checks, but can be disabled with reduced functionality" ON ) +option(USE_FORTRAN "Fortran is required for some tester checks, but can be disabled with reduced functionality" ON) if (USE_FORTRAN) - project( MAGMA C CXX Fortran ) -else() - project( MAGMA C CXX ) -endif() - + project(MAGMA C CXX Fortran) +else () + project(MAGMA C CXX) +endif () + +FIND_PROGRAM(PROGRAM_CCACHE ccache) +IF (PROGRAM_CCACHE) + SET(CMAKE_CXX_COMPILER_LAUNCHER ${PROGRAM_CCACHE}) + SET(CMAKE_C_COMPILER_LAUNCHER ${PROGRAM_CCACHE}) + SET(CMAKE_CUDA_COMPILER_LAUNCHER ${PROGRAM_CCACHE}) + SET(CMAKE_OPENCL_COMPILER_LAUNCHER ${PROGRAM_CCACHE}) +ENDIF () # ---------------------------------------- # to show compile commands, set this here or use 'make VERBOSE=1' @@ -18,17 +25,17 @@ endif() # ---------------------------------------- # MAGMA requires one backend to be enabled -option(MAGMA_ENABLE_CUDA "Enable the CUDA backend" OFF) -option(MAGMA_ENABLE_HIP "Enable the HIP backend" OFF) +option(MAGMA_ENABLE_CUDA "Enable the CUDA backend" OFF) +option(MAGMA_ENABLE_HIP "Enable the HIP backend" OFF) # check if one backend has been enabled if (NOT MAGMA_ENABLE_CUDA AND - NOT MAGMA_ENABLE_HIP - ) - message(STATUS "MAGMA requires one enabled backend!") - message(STATUS "Building CUDA backend") - set( MAGMA_ENABLE_CUDA ON ) -endif() + NOT MAGMA_ENABLE_HIP +) + message(STATUS "MAGMA requires one enabled backend!") + message(STATUS "Building CUDA backend") + set(MAGMA_ENABLE_CUDA ON) +endif () # ---------------------------------------- # don't regenerate files during make. @@ -40,25 +47,25 @@ set(CMAKE_SUPPRESS_REGENERATION on) # ---------------------------------------- # force an out-of-source build, to not overwrite the existing Makefiles # (out-of-source is cleaner, too) -string( COMPARE EQUAL "${CMAKE_SOURCE_DIR}" "${CMAKE_BINARY_DIR}" MAGMA_COMPILE_INPLACE ) +string(COMPARE EQUAL "${CMAKE_SOURCE_DIR}" "${CMAKE_BINARY_DIR}" MAGMA_COMPILE_INPLACE) if (MAGMA_COMPILE_INPLACE) - message( FATAL_ERROR "Compiling MAGMA with CMake requires an out-of-source build. To proceed: + message(FATAL_ERROR "Compiling MAGMA with CMake requires an out-of-source build. To proceed: rm -rf CMakeCache.txt CMakeFiles/ # delete files in ${CMAKE_SOURCE_DIR} mkdir build cd build cmake .. - make" ) -endif() + make") +endif () # ---------------------------------------- # prefer shared libraries -option( BUILD_SHARED_LIBS "If on, build shared libraries, otherwise build static libraries" ON ) +option(BUILD_SHARED_LIBS "If on, build shared libraries, otherwise build static libraries" ON) # prefer /usr/local/magma, instead of /usr/local. if (UNIX AND CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) set(CMAKE_INSTALL_PREFIX "/usr/local/magma" CACHE PATH "..." FORCE) -endif() +endif () # ---------------------------------------- # use C++14 and C99 @@ -70,92 +77,96 @@ CHECK_CXX_COMPILER_FLAG("-std=c++0x" COMPILER_SUPPORTS_CXX0X) CHECK_CXX_COMPILER_FLAG("-fPIC" COMPILER_SUPPORTS_FPIC) if (COMPILER_SUPPORTS_CXX14) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14") -elseif(COMPILER_SUPPORTS_CXX0X) +elseif (COMPILER_SUPPORTS_CXX0X) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x") -else() +else () message( WARNING "The compiler ${CMAKE_CXX_COMPILER} doesn't support the -std=c++14 flag. Some code may not compile.") -endif() +endif () CHECK_C_COMPILER_FLAG("-std=c99" COMPILER_SUPPORTS_C99) if (COMPILER_SUPPORTS_C99) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99") -else() - message( WARNING "The compiler ${CMAKE_C_COMPILER} doesn't support the -std=c99 flag. Some code may not compile.") -endif() +else () + message(WARNING "The compiler ${CMAKE_C_COMPILER} doesn't support the -std=c99 flag. Some code may not compile.") +endif () # ---------------------------------------- # check Fortran name mangling if (USE_FORTRAN) - include( FortranCInterface ) - FortranCInterface_HEADER( ${CMAKE_SOURCE_DIR}/include/magma_mangling_cmake.h MACRO_NAMESPACE MAGMA_ ) -else() + include(FortranCInterface) + FortranCInterface_HEADER(${CMAKE_SOURCE_DIR}/include/magma_mangling_cmake.h MACRO_NAMESPACE MAGMA_) +else () # set one of -DADD_, -DUPCASE, or -DNOCHANGE. See README. - message( STATUS "Building without Fortran compiler" ) - set( FORTRAN_CONVENTION "-DADD_" CACHE STRING "Fortran calling convention, one of -DADD_, -DNOCHANGE, -DUPCASE" ) - set_property( CACHE FORTRAN_CONVENTION PROPERTY STRINGS -DADD_ -DNOCHANGE -DUPCASE ) - message( STATUS " Using ${FORTRAN_CONVENTION} for Fortran calling convention" ) - set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FORTRAN_CONVENTION}" ) - set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FORTRAN_CONVENTION}" ) + message(STATUS "Building without Fortran compiler") + set(FORTRAN_CONVENTION "-DADD_" CACHE STRING "Fortran calling convention, one of -DADD_, -DNOCHANGE, -DUPCASE") + set_property(CACHE FORTRAN_CONVENTION PROPERTY STRINGS -DADD_ -DNOCHANGE -DUPCASE) + message(STATUS " Using ${FORTRAN_CONVENTION} for Fortran calling convention") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FORTRAN_CONVENTION}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FORTRAN_CONVENTION}") # see also NVCC_FLAGS below -endif() +endif () # ---------------------------------------- # locate OpenMP -find_package( OpenMP ) +find_package(OpenMP) if (OPENMP_FOUND) - message( STATUS "Found OpenMP" ) - message( STATUS " OpenMP_C_FLAGS ${OpenMP_C_FLAGS}" ) - message( STATUS " OpenMP_CXX_FLAGS ${OpenMP_CXX_FLAGS}" ) - set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}" ) - set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}" ) -endif() + message(STATUS "Found OpenMP") + message(STATUS " OpenMP_C_FLAGS ${OpenMP_C_FLAGS}") + message(STATUS " OpenMP_CXX_FLAGS ${OpenMP_CXX_FLAGS}") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") +endif () # ---------------------------------------- # locate CUDA libraries if (MAGMA_ENABLE_CUDA) - enable_language( CUDA ) + if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel") + set(CMAKE_CUDA_COMPILER_WORKS ON) # Fix for intel compiler and CUDA + endif () + enable_language(CUDA) - set( CUDA_NAMES - "one or more of " + set(CUDA_NAMES + "one or more of " "Fermi, Kepler, Maxwell, Pascal, Volta, Turing, Ampere, Ada, Hopper, " - "or valid sm_XY or sm_XYZ" ) - set( GPU_TARGET "" CACHE STRING - "CUDA architectures to compile for, overrides CMAKE_CUDA_ARCHITECTURES; ${CUDA_NAMES}" ) - find_package( CUDAToolkit ) + "or valid sm_XY or sm_XYZ") + set(GPU_TARGET "" CACHE STRING + "CUDA architectures to compile for, overrides CMAKE_CUDA_ARCHITECTURES; ${CUDA_NAMES}") + find_package(CUDAToolkit) if (CUDAToolkit_FOUND) - message( STATUS "Found CUDA ${CUDA_VERSION}" ) - message( STATUS " CUDA_CUDART_LIBRARY: CUDA::cudart" ) + message(STATUS "Found CUDA ${CUDA_VERSION}") + message(STATUS " CUDA_CUDART_LIBRARY: CUDA::cudart") #message( STATUS " CUDA_CUBLAS_LIBRARIES: CUDA::cublas" ) - include_directories( ${CUDAToolkit_INCLUDE_DIRS} ) + include_directories(${CUDAToolkit_INCLUDE_DIRS}) + link_directories(${CUDAToolkit_LIBRARY_DIR}) if (GPU_TARGET) # Map names to architectures. if (GPU_TARGET MATCHES Fermi) - set( GPU_TARGET "${GPU_TARGET} sm_20" ) - endif() + set(GPU_TARGET "${GPU_TARGET} sm_20") + endif () if (GPU_TARGET MATCHES Kepler) - set( GPU_TARGET "${GPU_TARGET} sm_30 sm_35 sm_37" ) - endif() + set(GPU_TARGET "${GPU_TARGET} sm_30 sm_35 sm_37") + endif () if (GPU_TARGET MATCHES Maxwell) set( GPU_TARGET "${GPU_TARGET} sm_50 sm_52 sm_53" ) - endif() + endif () if (GPU_TARGET MATCHES Pascal) set( GPU_TARGET "${GPU_TARGET} sm_60 sm_61 sm_62" ) - endif() + endif () if (GPU_TARGET MATCHES Volta) set( GPU_TARGET "${GPU_TARGET} sm_70 sm_72" ) - endif() + endif () if (GPU_TARGET MATCHES Turing) - set( GPU_TARGET "${GPU_TARGET} sm_75" ) - endif() + set(GPU_TARGET "${GPU_TARGET} sm_75") + endif () if (GPU_TARGET MATCHES Ampere) set( GPU_TARGET "${GPU_TARGET} sm_80 sm_86 sm_87" ) @@ -163,223 +174,223 @@ if (MAGMA_ENABLE_CUDA) if (GPU_TARGET MATCHES Ada) set( GPU_TARGET "${GPU_TARGET} sm_89" ) - endif() + endif () if (GPU_TARGET MATCHES Hopper) set( GPU_TARGET "${GPU_TARGET} sm_90 sm_90a" ) - endif() + endif () # Find all sm_XY and sm_XYZ, then strip off sm_. string( REGEX MATCHALL "sm_[0-9][0-9a-z]+" sms "${GPU_TARGET}" ) - string( REPLACE "sm_" "" __cuda_architectures "${sms}" ) + string(REPLACE "sm_" "" __cuda_architectures "${sms}") if (NOT __cuda_architectures) - message( FATAL_ERROR - "GPU_TARGET must contain ${CUDA_NAMES}. " - "Was: ${GPU_TARGET}" ) - endif() + message(FATAL_ERROR + "GPU_TARGET must contain ${CUDA_NAMES}. " + "Was: ${GPU_TARGET}") + endif () - set( CMAKE_CUDA_ARCHITECTURES "${__cuda_architectures}" ) - endif() + set(CMAKE_CUDA_ARCHITECTURES "${__cuda_architectures}") + endif () - message( STATUS " Compile for CMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}" ) - set( MAGMA_CUDA_ARCH "${CMAKE_CUDA_ARCHITECTURES}" ) + message(STATUS " Compile for CMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}") + set(MAGMA_CUDA_ARCH "${CMAKE_CUDA_ARCHITECTURES}") # Find minimum arch in CMAKE_CUDA_ARCHITECTURES, if they're all numeric. - set( min_arch 9999 ) - foreach( arch ${CMAKE_CUDA_ARCHITECTURES} ) + set(min_arch 9999) + foreach (arch ${CMAKE_CUDA_ARCHITECTURES}) if (arch MATCHES "^([0-9]+)") # 80-real, 80-virtual, etc. okay if (CMAKE_MATCH_1 LESS min_arch) - set( min_arch "${CMAKE_MATCH_1}" ) - endif() - else() - set( min_arch 0 ) # arch like "native", min unknown + set(min_arch "${CMAKE_MATCH_1}") + endif () + else () + set(min_arch 0) # arch like "native", min unknown break() - endif() - endforeach() + endif () + endforeach () # Append zero, so it is comparable to '__CUDA_ARCH__' - set( MAGMA_CUDA_ARCH_MIN "${min_arch}0" ) + set(MAGMA_CUDA_ARCH_MIN "${min_arch}0") - add_library( magma_nvcc_flags INTERFACE ) + add_library(magma_nvcc_flags INTERFACE) if (COMPILER_SUPPORTS_FPIC) target_compile_options(magma_nvcc_flags - INTERFACE - $<$:--compiler-options;-fPIC,${FORTRAN_CONVENTION}> + INTERFACE + $<$:--compiler-options;-fPIC,${FORTRAN_CONVENTION}> ) - else() + else () # No Position Independent Code on Windows. # Compiler will complain if you add that flag. target_compile_options(magma_nvcc_flags - INTERFACE - $<$:--compiler-options;${FORTRAN_CONVENTION}> + INTERFACE + $<$:--compiler-options;${FORTRAN_CONVENTION}> ) - endif() + endif () - set( MAGMA_HAVE_CUDA "1" ) + set(MAGMA_HAVE_CUDA "1") - message( STATUS "Define -DMAGMA_HAVE_CUDA -DMAGMA_CUDA_ARCH_MIN=${MAGMA_CUDA_ARCH_MIN}" ) - else() - message( STATUS "Could not find CUDA" ) - endif() -endif() + message(STATUS "Define -DMAGMA_HAVE_CUDA -DMAGMA_CUDA_ARCH_MIN=${MAGMA_CUDA_ARCH_MIN}") + else () + message(STATUS "Could not find CUDA") + endif () +endif () # ---------------------------------------- # locate HIP/ROCm libraries if (MAGMA_ENABLE_HIP) - set( GPU_TARGET "gfx900" CACHE STRING "HIP architectures to compile for" ) - list(APPEND CMAKE_PREFIX_PATH /opt/rocm /opt/rocm/lib/cmake/hip) - find_package( HIP ) - if (HIP_FOUND) - message( STATUS "Found HIP ${HIP_VERSION}" ) - message( STATUS " HIP_INCLUDE_DIRS: ${HIP_INCLUDE_DIRS}" ) - message( STATUS "GPU_TARGET: ${GPU_TARGET}" ) - - include_directories( ${HIP_INCLUDE_DIRS} ) - - set(HIP_SEPARABLE_COMPILATION ON) - - if (GPU_TARGET MATCHES kaveri) - set( GPU_TARGET ${GPU_TARGET} gfx700 ) - endif() - - if (GPU_TARGET MATCHES hawaii) - set( GPU_TARGET ${GPU_TARGET} gfx701 ) - endif() - - if (GPU_TARGET MATCHES kabini) - set( GPU_TARGET ${GPU_TARGET} gfx703 ) - endif() - - if (GPU_TARGET MATCHES mullins) - set( GPU_TARGET ${GPU_TARGET} gfx703 ) - endif() - - if (GPU_TARGET MATCHES bonaire) - set( GPU_TARGET ${GPU_TARGET} gfx704 ) - endif() - - if (GPU_TARGET MATCHES carrizo) - set( GPU_TARGET ${GPU_TARGET} gfx801 ) - endif() - - if (GPU_TARGET MATCHES iceland) - set( GPU_TARGET ${GPU_TARGET} gfx802 ) - endif() - - if (GPU_TARGET MATCHES tonga) - set( GPU_TARGET ${GPU_TARGET} gfx802 ) - endif() - - if (GPU_TARGET MATCHES fiji) - set( GPU_TARGET ${GPU_TARGET} gfx803 ) - endif() - - if (GPU_TARGET MATCHES polaris10) - set( GPU_TARGET ${GPU_TARGET} gfx803 ) - endif() - - if (GPU_TARGET MATCHES tongapro) - set( GPU_TARGET ${GPU_TARGET} gfx805 ) - endif() - - if (GPU_TARGET MATCHES stoney) - set( GPU_TARGET ${GPU_TARGET} gfx810 ) - endif() - - set( DEVCCFLAGS "" ) - set(VALID_GFXS "700;701;702;703;704;705;801;802;803;805;810;900;902;904;906;908;909;90c;1010;1011;1012;1030;1031;1032;1033") - foreach( GFX ${VALID_GFXS} ) - if ( GPU_TARGET MATCHES gfx${GFX} ) - set( DEVCCFLAGS ${DEVCCFLAGS} --offload-arch=gfx${GFX} ) - endif() - endforeach() - - set( DEVCCFLAGS ${DEVCCFLAGS} -fPIC ${FORTRAN_CONVENTION} ) - set(MAGMA_HAVE_HIP "1") - message( STATUS "Define -DMAGMA_HAVE_HIP" ) - - set_property(TARGET hip::device APPEND PROPERTY COMPATIBLE_INTERFACE_BOOL INTERFACE_HIP_DEVICE_COMPILE) - set_property(TARGET hip::device PROPERTY INTERFACE_HIP_DEVICE_COMPILE ON) - set(GPU_ARCH_FLAGS ${DEVCCFLAGS}) - - #add_compile_options(${GPU_ARCH_FLAGS}) - set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_AMD__" ) - option(ROCM_CORE "Location of the rocm-core package") - execute_process(COMMAND "${CMAKE_SOURCE_DIR}/tools/get-rocm-version.sh" "${ROCM_CORE}" OUTPUT_VARIABLE ROCM_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE) - message(STATUS "ROCM_VERSION=${ROCM_VERSION}") - set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DROCM_VERSION=${ROCM_VERSION}" ) - else() - message( STATUS "Could not find HIP" ) - endif() -endif() + set(GPU_TARGET "gfx900" CACHE STRING "HIP architectures to compile for") + list(APPEND CMAKE_PREFIX_PATH /opt/rocm /opt/rocm/lib/cmake/hip) + find_package(HIP) + if (HIP_FOUND) + message(STATUS "Found HIP ${HIP_VERSION}") + message(STATUS " HIP_INCLUDE_DIRS: ${HIP_INCLUDE_DIRS}") + message(STATUS "GPU_TARGET: ${GPU_TARGET}") + + include_directories(${HIP_INCLUDE_DIRS}) + + set(HIP_SEPARABLE_COMPILATION ON) + + if (GPU_TARGET MATCHES kaveri) + set(GPU_TARGET ${GPU_TARGET} gfx700) + endif () + + if (GPU_TARGET MATCHES hawaii) + set(GPU_TARGET ${GPU_TARGET} gfx701) + endif () + + if (GPU_TARGET MATCHES kabini) + set(GPU_TARGET ${GPU_TARGET} gfx703) + endif () + + if (GPU_TARGET MATCHES mullins) + set(GPU_TARGET ${GPU_TARGET} gfx703) + endif () + + if (GPU_TARGET MATCHES bonaire) + set(GPU_TARGET ${GPU_TARGET} gfx704) + endif () + + if (GPU_TARGET MATCHES carrizo) + set(GPU_TARGET ${GPU_TARGET} gfx801) + endif () + + if (GPU_TARGET MATCHES iceland) + set(GPU_TARGET ${GPU_TARGET} gfx802) + endif () + + if (GPU_TARGET MATCHES tonga) + set(GPU_TARGET ${GPU_TARGET} gfx802) + endif () + + if (GPU_TARGET MATCHES fiji) + set(GPU_TARGET ${GPU_TARGET} gfx803) + endif () + + if (GPU_TARGET MATCHES polaris10) + set(GPU_TARGET ${GPU_TARGET} gfx803) + endif () + + if (GPU_TARGET MATCHES tongapro) + set(GPU_TARGET ${GPU_TARGET} gfx805) + endif () + + if (GPU_TARGET MATCHES stoney) + set(GPU_TARGET ${GPU_TARGET} gfx810) + endif () + + set(DEVCCFLAGS "") + set(VALID_GFXS "700;701;702;703;704;705;801;802;803;805;810;900;902;904;906;908;909;90c;1010;1011;1012;1030;1031;1032;1033") + foreach (GFX ${VALID_GFXS}) + if (GPU_TARGET MATCHES gfx${GFX}) + set(DEVCCFLAGS ${DEVCCFLAGS} --offload-arch=gfx${GFX}) + endif () + endforeach () + + set(DEVCCFLAGS ${DEVCCFLAGS} -fPIC ${FORTRAN_CONVENTION}) + set(MAGMA_HAVE_HIP "1") + message(STATUS "Define -DMAGMA_HAVE_HIP") + + set_property(TARGET hip::device APPEND PROPERTY COMPATIBLE_INTERFACE_BOOL INTERFACE_HIP_DEVICE_COMPILE) + set_property(TARGET hip::device PROPERTY INTERFACE_HIP_DEVICE_COMPILE ON) + set(GPU_ARCH_FLAGS ${DEVCCFLAGS}) + + #add_compile_options(${GPU_ARCH_FLAGS}) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_AMD__") + option(ROCM_CORE "Location of the rocm-core package") + execute_process(COMMAND "${CMAKE_SOURCE_DIR}/tools/get-rocm-version.sh" "${ROCM_CORE}" OUTPUT_VARIABLE ROCM_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE) + message(STATUS "ROCM_VERSION=${ROCM_VERSION}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DROCM_VERSION=${ROCM_VERSION}") + else () + message(STATUS "Could not find HIP") + endif () +endif () # ---------------------------------------- # locate LAPACK libraries set(BLA_VENDOR "" CACHE STRING - "Use specified BLAS library. See https://cmake.org/cmake/help/latest/module/FindBLAS.html") + "Use specified BLAS library. See https://cmake.org/cmake/help/latest/module/FindBLAS.html") # List from CMake 3.17, minus some obsolete ones: # PhiPACK, Compaq CXML, DEC Alpha DXML, SunPerf, SGI SCSL, SGIMATH, # Intel, NAS (Apple veclib). # FLAME is BLIS. set_property(CACHE BLA_VENDOR PROPERTY STRINGS - "" "All" "Goto" "OpenBLAS" "FLAME" "ATLAS" "IBMESSL" - "Intel10_64lp" "Intel10_64lp_seq" "Intel10_64ilp" "Intel10_64ilp_seq" - "ACML" "ACML_MP" "ACML_GPU" - "Apple" - "Arm" "Arm_mp" "Arm_ilp64" "Arm_ilp64_mp" - "Generic") - -set( LAPACK_LIBRARIES "" CACHE STRING "Libraries for LAPACK and BLAS, to manually override search" ) + "" "All" "Goto" "OpenBLAS" "FLAME" "ATLAS" "IBMESSL" + "Intel10_64lp" "Intel10_64lp_seq" "Intel10_64ilp" "Intel10_64ilp_seq" + "ACML" "ACML_MP" "ACML_GPU" + "Apple" + "Arm" "Arm_mp" "Arm_ilp64" "Arm_ilp64_mp" + "Generic") + +set(LAPACK_LIBRARIES "" CACHE STRING "Libraries for LAPACK and BLAS, to manually override search") if (LAPACK_LIBRARIES STREQUAL "") - message( STATUS "Searching for BLAS and LAPACK. To override, set LAPACK_LIBRARIES using ccmake." ) - find_package( LAPACK ) + message(STATUS "Searching for BLAS and LAPACK. To override, set LAPACK_LIBRARIES using ccmake.") + find_package(LAPACK) # force showing updated LAPACK_LIBRARIES in ccmake / cmake-gui. - set( LAPACK_LIBRARIES ${LAPACK_LIBRARIES} CACHE STRING "Libraries for LAPACK and BLAS, to manually override search" FORCE ) -else() - message( STATUS "User set LAPACK_LIBRARIES. To change, edit LAPACK_LIBRARIES using ccmake (set to empty to enable search)." ) + set(LAPACK_LIBRARIES ${LAPACK_LIBRARIES} CACHE STRING "Libraries for LAPACK and BLAS, to manually override search" FORCE) +else () + message(STATUS "User set LAPACK_LIBRARIES. To change, edit LAPACK_LIBRARIES using ccmake (set to empty to enable search).") # Check either -lname syntax or file existence - foreach( LIB ${LAPACK_LIBRARIES} ) + foreach (LIB ${LAPACK_LIBRARIES}) if (NOT LIB MATCHES "^-l[a-zA-Z0-9_-]+$") - if (NOT EXISTS ${LIB}) - message( WARNING "\n Warning: file ${LIB} does not exist.\n" ) - endif() - endif() - endforeach() -endif() + if (NOT EXISTS ${LIB}) + message(WARNING "\n Warning: file ${LIB} does not exist.\n") + endif () + endif () + endforeach () +endif () # If using MKL, add it to includes and define MAGMA_WITH_MKL # Initially, this gets MKLROOT from environment, but then the user can edit it. if (LAPACK_LIBRARIES MATCHES mkl_core) - set( MKLROOT $ENV{MKLROOT} CACHE STRING "MKL installation directory" ) + set(MKLROOT $ENV{MKLROOT} CACHE STRING "MKL installation directory") if (MKLROOT STREQUAL "") - message( WARNING "LAPACK_LIBRARIES has MKL, but MKLROOT not set; can't add include directory." ) - else() - message( STATUS "MKLROOT set to ${MKLROOT}. To change, edit MKLROOT using ccmake." ) + message(WARNING "LAPACK_LIBRARIES has MKL, but MKLROOT not set; can't add include directory.") + else () + message(STATUS "MKLROOT set to ${MKLROOT}. To change, edit MKLROOT using ccmake.") if (NOT EXISTS ${MKLROOT}) - message( FATAL_ERROR "MKLROOT ${MKLROOT} directory does not exist." ) - endif() - include_directories( ${MKLROOT}/include ) - add_definitions( -DMAGMA_WITH_MKL ) - message( STATUS "Define -DMAGMA_WITH_MKL" ) - endif() -endif() + message(FATAL_ERROR "MKLROOT ${MKLROOT} directory does not exist.") + endif () + include_directories(${MKLROOT}/include) + add_definitions(-DMAGMA_WITH_MKL) + message(STATUS "Define -DMAGMA_WITH_MKL") + endif () +endif () # ---------------------------------------- # save magma.lib, magma_sparse.lib, etc. in lib/ -set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY lib ) -set( CMAKE_LIBRARY_OUTPUT_DIRECTORY lib ) +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY lib) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY lib) # ---------------------------------------- # list of sources if (MAGMA_ENABLE_CUDA) - include( ${CMAKE_SOURCE_DIR}/CMake.src.cuda ) -else() - include( ${CMAKE_SOURCE_DIR}/CMake.src.hip ) -endif() + include(${CMAKE_SOURCE_DIR}/CMake.src.cuda) +else () + include(${CMAKE_SOURCE_DIR}/CMake.src.hip) +endif () # ---------------------------------------- # common flags @@ -390,20 +401,20 @@ if (WIN32) # -Wall is way too verbose; use -W4 # -MP enables parallel builds # -std=c99 is not implemented, so skip that - string( REGEX REPLACE " */W3" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" ) - string( REGEX REPLACE " */W3" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" ) - set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -W4 -MP -DMAGMA_NOAFFINITY" ) - set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -W4 -MP -DMAGMA_NOAFFINITY" ) -else() + string(REGEX REPLACE " */W3" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") + string(REGEX REPLACE " */W3" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -W4 -MP -DMAGMA_NOAFFINITY") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -W4 -MP -DMAGMA_NOAFFINITY") +else () # Primarily for gcc / nvcc: # Ignore unused static functions in headers. - set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wno-unused-function" ) - set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-unused-function" ) -endif() + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wno-unused-function") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-unused-function") +endif () if (CMAKE_HOST_APPLE) # Use rpaths, which is on by default in CMake 3. - set( CMAKE_MACOSX_RPATH 1 ) + set(CMAKE_MACOSX_RPATH 1) # 64-bit veclib (Accelerate) has issues; substitute correct functions from LAPACK. # (The issue is single precision functions that return doubles; @@ -411,48 +422,48 @@ if (CMAKE_HOST_APPLE) # but this is not feasible in Fortran.) if (LAPACK_LIBRARIES MATCHES "Accelerate") if (USE_FORTRAN) - message( STATUS "MacOS X: adding blas_fix library" ) - add_library( blas_fix ${libblas_fix_src} ) - target_link_libraries( blas_fix - ${LAPACK_LIBRARIES} + message(STATUS "MacOS X: adding blas_fix library") + add_library(blas_fix ${libblas_fix_src}) + target_link_libraries(blas_fix + ${LAPACK_LIBRARIES} ) - set( blas_fix blas_fix ) - set( blas_fix_lib -lblas_fix ) - else() - message( WARNING "\n Warning: cannot compile blas_fix library for MacOS X without Fortran compiler.\n" ) - endif() - endif() + set(blas_fix blas_fix) + set(blas_fix_lib -lblas_fix) + else () + message(WARNING "\n Warning: cannot compile blas_fix library for MacOS X without Fortran compiler.\n") + endif () + endif () - set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMAGMA_NOAFFINITY" ) - set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMAGMA_NOAFFINITY" ) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMAGMA_NOAFFINITY") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMAGMA_NOAFFINITY") # previously, just compile as 32-bit, but CUDA 6.5 no longer has 32-bit FAT libraries ## set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -m32" ) ## set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32" ) ## set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -m32" ) ## set( CUDA_64_BIT_DEVICE_CODE OFF ) -endif() +endif () -include_directories( "${CMAKE_BINARY_DIR}/include" ) +include_directories("${CMAKE_BINARY_DIR}/include") -include_directories( include ) -include_directories( control ) +include_directories(include) +include_directories(control) if (MAGMA_ENABLE_CUDA) - include_directories( magmablas ) # e.g., shuffle.cuh -else() - include_directories( magmablas_hip ) # e.g., shuffle.cuh -endif() + include_directories(magmablas) # e.g., shuffle.cuh +else () + include_directories(magmablas_hip) # e.g., shuffle.cuh +endif () # Need to check sizeof(void*) after setting flags above; # CMAKE_SIZEOF_VOID_P can be wrong. -include( CheckTypeSize ) -CHECK_TYPE_SIZE( void* SIZEOF_VOID_PTR ) +include(CheckTypeSize) +CHECK_TYPE_SIZE(void* SIZEOF_VOID_PTR) if (USE_FORTRAN) - set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -Dmagma_devptr_t=\"integer\(kind=${SIZEOF_VOID_PTR}\)\"" ) -endif() + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -Dmagma_devptr_t=\"integer\(kind=${SIZEOF_VOID_PTR}\)\"") +endif () # Configure config -configure_file(${CMAKE_SOURCE_DIR}/include/magma_config.h.in ${CMAKE_BINARY_DIR}/include/magma_config.h) +configure_file(${CMAKE_SOURCE_DIR}/include/magma_config.h.in ${CMAKE_BINARY_DIR}/include/magma_config.h) # ---------------------------------------- # compile MAGMA library @@ -462,24 +473,24 @@ if (WIN32) # understand that .F90 files should be pre-processed. # separate Fortran and C/C++/CUDA files - foreach( filename ${libmagma_all} ) + foreach (filename ${libmagma_all}) if (filename MATCHES "\\.(f)$") # |f90|F90 - list( APPEND libmagma_all_f ${filename} ) + list(APPEND libmagma_all_f ${filename}) elseif (filename MATCHES "\\.(c|cu|cpp)$") - list( APPEND libmagma_all_cpp ${filename} ) - endif() - endforeach() + list(APPEND libmagma_all_cpp ${filename}) + endif () + endforeach () #message( "libmagma_all_cpp ${libmagma_all_cpp}" ) #message( "libmagma_all_f ${libmagma_all_f}" ) # on Windows, Fortran files aren't compiled if listed here... - add_library( magma ${libmagma_all_cpp} ) - target_link_libraries( magma - ${LAPACK_LIBRARIES} - CUDA::cudart - CUDA::cublas - CUDA::cusparse - magma_nvcc_flags + add_library(magma ${libmagma_all_cpp}) + target_link_libraries(magma + ${LAPACK_LIBRARIES} + CUDA::cudart + CUDA::cublas + CUDA::cusparse + magma_nvcc_flags ) # no Fortran files at the moment (how to test libmagma_all_f is not empty?), @@ -494,51 +505,51 @@ if (WIN32) ## CUDA::cusparse ## ) ## make list of Fortran .mod files to install, as below -else() +else () # Unix doesn't seem to have a problem with mixing C, CUDA, and Fortran files if (MAGMA_ENABLE_CUDA) - #message(FATAL_ERROR "${libmagma_all}") - add_library( magma ${libmagma_all} ) - target_link_libraries( magma - ${blas_fix} - ${LAPACK_LIBRARIES} - CUDA::cudart - CUDA::cublas - CUDA::cusparse - magma_nvcc_flags + #message(FATAL_ERROR "${libmagma_all}") + add_library(magma ${libmagma_all}) + target_link_libraries(magma + ${blas_fix} + ${LAPACK_LIBRARIES} + CUDA::cudart + CUDA::cublas + CUDA::cusparse + magma_nvcc_flags ) - else() - find_package( hipBLAS ) - if (hipBLAS_FOUND) - message( STATUS "Found rocBLAS ${rocBLAS_VERSION}" ) - endif() - find_package( hipSPARSE ) - if (hipSPARSE_FOUND) - message( STATUS "Found rocSPARSE ${rocSPARSE_VERSION}" ) - endif() - add_library( magma ${libmagma_all} ) - target_link_libraries( magma - hip::host - ${blas_fix} - ${LAPACK_LIBRARIES} + else () + find_package(hipBLAS) + if (hipBLAS_FOUND) + message(STATUS "Found rocBLAS ${rocBLAS_VERSION}") + endif () + find_package(hipSPARSE) + if (hipSPARSE_FOUND) + message(STATUS "Found rocSPARSE ${rocSPARSE_VERSION}") + endif () + add_library(magma ${libmagma_all}) + target_link_libraries(magma + hip::host + ${blas_fix} + ${LAPACK_LIBRARIES} hip::device - roc::hipblas - roc::hipsparse + roc::hipblas + roc::hipsparse ) - endif() + endif () if (USE_FORTRAN) # make list of Fortran .mod files to install - foreach( filename ${libmagma_all} ) + foreach (filename ${libmagma_all}) if (filename MATCHES "\\.(f90|F90)$") # mod files seem to wind up in root build directory - get_filename_component( fmod ${filename} NAME_WE ) - list( APPEND modules "${CMAKE_BINARY_DIR}/${fmod}.mod" ) - endif() - endforeach() - endif() -endif() -add_custom_target( lib DEPENDS magma ) + get_filename_component(fmod ${filename} NAME_WE) + list(APPEND modules "${CMAKE_BINARY_DIR}/${fmod}.mod") + endif () + endforeach () + endif () +endif () +add_custom_target(lib DEPENDS magma) # ---------------------------------------- @@ -546,35 +557,35 @@ add_custom_target( lib DEPENDS magma ) # If use fortran, compile only Fortran files, not magma_[sdcz]_no_fortran.cpp # else, compile only C++ files, not Fortran files if (USE_FORTRAN) - foreach( filename ${liblapacktest_all} ) + foreach (filename ${liblapacktest_all}) if (filename MATCHES "\\.(f|f90|F90)$") - list( APPEND liblapacktest_all_f ${filename} ) - endif() - endforeach() - add_library( lapacktest ${liblapacktest_all_f} ) -else() + list(APPEND liblapacktest_all_f ${filename}) + endif () + endforeach () + add_library(lapacktest ${liblapacktest_all_f}) +else () # alternatively, use only C/C++/CUDA files, including magma_[sdcz]_no_fortran.cpp - foreach( filename ${liblapacktest_all} ) + foreach (filename ${liblapacktest_all}) if (filename MATCHES "\\.(c|cu|cpp)$") - list( APPEND liblapacktest_all_cpp ${filename} ) - endif() - endforeach() - add_library( lapacktest ${liblapacktest_all_cpp} ) -endif() -target_link_libraries( lapacktest - ${blas_fix} - ${LAPACK_LIBRARIES} + list(APPEND liblapacktest_all_cpp ${filename}) + endif () + endforeach () + add_library(lapacktest ${liblapacktest_all_cpp}) +endif () +target_link_libraries(lapacktest + ${blas_fix} + ${LAPACK_LIBRARIES} ) # ---------------------------------------- # compile tester library -add_library( tester ${libtest_all} ) -target_link_libraries( tester - magma - lapacktest - ${blas_fix} - ${LAPACK_LIBRARIES} +add_library(tester ${libtest_all}) +target_link_libraries(tester + magma + lapacktest + ${blas_fix} + ${LAPACK_LIBRARIES} ) @@ -583,38 +594,38 @@ target_link_libraries( tester # sparse doesn't have Fortran at the moment, so no need for above shenanigans if (MAGMA_ENABLE_CUDA) - include_directories( sparse/include ) - include_directories( sparse/control ) -else() - include_directories( sparse_hip/include ) - include_directories( sparse_hip/control ) -endif() -include_directories( testing ) + include_directories(sparse/include) + include_directories(sparse/control) +else () + include_directories(sparse_hip/include) + include_directories(sparse_hip/control) +endif () +include_directories(testing) if (MAGMA_ENABLE_CUDA) - add_library( magma_sparse ${libsparse_all} ) + add_library(magma_sparse ${libsparse_all}) set_property(TARGET magma_sparse PROPERTY CUDA_STANDARD 14) - target_link_libraries( magma_sparse - magma - ${blas_fix} - ${LAPACK_LIBRARIES} - CUDA::cudart - CUDA::cublas - CUDA::cusparse - magma_nvcc_flags + target_link_libraries(magma_sparse + magma + ${blas_fix} + ${LAPACK_LIBRARIES} + CUDA::cudart + CUDA::cublas + CUDA::cusparse + magma_nvcc_flags ) -else() - add_library( magma_sparse ${libsparse_all} ) - target_link_libraries( magma_sparse - magma - ${blas_fix} - ${LAPACK_LIBRARIES} - hip::device - roc::hipblas - roc::hipsparse +else () + add_library(magma_sparse ${libsparse_all}) + target_link_libraries(magma_sparse + magma + ${blas_fix} + ${LAPACK_LIBRARIES} + hip::device + roc::hipblas + roc::hipsparse ) -endif() -add_custom_target( sparse-lib DEPENDS magma_sparse ) +endif () +add_custom_target(sparse-lib DEPENDS magma_sparse) # ---------------------------------------- @@ -622,119 +633,119 @@ add_custom_target( sparse-lib DEPENDS magma_sparse ) # save testers to testing/ # save tester lib files to testing_lib/ to avoid cluttering lib/ -set( CMAKE_RUNTIME_OUTPUT_DIRECTORY testing ) -set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY testing_lib ) -set( CMAKE_LIBRARY_OUTPUT_DIRECTORY testing_lib ) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY testing) +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY testing_lib) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY testing_lib) # skip Fortran testers, which require an extra file from CUDA -foreach( filename ${testing_all} ) +foreach (filename ${testing_all}) if (filename MATCHES "\\.(c|cu|cpp)$") - list( APPEND testing_all_cpp ${filename} ) - endif() -endforeach() -foreach( TEST ${testing_all_cpp} ) - string( REGEX REPLACE "\\.(cpp|f90|F90)" "" EXE ${TEST} ) - string( REGEX REPLACE "testing/" "" EXE ${EXE} ) + list(APPEND testing_all_cpp ${filename}) + endif () +endforeach () +foreach (TEST ${testing_all_cpp}) + string(REGEX REPLACE "\\.(cpp|f90|F90)" "" EXE ${TEST}) + string(REGEX REPLACE "testing/" "" EXE ${EXE}) #message( "${TEST} --> ${EXE}" ) - add_executable( ${EXE} ${TEST} ) - target_link_libraries( ${EXE} tester lapacktest magma ) - list( APPEND testing ${EXE} ) -endforeach() -add_custom_target( testing DEPENDS ${testing} ) + add_executable(${EXE} ${TEST}) + target_link_libraries(${EXE} tester lapacktest magma) + list(APPEND testing ${EXE}) +endforeach () +add_custom_target(testing DEPENDS ${testing}) # ---------------------------------------- # compile each sparse tester if (MAGMA_ENABLE_CUDA) - set(SPARSE_TEST_DIR "sparse/testing") -else() - set(SPARSE_TEST_DIR "sparse_hip/testing") -endif() + set(SPARSE_TEST_DIR "sparse/testing") +else () + set(SPARSE_TEST_DIR "sparse_hip/testing") +endif () -set( CMAKE_RUNTIME_OUTPUT_DIRECTORY "${SPARSE_TEST_DIR}" ) -cmake_policy( SET CMP0037 OLD) -foreach( TEST ${sparse_testing_all} ) - string( REGEX REPLACE "\\.(cpp|f90|F90)" "" EXE ${TEST} ) - string( REGEX REPLACE "${SPARSE_TEST_DIR}/" "" EXE ${EXE} ) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${SPARSE_TEST_DIR}") +cmake_policy(SET CMP0037 OLD) +foreach (TEST ${sparse_testing_all}) + string(REGEX REPLACE "\\.(cpp|f90|F90)" "" EXE ${TEST}) + string(REGEX REPLACE "${SPARSE_TEST_DIR}/" "" EXE ${EXE}) #message( "${TEST} --> ${EXE}" ) - add_executable( ${EXE} ${TEST} ) - target_link_libraries( ${EXE} magma_sparse magma ) - list( APPEND sparse-testing ${EXE} ) -endforeach() -add_custom_target( sparse-testing DEPENDS ${sparse-testing} ) + add_executable(${EXE} ${TEST}) + target_link_libraries(${EXE} magma_sparse magma) + list(APPEND sparse-testing ${EXE}) +endforeach () +add_custom_target(sparse-testing DEPENDS ${sparse-testing}) # ---------------------------------------- # what to install -install( TARGETS magma magma_sparse ${blas_fix} - RUNTIME DESTINATION bin - LIBRARY DESTINATION lib - ARCHIVE DESTINATION lib ) +install(TARGETS magma magma_sparse ${blas_fix} + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib) if (MAGMA_ENABLE_CUDA) - file( GLOB headers include/*.h sparse/include/*.h "${CMAKE_BINARY_DIR}/include/*.h" ) -else() - file( GLOB headers include/*.h sparse_hip/include/*.h "${CMAKE_BINARY_DIR}/include/*.h" ) -endif() + file(GLOB headers include/*.h sparse/include/*.h "${CMAKE_BINARY_DIR}/include/*.h") +else () + file(GLOB headers include/*.h sparse_hip/include/*.h "${CMAKE_BINARY_DIR}/include/*.h") +endif () if (USE_FORTRAN) - install( FILES ${headers} ${modules} - DESTINATION include ) -else() - install( FILES ${headers} DESTINATION include ) -endif() + install(FILES ${headers} ${modules} + DESTINATION include) +else () + install(FILES ${headers} DESTINATION include) +endif () # ---------------------------------------- # pkg-config get_target_property(MAGMA_INCLUDE magma INCLUDE_DIRECTORIES) -foreach(dir ${MAGMA_INCLUDE}) +foreach (dir ${MAGMA_INCLUDE}) string(APPEND INCLUDE_COMPILER_STRING "-I${dir} ") -endforeach() -set( MAGMA_INCLUDE "${INCLUDE_COMPILER_STRING}" ) -set( pkgconfig lib/pkgconfig/magma.pc ) -message( STATUS "pkgconfig ${pkgconfig}" ) -set( INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}" ) -set( CFLAGS "${CMAKE_C_FLAGS}" ) -set( CXXFLAGS "${CMAKE_CXX_FLAGS}" ) +endforeach () +set(MAGMA_INCLUDE "${INCLUDE_COMPILER_STRING}") +set(pkgconfig lib/pkgconfig/magma.pc) +message(STATUS "pkgconfig ${pkgconfig}") +set(INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") +set(CFLAGS "${CMAKE_C_FLAGS}") +set(CXXFLAGS "${CMAKE_CXX_FLAGS}") # CMake finds the Accelerate directory; we want -framework Accelerate for linking. -string( REPLACE "/System/Library/Frameworks/Accelerate.framework" "-framework Accelerate" LAPACK_LIBS "${LAPACK_LIBRARIES}" ) +string(REPLACE "/System/Library/Frameworks/Accelerate.framework" "-framework Accelerate" LAPACK_LIBS "${LAPACK_LIBRARIES}") if (MAGMA_ENABLE_CUDA) - string( REPLACE ";" " " LIBS - "${blas_fix_lib} ${LAPACK_LIBS} -L${CUDAToolkit_LIBRARY_DIR} -lcudart -lcublas -lcusparse") -else() - string( REPLACE ";" " " LIBS - "${blas_fix_lib} ${LAPACK_LIBS} ${HIP} ${rocBLAS} ${rocSPARSE}" ) -# "${blas_fix_lib} ${LAPACK_LIBS} hip::device roc::hipblas roc::hipsparse" ) -endif() -set( MAGMA_REQUIRED "" ) -configure_file( "${pkgconfig}.in" "${pkgconfig}" @ONLY ) -install( FILES "${CMAKE_BINARY_DIR}/${pkgconfig}" - DESTINATION lib/pkgconfig ) + string(REPLACE ";" " " LIBS + "${blas_fix_lib} ${LAPACK_LIBS} -L${CUDAToolkit_LIBRARY_DIR} -lcudart -lcublas -lcusparse") +else () + string(REPLACE ";" " " LIBS + "${blas_fix_lib} ${LAPACK_LIBS} ${HIP} ${rocBLAS} ${rocSPARSE}") + # "${blas_fix_lib} ${LAPACK_LIBS} hip::device roc::hipblas roc::hipsparse" ) +endif () +set(MAGMA_REQUIRED "") +configure_file("${pkgconfig}.in" "${pkgconfig}" @ONLY) +install(FILES "${CMAKE_BINARY_DIR}/${pkgconfig}" + DESTINATION lib/pkgconfig) # ---------------------------------------- -get_directory_property( compile_definitions COMPILE_DEFINITIONS ) +get_directory_property(compile_definitions COMPILE_DEFINITIONS) -message( STATUS "Flags" ) -message( STATUS " CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}" ) -message( STATUS " CFLAGS: ${CMAKE_C_FLAGS}" ) -message( STATUS " CXXFLAGS: ${CMAKE_CXX_FLAGS}" ) +message(STATUS "Flags") +message(STATUS " CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}") +message(STATUS " CFLAGS: ${CMAKE_C_FLAGS}") +message(STATUS " CXXFLAGS: ${CMAKE_CXX_FLAGS}") if (MAGMA_ENABLE_CUDA) - message( STATUS " NVCCFLAGS: ${CMAKE_CUDA_FLAGS}" ) -else() - message( STATUS " DEVCCFLAGS: ${DEVCCFLAGS}" ) -endif() -message( STATUS " FFLAGS: ${CMAKE_Fortran_FLAGS}" ) -message( STATUS " LIBS: ${LIBS}" ) -message( STATUS " blas_fix: ${blas_fix} (MacOS Accelerate only)" ) -message( STATUS " LAPACK_LIBRARIES: ${LAPACK_LIBRARIES}" ) -message( STATUS " INCLUDE_DIRECTORIES: ${MAGMA_INCLUDE}" ) + message(STATUS " NVCCFLAGS: ${CMAKE_CUDA_FLAGS}") +else () + message(STATUS " DEVCCFLAGS: ${DEVCCFLAGS}") +endif () +message(STATUS " FFLAGS: ${CMAKE_Fortran_FLAGS}") +message(STATUS " LIBS: ${LIBS}") +message(STATUS " blas_fix: ${blas_fix} (MacOS Accelerate only)") +message(STATUS " LAPACK_LIBRARIES: ${LAPACK_LIBRARIES}") +message(STATUS " INCLUDE_DIRECTORIES: ${MAGMA_INCLUDE}") if (MAGMA_ENABLE_CUDA) - message( STATUS " CUDA_CUDART_LIBRARY: CUDA::cudart" ) - message( STATUS " CUDA_CUBLAS_LIBRARIES: CUDA::cublas" ) - message( STATUS " CUDA_cusparse_LIBRARY: CUDA::cusparse" ) -else() - message( STATUS " HIP_LIBRARY: hip::device" ) - message( STATUS " HIP_BLAS_LIBRARIES: roc::hipblas" ) - message( STATUS " HIP_sparse_LIBRARY: roc::hipsparse" ) -endif() -message( STATUS " Fortran modules: ${modules}" ) + message(STATUS " CUDA_CUDART_LIBRARY: CUDA::cudart") + message(STATUS " CUDA_CUBLAS_LIBRARIES: CUDA::cublas") + message(STATUS " CUDA_cusparse_LIBRARY: CUDA::cusparse") +else () + message(STATUS " HIP_LIBRARY: hip::device") + message(STATUS " HIP_BLAS_LIBRARIES: roc::hipblas") + message(STATUS " HIP_sparse_LIBRARY: roc::hipsparse") +endif () +message(STATUS " Fortran modules: ${modules}") diff --git a/include/magma_auxiliary.h b/include/magma_auxiliary.h index c4f06d55d..2c7fd075c 100644 --- a/include/magma_auxiliary.h +++ b/include/magma_auxiliary.h @@ -81,6 +81,9 @@ magma_int_t magma_get_smlsize_divideconquer(); magma_int_t magma_malloc( magma_ptr *ptr_ptr, size_t bytes ); +magma_int_t +magma_malloc_async( magma_ptr* ptrPtr, size_t size, magma_queue_t queue); + magma_int_t magma_malloc_cpu( void **ptr_ptr, size_t bytes ); @@ -93,6 +96,9 @@ magma_free_cpu( void *ptr ); #define magma_free( ptr ) \ magma_free_internal( ptr, __func__, __FILE__, __LINE__ ) +#define magma_free_async( ptr, queue ) \ + magma_free_internal_async( ptr, __func__, __FILE__, __LINE__, queue ) + #define magma_free_pinned( ptr ) \ magma_free_pinned_internal( ptr, __func__, __FILE__, __LINE__ ) @@ -101,6 +107,11 @@ magma_free_internal( magma_ptr ptr, const char* func, const char* file, int line ); +magma_int_t +magma_free_internal_async( + magma_ptr ptr, + const char* func, const char* file, int line, magma_queue_t queue ); + magma_int_t magma_free_pinned_internal( void *ptr, @@ -128,24 +139,45 @@ magma_memset_async(void * ptr, int value, size_t count, magma_queue_t queue); /// Type-safe version of magma_malloc(), for magma_int_t arrays. Allocates n*sizeof(magma_int_t) bytes. static inline magma_int_t magma_imalloc( magmaInt_ptr *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(magma_int_t) ); } +/// Type-safe version of magma_malloc(), for magma_int_t arrays. Allocates n*sizeof(magma_int_t) bytes. +static inline magma_int_t magma_imalloc_async( magmaInt_ptr *ptr_ptr, size_t n, magma_queue_t queue ) { return magma_malloc_async( (magma_ptr*) ptr_ptr, n*sizeof(magma_int_t), queue ); } + /// Type-safe version of magma_malloc(), for magma_index_t arrays. Allocates n*sizeof(magma_index_t) bytes. static inline magma_int_t magma_index_malloc( magmaIndex_ptr *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(magma_index_t) ); } +/// Type-safe version of magma_malloc(), for magma_index_t arrays. Allocates n*sizeof(magma_index_t) bytes. +static inline magma_int_t magma_index_malloc_async( magmaIndex_ptr *ptr_ptr, size_t n, magma_queue_t queue ) { return magma_malloc_async( (magma_ptr*) ptr_ptr, n*sizeof(magma_index_t), queue ); } + /// Type-safe version of magma_malloc(), for magma_uindex_t arrays. Allocates n*sizeof(magma_uindex_t) bytes. static inline magma_int_t magma_uindex_malloc( magmaUIndex_ptr *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(magma_uindex_t) ); } +/// Type-safe version of magma_malloc(), for magma_uindex_t arrays. Allocates n*sizeof(magma_uindex_t) bytes. +static inline magma_int_t magma_uindex_malloc_async( magmaUIndex_ptr *ptr_ptr, size_t n, magma_queue_t queue ) { return magma_malloc_async( (magma_ptr*) ptr_ptr, n*sizeof(magma_uindex_t), queue); } + /// Type-safe version of magma_malloc(), for float arrays. Allocates n*sizeof(float) bytes. static inline magma_int_t magma_smalloc( magmaFloat_ptr *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(float) ); } +/// Type-safe version of magma_malloc(), for float arrays. Allocates n*sizeof(float) bytes. +static inline magma_int_t magma_smalloc_async( magmaFloat_ptr *ptr_ptr, size_t n, magma_queue_t queue ) { return magma_malloc_async( (magma_ptr*) ptr_ptr, n*sizeof(float), queue); } + /// Type-safe version of magma_malloc(), for double arrays. Allocates n*sizeof(double) bytes. static inline magma_int_t magma_dmalloc( magmaDouble_ptr *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(double) ); } +/// Type-safe version of magma_malloc(), for double arrays. Allocates n*sizeof(double) bytes. +static inline magma_int_t magma_dmalloc_async( magmaDouble_ptr *ptr_ptr, size_t n, magma_queue_t queue ) { return magma_malloc_async( (magma_ptr*) ptr_ptr, n*sizeof(double), queue); } + /// Type-safe version of magma_malloc(), for magmaFloatComplex arrays. Allocates n*sizeof(magmaFloatComplex) bytes. static inline magma_int_t magma_cmalloc( magmaFloatComplex_ptr *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(magmaFloatComplex) ); } +/// Type-safe version of magma_malloc(), for magmaFloatComplex arrays. Allocates n*sizeof(magmaFloatComplex) bytes. +static inline magma_int_t magma_cmalloc_async( magmaFloatComplex_ptr *ptr_ptr, size_t n, magma_queue_t queue ) { return magma_malloc_async( (magma_ptr*) ptr_ptr, n*sizeof(magmaFloatComplex), queue ); } + /// Type-safe version of magma_malloc(), for magmaDoubleComplex arrays. Allocates n*sizeof(magmaDoubleComplex) bytes. static inline magma_int_t magma_zmalloc( magmaDoubleComplex_ptr *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(magmaDoubleComplex) ); } +/// Type-safe version of magma_malloc_async(), for magmaDoubleComplex arrays. Allocates n*sizeof(magmaDoubleComplex) bytes. +static inline magma_int_t magma_zmalloc_async( magmaDoubleComplex_ptr *ptr_ptr, size_t n, magma_queue_t queue ) { return magma_malloc_async( (magma_ptr*) ptr_ptr, n*sizeof(magmaDoubleComplex), queue ); } + /// @} diff --git a/include/magma_z.h b/include/magma_z.h index f66ba76c1..d8d0091c9 100644 --- a/include/magma_z.h +++ b/include/magma_z.h @@ -505,6 +505,16 @@ magma_zgerbt_gpu( magmaDoubleComplex *U, magmaDoubleComplex *V, magma_int_t *info); +// CUDA MAGMA only +magma_int_t +magma_zgerbt_gpu_async( + const magma_bool_t gen, const magma_int_t n, const magma_int_t nrhs, + magmaDoubleComplex_ptr const dA, magma_int_t const ldda, + magmaDoubleComplex_ptr const dB, magma_int_t const lddb, + magmaDoubleComplex_ptr const dU, magmaDoubleComplex_ptr const dV, + magma_int_t *info, + magma_queue_t queue); + // CUDA MAGMA only magma_int_t magma_zgerfs_nopiv_gpu( @@ -516,6 +526,20 @@ magma_zgerfs_nopiv_gpu( magma_int_t *iter, magma_int_t *info); +// CUDA MAGMA only +magma_int_t +magma_zgerfs_nopiv_gpu_async( + magma_trans_t trans, magma_int_t n, magma_int_t nrhs, + magmaDoubleComplex_ptr dA, magma_int_t ldda, + magmaDoubleComplex_ptr dB, magma_int_t lddb, + magmaDoubleComplex_ptr dX, magma_int_t lddx, + magmaDoubleComplex_ptr dworkd, magmaDoubleComplex_ptr dAF, + magma_int_t *iter, + magma_int_t *info, + magma_int_t iter_max, + double bwdmax, + magma_queue_t queue); + magma_int_t magma_zgesdd( magma_vec_t jobz, magma_int_t m, magma_int_t n, @@ -553,6 +577,13 @@ magma_zgesv_nopiv_gpu( magmaDoubleComplex_ptr dB, magma_int_t lddb, magma_int_t *info); +magma_int_t +magma_zgesv_nopiv_gpu_async( + magma_int_t n, magma_int_t nrhs, + magmaDoubleComplex_ptr dA, magma_int_t ldda, + magmaDoubleComplex_ptr dB, magma_int_t lddb, + magma_int_t *info, magma_queue_t queue ); + // CUDA MAGMA only magma_int_t magma_zgesv_rbt( @@ -561,6 +592,26 @@ magma_zgesv_rbt( magmaDoubleComplex *B, magma_int_t ldb, magma_int_t *info); +// CUDA MAGMA only +magma_int_t +magma_zgesv_rbt_async( + const magma_bool_t refine, const magma_int_t n, const magma_int_t nrhs, + const magmaDoubleComplex *const dA, const magma_int_t lda, + magmaDoubleComplex *const dB, const magma_int_t ldb, + magma_int_t *info, + const magma_int_t iter_max, const double bwdmax, + magma_queue_t queue ); + +// CUDA MAGMA only +magma_int_t +magma_zgesv_rbt_refine_async( + const magma_int_t n, const magma_int_t nrhs, + const magmaDoubleComplex *const dA_, const magma_int_t lda, + magmaDoubleComplex *const dB_, const magma_int_t ldb, + magma_int_t *info, + const magma_int_t iter_max, const double bwdmax, + magma_queue_t queue); + magma_int_t magma_zgesvd( magma_vec_t jobu, magma_vec_t jobvt, magma_int_t m, magma_int_t n, @@ -704,6 +755,13 @@ magma_zgetrf_nopiv_gpu( magmaDoubleComplex_ptr dA, magma_int_t ldda, magma_int_t *info); +magma_int_t +magma_zgetrf_nopiv_gpu_async( + magma_int_t m, magma_int_t n, + magmaDoubleComplex_ptr dA, magma_int_t ldda, + magma_int_t *info, + magma_queue_t queue); + magma_int_t magma_zgetri_gpu( magma_int_t n, @@ -749,6 +807,13 @@ magma_zgetrs_nopiv_gpu( magmaDoubleComplex_ptr dB, magma_int_t lddb, magma_int_t *info); +magma_int_t +magma_zgetrs_nopiv_gpu_async( + magma_trans_t trans, magma_int_t n, magma_int_t nrhs, + magmaDoubleComplex_ptr dA, magma_int_t ldda, + magmaDoubleComplex_ptr dB, magma_int_t lddb, + magma_int_t *info, magma_queue_t queue); + // ------------------------------------------------------------ zhe routines magma_int_t magma_zheevd( diff --git a/include/magmablas_z.h b/include/magmablas_z.h index 8a10d4622..d8e0bf029 100644 --- a/include/magmablas_z.h +++ b/include/magmablas_z.h @@ -496,6 +496,13 @@ magmablas_ztrtri_diag( magmaDoubleComplex_ptr d_dinvA, magma_queue_t queue ); +void +magmablas_ztrtri_diag_async( + magma_uplo_t uplo, magma_diag_t diag, magma_int_t n, + magmaDoubleComplex_const_ptr dA, magma_int_t ldda, + magmaDoubleComplex_ptr d_dinvA, + magma_queue_t queue ); + /* * to cleanup (alphabetical order) */ @@ -757,6 +764,15 @@ magmablas_ztrsm( magmaDoubleComplex_ptr dB, magma_int_t lddb, magma_queue_t queue ); +void +magmablas_ztrsm_async( + magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, + magma_int_t m, magma_int_t n, + magmaDoubleComplex alpha, + magmaDoubleComplex_const_ptr dA, magma_int_t ldda, + magmaDoubleComplex_ptr dB, magma_int_t lddb, + magma_queue_t queue ); + void magmablas_ztrsm_outofplace( magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, diff --git a/interface_cuda/alloc.cpp b/interface_cuda/alloc.cpp index d7e178baa..74f9bdbd1 100644 --- a/interface_cuda/alloc.cpp +++ b/interface_cuda/alloc.cpp @@ -23,6 +23,7 @@ #include "error.h" //#ifdef MAGMA_HAVE_CUDA +#define NOPINNED_ALLOC // Fix for multi GPU systems running jemalloc, where cudaHostAlloc and cudaHostRegister kill parallelism across multiple threads using multiple GPUs #ifdef DEBUG_MEMORY @@ -77,6 +78,53 @@ magma_malloc( magma_ptr* ptrPtr, size_t size ) } +/***************************************************************************//** + Allocates memory on the GPU. CUDA imposes a synchronization. + Use magma_free() to free this memory. + + @param[out] + ptrPtr On output, set to the pointer that was allocated. + NULL on failure. + + @param[in] + size Size in bytes to allocate. If size = 0, allocates some minimal size. + + @param[in] + queue Magma queue whose CUDA stream is used to put the cudaMalloc on. + + @return MAGMA_SUCCESS + @return MAGMA_ERR_DEVICE_ALLOC on failure + + Type-safe versions avoid the need for a (void**) cast and explicit sizeof. + @see magma_smalloc_q + @see magma_dmalloc_q + @see magma_cmalloc_q + @see magma_zmalloc_q + @see magma_imalloc_q + @see magma_index_malloc_q + + @ingroup magma_malloc +*******************************************************************************/ +extern "C" magma_int_t +magma_malloc_async( magma_ptr* ptrPtr, size_t size, magma_queue_t queue) +{ + // malloc and free sometimes don't work for size=0, so allocate some minimal size + if ( size == 0 ) + size = sizeof(magmaDoubleComplex); + if ( cudaSuccess != cudaMallocAsync( ptrPtr, size, queue->cuda_stream() )) { + return MAGMA_ERR_DEVICE_ALLOC; + } + + #ifdef DEBUG_MEMORY + g_pointers_mutex.lock(); + g_pointers_dev[ *ptrPtr ] = size; + g_pointers_mutex.unlock(); + #endif + + return MAGMA_SUCCESS; +} + + /***************************************************************************//** @fn magma_free( ptr ) @@ -114,6 +162,45 @@ magma_free_internal( magma_ptr ptr, } +/***************************************************************************//** + @fn magma_free( ptr ) + + Frees GPU memory previously allocated by magma_malloc(). + + @param[in] + ptr Pointer to free. + @param[in] + queue MAGMA queue to use the CYDA stream to free on. + + @return MAGMA_SUCCESS + @return MAGMA_ERR_INVALID_PTR on failure + + @ingroup magma_malloc +*******************************************************************************/ +extern "C" magma_int_t +magma_free_internal_async( magma_ptr ptr, + const char* func, const char* file, int line, magma_queue_t queue ) +{ + #ifdef DEBUG_MEMORY + g_pointers_mutex.lock(); + if ( ptr != NULL && g_pointers_dev.count( ptr ) == 0 ) { + fprintf( stderr, "magma_free( %p ) that wasn't allocated with magma_malloc.\n", ptr ); + } + else { + g_pointers_dev.erase( ptr ); + } + g_pointers_mutex.unlock(); + #endif + + cudaError_t err = cudaFreeAsync( ptr, queue->cuda_stream() ); + check_xerror( err, func, file, line ); + if ( err != cudaSuccess ) { + return MAGMA_ERR_INVALID_PTR; + } + return MAGMA_SUCCESS; +} + + /***************************************************************************//** Allocate size bytes on CPU. The purpose of using this instead of malloc is to properly align arrays @@ -162,10 +249,10 @@ magma_malloc_cpu( void** ptrPtr, size_t size ) } #endif #else - *ptrPtr = malloc( size ); - if ( *ptrPtr == NULL ) { - return MAGMA_ERR_HOST_ALLOC; - } + *ptrPtr = malloc( size ); + if ( *ptrPtr == NULL ) { + return MAGMA_ERR_HOST_ALLOC; + } #endif #ifdef DEBUG_MEMORY @@ -242,10 +329,15 @@ magma_free_cpu( void* ptr ) extern "C" magma_int_t magma_malloc_pinned( void** ptrPtr, size_t size ) { + #ifdef NOPINNED_ALLOC + return magma_malloc_cpu( ptrPtr, size ); + #endif + // malloc and free sometimes don't work for size=0, so allocate some minimal size // (for pinned memory, the error is detected in free) if ( size == 0 ) size = sizeof(magmaDoubleComplex); + if ( cudaSuccess != cudaHostAlloc( ptrPtr, size, cudaHostAllocPortable )) { return MAGMA_ERR_HOST_ALLOC; } @@ -277,6 +369,10 @@ extern "C" magma_int_t magma_free_pinned_internal( void* ptr, const char* func, const char* file, int line ) { + #ifdef NOPINNED_ALLOC + return magma_free_cpu( ptr ); + #endif + #ifdef DEBUG_MEMORY g_pointers_mutex.lock(); if ( ptr != NULL && g_pointers_pin.count( ptr ) == 0 ) { @@ -293,6 +389,7 @@ magma_free_pinned_internal( void* ptr, if ( cudaSuccess != err ) { return MAGMA_ERR_INVALID_PTR; } + return MAGMA_SUCCESS; } diff --git a/magmablas/ztrsm.cu b/magmablas/ztrsm.cu index ff63b4471..81cc626a6 100644 --- a/magmablas/ztrsm.cu +++ b/magmablas/ztrsm.cu @@ -380,6 +380,244 @@ void magmablas_ztrsm_outofplace( } } +extern "C" +void magmablas_ztrsm_outofplace_async( + magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, + magma_int_t m, magma_int_t n, + magmaDoubleComplex alpha, + magmaDoubleComplex_const_ptr dA, magma_int_t ldda, + magmaDoubleComplex_ptr dB, magma_int_t lddb, + magmaDoubleComplex_ptr dX, magma_int_t lddx, + magma_int_t flag, + magmaDoubleComplex_ptr d_dinvA, magma_int_t dinvA_length, + magma_queue_t queue ) +{ + #define dA(i_, j_) (dA + (i_) + (j_)*ldda) + #define dB(i_, j_) (dB + (i_) + (j_)*lddb) + #define dX(i_, j_) (dX + (i_) + (j_)*lddx) + #define d_dinvA(i_) (d_dinvA + (i_)*NB) + + const magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; + const magmaDoubleComplex c_one = MAGMA_Z_ONE; + const magmaDoubleComplex c_zero = MAGMA_Z_ZERO; + + magma_int_t i, jb; + magma_int_t nrowA = (side == MagmaLeft ? m : n); + + magma_int_t min_dinvA_length; + if ( side == MagmaLeft ) { + min_dinvA_length = magma_roundup( m, NB )*NB; + } + else { + min_dinvA_length = magma_roundup( n, NB )*NB; + } + + magma_int_t info = 0; + if ( side != MagmaLeft && side != MagmaRight ) { + info = -1; + } else if ( uplo != MagmaUpper && uplo != MagmaLower ) { + info = -2; + } else if ( transA != MagmaNoTrans && transA != MagmaTrans && transA != MagmaConjTrans ) { + info = -3; + } else if ( diag != MagmaUnit && diag != MagmaNonUnit ) { + info = -4; + } else if (m < 0) { + info = -5; + } else if (n < 0) { + info = -6; + } else if (dA == NULL) { + info = -8; + } else if (ldda < max(1,nrowA)) { + info = -9; + } else if (dB == NULL) { + info = -10; + } else if (lddb < max(1,m)) { + info = -11; + } else if (dX == NULL) { + info = -12; + } else if (lddx < max(1,m)) { + info = -13; + } else if (d_dinvA == NULL) { + info = -15; + } else if (dinvA_length < min_dinvA_length) { + info = -16; + } + + if (info != 0) { + magma_xerbla( __func__, -(info) ); + return; + } + + // quick return if possible. + if (m == 0 || n == 0) + return; + + if (side == MagmaLeft) { + // invert diagonal blocks + if (flag) + magmablas_ztrtri_diag_async( uplo, diag, m, dA, ldda, d_dinvA, queue ); + + if (transA == MagmaNoTrans) { + if (uplo == MagmaLower) { + // left, lower no-transpose + // handle first block separately with alpha + jb = min(NB, m); + magma_zgemm( MagmaNoTrans, MagmaNoTrans, jb, n, jb, alpha, d_dinvA(0), NB, dB, lddb, c_zero, dX, lddx, queue ); + if (NB < m) { + magma_zgemm( MagmaNoTrans, MagmaNoTrans, m-NB, n, NB, c_neg_one, dA(NB,0), ldda, dX, lddx, alpha, dB(NB,0), lddb, queue ); + + // remaining blocks + for( i=NB; i < m; i += NB ) { + jb = min(m-i, NB); + magma_zgemm( MagmaNoTrans, MagmaNoTrans, jb, n, jb, c_one, d_dinvA(i), NB, dB(i,0), lddb, c_zero, dX(i,0), lddx, queue ); + if (i+NB >= m) + break; + magma_zgemm( MagmaNoTrans, MagmaNoTrans, m-i-NB, n, NB, c_neg_one, dA(i+NB,i), ldda, dX(i,0), lddx, c_one, dB(i+NB,0), lddb, queue ); + } + } + } + else { + // left, upper no-transpose + // handle first block separately with alpha + jb = (m % NB == 0) ? NB : (m % NB); + i = m-jb; + magma_zgemm( MagmaNoTrans, MagmaNoTrans, jb, n, jb, alpha, d_dinvA(i), NB, dB(i,0), lddb, c_zero, dX(i,0), lddx, queue ); + if (i-NB >= 0) { + magma_zgemm( MagmaNoTrans, MagmaNoTrans, i, n, jb, c_neg_one, dA(0,i), ldda, dX(i,0), lddx, alpha, dB, lddb, queue ); + + // remaining blocks + for( i=m-jb-NB; i >= 0; i -= NB ) { + magma_zgemm( MagmaNoTrans, MagmaNoTrans, NB, n, NB, c_one, d_dinvA(i), NB, dB(i,0), lddb, c_zero, dX(i,0), lddx, queue ); + if (i-NB < 0) + break; + magma_zgemm( MagmaNoTrans, MagmaNoTrans, i, n, NB, c_neg_one, dA(0,i), ldda, dX(i,0), lddx, c_one, dB, lddb, queue ); + } + } + } + } + else { // transA == MagmaTrans || transA == MagmaConjTrans + if (uplo == MagmaLower) { + // left, lower transpose + // handle first block separately with alpha + jb = (m % NB == 0) ? NB : (m % NB); + i = m-jb; + magma_zgemm( transA, MagmaNoTrans, jb, n, jb, alpha, d_dinvA(i), NB, dB(i,0), lddb, c_zero, dX(i,0), lddx, queue ); + if (i-NB >= 0) { + magma_zgemm( transA, MagmaNoTrans, i, n, jb, c_neg_one, dA(i,0), ldda, dX(i,0), lddx, alpha, dB, lddb, queue ); + + // remaining blocks + for( i=m-jb-NB; i >= 0; i -= NB ) { + magma_zgemm( transA, MagmaNoTrans, NB, n, NB, c_one, d_dinvA(i), NB, dB(i,0), lddb, c_zero, dX(i,0), lddx, queue ); + if (i-NB < 0) + break; + magma_zgemm( transA, MagmaNoTrans, i, n, NB, c_neg_one, dA(i,0), ldda, dX(i,0), lddx, c_one, dB, lddb, queue ); + } + } + } + else { + // left, upper transpose + // handle first block separately with alpha + jb = min(NB, m); + magma_zgemm( transA, MagmaNoTrans, jb, n, jb, alpha, d_dinvA(0), NB, dB, lddb, c_zero, dX, lddx, queue ); + if (NB < m) { + magma_zgemm( transA, MagmaNoTrans, m-NB, n, NB, c_neg_one, dA(0,NB), ldda, dX, lddx, alpha, dB(NB,0), lddb, queue ); + + // remaining blocks + for( i=NB; i < m; i += NB ) { + jb = min(m-i, NB); + magma_zgemm( transA, MagmaNoTrans, jb, n, jb, c_one, d_dinvA(i), NB, dB(i,0), lddb, c_zero, dX(i,0), lddx, queue ); + if (i+NB >= m) + break; + magma_zgemm( transA, MagmaNoTrans, m-i-NB, n, NB, c_neg_one, dA(i,i+NB), ldda, dX(i,0), lddx, c_one, dB(i+NB,0), lddb, queue ); + } + } + } + } + } + else { // side == MagmaRight + // invert diagonal blocks + if (flag) + magmablas_ztrtri_diag_async( uplo, diag, n, dA, ldda, d_dinvA, queue ); + + if (transA == MagmaNoTrans) { + if (uplo == MagmaLower) { + // right, lower no-transpose + // handle first block separately with alpha + jb = (n % NB == 0) ? NB : (n % NB); + i = n-jb; + magma_zgemm( MagmaNoTrans, MagmaNoTrans, m, jb, jb, alpha, dB(0,i), lddb, d_dinvA(i), NB, c_zero, dX(0,i), lddx, queue ); + if (i-NB >= 0) { + magma_zgemm( MagmaNoTrans, MagmaNoTrans, m, i, jb, c_neg_one, dX(0,i), lddx, dA(i,0), ldda, alpha, dB, lddb, queue ); + + // remaining blocks + for( i=n-jb-NB; i >= 0; i -= NB ) { + magma_zgemm( MagmaNoTrans, MagmaNoTrans, m, NB, NB, c_one, dB(0,i), lddb, d_dinvA(i), NB, c_zero, dX(0,i), lddx, queue ); + if (i-NB < 0) + break; + magma_zgemm( MagmaNoTrans, MagmaNoTrans, m, i, NB, c_neg_one, dX(0,i), lddx, dA(i,0), ldda, c_one, dB, lddb, queue ); + } + } + } + else { + // right, upper no-transpose + // handle first block separately with alpha + jb = min(NB, n); + magma_zgemm( MagmaNoTrans, MagmaNoTrans, m, jb, jb, alpha, dB, lddb, d_dinvA(0), NB, c_zero, dX, lddx, queue ); + if (NB < n) { + magma_zgemm( MagmaNoTrans, MagmaNoTrans, m, n-NB, NB, c_neg_one, dX, lddx, dA(0,NB), ldda, alpha, dB(0,NB), lddb, queue ); + + // remaining blocks + for( i=NB; i < n; i += NB ) { + jb = min(NB, n-i); + magma_zgemm( MagmaNoTrans, MagmaNoTrans, m, jb, jb, c_one, dB(0,i), lddb, d_dinvA(i), NB, c_zero, dX(0,i), lddx, queue ); + if (i+NB >= n) + break; + magma_zgemm( MagmaNoTrans, MagmaNoTrans, m, n-i-NB, NB, c_neg_one, dX(0,i), lddx, dA(i,i+NB), ldda, c_one, dB(0,i+NB), lddb, queue ); + } + } + } + } + else { // transA == MagmaTrans || transA == MagmaConjTrans + if (uplo == MagmaLower) { + // right, lower transpose + // handle first block separately with alpha + jb = min(NB, n); + magma_zgemm( MagmaNoTrans, transA, m, jb, jb, alpha, dB, lddb, d_dinvA(0), NB, c_zero, dX, lddx, queue ); + if (NB < n) { + magma_zgemm( MagmaNoTrans, transA, m, n-NB, NB, c_neg_one, dX, lddx, dA(NB,0), ldda, alpha, dB(0,NB), lddb, queue ); + + // remaining blocks + for( i=NB; i < n; i += NB ) { + jb = min(NB, n-i); + magma_zgemm( MagmaNoTrans, transA, m, jb, jb, c_one, dB(0,i), lddb, d_dinvA(i), NB, c_zero, dX(0,i), lddx, queue ); + if (i+NB >= n) + break; + magma_zgemm( MagmaNoTrans, transA, m, n-i-NB, NB, c_neg_one, dX(0,i), lddx, dA(NB+i,i), ldda, c_one, dB(0,i+NB), lddb, queue ); + } + } + } + else { + // right, upper transpose + // handle first block separately with alpha + jb = (n % NB == 0) ? NB : (n % NB); + i = n-jb; + magma_zgemm( MagmaNoTrans, transA, m, jb, jb, alpha, dB(0,i), lddb, d_dinvA(i), NB, c_zero, dX(0,i), lddx, queue ); + if (i-NB >= 0) { + magma_zgemm( MagmaNoTrans, transA, m, i, jb, c_neg_one, dX(0,i), lddx, dA(0,i), ldda, alpha, dB, lddb, queue ); + + // remaining blocks + for( i=n-jb-NB; i >= 0; i -= NB ) { + magma_zgemm( MagmaNoTrans, transA, m, NB, NB, c_one, dB(0,i), lddb, d_dinvA(i), NB, c_zero, dX(0,i), lddx, queue ); + if (i-NB < 0) + break; + magma_zgemm( MagmaNoTrans, transA, m, i, NB, c_neg_one, dX(0,i), lddx, dA(0,i), ldda, c_one, dB, lddb, queue ); + } + } + } + } + } +} + /***************************************************************************//** Similar to magmablas_ztrsm_outofplace(), but copies result dX back to dB, @@ -407,6 +645,25 @@ void magmablas_ztrsm_work( magmablas_zlacpy( MagmaFull, m, n, dX, lddx, dB, lddb, queue ); } +extern "C" +void magmablas_ztrsm_work_async( + magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, + magma_int_t m, magma_int_t n, + magmaDoubleComplex alpha, + magmaDoubleComplex_const_ptr dA, magma_int_t ldda, + magmaDoubleComplex_ptr dB, magma_int_t lddb, + magmaDoubleComplex_ptr dX, magma_int_t lddx, + magma_int_t flag, + magmaDoubleComplex_ptr d_dinvA, magma_int_t dinvA_length, + magma_queue_t queue ) +{ + magmablas_ztrsm_outofplace_async( side, uplo, transA, diag, m, n, alpha, + dA, ldda, dB, lddb, dX, lddx, flag, + d_dinvA, dinvA_length, queue ); + // copy X to B + magmablas_zlacpy( MagmaFull, m, n, dX, lddx, dB, lddb, queue ); +} + /***************************************************************************//** Similar to magmablas_ztrsm_outofplace(), but allocates dX and d_dinvA @@ -484,3 +741,72 @@ void magmablas_ztrsm( magma_free( d_dinvA ); magma_free( dX ); } + +extern "C" +void magmablas_ztrsm_async( + magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, + magma_int_t m, magma_int_t n, + magmaDoubleComplex alpha, + magmaDoubleComplex_const_ptr dA, magma_int_t ldda, + magmaDoubleComplex_ptr dB, magma_int_t lddb, + magma_queue_t queue ) +{ + const magma_int_t nrowA = (side == MagmaLeft ? m : n); + + magma_int_t info = 0; + if ( side != MagmaLeft && side != MagmaRight ) { + info = -1; + } else if ( uplo != MagmaUpper && uplo != MagmaLower ) { + info = -2; + } else if ( transA != MagmaNoTrans && transA != MagmaTrans && transA != MagmaConjTrans ) { + info = -3; + } else if ( diag != MagmaUnit && diag != MagmaNonUnit ) { + info = -4; + } else if (m < 0) { + info = -5; + } else if (n < 0) { + info = -6; + } else if (dA == NULL) { + info = -8; + } else if (ldda < max(1,nrowA)) { + info = -9; + } else if (dB == NULL) { + info = -10; + } else if (lddb < max(1,m)) { + info = -11; + } + + if (info != 0) { + magma_xerbla( __func__, -(info) ); + return; + } + + magmaDoubleComplex_ptr d_dinvA=NULL, dX=NULL; + const magma_int_t lddx = magma_roundup( m, 32 ); + const magma_int_t size_x = lddx*n; + magma_int_t dinvA_length; + if ( side == MagmaLeft ) { + dinvA_length = magma_roundup( m, NB )*NB; + } + else { + dinvA_length = magma_roundup( n, NB )*NB; + } + + magma_zmalloc_async( &d_dinvA, dinvA_length, queue ); + magma_zmalloc_async( &dX, size_x, queue ); + + if ( d_dinvA == NULL || dX == NULL ) { + info = MAGMA_ERR_DEVICE_ALLOC; + magma_xerbla( __func__, -(info) ); + // continue to free + } + else { + magmablas_zlaset( MagmaFull, dinvA_length, 1, MAGMA_Z_ZERO, MAGMA_Z_ZERO, d_dinvA, dinvA_length, queue ); + magmablas_zlaset( MagmaFull, m, n, MAGMA_Z_ZERO, MAGMA_Z_ZERO, dX, lddx, queue ); + magmablas_ztrsm_work_async( side, uplo, transA, diag, m, n, alpha, + dA, ldda, dB, lddb, dX, lddx, 1, d_dinvA, dinvA_length, queue ); + } + + magma_free_async( d_dinvA, queue ); + magma_free_async( dX, queue ); +} diff --git a/magmablas/ztrtri_diag.cu b/magmablas/ztrtri_diag.cu index 89f057971..2e75f20e3 100644 --- a/magmablas/ztrtri_diag.cu +++ b/magmablas/ztrtri_diag.cu @@ -178,3 +178,105 @@ magmablas_ztrtri_diag( } } } + +extern "C" void +magmablas_ztrtri_diag_async( + magma_uplo_t uplo, magma_diag_t diag, magma_int_t n, + magmaDoubleComplex_const_ptr dA, magma_int_t ldda, + magmaDoubleComplex_ptr d_dinvA, + magma_queue_t queue) +{ + magma_int_t info = 0; + if (uplo != MagmaLower && uplo != MagmaUpper) + info = -1; + else if (diag != MagmaNonUnit && diag != MagmaUnit) + info = -2; + else if (n < 0) + info = -3; + else if (ldda < n) + info = -5; + + if (info != 0) { + magma_xerbla( __func__, -(info) ); + return; //info + } + + const int nblocks = magma_ceildiv( n, IB ); + + cudaMemsetAsync( d_dinvA, 0, magma_roundup( n, NB )*NB * sizeof(magmaDoubleComplex), queue->cuda_stream() ); + + if ( uplo == MagmaLower ) { + // invert diagonal IB x IB inner blocks + ztrtri_diag_lower_kernel + <<< nblocks, IB, 0, queue->cuda_stream() >>> + ( diag, n, dA, ldda, d_dinvA ); + + // build up NB x NB blocks (assuming IB=16 here): + // use 16 x 16 blocks to build 32 x 32 blocks, 1 x (1 x npages) grid, 4 x 4 threads; + // then 32 x 32 blocks to build 64 x 64 blocks, 1 x (2 x npages) grid, 8 x 4 threads; + // then 64 x 64 blocks to build 128 x 128 blocks, 1 x (4 x npages) grid, 16 x 4 threads; + // then 128 x 128 blocks to build 256 x 256 blocks, 2 x (8 x npages) grid, 16 x 4 threads. + for( int jb=IB; jb < NB; jb *= 2 ) { + const int kb = jb*2; + const int npages = magma_ceildiv( n, kb ); + const dim3 threads( (jb <= 32 ? jb/4 : 16), 4 ); + const dim3 grid( jb/(threads.x*threads.y), npages*(jb/16) ); // emulate 3D grid: NX * (NY*npages), for CUDA ARCH 1.x + + //printf( "n %d, jb %d, grid %d x %d (%d x %d)\n", n, jb, grid.x, grid.y, grid.y / npages, npages ); + switch (jb) { + case 16: + triple_zgemm16_part1_lower_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages ); + triple_zgemm16_part2_lower_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages ); + break; + case 32: + triple_zgemm32_part1_lower_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages ); + triple_zgemm32_part2_lower_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages ); + break; + case 64: + triple_zgemm64_part1_lower_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages ); + triple_zgemm64_part2_lower_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages ); + break; + default: + triple_zgemm_above64_part1_lower_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages ); + triple_zgemm_above64_part2_lower_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages ); + triple_zgemm_above64_part3_lower_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages ); + break; + } + if ( kb >= n ) break; + } + } + else { + ztrtri_diag_upper_kernel + <<< nblocks, IB, 0, queue->cuda_stream() >>> + ( diag, n, dA, ldda, d_dinvA ); + + // update the inverse up to the size of IB + for( int jb=IB; jb < NB; jb *= 2 ) { + const int kb = jb*2; + const int npages = magma_ceildiv( n, kb ); + const dim3 threads( (jb <= 32 ? jb/4 : 16), 4 ); + const dim3 grid( jb/(threads.x*threads.y), npages*(jb/16) ); // emulate 3D grid: NX * (NY*npages), for CUDA ARCH 1.x + + switch (jb) { + case 16: + triple_zgemm16_part1_upper_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages ); + triple_zgemm16_part2_upper_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages ); + break; + case 32: + triple_zgemm32_part1_upper_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages ); + triple_zgemm32_part2_upper_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages ); + break; + case 64: + triple_zgemm64_part1_upper_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages ); + triple_zgemm64_part2_upper_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages ); + break; + default: + triple_zgemm_above64_part1_upper_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages ); + triple_zgemm_above64_part2_upper_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages ); + triple_zgemm_above64_part3_upper_kernel<<< grid, threads, 0, queue->cuda_stream() >>>( n, dA, ldda, d_dinvA, jb, npages ); + break; + } + if ( kb >= n ) break; + } + } +} diff --git a/src/zgerbt_gpu.cpp b/src/zgerbt_gpu.cpp index 6c0e8f405..436b13ce1 100644 --- a/src/zgerbt_gpu.cpp +++ b/src/zgerbt_gpu.cpp @@ -169,3 +169,67 @@ magma_zgerbt_gpu( return *info; } + +extern "C" magma_int_t +magma_zgerbt_gpu_async( + const magma_bool_t gen, const magma_int_t n, const magma_int_t nrhs, + magmaDoubleComplex_ptr const dA, magma_int_t const ldda, + magmaDoubleComplex_ptr const dB, magma_int_t const lddb, + magmaDoubleComplex_ptr const dU, magmaDoubleComplex_ptr const dV, + magma_int_t *info, + magma_queue_t queue) +{ +#define dB(i_, j_) (dB + (i_) + (j_)*lddb) + + /* Function Body */ + *info = 0; + if ( ! (gen == MagmaTrue) && + ! (gen == MagmaFalse) ) { + *info = -1; + } + else if (n < 0) { + *info = -2; + } else if (nrhs < 0) { + *info = -3; + } else if (ldda < max(1,n)) { + *info = -5; + } else if (lddb < max(1,n)) { + *info = -7; + } + if (*info != 0) { + magma_xerbla( __func__, -(*info) ); + return *info; + } + + /* Quick return if possible */ + if (nrhs == 0 || n == 0) + return *info; + + magmaDoubleComplex *U, *V; + if (MAGMA_SUCCESS != magma_zmalloc_cpu( &U, 2*n ) || + MAGMA_SUCCESS != magma_zmalloc_cpu( &V, 2*n )) + { + *info = MAGMA_ERR_HOST_ALLOC; + goto cleanup; + } + + /* Initialize Butterfly matrix on the CPU */ + if (gen == MagmaTrue) + init_butterfly( 2*n, U, V ); + + /* Copy the butterfly to the GPU */ + magma_zsetvector_async( 2*n, U, 1, dU, 1, queue ); + magma_zsetvector_async( 2*n, V, 1, dV, 1, queue ); + + /* Perform Partial Random Butterfly Transformation on the GPU */ + magmablas_zprbt( n, dA, ldda, dU, dV, queue ); + + /* Compute U^T * b on the GPU*/ + magmablas_zprbt_mtv(n, nrhs, dU, dB, lddb, queue); + + cleanup: + magma_free_cpu( U ); + magma_free_cpu( V ); + + return *info; +} diff --git a/src/zgerfs_nopiv_gpu.cpp b/src/zgerfs_nopiv_gpu.cpp index 2c2f9c04a..3432d67b5 100644 --- a/src/zgerfs_nopiv_gpu.cpp +++ b/src/zgerfs_nopiv_gpu.cpp @@ -8,6 +8,7 @@ @precisions normal z -> s d c */ +#include #include "magma_internal.h" #define BWDMAX 1.0 @@ -275,3 +276,192 @@ magma_zgerfs_nopiv_gpu( return *info; } + +extern "C" magma_int_t +magma_zgerfs_nopiv_gpu_async( + magma_trans_t trans, magma_int_t n, magma_int_t nrhs, + magmaDoubleComplex_ptr dA, magma_int_t ldda, + magmaDoubleComplex_ptr dB, magma_int_t lddb, + magmaDoubleComplex_ptr dX, magma_int_t lddx, + magmaDoubleComplex_ptr dworkd, magmaDoubleComplex_ptr dAF, + magma_int_t *iter, + magma_int_t *info, + magma_int_t iter_max, + double bwdmax, + magma_queue_t queue) +{ +#define dB(i,j) (dB + (i) + (j)*lddb) +#define dX(i,j) (dX + (i) + (j)*lddx) +#define dR(i,j) (dR + (i) + (j)*lddr) + + /* Constants */ + const magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; + const magmaDoubleComplex c_one = MAGMA_Z_ONE; + constexpr magma_int_t ione = 1; + const auto n_elem = n * nrhs; + + /* Local variables */ + magmaDoubleComplex_ptr dR; + magmaDoubleComplex Xnrmv, Rnrmv; + double Anrm, Xnrm, Rnrm, cte, eps, work[1]; + double sae, best_sae = std::numeric_limits::infinity(); + magma_int_t i, j, iiter, lddsa, lddr; + magmaDoubleComplex_ptr best_dX = nullptr; + + /* Check arguments */ + *iter = 0; + *info = 0; + if ( n < 0 ) + *info = -1; + else if ( nrhs < 0 ) + *info = -2; + else if ( ldda < max(1,n)) + *info = -4; + else if ( lddb < max(1,n)) + *info = -8; + else if ( lddx < max(1,n)) + *info = -10; + + if (*info != 0) { + magma_xerbla( __func__, -(*info) ); + return *info; + } + + if ( n == 0 || nrhs == 0 ) + return *info; + + lddsa = n; + lddr = n; + + dR = dworkd; + + eps = lapackf77_dlamch("Epsilon"); + if ( bwdmax == 0 ) { + Anrm = cte = 0; + } else { + Anrm = magmablas_zlange(MagmaInfNorm, n, n, dA, ldda, (magmaDouble_ptr) dworkd, n_elem, queue); + cte = Anrm * eps * magma_dsqrt((double) n) * bwdmax; + } + + // residual dR = dB - dA*dX in double precision + magmablas_zlacpy( MagmaFull, n, nrhs, dB, lddb, dR, lddr, queue ); + if ( nrhs == 1 ) { + magma_zgemv( trans, n, n, + c_neg_one, dA, ldda, + dX, 1, + c_one, dR, 1, queue ); + } + else { + magma_zgemm( trans, MagmaNoTrans, n, nrhs, n, + c_neg_one, dA, ldda, + dX, lddx, + c_one, dR, lddr, queue ); + } + + if ( bwdmax == 0 ) goto refinement; + + // TODO: use MAGMA_Z_ABS( dX(i,j) ) instead of zlange? + // TODO implement Xamax for one GPU copy less + for( j=0; j < nrhs; j++ ) { + i = magma_izamax( n, dX(0,j), 1, queue ) - 1; + magma_zgetmatrix( 1, 1, dX(i,j), 1, &Xnrmv, 1, queue ); + Xnrm = lapackf77_zlange( "F", &ione, &ione, &Xnrmv, &ione, work ); + + i = magma_izamax( n, dR(0,j), 1, queue ) - 1; + magma_zgetmatrix( 1, 1, dR(i,j), 1, &Rnrmv, 1, queue ); + Rnrm = lapackf77_zlange( "F", &ione, &ione, &Rnrmv, &ione, work ); + //printf("Rnrm : %e, Xnrm*cte : %e\n", Rnrm, Xnrm*cte); + if ( Rnrm > Xnrm*cte ) { + goto refinement; + } + } + + *iter = 0; + goto cleanup; + + refinement: + magma_zmalloc_async( &best_dX, n_elem, queue ); + for( iiter=1; iiter < iter_max; ) { + *info = 0; + // solve dAF*dX = dR + // it's okay that dR is used for both dB input and dX output. + magma_zgetrs_nopiv_gpu_async( trans, n, nrhs, dAF, lddsa, dR, lddr, info, queue ); + if (*info != 0) { + *iter = -3; + goto fallback; + } + + // Add correction and setup residual + // dX += dR --and-- + // dR = dB + // This saves going through dR a second time (if done with one more kernel). + // -- not really: first time is read, second time is write. + for( j=0; j < nrhs; ++j ) { + magmablas_zaxpycp( n, dR(0,j), dX(0,j), dB(0,j), queue ); + } + + // residual dR = dB - dA*dX in double precision + if ( nrhs == 1 ) { + magma_zgemv( trans, n, n, + c_neg_one, dA, ldda, + dX, 1, + c_one, dR, 1, queue ); + } + else { + magma_zgemm( trans, MagmaNoTrans, n, nrhs, n, + c_neg_one, dA, ldda, + dX, lddx, + c_one, dR, lddr, queue ); + } + + /* Sum of absolute error is compared between each iteration + * and the solution with best residuals copied on the side. */ + sae = magma_dzasum(n_elem, dR, 1, queue); + if (sae < best_sae) { + best_sae = sae; + magma_zcopymatrix_async(n, nrhs, dX, n, best_dX, n, queue); + } + if (bwdmax == 0) goto L20; + + /* Check whether the nrhs normwise backward errors satisfy the + * stopping criterion. If yes, set ITER=IITER > 0 and return. */ + for( j=0; j < nrhs; ++j ) { + i = magma_izamax( n, dX(0,j), 1, queue ) - 1; + magma_zgetmatrix( 1, 1, dX(i,j), 1, &Xnrmv, 1, queue ); + Xnrm = lapackf77_zlange( "F", &ione, &ione, &Xnrmv, &ione, work ); + + i = magma_izamax( n, dR(0,j), 1, queue ) - 1; + magma_zgetmatrix( 1, 1, dR(i,j), 1, &Rnrmv, 1, queue ); + Rnrm = lapackf77_zlange( "F", &ione, &ione, &Rnrmv, &ione, work ); + + if ( bwdmax == 0 || Rnrm > Xnrm*cte ) { + goto L20; + } + } + + /* If we are here, the nrhs normwise backward errors satisfy + * the stopping criterion, we are good to exit. */ + *iter = iiter; + goto cleanup; + + L20: + ++iiter; + } + /* If we are at this place of the code, this is because we have + * performed ITER=iter_max iterations and never satisified the + * stopping criterion. Set up the ITER flag accordingly. */ + *iter = -iter_max - 1; + + fallback: + /* Iterative refinement failed to converge to a + * satisfactory solution. */ + + cleanup: + + if (best_dX) { + magma_zcopymatrix_async( n, nrhs, best_dX, n, dX, n, queue ); + magma_free_async( best_dX, queue ); + } + + return *info; +} diff --git a/src/zgesv_nopiv_gpu.cpp b/src/zgesv_nopiv_gpu.cpp index c142b7092..ab76c5ef9 100644 --- a/src/zgesv_nopiv_gpu.cpp +++ b/src/zgesv_nopiv_gpu.cpp @@ -93,3 +93,38 @@ magma_zgesv_nopiv_gpu( return *info; } + +extern "C" magma_int_t +magma_zgesv_nopiv_gpu_async( + magma_int_t n, magma_int_t nrhs, + magmaDoubleComplex_ptr dA, magma_int_t ldda, + magmaDoubleComplex_ptr dB, magma_int_t lddb, + magma_int_t *info, magma_queue_t queue ) +{ + *info = 0; + if (n < 0) { + *info = -1; + } else if (nrhs < 0) { + *info = -2; + } else if (ldda < max(1,n)) { + *info = -4; + } else if (lddb < max(1,n)) { + *info = -6; + } + if (*info != 0) { + magma_xerbla( __func__, -(*info) ); + return *info; + } + + /* Quick return if possible */ + if (n == 0 || nrhs == 0) { + return *info; + } + + magma_zgetrf_nopiv_gpu_async( n, n, dA, ldda, info, queue ); + if ( *info == MAGMA_SUCCESS ) { + magma_zgetrs_nopiv_gpu_async( MagmaNoTrans, n, nrhs, dA, ldda, dB, lddb, info, queue ); + } + + return *info; +} diff --git a/src/zgesv_rbt.cpp b/src/zgesv_rbt.cpp index a5cd7620d..9b11daa77 100644 --- a/src/zgesv_rbt.cpp +++ b/src/zgesv_rbt.cpp @@ -201,3 +201,201 @@ magma_zgesv_rbt( return *info; } + +extern "C" magma_int_t +magma_zgesv_rbt_async( + const magma_bool_t refine, const magma_int_t n, const magma_int_t nrhs, + const magmaDoubleComplex *const dA_, const magma_int_t lda, + magmaDoubleComplex *const dB_, const magma_int_t ldb, + magma_int_t *const info, + const magma_int_t iter_max, const double bwdmax, + magma_queue_t queue) +{ + /* Constants */ + const magmaDoubleComplex c_zero = MAGMA_Z_ZERO; + const magmaDoubleComplex c_one = MAGMA_Z_ONE; + + /* Local variables */ + magma_int_t nn = n;//magma_roundup( n, 4 ); + magmaDoubleComplex_ptr dA=NULL, dB=NULL, dAo=NULL, dBo=NULL, dwork=NULL, dU=NULL, dV=NULL; + magma_int_t iter; + + /* Function Body */ + *info = 0; + if ( ! (refine == MagmaTrue) && + ! (refine == MagmaFalse) ) { + *info = -1; + } + else if (n < 0) { + *info = -2; + } else if (nrhs < 0) { + *info = -3; + } else if (lda < max(1,n)) { + *info = -5; + } else if (ldb < max(1,n)) { + *info = -7; + } + if (*info != 0) { + magma_xerbla( __func__, -(*info) ); + return *info; + } + + /* Quick return if possible */ + if (nrhs == 0 || n == 0) + return *info; + + // TODO: investigate failures on AMD GPUs + // For now ignore refine and always set it to False + // there is probably a bug in the refinement code for the HIP backend + #ifdef MAGMA_HAVE_HIP + refine = MagmaFalse; + #endif + + if (MAGMA_SUCCESS != magma_zmalloc_async( &dA, nn*nn, queue ) || + MAGMA_SUCCESS != magma_zmalloc_async( &dB, nn*nrhs, queue )) + { + *info = MAGMA_ERR_DEVICE_ALLOC; + goto cleanup; + } + + /* Allocate memory for the buterfly matrices */ + if ((MAGMA_SUCCESS != magma_zmalloc_async( &dU, 2*n, queue )) || + (MAGMA_SUCCESS != magma_zmalloc_async( &dV, 2*n, queue ))) { + magma_free_async( dU, queue ); + magma_free_async( dV, queue ); + *info = MAGMA_ERR_DEVICE_ALLOC; + return *info; + } + + if (refine == MagmaTrue) { + if (MAGMA_SUCCESS != magma_zmalloc_async( &dAo, nn*nn, queue ) || + MAGMA_SUCCESS != magma_zmalloc_async( &dwork, nn*nrhs, queue ) || + MAGMA_SUCCESS != magma_zmalloc_async( &dBo, nn*nrhs, queue )) + { + *info = MAGMA_ERR_DEVICE_ALLOC; + goto cleanup; + } + } + + magmablas_zlaset( MagmaFull, nn, nn, c_zero, c_one, dA, nn, queue ); + + magma_zcopymatrix_async( n, n, dA_, lda, dA, nn, queue ); + magma_zcopymatrix_async( n, nrhs, dB_, ldb, dB, nn, queue ); + + *info = magma_zgerbt_gpu_async( MagmaTrue, nn, nrhs, dA, nn, dB, nn, dU, dV, info, queue ); + if (*info != MAGMA_SUCCESS) { + return *info; + } + + if (refine == MagmaTrue) { + magma_zcopymatrix_async( nn, nn, dA, nn, dAo, nn, queue ); + magma_zcopymatrix_async( nn, nrhs, dB, nn, dBo, nn, queue ); + } + /* Solve the system U^TAV.y = U^T.b on the GPU */ + magma_zgesv_nopiv_gpu_async( nn, nrhs, dA, nn, dB, nn, info, queue ); + + /* Iterative refinement */ + if (refine == MagmaTrue) { + magma_zgerfs_nopiv_gpu_async( MagmaNoTrans, nn, nrhs, dAo, nn, dBo, nn, dB, nn, dwork, dA, &iter, info, iter_max, bwdmax, queue ); + } + + magmablas_zprbt_mv(nn, nrhs, dV, dB, nn, queue); + + magma_zcopymatrix_async( n, nrhs, dB, nn, dB_, ldb, queue ); + + cleanup: + magma_free_async( dA, queue ); + magma_free_async( dB, queue ); + magma_free_async( dU, queue ); + magma_free_async( dV, queue ); + + if (refine == MagmaTrue) { + magma_free_async( dAo, queue ); + magma_free_async( dBo, queue ); + magma_free_async( dwork, queue ); + } + + return *info; +} + +extern "C" magma_int_t +magma_zgesv_rbt_refine_async( + const magma_int_t n, const magma_int_t nrhs, + const magmaDoubleComplex *const dA_, const magma_int_t lda, + magmaDoubleComplex *const dB_, const magma_int_t ldb, + magma_int_t *info, + const magma_int_t iter_max, const double bwdmax, + magma_queue_t queue) +{ + /* Constants */ + const magmaDoubleComplex c_zero = MAGMA_Z_ZERO; + const magmaDoubleComplex c_one = MAGMA_Z_ONE; + + /* Local variables */ + magma_int_t nn = n;//magma_roundup( n, 4 ); + magmaDoubleComplex_ptr dA=NULL, dB=NULL, dAo=NULL, dBo=NULL, dwork=NULL; + magma_int_t iter; + + /* Function Body */ + *info = 0; + if (n < 0) { + *info = -2; + } else if (nrhs < 0) { + *info = -3; + } else if (lda < max(1,n)) { + *info = -5; + } else if (ldb < max(1,n)) { + *info = -7; + } + if (*info != 0) { + magma_xerbla( __func__, -(*info) ); + return *info; + } + + /* Quick return if possible */ + if (nrhs == 0 || n == 0) + return *info; + + // TODO: investigate failures on AMD GPUs + // For now ignore refine and always set it to False + // there is probably a bug in the refinement code for the HIP backend + #ifdef MAGMA_HAVE_HIP + *info = -8; + return *info; + #endif + + if (MAGMA_SUCCESS != magma_zmalloc_async( &dA, nn*nn, queue ) || + MAGMA_SUCCESS != magma_zmalloc_async( &dB, nn*nrhs, queue )) + { + *info = MAGMA_ERR_DEVICE_ALLOC; + goto cleanup; + } + + if (MAGMA_SUCCESS != magma_zmalloc_async( &dAo, nn*nn, queue ) || + MAGMA_SUCCESS != magma_zmalloc_async( &dwork, nn*nrhs, queue ) || + MAGMA_SUCCESS != magma_zmalloc_async( &dBo, nn*nrhs, queue )) + { + *info = MAGMA_ERR_DEVICE_ALLOC; + goto cleanup; + } + + magmablas_zlaset( MagmaFull, nn, nn, c_zero, c_one, dA, nn, queue ); + + magma_zcopymatrix_async( n, n, dA_, lda, dA, nn, queue ); + magma_zcopymatrix_async( nn, nn, dA_, nn, dAo, nn, queue ); + magma_zcopymatrix_async( n, nrhs, dB_, ldb, dB, nn, queue ); + magma_zcopymatrix_async( nn, nrhs, dB_, nn, dBo, nn, queue ); + + magma_zgerfs_nopiv_gpu_async( MagmaNoTrans, nn, nrhs, dAo, nn, dBo, nn, dB, nn, dwork, dA, &iter, info, iter_max, bwdmax, queue ); + + magma_zcopymatrix_async( n, nrhs, dB, nn, dB_, ldb, queue ); + + cleanup: + magma_free_async( dA, queue ); + magma_free_async( dB, queue ); + magma_free_async( dAo, queue ); + magma_free_async( dBo, queue ); + magma_free_async( dwork, queue ); + + return *info; +} diff --git a/src/zgetrf_nopiv_gpu.cpp b/src/zgetrf_nopiv_gpu.cpp index 472491e2f..5abbbafa7 100644 --- a/src/zgetrf_nopiv_gpu.cpp +++ b/src/zgetrf_nopiv_gpu.cpp @@ -209,3 +209,153 @@ magma_zgetrf_nopiv_gpu( return *info; } /* magma_zgetrf_nopiv_gpu */ + +extern "C" magma_int_t +magma_zgetrf_nopiv_gpu_async( + magma_int_t m, magma_int_t n, + magmaDoubleComplex_ptr dA, magma_int_t ldda, + magma_int_t *info, + magma_queue_t queue) +{ +#ifdef MAGMA_HAVE_OPENCL +#define dA(i_, j_) dA, (dA_offset + (i_)*nb + (j_)*nb*ldda) +#else +#define dA(i_, j_) (dA + (i_)*nb + (j_)*nb*ldda) +#endif + + const magmaDoubleComplex c_one = MAGMA_Z_ONE; + const magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; + + magma_int_t iinfo, nb; + magma_int_t maxm, mindim; + magma_int_t j, rows, s, ldwork; + magmaDoubleComplex *work; + + /* Check arguments */ + *info = 0; + if (m < 0) + *info = -1; + else if (n < 0) + *info = -2; + else if (ldda < max(1,m)) + *info = -4; + + if (*info != 0) { + magma_xerbla( __func__, -(*info) ); + return *info; + } + + /* Quick return if possible */ + if (m == 0 || n == 0) + return *info; + + /* Function Body */ + mindim = min( m, n ); + nb = magma_get_zgetrf_nb( m, n ); + s = mindim / nb; + + magma_queue_t queues[2]; + queues[0] = queue; + magma_queue_create( queue->device(), &queues[1] ); + + if (nb <= 1 || nb >= min(m,n)) { + /* Use CPU code. */ + if ( MAGMA_SUCCESS != magma_zmalloc_cpu( &work, m*n )) { + *info = MAGMA_ERR_HOST_ALLOC; + return *info; + } + magma_zgetmatrix( m, n, dA(0,0), ldda, work, m, queues[0] ); + magma_zgetrf_nopiv( m, n, work, m, info ); + magma_zsetmatrix( m, n, work, m, dA(0,0), ldda, queues[0] ); + magma_free_cpu( work ); + } + else { + /* Use hybrid blocked code. */ + maxm = magma_roundup( m, 32 ); + + ldwork = maxm; + if (MAGMA_SUCCESS != magma_zmalloc_pinned( &work, ldwork*nb )) { + *info = MAGMA_ERR_HOST_ALLOC; + return *info; + } + + for( j=0; j < s; j++ ) { + // get j-th panel from device + magma_queue_sync( queues[1] ); + magma_zgetmatrix_async( m-j*nb, nb, dA(j,j), ldda, work, ldwork, queues[0] ); + + if ( j > 0 ) { + magma_ztrsm( MagmaLeft, MagmaLower, MagmaNoTrans, MagmaUnit, + nb, n - (j+1)*nb, + c_one, dA(j-1,j-1), ldda, + dA(j-1,j+1), ldda, queues[1] ); + magma_zgemm( MagmaNoTrans, MagmaNoTrans, + m-j*nb, n-(j+1)*nb, nb, + c_neg_one, dA(j, j-1), ldda, + dA(j-1,j+1), ldda, + c_one, dA(j, j+1), ldda, queues[1] ); + } + + // do the cpu part + rows = m - j*nb; + magma_queue_sync( queues[0] ); + magma_zgetrf_nopiv( rows, nb, work, ldwork, &iinfo ); + if ( *info == 0 && iinfo > 0 ) + *info = iinfo + j*nb; + + // send j-th panel to device + magma_zsetmatrix_async( m-j*nb, nb, work, ldwork, dA(j, j), ldda, queues[1] ); + + // do the small non-parallel computations (next panel update) + if ( s > j+1 ) { + magma_ztrsm( MagmaLeft, MagmaLower, MagmaNoTrans, MagmaUnit, + nb, nb, + c_one, dA(j, j), ldda, + dA(j, j+1), ldda, queues[1] ); + magma_zgemm( MagmaNoTrans, MagmaNoTrans, + m-(j+1)*nb, nb, nb, + c_neg_one, dA(j+1, j), ldda, + dA(j, j+1), ldda, + c_one, dA(j+1, j+1), ldda, queues[1] ); + } + else { + magma_ztrsm( MagmaLeft, MagmaLower, MagmaNoTrans, MagmaUnit, + nb, n-s*nb, + c_one, dA(j, j ), ldda, + dA(j, j+1), ldda, queues[1] ); + magma_zgemm( MagmaNoTrans, MagmaNoTrans, + m-(j+1)*nb, n-(j+1)*nb, nb, + c_neg_one, dA(j+1, j ), ldda, + dA(j, j+1), ldda, + c_one, dA(j+1, j+1), ldda, queues[1] ); + } + } + + magma_int_t nb0 = min( m - s*nb, n - s*nb ); + if ( nb0 > 0 ) { + rows = m - s*nb; + + magma_zgetmatrix( rows, nb0, dA(s,s), ldda, work, ldwork, queues[1] ); + + // do the cpu part + magma_zgetrf_nopiv( rows, nb0, work, ldwork, &iinfo ); + if ( *info == 0 && iinfo > 0 ) + *info = iinfo + s*nb; + + // send j-th panel to device + magma_zsetmatrix( rows, nb0, work, ldwork, dA(s,s), ldda, queues[1] ); + + magmablas_ztrsm_async( MagmaLeft, MagmaLower, MagmaNoTrans, MagmaUnit, + nb0, n-s*nb-nb0, + c_one, dA(s,s), ldda, + dA(s,s)+nb0, ldda, queues[1] ); + } + magma_queue_sync( queues[1] ); + + magma_free_pinned( work ); + } + + magma_queue_destroy( queues[1] ); + + return *info; +} /* magma_zgetrf_nopiv_gpu_async */ diff --git a/src/zgetrs_nopiv_gpu.cpp b/src/zgetrs_nopiv_gpu.cpp index bd9dea284..1a56bee75 100644 --- a/src/zgetrs_nopiv_gpu.cpp +++ b/src/zgetrs_nopiv_gpu.cpp @@ -130,3 +130,63 @@ magma_zgetrs_nopiv_gpu( return *info; } + +extern "C" magma_int_t +magma_zgetrs_nopiv_gpu_async( + magma_trans_t trans, magma_int_t n, magma_int_t nrhs, + magmaDoubleComplex_ptr dA, magma_int_t ldda, + magmaDoubleComplex_ptr dB, magma_int_t lddb, + magma_int_t *info, magma_queue_t queue) +{ + // Constants + const magmaDoubleComplex c_one = MAGMA_Z_ONE; + + // Local variables + const bool notran = trans == MagmaNoTrans; + + *info = 0; + if ( (! notran) && + (trans != MagmaTrans) && + (trans != MagmaConjTrans) ) { + *info = -1; + } else if (n < 0) { + *info = -2; + } else if (nrhs < 0) { + *info = -3; + } else if (ldda < max(1,n)) { + *info = -5; + } else if (lddb < max(1,n)) { + *info = -7; + } + if (*info != 0) { + magma_xerbla( __func__, -(*info) ); + return *info; + } + + /* Quick return if possible */ + if (n == 0 || nrhs == 0) { + return *info; + } + + if (notran) { + /* Solve A * X = B. */ + if ( nrhs == 1) { + magma_ztrsv( MagmaLower, MagmaNoTrans, MagmaUnit, n, dA, ldda, dB, 1, queue ); + magma_ztrsv( MagmaUpper, MagmaNoTrans, MagmaNonUnit, n, dA, ldda, dB, 1, queue ); + } else { + magma_ztrsm( MagmaLeft, MagmaLower, MagmaNoTrans, MagmaUnit, n, nrhs, c_one, dA, ldda, dB, lddb, queue ); + magma_ztrsm( MagmaLeft, MagmaUpper, MagmaNoTrans, MagmaNonUnit, n, nrhs, c_one, dA, ldda, dB, lddb, queue ); + } + } else { + /* Solve A**T * X = B or A**H * X = B. */ + if ( nrhs == 1) { + magma_ztrsv( MagmaUpper, trans, MagmaNonUnit, n, dA, ldda, dB, 1, queue ); + magma_ztrsv( MagmaLower, trans, MagmaUnit, n, dA, ldda, dB, 1, queue ); + } else { + magma_ztrsm( MagmaLeft, MagmaUpper, trans, MagmaNonUnit, n, nrhs, c_one, dA, ldda, dB, lddb, queue ); + magma_ztrsm( MagmaLeft, MagmaLower, trans, MagmaUnit, n, nrhs, c_one, dA, ldda, dB, lddb, queue ); + } + } + + return *info; +} From 9ccac3a6e59a8e0f0872ab5087c5d7b16348d9d2 Mon Sep 17 00:00:00 2001 From: nbeams <246972+nbeams@users.noreply.github.com> Date: Tue, 10 Dec 2024 23:21:08 +0000 Subject: [PATCH 02/27] Remove deprecated v1 interface --- control/trace.cpp | 16 +- docs/documentation.txt | 5 - include/Makefile.src | 4 - include/magma.h | 10 - include/magma_copy_v1.h | 361 --------- include/magmablas_v1.h | 102 --- include/magmablas_v1_map.h | 72 -- include/magmablas_z_v1.h | 1250 ------------------------------- include/magmablas_z_v1_map.h | 135 ---- include/magmablas_zc_v1.h | 91 --- include/magmablas_zc_v1_map.h | 24 - interface_cuda/Makefile.src | 3 - interface_cuda/blas_z_v1.cpp | 549 -------------- interface_cuda/copy_v1.cpp | 133 ---- interface_cuda/interface.cpp | 49 -- interface_cuda/interface_v1.cpp | 189 ----- magmablas/Makefile.src | 2 - magmablas/magmablas_z_v1.cpp | 825 -------------------- magmablas/magmablas_zc_v1.cpp | 96 --- src/zpotrf_mgpu_right.cpp | 3 - 20 files changed, 9 insertions(+), 3910 deletions(-) delete mode 100644 include/magma_copy_v1.h delete mode 100644 include/magmablas_v1.h delete mode 100644 include/magmablas_v1_map.h delete mode 100644 include/magmablas_z_v1.h delete mode 100644 include/magmablas_z_v1_map.h delete mode 100644 include/magmablas_zc_v1.h delete mode 100644 include/magmablas_zc_v1_map.h delete mode 100644 interface_cuda/blas_z_v1.cpp delete mode 100644 interface_cuda/copy_v1.cpp delete mode 100644 interface_cuda/interface_v1.cpp delete mode 100644 magmablas/magmablas_z_v1.cpp delete mode 100644 magmablas/magmablas_zc_v1.cpp diff --git a/control/trace.cpp b/control/trace.cpp index 480b8b1b1..cf7245efa 100644 --- a/control/trace.cpp +++ b/control/trace.cpp @@ -14,7 +14,6 @@ #include #include "magma_internal.h" -#include "magmablas_v1.h" // set TRACE_METHOD = 2 to record start time as // later of CPU time and previous event's end time. @@ -79,9 +78,8 @@ void trace_init( int ncore, int ngpu, int nqueue, magma_queue_t* queues ) int t = dev*glog.nqueue + s; glog.gpu_id[t] = 0; glog.queues[t] = queues[t]; + magma_queue_sync(queues[t]); } - magma_setdevice( dev ); - magma_device_sync(); } // now that all GPUs are sync'd, record start time for( int dev = 0; dev < ngpu; ++dev ) { @@ -94,8 +92,10 @@ void trace_init( int ncore, int ngpu, int nqueue, magma_queue_t* queues ) } // sync again for( int dev = 0; dev < ngpu; ++dev ) { - magma_setdevice( dev ); - magma_device_sync(); + for( int s = 0; s < nqueue; ++s ) { + int t = dev*glog.nqueue + s; + magma_queue_sync(queues[t]); + } } glog.cpu_first = magma_wtime(); } @@ -173,8 +173,10 @@ void trace_finalize( const char* filename, const char* cssfile ) // sync devices for( int dev = 0; dev < glog.ngpu; ++dev ) { - magma_setdevice( dev ); - magma_device_sync(); + for( int s = 0; s < nqueue; ++s ) { + int t = dev*glog.nqueue + s; + magma_queue_sync(queues[t]); + } } double time = magma_wtime() - glog.cpu_first; diff --git a/docs/documentation.txt b/docs/documentation.txt index 9b91ec983..200198361 100644 --- a/docs/documentation.txt +++ b/docs/documentation.txt @@ -188,11 +188,6 @@ using the `-D` compiler flag, e.g., `-DMAGMA_WITH_MKL` in CFLAGS. If linked with ACML 5 or later, allows MAGMA to get ACML's version. ACML's number of threads are set via OpenMP. -- `MAGMA_NO_V1` - - Disables MAGMA v1.x compatability. Skips compiling non-queue versions - of MAGMA BLAS routines, and simplifies magma_init(). - - `MAGMA_NOAFFINITY` Disables thread affinity, available in glibc 2.6 and later. diff --git a/include/Makefile.src b/include/Makefile.src index af83d8314..2c6cbe42c 100644 --- a/include/Makefile.src +++ b/include/Makefile.src @@ -18,11 +18,7 @@ hdr += \ $(cdir)/magma_z.h \ $(cdir)/magma_zc.h \ $(cdir)/magmablas_z.h \ - $(cdir)/magmablas_z_v1.h \ - $(cdir)/magmablas_z_v1_map.h \ $(cdir)/magmablas_zc.h \ - $(cdir)/magmablas_zc_v1.h \ - $(cdir)/magmablas_zc_v1_map.h \ $(cdir)/magma_zlapack.h \ $(cdir)/magma_zbulge.h \ $(cdir)/magma_zbulgeinc.h \ diff --git a/include/magma.h b/include/magma.h index 29362099d..425dd22b6 100644 --- a/include/magma.h +++ b/include/magma.h @@ -9,10 +9,6 @@ #ifndef MAGMA_H #define MAGMA_H -#ifdef MAGMA_NO_V1 -#error "Since MAGMA_NO_V1 is defined, magma.h is invalid; use magma_v2.h" -#endif - // ============================================================================= // MAGMA configuration #include "magma_config.h" @@ -25,13 +21,7 @@ #endif #endif -// Include the MAGMA v2 and v1 APIs, -// then map names to the v1 API (e.g., magma_zgemm => magma_zgemm_v1). -// Some functions (like setmatrix_async) are the same in v1 and v2, -// so are provided by the v2 API. #include "magma_v2.h" -#include "magmablas_v1.h" -#include "magmablas_v1_map.h" #undef MAGMA_API #define MAGMA_API 1 diff --git a/include/magma_copy_v1.h b/include/magma_copy_v1.h deleted file mode 100644 index dbe1af138..000000000 --- a/include/magma_copy_v1.h +++ /dev/null @@ -1,361 +0,0 @@ -/* - -- MAGMA (version 2.0) -- - Univ. of Tennessee, Knoxville - Univ. of California, Berkeley - Univ. of Colorado, Denver - @date -*/ - -#ifndef MAGMA_COPY_V1_H -#define MAGMA_COPY_V1_H - -#ifdef MAGMA_NO_V1 -#error "Since MAGMA_NO_V1 is defined, magma.h is invalid; use magma_v2.h" -#endif - -#include "magma_types.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// ============================================================================= -// copying vectors -// set copies host to device -// get copies device to host -// copy copies device to device -// (with CUDA unified addressing, copy can be between same or different devices) -// Add the function, file, and line for error-reporting purposes. -// async versions are same for v1 and v2; see magmablas_q.h - -#define magma_setvector_v1( n, elemSize, hx_src, incx, dy_dst, incy ) \ - magma_setvector_v1_internal( n, elemSize, hx_src, incx, dy_dst, incy, __func__, __FILE__, __LINE__ ) - -#define magma_getvector_v1( n, elemSize, dx_src, incx, hy_dst, incy ) \ - magma_getvector_v1_internal( n, elemSize, dx_src, incx, hy_dst, incy, __func__, __FILE__, __LINE__ ) - -#define magma_copyvector_v1( n, elemSize, dx_src, incx, dy_dst, incy ) \ - magma_copyvector_v1_internal( n, elemSize, dx_src, incx, dy_dst, incy, __func__, __FILE__, __LINE__ ) - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_setvector_v1_internal( - magma_int_t n, magma_int_t elemSize, - const void *hx_src, magma_int_t incx, - magma_ptr dy_dst, magma_int_t incy, - const char* func, const char* file, int line ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_getvector_v1_internal( - magma_int_t n, magma_int_t elemSize, - magma_const_ptr dx_src, magma_int_t incx, - void *hy_dst, magma_int_t incy, - const char* func, const char* file, int line ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_copyvector_v1_internal( - magma_int_t n, magma_int_t elemSize, - magma_const_ptr dx_src, magma_int_t incx, - magma_ptr dy_dst, magma_int_t incy, - const char* func, const char* file, int line ); - - -// ============================================================================= -// copying sub-matrices (contiguous columns) - -#define magma_setmatrix_v1( m, n, elemSize, hA_src, lda, dB_dst, lddb ) \ - magma_setmatrix_v1_internal( m, n, elemSize, hA_src, lda, dB_dst, lddb, __func__, __FILE__, __LINE__ ) - -#define magma_getmatrix_v1( m, n, elemSize, dA_src, ldda, hB_dst, ldb ) \ - magma_getmatrix_v1_internal( m, n, elemSize, dA_src, ldda, hB_dst, ldb, __func__, __FILE__, __LINE__ ) - -#define magma_copymatrix_v1( m, n, elemSize, dA_src, ldda, dB_dst, lddb ) \ - magma_copymatrix_v1_internal( m, n, elemSize, dA_src, ldda, dB_dst, lddb, __func__, __FILE__, __LINE__ ) - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_setmatrix_v1_internal( - magma_int_t m, magma_int_t n, magma_int_t elemSize, - const void *hA_src, magma_int_t lda, - magma_ptr dB_dst, magma_int_t lddb, - const char* func, const char* file, int line ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_getmatrix_v1_internal( - magma_int_t m, magma_int_t n, magma_int_t elemSize, - magma_const_ptr dA_src, magma_int_t ldda, - void *hB_dst, magma_int_t ldb, - const char* func, const char* file, int line ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_copymatrix_v1_internal( - magma_int_t m, magma_int_t n, magma_int_t elemSize, - magma_const_ptr dA_src, magma_int_t ldda, - magma_ptr dB_dst, magma_int_t lddb, - const char* func, const char* file, int line ); - - -// ============================================================================= -// copying vectors - version for magma_int_t - -#define magma_isetvector_v1( n, hx_src, incx, dy_dst, incy ) \ - magma_isetvector_v1_internal( n, hx_src, incx, dy_dst, incy, __func__, __FILE__, __LINE__ ) - -#define magma_igetvector_v1( n, dx_src, incx, hy_dst, incy ) \ - magma_igetvector_v1_internal( n, dx_src, incx, hy_dst, incy, __func__, __FILE__, __LINE__ ) - -#define magma_icopyvector_v1( n, dx_src, incx, dy_dst, incy ) \ - magma_icopyvector_v1_internal( n, dx_src, incx, dy_dst, incy, __func__, __FILE__, __LINE__ ) - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -static inline void -magma_isetvector_v1_internal( - magma_int_t n, - const magma_int_t *hx_src, magma_int_t incx, - magmaInt_ptr dy_dst, magma_int_t incy, - const char* func, const char* file, int line ) -{ - magma_setvector_v1_internal( n, sizeof(magma_int_t), - hx_src, incx, - dy_dst, incy, - func, file, line ); -} - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -static inline void -magma_igetvector_v1_internal( - magma_int_t n, - magmaInt_const_ptr dx_src, magma_int_t incx, - magma_int_t *hy_dst, magma_int_t incy, - const char* func, const char* file, int line ) -{ - magma_getvector_v1_internal( n, sizeof(magma_int_t), - dx_src, incx, - hy_dst, incy, - func, file, line ); -} - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -static inline void -magma_icopyvector_v1_internal( - magma_int_t n, - magmaInt_const_ptr dx_src, magma_int_t incx, - magmaInt_ptr dy_dst, magma_int_t incy, - const char* func, const char* file, int line ) -{ - magma_copyvector_v1_internal( n, sizeof(magma_int_t), - dx_src, incx, - dy_dst, incy, - func, file, line ); -} - - -// ============================================================================= -// copying sub-matrices - version for magma_int_t - -#define magma_isetmatrix_v1( m, n, hA_src, lda, dB_dst, lddb ) \ - magma_isetmatrix_v1_internal( m, n, hA_src, lda, dB_dst, lddb, __func__, __FILE__, __LINE__ ) - -#define magma_igetmatrix_v1( m, n, dA_src, ldda, hB_dst, ldb ) \ - magma_igetmatrix_v1_internal( m, n, dA_src, ldda, hB_dst, ldb, __func__, __FILE__, __LINE__ ) - -#define magma_icopymatrix_v1( m, n, dA_src, ldda, dB_dst, lddb ) \ - magma_icopymatrix_v1_internal( m, n, dA_src, ldda, dB_dst, lddb, __func__, __FILE__, __LINE__ ) - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -static inline void -magma_isetmatrix_v1_internal( - magma_int_t m, magma_int_t n, - const magma_int_t *hA_src, magma_int_t lda, - magmaInt_ptr dB_dst, magma_int_t lddb, - const char* func, const char* file, int line ) -{ - magma_setmatrix_v1_internal( m, n, sizeof(magma_int_t), - hA_src, lda, - dB_dst, lddb, - func, file, line ); -} - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -static inline void -magma_igetmatrix_v1_internal( - magma_int_t m, magma_int_t n, - magmaInt_const_ptr dA_src, magma_int_t ldda, - magma_int_t *hB_dst, magma_int_t ldb, - const char* func, const char* file, int line ) -{ - magma_getmatrix_v1_internal( m, n, sizeof(magma_int_t), - dA_src, ldda, - hB_dst, ldb, - func, file, line ); -} - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -static inline void -magma_icopymatrix_v1_internal( - magma_int_t m, magma_int_t n, - magmaInt_const_ptr dA_src, magma_int_t ldda, - magmaInt_ptr dB_dst, magma_int_t lddb, - const char* func, const char* file, int line ) -{ - magma_copymatrix_v1_internal( m, n, sizeof(magma_int_t), - dA_src, ldda, - dB_dst, lddb, - func, file, line ); -} - - -// ============================================================================= -// copying vectors - version for magma_index_t - -#define magma_index_setvector_v1( n, hx_src, incx, dy_dst, incy ) \ - magma_index_setvector_v1_internal( n, hx_src, incx, dy_dst, incy, __func__, __FILE__, __LINE__ ) - -#define magma_index_getvector_v1( n, dx_src, incx, hy_dst, incy ) \ - magma_index_getvector_v1_internal( n, dx_src, incx, hy_dst, incy, __func__, __FILE__, __LINE__ ) - -#define magma_index_copyvector_v1( n, dx_src, incx, dy_dst, incy ) \ - magma_index_copyvector_v1_internal( n, dx_src, incx, dy_dst, incy, __func__, __FILE__, __LINE__ ) - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -static inline void -magma_index_setvector_v1_internal( - magma_int_t n, - const magma_index_t *hx_src, magma_int_t incx, - magmaIndex_ptr dy_dst, magma_int_t incy, - const char* func, const char* file, int line ) -{ - magma_setvector_v1_internal( n, sizeof(magma_index_t), - hx_src, incx, - dy_dst, incy, - func, file, line ); -} - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -static inline void -magma_index_getvector_v1_internal( - magma_int_t n, - magmaIndex_const_ptr dx_src, magma_int_t incx, - magma_index_t *hy_dst, magma_int_t incy, - const char* func, const char* file, int line ) -{ - magma_getvector_v1_internal( n, sizeof(magma_index_t), - dx_src, incx, - hy_dst, incy, - func, file, line ); -} - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -static inline void -magma_index_copyvector_v1_internal( - magma_int_t n, - magmaIndex_const_ptr dx_src, magma_int_t incx, - magmaIndex_ptr dy_dst, magma_int_t incy, - const char* func, const char* file, int line ) -{ - magma_copyvector_v1_internal( n, sizeof(magma_index_t), - dx_src, incx, - dy_dst, incy, - func, file, line ); -} - - -// ============================================================================= -// copying sub-matrices - version for magma_index_t - -#define magma_index_setmatrix_v1( m, n, hA_src, lda, dB_dst, lddb ) \ - magma_index_setmatrix_v1_internal( m, n, hA_src, lda, dB_dst, lddb, __func__, __FILE__, __LINE__ ) - -#define magma_index_getmatrix_v1( m, n, dA_src, ldda, hB_dst, ldb ) \ - magma_index_getmatrix_v1_internal( m, n, dA_src, ldda, hB_dst, ldb, __func__, __FILE__, __LINE__ ) - -#define magma_index_copymatrix_v1( m, n, dA_src, ldda, dB_dst, lddb ) \ - magma_index_copymatrix_v1_internal( m, n, dA_src, ldda, dB_dst, lddb, __func__, __FILE__, __LINE__ ) - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -static inline void -magma_index_setmatrix_v1_internal( - magma_int_t m, magma_int_t n, - const magma_index_t *hA_src, magma_int_t lda, - magmaIndex_ptr dB_dst, magma_int_t lddb, - const char* func, const char* file, int line ) -{ - magma_setmatrix_v1_internal( m, n, sizeof(magma_index_t), - hA_src, lda, - dB_dst, lddb, - func, file, line ); -} - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -static inline void -magma_index_getmatrix_v1_internal( - magma_int_t m, magma_int_t n, - magmaIndex_const_ptr dA_src, magma_int_t ldda, - magma_index_t *hB_dst, magma_int_t ldb, - const char* func, const char* file, int line ) -{ - magma_getmatrix_v1_internal( m, n, sizeof(magma_index_t), - dA_src, ldda, - hB_dst, ldb, - func, file, line ); -} - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -static inline void -magma_index_copymatrix_v1_internal( - magma_int_t m, magma_int_t n, - magmaIndex_const_ptr dA_src, magma_int_t ldda, - magmaIndex_ptr dB_dst, magma_int_t lddb, - const char* func, const char* file, int line ) -{ - magma_copymatrix_v1_internal( m, n, sizeof(magma_index_t), - dA_src, ldda, - dB_dst, lddb, - func, file, line ); -} - -#ifdef __cplusplus -} -#endif - -#endif // MAGMA_COPY_V1_H diff --git a/include/magmablas_v1.h b/include/magmablas_v1.h deleted file mode 100644 index 865e833ff..000000000 --- a/include/magmablas_v1.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - -- MAGMA (version 2.0) -- - Univ. of Tennessee, Knoxville - Univ. of California, Berkeley - Univ. of Colorado, Denver - @date - @file -*/ - -#ifndef MAGMABLAS_V1_H -#define MAGMABLAS_V1_H - -#ifdef MAGMA_NO_V1 -#error "Since MAGMA_NO_V1 is defined, magma.h is invalid; use magma_v2.h" -#endif - -#include "magma_copy_v1.h" -#include "magmablas_z.h" -#include "magmablas_z_v1.h" -#include "magmablas_c_v1.h" -#include "magmablas_d_v1.h" -#include "magmablas_s_v1.h" -#include "magmablas_zc_v1.h" -#include "magmablas_ds_v1.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// ============================================================================= -// queue support -// new magma_queue_create adds device - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -#define magma_queue_create_v1( queue_ptr ) \ - magma_queue_create_v1_internal( queue_ptr, __func__, __FILE__, __LINE__ ) - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void magma_queue_create_v1_internal( - magma_queue_t* queue_ptr, - const char* func, const char* file, int line ); - - -// ============================================================================= - -/// @deprecated -/// @ingroup magma_deprecated_v1 -#define MagmaUpperLower MagmaFull - -/// @deprecated -/// @ingroup magma_deprecated_v1 -#define MagmaUpperLowerStr MagmaFullStr - -/// @deprecated -/// @ingroup magma_deprecated_v1 -#define MAGMA_Z_CNJG(a) MAGMA_Z_CONJ(a) - -/// @deprecated -/// @ingroup magma_deprecated_v1 -#define MAGMA_C_CNJG(a) MAGMA_C_CONJ(a) - -/// @deprecated -/// @ingroup magma_deprecated_v1 -#define MAGMA_D_CNJG(a) MAGMA_D_CONJ(a) - -/// @deprecated -/// @ingroup magma_deprecated_v1 -#define MAGMA_S_CNJG(a) MAGMA_S_CONJ(a) - -// device_sync is not portable to OpenCL, and is generally not needed -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void magma_device_sync(); - - -// ============================================================================= -// Define magma queue -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -magma_int_t magmablasSetKernelStream( magma_queue_t queue ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -magma_int_t magmablasGetKernelStream( magma_queue_t *queue ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -magma_queue_t magmablasGetQueue(); - -#ifdef __cplusplus -} -#endif - -#endif // MAGMABLAS_V1_H diff --git a/include/magmablas_v1_map.h b/include/magmablas_v1_map.h deleted file mode 100644 index 141f96023..000000000 --- a/include/magmablas_v1_map.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - -- MAGMA (version 2.0) -- - Univ. of Tennessee, Knoxville - Univ. of California, Berkeley - Univ. of Colorado, Denver - @date -*/ - -#ifndef MAGMABLAS_V1_MAP_H -#define MAGMABLAS_V1_MAP_H - -#ifdef MAGMA_NO_V1 -#error "Since MAGMA_NO_V1 is defined, magma.h is invalid; use magma_v2.h" -#endif - -// ============================================================================= -// map function names to old v1 routines - -#include "magmablas_s_v1_map.h" -#include "magmablas_d_v1_map.h" -#include "magmablas_c_v1_map.h" -#include "magmablas_z_v1_map.h" -#include "magmablas_ds_v1_map.h" -#include "magmablas_zc_v1_map.h" - -#undef magma_queue_create - -#undef magma_setvector -#undef magma_getvector -#undef magma_copyvector -#undef magma_setmatrix -#undef magma_getmatrix -#undef magma_copymatrix - -#undef magma_isetvector -#undef magma_igetvector -#undef magma_icopyvector -#undef magma_isetmatrix -#undef magma_igetmatrix -#undef magma_icopymatrix - -#undef magma_index_setvector -#undef magma_index_getvector -#undef magma_index_copyvector -#undef magma_index_setmatrix -#undef magma_index_getmatrix -#undef magma_index_copymatrix - -#define magma_queue_create magma_queue_create_v1 - -#define magma_setvector magma_setvector_v1 -#define magma_getvector magma_getvector_v1 -#define magma_copyvector magma_copyvector_v1 -#define magma_setmatrix magma_setmatrix_v1 -#define magma_getmatrix magma_getmatrix_v1 -#define magma_copymatrix magma_copymatrix_v1 - -#define magma_isetvector magma_isetvector_v1 -#define magma_igetvector magma_igetvector_v1 -#define magma_icopyvector magma_icopyvector_v1 -#define magma_isetmatrix magma_isetmatrix_v1 -#define magma_igetmatrix magma_igetmatrix_v1 -#define magma_icopymatrix magma_icopymatrix_v1 - -#define magma_index_setvector magma_index_setvector_v1 -#define magma_index_getvector magma_index_getvector_v1 -#define magma_index_copyvector magma_index_copyvector_v1 -#define magma_index_setmatrix magma_index_setmatrix_v1 -#define magma_index_getmatrix magma_index_getmatrix_v1 -#define magma_index_copymatrix magma_index_copymatrix_v1 - -#endif // MAGMABLAS_V1_MAP_H diff --git a/include/magmablas_z_v1.h b/include/magmablas_z_v1.h deleted file mode 100644 index 3c126478d..000000000 --- a/include/magmablas_z_v1.h +++ /dev/null @@ -1,1250 +0,0 @@ -/* - -- MAGMA (version 2.0) -- - Univ. of Tennessee, Knoxville - Univ. of California, Berkeley - Univ. of Colorado, Denver - @date - - @precisions normal z -> s d c -*/ - -#ifndef MAGMABLAS_Z_V1_H -#define MAGMABLAS_Z_V1_H - -#ifdef MAGMA_NO_V1 -#error "Since MAGMA_NO_V1 is defined, magma.h is invalid; use magma_v2.h" -#endif - -#include "magma_types.h" -#include "magma_copy_v1.h" - -#define MAGMA_COMPLEX - -#ifdef __cplusplus -extern "C" { -#endif - - /* - * Transpose functions - */ -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_ztranspose_inplace_v1( - magma_int_t n, - magmaDoubleComplex_ptr dA, magma_int_t ldda ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_ztranspose_conj_inplace_v1( - magma_int_t n, - magmaDoubleComplex_ptr dA, magma_int_t ldda ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_ztranspose_v1( - magma_int_t m, magma_int_t n, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_ptr dAT, magma_int_t lddat ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_ztranspose_conj_v1( - magma_int_t m, magma_int_t n, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_ptr dAT, magma_int_t lddat ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zgetmatrix_transpose_v1( - magma_int_t m, magma_int_t n, - magmaDoubleComplex_const_ptr dAT, magma_int_t ldda, - magmaDoubleComplex *hA, magma_int_t lda, - magmaDoubleComplex_ptr dwork, magma_int_t lddwork, magma_int_t nb ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zsetmatrix_transpose_v1( - magma_int_t m, magma_int_t n, - const magmaDoubleComplex *hA, magma_int_t lda, - magmaDoubleComplex_ptr dAT, magma_int_t ldda, - magmaDoubleComplex_ptr dwork, magma_int_t lddwork, magma_int_t nb ); - - /* - * RBT-related functions - */ -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zprbt_v1( - magma_int_t n, - magmaDoubleComplex_ptr dA, magma_int_t ldda, - magmaDoubleComplex_ptr du, - magmaDoubleComplex_ptr dv ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zprbt_mv_v1( - magma_int_t n, magma_int_t nrhs, - magmaDoubleComplex_ptr dv, - magmaDoubleComplex_ptr db, magma_int_t lddb); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zprbt_mtv_v1( - magma_int_t n, magma_int_t nrhs, - magmaDoubleComplex_ptr du, - magmaDoubleComplex_ptr db, magma_int_t lddb); - - /* - * Multi-GPU copy functions - */ -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_zgetmatrix_1D_col_bcyclic_v1( - magma_int_t m, magma_int_t n, - magmaDoubleComplex_const_ptr const dA[], magma_int_t ldda, - magmaDoubleComplex *hA, magma_int_t lda, - magma_int_t ngpu, magma_int_t nb ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_zsetmatrix_1D_col_bcyclic_v1( - magma_int_t m, magma_int_t n, - const magmaDoubleComplex *hA, magma_int_t lda, - magmaDoubleComplex_ptr dA[], magma_int_t ldda, - magma_int_t ngpu, magma_int_t nb ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_zgetmatrix_1D_row_bcyclic_v1( - magma_int_t m, magma_int_t n, - magmaDoubleComplex_const_ptr const dA[], magma_int_t ldda, - magmaDoubleComplex *hA, magma_int_t lda, - magma_int_t ngpu, magma_int_t nb ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_zsetmatrix_1D_row_bcyclic_v1( - magma_int_t m, magma_int_t n, - const magmaDoubleComplex *hA, magma_int_t lda, - magmaDoubleComplex_ptr dA[], magma_int_t ldda, - magma_int_t ngpu, magma_int_t nb ); - - - /* - * LAPACK auxiliary functions (alphabetical order) - */ -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zgeadd_v1( - magma_int_t m, magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_ptr dB, magma_int_t lddb ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zgeadd2_v1( - magma_int_t m, magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex beta, - magmaDoubleComplex_ptr dB, magma_int_t lddb ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zlacpy_v1( - magma_uplo_t uplo, - magma_int_t m, magma_int_t n, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_ptr dB, magma_int_t lddb ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zlacpy_conj_v1( - magma_int_t n, - magmaDoubleComplex_ptr dA1, magma_int_t lda1, - magmaDoubleComplex_ptr dA2, magma_int_t lda2 ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zlacpy_sym_in_v1( - magma_uplo_t uplo, magma_int_t m, magma_int_t n, - magma_int_t *rows, magma_int_t *perm, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_ptr dB, magma_int_t lddb ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zlacpy_sym_out_v1( - magma_uplo_t uplo, magma_int_t m, magma_int_t n, - magma_int_t *rows, magma_int_t *perm, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_ptr dB, magma_int_t lddb ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -double -magmablas_zlange_v1( - magma_norm_t norm, - magma_int_t m, magma_int_t n, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDouble_ptr dwork, magma_int_t lwork ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -double -magmablas_zlanhe_v1( - magma_norm_t norm, magma_uplo_t uplo, - magma_int_t n, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDouble_ptr dwork, magma_int_t lwork ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -double -magmablas_zlansy_v1( - magma_norm_t norm, magma_uplo_t uplo, - magma_int_t n, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDouble_ptr dwork, magma_int_t lwork ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zlarfg_v1( - magma_int_t n, - magmaDoubleComplex_ptr dalpha, - magmaDoubleComplex_ptr dx, magma_int_t incx, - magmaDoubleComplex_ptr dtau ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zlascl_v1( - magma_type_t type, magma_int_t kl, magma_int_t ku, - double cfrom, double cto, - magma_int_t m, magma_int_t n, - magmaDoubleComplex_ptr dA, magma_int_t ldda, - magma_int_t *info ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zlascl_2x2_v1( - magma_type_t type, magma_int_t m, - magmaDoubleComplex_const_ptr dW, magma_int_t lddw, - magmaDoubleComplex_ptr dA, magma_int_t ldda, - magma_int_t *info ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zlascl2_v1( - magma_type_t type, - magma_int_t m, magma_int_t n, - magmaDouble_const_ptr dD, - magmaDoubleComplex_ptr dA, magma_int_t ldda, - magma_int_t *info ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zlascl_diag_v1( - magma_type_t type, magma_int_t m, magma_int_t n, - magmaDoubleComplex_const_ptr dD, magma_int_t lddd, - magmaDoubleComplex_ptr dA, magma_int_t ldda, - magma_int_t *info ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zlaset_v1( - magma_uplo_t uplo, magma_int_t m, magma_int_t n, - magmaDoubleComplex offdiag, magmaDoubleComplex diag, - magmaDoubleComplex_ptr dA, magma_int_t ldda ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zlaset_band_v1( - magma_uplo_t uplo, magma_int_t m, magma_int_t n, magma_int_t k, - magmaDoubleComplex offdiag, magmaDoubleComplex diag, - magmaDoubleComplex_ptr dA, magma_int_t ldda ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zlaswp_v1( - magma_int_t n, - magmaDoubleComplex_ptr dAT, magma_int_t ldda, - magma_int_t k1, magma_int_t k2, - const magma_int_t *ipiv, magma_int_t inci ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zlaswp2_v1( - magma_int_t n, - magmaDoubleComplex_ptr dAT, magma_int_t ldda, - magma_int_t k1, magma_int_t k2, - magmaInt_const_ptr d_ipiv, magma_int_t inci ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zlaswp_sym_v1( - magma_int_t n, - magmaDoubleComplex_ptr dA, magma_int_t ldda, - magma_int_t k1, magma_int_t k2, - const magma_int_t *ipiv, magma_int_t inci ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zlaswpx_v1( - magma_int_t n, - magmaDoubleComplex_ptr dA, magma_int_t ldx, magma_int_t ldy, - magma_int_t k1, magma_int_t k2, - const magma_int_t *ipiv, magma_int_t inci ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zsymmetrize_v1( - magma_uplo_t uplo, magma_int_t m, - magmaDoubleComplex_ptr dA, magma_int_t ldda ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zsymmetrize_tiles_v1( - magma_uplo_t uplo, magma_int_t m, - magmaDoubleComplex_ptr dA, magma_int_t ldda, - magma_int_t ntile, magma_int_t mstride, magma_int_t nstride ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_ztrtri_diag_v1( - magma_uplo_t uplo, magma_diag_t diag, magma_int_t n, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_ptr d_dinvA ); - - /* - * to cleanup (alphabetical order) - */ -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_dznrm2_adjust_v1( - magma_int_t k, - magmaDouble_ptr dxnorm, - magmaDoubleComplex_ptr dc ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_dznrm2_check_v1( - magma_int_t m, magma_int_t n, - magmaDoubleComplex_ptr dA, magma_int_t ldda, - magmaDouble_ptr dxnorm, - magmaDouble_ptr dlsticc ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_dznrm2_cols_v1( - magma_int_t m, magma_int_t n, - magmaDoubleComplex_ptr dA, magma_int_t ldda, - magmaDouble_ptr dxnorm ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_dznrm2_row_check_adjust_v1( - magma_int_t k, double tol, - magmaDouble_ptr dxnorm, - magmaDouble_ptr dxnorm2, - magmaDoubleComplex_ptr dC, magma_int_t lddc, - magmaDouble_ptr dlsticc ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -magma_int_t -magma_zlarfb_gpu_v1( - magma_side_t side, magma_trans_t trans, magma_direct_t direct, magma_storev_t storev, - magma_int_t m, magma_int_t n, magma_int_t k, - magmaDoubleComplex_const_ptr dV, magma_int_t lddv, - magmaDoubleComplex_const_ptr dT, magma_int_t lddt, - magmaDoubleComplex_ptr dC, magma_int_t lddc, - magmaDoubleComplex_ptr dwork, magma_int_t ldwork ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -magma_int_t -magma_zlarfb_gpu_gemm_v1( - magma_side_t side, magma_trans_t trans, magma_direct_t direct, magma_storev_t storev, - magma_int_t m, magma_int_t n, magma_int_t k, - magmaDoubleComplex_const_ptr dV, magma_int_t lddv, - magmaDoubleComplex_const_ptr dT, magma_int_t lddt, - magmaDoubleComplex_ptr dC, magma_int_t lddc, - magmaDoubleComplex_ptr dwork, magma_int_t ldwork, - magmaDoubleComplex_ptr dworkvt, magma_int_t ldworkvt ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_zlarfbx_gpu_v1( - magma_int_t m, magma_int_t k, - magmaDoubleComplex_ptr V, magma_int_t ldv, - magmaDoubleComplex_ptr dT, magma_int_t ldt, - magmaDoubleComplex_ptr c, - magmaDoubleComplex_ptr dwork ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_zlarfg_gpu_v1( - magma_int_t n, - magmaDoubleComplex_ptr dx0, - magmaDoubleComplex_ptr dx, - magmaDoubleComplex_ptr dtau, - magmaDouble_ptr dxnorm, - magmaDoubleComplex_ptr dAkk ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_zlarfgtx_gpu_v1( - magma_int_t n, - magmaDoubleComplex_ptr dx0, - magmaDoubleComplex_ptr dx, - magmaDoubleComplex_ptr dtau, - magmaDouble_ptr dxnorm, - magmaDoubleComplex_ptr dA, magma_int_t iter, - magmaDoubleComplex_ptr V, magma_int_t ldv, - magmaDoubleComplex_ptr T, magma_int_t ldt, - magmaDoubleComplex_ptr dwork ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_zlarfgx_gpu_v1( - magma_int_t n, - magmaDoubleComplex_ptr dx0, - magmaDoubleComplex_ptr dx, - magmaDoubleComplex_ptr dtau, - magmaDouble_ptr dxnorm, - magmaDoubleComplex_ptr dA, magma_int_t iter ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_zlarfx_gpu_v1( - magma_int_t m, magma_int_t n, - magmaDoubleComplex_ptr v, - magmaDoubleComplex_ptr tau, - magmaDoubleComplex_ptr C, magma_int_t ldc, - magmaDouble_ptr xnorm, - magmaDoubleComplex_ptr dT, magma_int_t iter, - magmaDoubleComplex_ptr work ); - - - /* - * Level 1 BLAS (alphabetical order) - */ -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zaxpycp_v1( - magma_int_t m, - magmaDoubleComplex_ptr dr, - magmaDoubleComplex_ptr dx, - magmaDoubleComplex_const_ptr db ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zswap_v1( - magma_int_t n, - magmaDoubleComplex_ptr dx, magma_int_t incx, - magmaDoubleComplex_ptr dy, magma_int_t incy ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zswapblk_v1( - magma_order_t order, - magma_int_t n, - magmaDoubleComplex_ptr dA, magma_int_t ldda, - magmaDoubleComplex_ptr dB, magma_int_t lddb, - magma_int_t i1, magma_int_t i2, - const magma_int_t *ipiv, magma_int_t inci, - magma_int_t offset ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zswapdblk_v1( - magma_int_t n, magma_int_t nb, - magmaDoubleComplex_ptr dA, magma_int_t ldda, magma_int_t inca, - magmaDoubleComplex_ptr dB, magma_int_t lddb, magma_int_t incb ); - - /* - * Level 2 BLAS (alphabetical order) - */ -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zgemv_v1( - magma_trans_t trans, magma_int_t m, magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_const_ptr dx, magma_int_t incx, - magmaDoubleComplex beta, - magmaDoubleComplex_ptr dy, magma_int_t incy ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zgemv_conj_v1( - magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_const_ptr dx, magma_int_t incx, - magmaDoubleComplex beta, - magmaDoubleComplex_ptr dy, magma_int_t incy ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -magma_int_t -magmablas_zhemv_v1( - magma_uplo_t uplo, magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_const_ptr dx, magma_int_t incx, - magmaDoubleComplex beta, - magmaDoubleComplex_ptr dy, magma_int_t incy ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -magma_int_t -magmablas_zsymv_v1( - magma_uplo_t uplo, magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_const_ptr dx, magma_int_t incx, - magmaDoubleComplex beta, - magmaDoubleComplex_ptr dy, magma_int_t incy ); - - /* - * Level 3 BLAS (alphabetical order) - */ -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zgemm_v1( - magma_trans_t transA, magma_trans_t transB, - magma_int_t m, magma_int_t n, magma_int_t k, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_const_ptr dB, magma_int_t lddb, - magmaDoubleComplex beta, - magmaDoubleComplex_ptr dC, magma_int_t lddc ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zgemm_reduce_v1( - magma_int_t m, magma_int_t n, magma_int_t k, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_const_ptr dB, magma_int_t lddb, - magmaDoubleComplex beta, - magmaDoubleComplex_ptr dC, magma_int_t lddc ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zhemm_v1( - magma_side_t side, magma_uplo_t uplo, - magma_int_t m, magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_const_ptr dB, magma_int_t lddb, - magmaDoubleComplex beta, - magmaDoubleComplex_ptr dC, magma_int_t lddc ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zsymm_v1( - magma_side_t side, magma_uplo_t uplo, - magma_int_t m, magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_const_ptr dB, magma_int_t lddb, - magmaDoubleComplex beta, - magmaDoubleComplex_ptr dC, magma_int_t lddc ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zsyr2k_v1( - magma_uplo_t uplo, magma_trans_t trans, - magma_int_t n, magma_int_t k, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_const_ptr dB, magma_int_t lddb, - magmaDoubleComplex beta, - magmaDoubleComplex_ptr dC, magma_int_t lddc ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zher2k_v1( - magma_uplo_t uplo, magma_trans_t trans, - magma_int_t n, magma_int_t k, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_const_ptr dB, magma_int_t lddb, - double beta, - magmaDoubleComplex_ptr dC, magma_int_t lddc ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zsyrk_v1( - magma_uplo_t uplo, magma_trans_t trans, - magma_int_t n, magma_int_t k, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex beta, - magmaDoubleComplex_ptr dC, magma_int_t lddc ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zherk_v1( - magma_uplo_t uplo, magma_trans_t trans, - magma_int_t n, magma_int_t k, - double alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - double beta, - magmaDoubleComplex_ptr dC, magma_int_t lddc ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_ztrsm_v1( - magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, - magma_int_t m, magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_ptr dB, magma_int_t lddb ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_ztrsm_outofplace_v1( - magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, - magma_int_t m, magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_ptr dB, magma_int_t lddb, - magmaDoubleComplex_ptr dX, magma_int_t lddx, - magma_int_t flag, - magmaDoubleComplex_ptr d_dinvA, magma_int_t dinvA_length ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_ztrsm_work_v1( - magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, - magma_int_t m, magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_ptr dB, magma_int_t lddb, - magmaDoubleComplex_ptr dX, magma_int_t lddx, - magma_int_t flag, - magmaDoubleComplex_ptr d_dinvA, magma_int_t dinvA_length ); - - - /* - * Wrappers for platform independence. - * These wrap CUBLAS or AMD OpenCL BLAS functions. - */ - -// ============================================================================= -// copying vectors -// set copies host to device -// get copies device to host -// copy copies device to device -// (with CUDA unified addressing, copy can be between same or different devices) -// Add the function, file, and line for error-reporting purposes. - -#define magma_zsetvector_v1( n, hx_src, incx, dy_dst, incy ) \ - magma_zsetvector_v1_internal( n, hx_src, incx, dy_dst, incy, __func__, __FILE__, __LINE__ ) - -#define magma_zgetvector_v1( n, dx_src, incx, hy_dst, incy ) \ - magma_zgetvector_v1_internal( n, dx_src, incx, hy_dst, incy, __func__, __FILE__, __LINE__ ) - -#define magma_zcopyvector_v1( n, dx_src, incx, dy_dst, incy ) \ - magma_zcopyvector_v1_internal( n, dx_src, incx, dy_dst, incy, __func__, __FILE__, __LINE__ ) - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -static inline void -magma_zsetvector_v1_internal( - magma_int_t n, - magmaDoubleComplex const *hx_src, magma_int_t incx, - magmaDoubleComplex_ptr dy_dst, magma_int_t incy, - const char* func, const char* file, int line ) -{ - magma_setvector_v1_internal( n, sizeof(magmaDoubleComplex), - hx_src, incx, - dy_dst, incy, - func, file, line ); -} - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -static inline void -magma_zgetvector_v1_internal( - magma_int_t n, - magmaDoubleComplex_const_ptr dx_src, magma_int_t incx, - magmaDoubleComplex *hy_dst, magma_int_t incy, - const char* func, const char* file, int line ) -{ - magma_getvector_v1_internal( n, sizeof(magmaDoubleComplex), - dx_src, incx, - hy_dst, incy, - func, file, line ); -} - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -static inline void -magma_zcopyvector_v1_internal( - magma_int_t n, - magmaDoubleComplex_const_ptr dx_src, magma_int_t incx, - magmaDoubleComplex_ptr dy_dst, magma_int_t incy, - const char* func, const char* file, int line ) -{ - magma_copyvector_v1_internal( n, sizeof(magmaDoubleComplex), - dx_src, incx, - dy_dst, incy, - func, file, line ); -} - - -// ============================================================================= -// copying sub-matrices (contiguous columns) - -#define magma_zsetmatrix_v1( m, n, hA_src, lda, dB_dst, lddb ) \ - magma_zsetmatrix_v1_internal( m, n, hA_src, lda, dB_dst, lddb, __func__, __FILE__, __LINE__ ) - -#define magma_zgetmatrix_v1( m, n, dA_src, ldda, hB_dst, ldb ) \ - magma_zgetmatrix_v1_internal( m, n, dA_src, ldda, hB_dst, ldb, __func__, __FILE__, __LINE__ ) - -#define magma_zcopymatrix_v1( m, n, dA_src, ldda, dB_dst, lddb ) \ - magma_zcopymatrix_v1_internal( m, n, dA_src, ldda, dB_dst, lddb, __func__, __FILE__, __LINE__ ) - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -static inline void -magma_zsetmatrix_v1_internal( - magma_int_t m, magma_int_t n, - magmaDoubleComplex const *hA_src, magma_int_t lda, - magmaDoubleComplex_ptr dB_dst, magma_int_t lddb, - const char* func, const char* file, int line ) -{ - magma_setmatrix_v1_internal( m, n, sizeof(magmaDoubleComplex), - hA_src, lda, - dB_dst, lddb, - func, file, line ); -} - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -static inline void -magma_zgetmatrix_v1_internal( - magma_int_t m, magma_int_t n, - magmaDoubleComplex_const_ptr dA_src, magma_int_t ldda, - magmaDoubleComplex *hB_dst, magma_int_t ldb, - const char* func, const char* file, int line ) -{ - magma_getmatrix_v1_internal( m, n, sizeof(magmaDoubleComplex), - dA_src, ldda, - hB_dst, ldb, - func, file, line ); -} - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -static inline void -magma_zcopymatrix_v1_internal( - magma_int_t m, magma_int_t n, - magmaDoubleComplex_const_ptr dA_src, magma_int_t ldda, - magmaDoubleComplex_ptr dB_dst, magma_int_t lddb, - const char* func, const char* file, int line ) -{ - magma_copymatrix_v1_internal( m, n, sizeof(magmaDoubleComplex), - dA_src, ldda, - dB_dst, lddb, - func, file, line ); -} - - -// ============================================================================= -// Level 1 BLAS (alphabetical order) - -// in cublas_v2, result returned through output argument -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -magma_int_t -magma_izamax_v1( - magma_int_t n, - magmaDoubleComplex_const_ptr dx, magma_int_t incx ); - -// in cublas_v2, result returned through output argument -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -magma_int_t -magma_izamin_v1( - magma_int_t n, - magmaDoubleComplex_const_ptr dx, magma_int_t incx ); - -// in cublas_v2, result returned through output argument -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -double -magma_dzasum_v1( - magma_int_t n, - magmaDoubleComplex_const_ptr dx, magma_int_t incx ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_zaxpy_v1( - magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dx, magma_int_t incx, - magmaDoubleComplex_ptr dy, magma_int_t incy ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_zcopy_v1( - magma_int_t n, - magmaDoubleComplex_const_ptr dx, magma_int_t incx, - magmaDoubleComplex_ptr dy, magma_int_t incy ); - -// in cublas_v2, result returned through output argument -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -magmaDoubleComplex -magma_zdotc_v1( - magma_int_t n, - magmaDoubleComplex_const_ptr dx, magma_int_t incx, - magmaDoubleComplex_const_ptr dy, magma_int_t incy ); - -// in cublas_v2, result returned through output argument -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -magmaDoubleComplex -magma_zdotu_v1( - magma_int_t n, - magmaDoubleComplex_const_ptr dx, magma_int_t incx, - magmaDoubleComplex_const_ptr dy, magma_int_t incy ); - -// in cublas_v2, result returned through output argument -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -double -magma_dznrm2_v1( - magma_int_t n, - magmaDoubleComplex_const_ptr dx, magma_int_t incx ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_zrot_v1( - magma_int_t n, - magmaDoubleComplex_ptr dx, magma_int_t incx, - magmaDoubleComplex_ptr dy, magma_int_t incy, - double dc, magmaDoubleComplex ds ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_zdrot_v1( - magma_int_t n, - magmaDoubleComplex_ptr dx, magma_int_t incx, - magmaDoubleComplex_ptr dy, magma_int_t incy, - double dc, double ds ); - -#ifdef MAGMA_REAL -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_zrotm_v1( - magma_int_t n, - magmaDouble_ptr dx, magma_int_t incx, - magmaDouble_ptr dy, magma_int_t incy, - magmaDouble_const_ptr param ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_zrotmg_v1( - magmaDouble_ptr d1, magmaDouble_ptr d2, - magmaDouble_ptr x1, magmaDouble_const_ptr y1, - magmaDouble_ptr param ); -#endif // MAGMA_REAL - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_zscal_v1( - magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_ptr dx, magma_int_t incx ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_zdscal_v1( - magma_int_t n, - double alpha, - magmaDoubleComplex_ptr dx, magma_int_t incx ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_zswap_v1( - magma_int_t n, - magmaDoubleComplex_ptr dx, magma_int_t incx, - magmaDoubleComplex_ptr dy, magma_int_t incy ); - -// ============================================================================= -// Level 2 BLAS (alphabetical order) - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_zgemv_v1( - magma_trans_t transA, - magma_int_t m, magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_const_ptr dx, magma_int_t incx, - magmaDoubleComplex beta, - magmaDoubleComplex_ptr dy, magma_int_t incy ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_zgerc_v1( - magma_int_t m, magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dx, magma_int_t incx, - magmaDoubleComplex_const_ptr dy, magma_int_t incy, - magmaDoubleComplex_ptr dA, magma_int_t ldda ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_zgeru_v1( - magma_int_t m, magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dx, magma_int_t incx, - magmaDoubleComplex_const_ptr dy, magma_int_t incy, - magmaDoubleComplex_ptr dA, magma_int_t ldda ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_zhemv_v1( - magma_uplo_t uplo, - magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_const_ptr dx, magma_int_t incx, - magmaDoubleComplex beta, - magmaDoubleComplex_ptr dy, magma_int_t incy ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_zher_v1( - magma_uplo_t uplo, - magma_int_t n, - double alpha, - magmaDoubleComplex_const_ptr dx, magma_int_t incx, - magmaDoubleComplex_ptr dA, magma_int_t ldda ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_zher2_v1( - magma_uplo_t uplo, - magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dx, magma_int_t incx, - magmaDoubleComplex_const_ptr dy, magma_int_t incy, - magmaDoubleComplex_ptr dA, magma_int_t ldda ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_ztrmv_v1( - magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, - magma_int_t n, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_ptr dx, magma_int_t incx ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_ztrsv_v1( - magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, - magma_int_t n, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_ptr dx, magma_int_t incx ); - -// ============================================================================= -// Level 3 BLAS (alphabetical order) - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_zgemm_v1( - magma_trans_t transA, magma_trans_t transB, - magma_int_t m, magma_int_t n, magma_int_t k, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_const_ptr dB, magma_int_t lddb, - magmaDoubleComplex beta, - magmaDoubleComplex_ptr dC, magma_int_t lddc ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_zsymm_v1( - magma_side_t side, magma_uplo_t uplo, - magma_int_t m, magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_const_ptr dB, magma_int_t lddb, - magmaDoubleComplex beta, - magmaDoubleComplex_ptr dC, magma_int_t lddc ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_zhemm_v1( - magma_side_t side, magma_uplo_t uplo, - magma_int_t m, magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_const_ptr dB, magma_int_t lddb, - magmaDoubleComplex beta, - magmaDoubleComplex_ptr dC, magma_int_t lddc ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_zsyr2k_v1( - magma_uplo_t uplo, magma_trans_t trans, - magma_int_t n, magma_int_t k, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_const_ptr dB, magma_int_t lddb, - magmaDoubleComplex beta, - magmaDoubleComplex_ptr dC, magma_int_t lddc ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_zher2k_v1( - magma_uplo_t uplo, magma_trans_t trans, - magma_int_t n, magma_int_t k, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_const_ptr dB, magma_int_t lddb, - double beta, - magmaDoubleComplex_ptr dC, magma_int_t lddc ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_zsyrk_v1( - magma_uplo_t uplo, magma_trans_t trans, - magma_int_t n, magma_int_t k, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex beta, - magmaDoubleComplex_ptr dC, magma_int_t lddc ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_zherk_v1( - magma_uplo_t uplo, magma_trans_t trans, - magma_int_t n, magma_int_t k, - double alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - double beta, - magmaDoubleComplex_ptr dC, magma_int_t lddc ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_ztrmm_v1( - magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, - magma_int_t m, magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_ptr dB, magma_int_t lddb ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magma_ztrsm_v1( - magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, - magma_int_t m, magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_ptr dB, magma_int_t lddb ); - - -#ifdef __cplusplus -} -#endif - -#undef MAGMA_COMPLEX - -#endif // MAGMABLAS_Z_H diff --git a/include/magmablas_z_v1_map.h b/include/magmablas_z_v1_map.h deleted file mode 100644 index c62d2c9e0..000000000 --- a/include/magmablas_z_v1_map.h +++ /dev/null @@ -1,135 +0,0 @@ -/* - -- MAGMA (version 2.0) -- - Univ. of Tennessee, Knoxville - Univ. of California, Berkeley - Univ. of Colorado, Denver - @date - - @precisions normal z -> s d c -*/ - -#ifndef MAGMABLAS_Z_V1_MAP_H -#define MAGMABLAS_Z_V1_MAP_H - -#ifdef MAGMA_NO_V1 -#error "Since MAGMA_NO_V1 is defined, magma.h is invalid; use magma_v2.h" -#endif - -// ============================================================================= -// map function names to old v1 routines - -#define magmablas_ztranspose_inplace magmablas_ztranspose_inplace_v1 -#define magmablas_ztranspose_conj_inplace magmablas_ztranspose_conj_inplace_v1 -#define magmablas_ztranspose magmablas_ztranspose_v1 -#define magmablas_ztranspose_conj magmablas_ztranspose_conj_v1 -#define magmablas_zgetmatrix_transpose magmablas_zgetmatrix_transpose_v1 -#define magmablas_zsetmatrix_transpose magmablas_zsetmatrix_transpose_v1 -#define magmablas_zprbt magmablas_zprbt_v1 -#define magmablas_zprbt_mv magmablas_zprbt_mv_v1 -#define magmablas_zprbt_mtv magmablas_zprbt_mtv_v1 -#define magma_zgetmatrix_1D_col_bcyclic magma_zgetmatrix_1D_col_bcyclic_v1 -#define magma_zsetmatrix_1D_col_bcyclic magma_zsetmatrix_1D_col_bcyclic_v1 -#define magma_zgetmatrix_1D_row_bcyclic magma_zgetmatrix_1D_row_bcyclic_v1 -#define magma_zsetmatrix_1D_row_bcyclic magma_zsetmatrix_1D_row_bcyclic_v1 -#define magmablas_zgeadd magmablas_zgeadd_v1 -#define magmablas_zgeadd2 magmablas_zgeadd2_v1 -#define magmablas_zlacpy magmablas_zlacpy_v1 -#define magmablas_zlacpy_conj magmablas_zlacpy_conj_v1 -#define magmablas_zlacpy_sym_in magmablas_zlacpy_sym_in_v1 -#define magmablas_zlacpy_sym_out magmablas_zlacpy_sym_out_v1 -#define magmablas_zlange magmablas_zlange_v1 -#define magmablas_zlanhe magmablas_zlanhe_v1 -#define magmablas_zlansy magmablas_zlansy_v1 -#define magmablas_zlarfg magmablas_zlarfg_v1 -#define magmablas_zlascl magmablas_zlascl_v1 -#define magmablas_zlascl_2x2 magmablas_zlascl_2x2_v1 -#define magmablas_zlascl2 magmablas_zlascl2_v1 -#define magmablas_zlascl_diag magmablas_zlascl_diag_v1 -#define magmablas_zlaset magmablas_zlaset_v1 -#define magmablas_zlaset_band magmablas_zlaset_band_v1 -#define magmablas_zlaswp magmablas_zlaswp_v1 -#define magmablas_zlaswp2 magmablas_zlaswp2_v1 -#define magmablas_zlaswp_sym magmablas_zlaswp_sym_v1 -#define magmablas_zlaswpx magmablas_zlaswpx_v1 -#define magmablas_zsymmetrize magmablas_zsymmetrize_v1 -#define magmablas_zsymmetrize_tiles magmablas_zsymmetrize_tiles_v1 -#define magmablas_ztrtri_diag magmablas_ztrtri_diag_v1 -#define magmablas_dznrm2_adjust magmablas_dznrm2_adjust_v1 -#define magmablas_dznrm2_check magmablas_dznrm2_check_v1 -#define magmablas_dznrm2_cols magmablas_dznrm2_cols_v1 -#define magmablas_dznrm2_row_check_adjust magmablas_dznrm2_row_check_adjust_v1 -#define magma_zlarfb_gpu magma_zlarfb_gpu_v1 -#define magma_zlarfb_gpu_gemm magma_zlarfb_gpu_gemm_v1 -#define magma_zlarfbx_gpu magma_zlarfbx_gpu_v1 -#define magma_zlarfg_gpu magma_zlarfg_gpu_v1 -#define magma_zlarfgtx_gpu magma_zlarfgtx_gpu_v1 -#define magma_zlarfgx_gpu magma_zlarfgx_gpu_v1 -#define magma_zlarfx_gpu magma_zlarfx_gpu_v1 -#define magmablas_zaxpycp magmablas_zaxpycp_v1 -#define magmablas_zswap magmablas_zswap_v1 -#define magmablas_zswapblk magmablas_zswapblk_v1 -#define magmablas_zswapdblk magmablas_zswapdblk_v1 -#define magmablas_zgemv magmablas_zgemv_v1 -#define magmablas_zgemv_conj magmablas_zgemv_conj_v1 -#define magmablas_zhemv magmablas_zhemv_v1 -#define magmablas_zsymv magmablas_zsymv_v1 -#define magmablas_zgemm magmablas_zgemm_v1 -#define magmablas_zgemm_reduce magmablas_zgemm_reduce_v1 -#define magmablas_zhemm magmablas_zhemm_v1 -#define magmablas_zsymm magmablas_zsymm_v1 -#define magmablas_zsyr2k magmablas_zsyr2k_v1 -#define magmablas_zher2k magmablas_zher2k_v1 -#define magmablas_zsyrk magmablas_zsyrk_v1 -#define magmablas_zherk magmablas_zherk_v1 -#define magmablas_ztrsm magmablas_ztrsm_v1 -#define magmablas_ztrsm_outofplace magmablas_ztrsm_outofplace_v1 -#define magmablas_ztrsm_work magmablas_ztrsm_work_v1 - -#undef magma_zsetvector -#undef magma_zgetvector -#undef magma_zcopyvector -#undef magma_zsetmatrix -#undef magma_zgetmatrix -#undef magma_zcopymatrix - -#define magma_zsetvector magma_zsetvector_v1 -#define magma_zgetvector magma_zgetvector_v1 -#define magma_zcopyvector magma_zcopyvector_v1 -#define magma_zsetmatrix magma_zsetmatrix_v1 -#define magma_zgetmatrix magma_zgetmatrix_v1 -#define magma_zcopymatrix magma_zcopymatrix_v1 - -#define magma_izamax magma_izamax_v1 -#define magma_izamin magma_izamin_v1 -#define magma_dzasum magma_dzasum_v1 -#define magma_zaxpy magma_zaxpy_v1 -#define magma_zcopy magma_zcopy_v1 -#define magma_zdotc magma_zdotc_v1 -#define magma_zdotu magma_zdotu_v1 -#define magma_dznrm2 magma_dznrm2_v1 -#define magma_zrot magma_zrot_v1 -#define magma_zdrot magma_zdrot_v1 -#define magma_zrotm magma_zrotm_v1 -#define magma_zrotmg magma_zrotmg_v1 -#define magma_zscal magma_zscal_v1 -#define magma_zdscal magma_zdscal_v1 -#define magma_zswap magma_zswap_v1 -#define magma_zgemv magma_zgemv_v1 -#define magma_zgerc magma_zgerc_v1 -#define magma_zgeru magma_zgeru_v1 -#define magma_zhemv magma_zhemv_v1 -#define magma_zher magma_zher_v1 -#define magma_zher2 magma_zher2_v1 -#define magma_ztrmv magma_ztrmv_v1 -#define magma_ztrsv magma_ztrsv_v1 -#define magma_zgemm magma_zgemm_v1 -#define magma_zsymm magma_zsymm_v1 -#define magma_zhemm magma_zhemm_v1 -#define magma_zsyr2k magma_zsyr2k_v1 -#define magma_zher2k magma_zher2k_v1 -#define magma_zsyrk magma_zsyrk_v1 -#define magma_zherk magma_zherk_v1 -#define magma_ztrmm magma_ztrmm_v1 -#define magma_ztrsm magma_ztrsm_v1 - -#endif // MAGMABLAS_Z_V1_MAP_H diff --git a/include/magmablas_zc_v1.h b/include/magmablas_zc_v1.h deleted file mode 100644 index d42ed3aee..000000000 --- a/include/magmablas_zc_v1.h +++ /dev/null @@ -1,91 +0,0 @@ -/* - -- MAGMA (version 2.0) -- - Univ. of Tennessee, Knoxville - Univ. of California, Berkeley - Univ. of Colorado, Denver - @date - - @precisions mixed zc -> ds -*/ - -#ifndef MAGMABLAS_ZC_V1_H -#define MAGMABLAS_ZC_V1_H - -#ifdef MAGMA_NO_V1 -#error "Since MAGMA_NO_V1 is defined, magma.h is invalid; use magma_v2.h" -#endif - -#include "magma_types.h" - -#ifdef __cplusplus -extern "C" { -#endif - - /* Mixed precision */ -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zcaxpycp_v1( - magma_int_t m, - magmaFloatComplex_ptr r, - magmaDoubleComplex_ptr x, - magmaDoubleComplex_const_ptr b, - magmaDoubleComplex_ptr w ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zclaswp_v1( - magma_int_t n, - magmaDoubleComplex_ptr A, magma_int_t lda, - magmaFloatComplex_ptr SA, - magma_int_t m, - const magma_int_t *ipiv, magma_int_t incx ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zlag2c_v1( - magma_int_t m, magma_int_t n, - magmaDoubleComplex_const_ptr A, magma_int_t lda, - magmaFloatComplex_ptr SA, magma_int_t ldsa, - magma_int_t *info ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_clag2z_v1( - magma_int_t m, magma_int_t n, - magmaFloatComplex_const_ptr SA, magma_int_t ldsa, - magmaDoubleComplex_ptr A, magma_int_t lda, - magma_int_t *info ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_zlat2c_v1( - magma_uplo_t uplo, magma_int_t n, - magmaDoubleComplex_const_ptr A, magma_int_t lda, - magmaFloatComplex_ptr SA, magma_int_t ldsa, - magma_int_t *info ); - -/// @deprecated -/// @ingroup magma_deprecated_v1 -MAGMA_DEPRECATE("The MAGMA v1 interface is deprecated and will be removed in the next release") -void -magmablas_clat2z_v1( - magma_uplo_t uplo, magma_int_t n, - magmaFloatComplex_const_ptr SA, magma_int_t ldsa, - magmaDoubleComplex_ptr A, magma_int_t lda, - magma_int_t *info ); - -#ifdef __cplusplus -} -#endif - -#endif // MAGMABLAS_ZC_V1_H diff --git a/include/magmablas_zc_v1_map.h b/include/magmablas_zc_v1_map.h deleted file mode 100644 index cd94d87d0..000000000 --- a/include/magmablas_zc_v1_map.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - -- MAGMA (version 2.0) -- - Univ. of Tennessee, Knoxville - Univ. of California, Berkeley - Univ. of Colorado, Denver - @date - - @precisions mixed zc -> ds -*/ - -#ifndef MAGMABLAS_ZC_V1_MAP_H -#define MAGMABLAS_ZC_V1_MAP_H - -// ============================================================================= -// map function names to old v1 routines - -#define magmablas_zcaxpycp magmablas_zcaxpycp_v1 -#define magmablas_zclaswp magmablas_zclaswp_v1 -#define magmablas_zlag2c magmablas_zlag2c_v1 -#define magmablas_clag2z magmablas_clag2z_v1 -#define magmablas_zlat2c magmablas_zlat2c_v1 -#define magmablas_clat2z magmablas_clat2z_v1 - -#endif // MAGMABLAS_ZC_V1_MAP_H diff --git a/interface_cuda/Makefile.src b/interface_cuda/Makefile.src index c9c02bcc1..b5a052c88 100644 --- a/interface_cuda/Makefile.src +++ b/interface_cuda/Makefile.src @@ -16,14 +16,11 @@ cdir := interface_cuda libmagma_src += \ $(cdir)/alloc.cpp \ $(cdir)/blas_h_v2.cpp \ - $(cdir)/blas_z_v1.cpp \ $(cdir)/blas_z_v2.cpp \ - $(cdir)/copy_v1.cpp \ $(cdir)/copy_v2.cpp \ $(cdir)/error.cpp \ $(cdir)/connection_mgpu.cpp \ $(cdir)/interface.cpp \ - $(cdir)/interface_v1.cpp \ # ---------------------------------------------------------------------- diff --git a/interface_cuda/blas_z_v1.cpp b/interface_cuda/blas_z_v1.cpp deleted file mode 100644 index 09399c1a0..000000000 --- a/interface_cuda/blas_z_v1.cpp +++ /dev/null @@ -1,549 +0,0 @@ -/* - -- MAGMA (version 2.0) -- - Univ. of Tennessee, Knoxville - Univ. of California, Berkeley - Univ. of Colorado, Denver - @date - - @author Mark Gates - @precisions normal z -> s d c -*/ -#ifndef MAGMA_NO_V1 - -#include "magma_internal.h" -#include "magmablas_v1.h" // includes v1 prototypes; does NOT map routine names -#include "error.h" - -#define COMPLEX - -#ifdef MAGMA_HAVE_CUDA - -// These MAGMA v1 routines are all deprecated. -// See blas_z_v2.cpp for documentation. - - -// ============================================================================= -// Level 1 BLAS - -/******************************************************************************/ -extern "C" magma_int_t -magma_izamax_v1( - magma_int_t n, - magmaDoubleComplex_const_ptr dx, magma_int_t incx ) -{ - return magma_izamax( n, dx, incx, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" magma_int_t -magma_izamin_v1( - magma_int_t n, - magmaDoubleComplex_const_ptr dx, magma_int_t incx ) -{ - return magma_izamin( n, dx, incx, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" double -magma_dzasum_v1( - magma_int_t n, - magmaDoubleComplex_const_ptr dx, magma_int_t incx ) -{ - return magma_dzasum( n, dx, incx, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magma_zaxpy_v1( - magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dx, magma_int_t incx, - magmaDoubleComplex_ptr dy, magma_int_t incy ) -{ - magma_zaxpy( n, alpha, dx, incx, dy, incy, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magma_zcopy_v1( - magma_int_t n, - magmaDoubleComplex_const_ptr dx, magma_int_t incx, - magmaDoubleComplex_ptr dy, magma_int_t incy ) -{ - magma_zcopy( n, dx, incx, dy, incy, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" -magmaDoubleComplex magma_zdotc_v1( - magma_int_t n, - magmaDoubleComplex_const_ptr dx, magma_int_t incx, - magmaDoubleComplex_const_ptr dy, magma_int_t incy ) -{ - return magma_zdotc( n, dx, incx, dy, incy, magmablasGetQueue() ); -} - - -/******************************************************************************/ -#ifdef COMPLEX -extern "C" -magmaDoubleComplex magma_zdotu_v1( - magma_int_t n, - magmaDoubleComplex_const_ptr dx, magma_int_t incx, - magmaDoubleComplex_const_ptr dy, magma_int_t incy ) -{ - return magma_zdotu( n, dx, incx, dy, incy, magmablasGetQueue() ); -} -#endif // COMPLEX - - -/******************************************************************************/ -extern "C" double -magma_dznrm2_v1( - magma_int_t n, - magmaDoubleComplex_const_ptr dx, magma_int_t incx ) -{ - return magma_dznrm2( n, dx, incx, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magma_zrot_v1( - magma_int_t n, - magmaDoubleComplex_ptr dx, magma_int_t incx, - magmaDoubleComplex_ptr dy, magma_int_t incy, - double c, magmaDoubleComplex s ) -{ - magma_zrot( n, dx, incx, dy, incy, c, s, magmablasGetQueue() ); -} - - -/******************************************************************************/ -#ifdef COMPLEX -extern "C" void -magma_zdrot_v1( - magma_int_t n, - magmaDoubleComplex_ptr dx, magma_int_t incx, - magmaDoubleComplex_ptr dy, magma_int_t incy, - double c, double s ) -{ - magma_zdrot( n, dx, incx, dy, incy, c, s, magmablasGetQueue() ); -} -#endif // COMPLEX - - -/******************************************************************************/ -#ifdef REAL -extern "C" void -magma_zrotm_v1( - magma_int_t n, - double *dx, magma_int_t incx, - double *dy, magma_int_t incy, - const double *param ) -{ - magma_zrotm( n, dx, incx, dy, incy, param, magmablasGetQueue() ); -} -#endif // REAL - - -/******************************************************************************/ -#ifdef REAL -extern "C" void -magma_zrotmg_v1( - double *d1, double *d2, - double *x1, const double *y1, - double *param ) -{ - magma_zrotmg( d1, d2, x1, y1, param, magmablasGetQueue() ); -} -#endif // REAL - - -/******************************************************************************/ -extern "C" void -magma_zscal_v1( - magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_ptr dx, magma_int_t incx ) -{ - magma_zscal( n, alpha, dx, incx, magmablasGetQueue() ); -} - - -/******************************************************************************/ -#ifdef COMPLEX -extern "C" void -magma_zdscal_v1( - magma_int_t n, - double alpha, - magmaDoubleComplex_ptr dx, magma_int_t incx ) -{ - magma_zdscal( n, alpha, dx, incx, magmablasGetQueue() ); -} -#endif // COMPLEX - - -/******************************************************************************/ -extern "C" void -magma_zswap_v1( - magma_int_t n, - magmaDoubleComplex_ptr dx, magma_int_t incx, - magmaDoubleComplex_ptr dy, magma_int_t incy ) -{ - magma_zswap( n, dx, incx, dy, incy, magmablasGetQueue() ); -} - - -// ============================================================================= -// Level 2 BLAS - -/******************************************************************************/ -extern "C" void -magma_zgemv_v1( - magma_trans_t transA, - magma_int_t m, magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_const_ptr dx, magma_int_t incx, - magmaDoubleComplex beta, - magmaDoubleComplex_ptr dy, magma_int_t incy ) -{ - magma_zgemv( - transA, - m, n, - alpha, dA, ldda, - dx, incx, - beta, dy, incy, - magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magma_zgerc_v1( - magma_int_t m, magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dx, magma_int_t incx, - magmaDoubleComplex_const_ptr dy, magma_int_t incy, - magmaDoubleComplex_ptr dA, magma_int_t ldda ) -{ - magma_zgerc( - m, n, - alpha, dx, incx, - dy, incy, - dA, ldda, - magmablasGetQueue() ); -} - - -/******************************************************************************/ -#ifdef COMPLEX -extern "C" void -magma_zgeru_v1( - magma_int_t m, magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dx, magma_int_t incx, - magmaDoubleComplex_const_ptr dy, magma_int_t incy, - magmaDoubleComplex_ptr dA, magma_int_t ldda ) -{ - magma_zgeru( - m, n, - alpha, dx, incx, - dy, incy, - dA, ldda, - magmablasGetQueue() ); -} -#endif // COMPLEX - - -/******************************************************************************/ -extern "C" void -magma_zhemv_v1( - magma_uplo_t uplo, - magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_const_ptr dx, magma_int_t incx, - magmaDoubleComplex beta, - magmaDoubleComplex_ptr dy, magma_int_t incy ) -{ - magma_zhemv( - uplo, - n, - alpha, dA, ldda, - dx, incx, - beta, dy, incy, - magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magma_zher_v1( - magma_uplo_t uplo, - magma_int_t n, - double alpha, - magmaDoubleComplex_const_ptr dx, magma_int_t incx, - magmaDoubleComplex_ptr dA, magma_int_t ldda ) -{ - magma_zher( - uplo, - n, - alpha, dx, incx, - dA, ldda, - magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magma_zher2_v1( - magma_uplo_t uplo, - magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dx, magma_int_t incx, - magmaDoubleComplex_const_ptr dy, magma_int_t incy, - magmaDoubleComplex_ptr dA, magma_int_t ldda ) -{ - magma_zher2( - uplo, - n, - alpha, dx, incx, - dy, incy, - dA, ldda, - magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magma_ztrmv_v1( - magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, - magma_int_t n, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_ptr dx, magma_int_t incx ) -{ - magma_ztrmv( - uplo, trans, diag, - n, - dA, ldda, - dx, incx, - magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magma_ztrsv_v1( - magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, - magma_int_t n, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_ptr dx, magma_int_t incx ) -{ - magma_ztrsv( - uplo, trans, diag, - n, - dA, ldda, - dx, incx, - magmablasGetQueue() ); -} - - -// ============================================================================= -// Level 3 BLAS - -/******************************************************************************/ -extern "C" void -magma_zgemm_v1( - magma_trans_t transA, magma_trans_t transB, - magma_int_t m, magma_int_t n, magma_int_t k, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_const_ptr dB, magma_int_t lddb, - magmaDoubleComplex beta, - magmaDoubleComplex_ptr dC, magma_int_t lddc ) -{ - magma_zgemm( - transA, transB, - m, n, k, - alpha, dA, ldda, - dB, lddb, - beta, dC, lddc, - magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magma_zsymm_v1( - magma_side_t side, magma_uplo_t uplo, - magma_int_t m, magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_const_ptr dB, magma_int_t lddb, - magmaDoubleComplex beta, - magmaDoubleComplex_ptr dC, magma_int_t lddc ) -{ - magma_zsymm( - side, uplo, - m, n, - alpha, dA, ldda, - dB, lddb, - beta, dC, lddc, - magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magma_zsyrk_v1( - magma_uplo_t uplo, magma_trans_t trans, - magma_int_t n, magma_int_t k, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex beta, - magmaDoubleComplex_ptr dC, magma_int_t lddc ) -{ - magma_zsyrk( - uplo, trans, - n, k, - alpha, dA, ldda, - beta, dC, lddc, - magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magma_zsyr2k_v1( - magma_uplo_t uplo, magma_trans_t trans, - magma_int_t n, magma_int_t k, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_const_ptr dB, magma_int_t lddb, - magmaDoubleComplex beta, - magmaDoubleComplex_ptr dC, magma_int_t lddc ) -{ - magma_zsyr2k( - uplo, trans, - n, k, - alpha, dA, ldda, - dB, lddb, - beta, dC, lddc, - magmablasGetQueue() ); -} - - -/******************************************************************************/ -#ifdef COMPLEX -extern "C" void -magma_zhemm_v1( - magma_side_t side, magma_uplo_t uplo, - magma_int_t m, magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_const_ptr dB, magma_int_t lddb, - magmaDoubleComplex beta, - magmaDoubleComplex_ptr dC, magma_int_t lddc ) -{ - magma_zhemm( - side, uplo, - m, n, - alpha, dA, ldda, - dB, lddb, - beta, dC, lddc, - magmablasGetQueue() ); -} -#endif // COMPLEX - - -/******************************************************************************/ -#ifdef COMPLEX -extern "C" void -magma_zherk_v1( - magma_uplo_t uplo, magma_trans_t trans, - magma_int_t n, magma_int_t k, - double alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - double beta, - magmaDoubleComplex_ptr dC, magma_int_t lddc ) -{ - magma_zherk( - uplo, trans, - n, k, - alpha, dA, ldda, - beta, dC, lddc, - magmablasGetQueue() ); -} -#endif // COMPLEX - - -/******************************************************************************/ -#ifdef COMPLEX -extern "C" void -magma_zher2k_v1( - magma_uplo_t uplo, magma_trans_t trans, - magma_int_t n, magma_int_t k, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_const_ptr dB, magma_int_t lddb, - double beta, - magmaDoubleComplex_ptr dC, magma_int_t lddc ) -{ - magma_zher2k( - uplo, trans, - n, k, - alpha, dA, ldda, - dB, lddb, - beta, dC, lddc, - magmablasGetQueue() ); -} -#endif // COMPLEX - - -/******************************************************************************/ -extern "C" void -magma_ztrmm_v1( - magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, - magma_int_t m, magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_ptr dB, magma_int_t lddb ) -{ - magma_ztrmm( - side, uplo, trans, diag, - m, n, - alpha, dA, ldda, - dB, lddb, - magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magma_ztrsm_v1( - magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, - magma_int_t m, magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_ptr dB, magma_int_t lddb ) -{ - magma_ztrsm( - side, uplo, trans, diag, - m, n, - alpha, dA, ldda, - dB, lddb, - magmablasGetQueue() ); -} - -#endif // MAGMA_HAVE_CUDA - -#undef COMPLEX - -#endif // MAGMA_NO_V1 diff --git a/interface_cuda/copy_v1.cpp b/interface_cuda/copy_v1.cpp deleted file mode 100644 index 5d7d3fd95..000000000 --- a/interface_cuda/copy_v1.cpp +++ /dev/null @@ -1,133 +0,0 @@ -/* - -- MAGMA (version 2.0) -- - Univ. of Tennessee, Knoxville - Univ. of California, Berkeley - Univ. of Colorado, Denver - @date - - @author Mark Gates -*/ -#ifndef MAGMA_NO_V1 - -#include "magma_internal.h" -#include "magmablas_v1.h" // includes v1 prototypes; does NOT map routine names -#include "error.h" - -#include - -#if defined(MAGMA_HAVE_CUDA) || defined(MAGMA_HAVE_HIP) - -// These MAGMA v1 routines are all deprecated. -// See copy_v2.cpp for documentation. - -// Generic, type-independent routines to copy data. -// Type-safe versions which avoid the user needing sizeof(...) are in headers; -// see magma_{s,d,c,z,i,index_}{set,get,copy}{matrix,vector} - -/******************************************************************************/ -extern "C" void -magma_setvector_v1_internal( - magma_int_t n, magma_int_t elemSize, - void const* hx_src, magma_int_t incx, - magma_ptr dy_dst, magma_int_t incy, - const char* func, const char* file, int line ) -{ - magma_setvector_internal( - n, elemSize, - hx_src, incx, - dy_dst, incy, - magmablasGetQueue(), - func, file, line ); -} - - -/******************************************************************************/ -extern "C" void -magma_getvector_v1_internal( - magma_int_t n, magma_int_t elemSize, - magma_const_ptr dx_src, magma_int_t incx, - void* hy_dst, magma_int_t incy, - const char* func, const char* file, int line ) -{ - magma_getvector_internal( - n, elemSize, - dx_src, incx, - hy_dst, incy, - magmablasGetQueue(), - func, file, line ); -} - - -/******************************************************************************/ -extern "C" void -magma_copyvector_v1_internal( - magma_int_t n, magma_int_t elemSize, - magma_const_ptr dx_src, magma_int_t incx, - magma_ptr dy_dst, magma_int_t incy, - const char* func, const char* file, int line ) -{ - magma_copyvector_internal( - n, elemSize, - dx_src, incx, - dy_dst, incy, - magmablasGetQueue(), - func, file, line ); -} - - -/******************************************************************************/ -extern "C" void -magma_setmatrix_v1_internal( - magma_int_t m, magma_int_t n, magma_int_t elemSize, - void const* hA_src, magma_int_t lda, - magma_ptr dB_dst, magma_int_t lddb, - const char* func, const char* file, int line ) -{ - cublasStatus_t status; - status = cublasSetMatrix( - int(m), int(n), int(elemSize), - hA_src, int(lda), - dB_dst, int(lddb) ); - check_xerror( status, func, file, line ); - MAGMA_UNUSED( status ); -} - - -/******************************************************************************/ -extern "C" void -magma_getmatrix_v1_internal( - magma_int_t m, magma_int_t n, magma_int_t elemSize, - magma_const_ptr dA_src, magma_int_t ldda, - void* hB_dst, magma_int_t ldb, - const char* func, const char* file, int line ) -{ - cublasStatus_t status; - status = cublasGetMatrix( - int(m), int(n), int(elemSize), - dA_src, int(ldda), - hB_dst, int(ldb) ); - check_xerror( status, func, file, line ); - MAGMA_UNUSED( status ); -} - - -/******************************************************************************/ -extern "C" void -magma_copymatrix_v1_internal( - magma_int_t m, magma_int_t n, magma_int_t elemSize, - magma_const_ptr dA_src, magma_int_t ldda, - magma_ptr dB_dst, magma_int_t lddb, - const char* func, const char* file, int line ) -{ - cudaError_t status; - status = cudaMemcpy2D( - dB_dst, int(lddb*elemSize), - dA_src, int(ldda*elemSize), - int(m*elemSize), int(n), cudaMemcpyDeviceToDevice ); - check_xerror( status, func, file, line ); - MAGMA_UNUSED( status ); -} - -#endif // MAGMA_HAVE_CUDA - -#endif // MAGMA_NO_V1 diff --git a/interface_cuda/interface.cpp b/interface_cuda/interface.cpp index 50b4fa3d3..0c317ebb5 100644 --- a/interface_cuda/interface.cpp +++ b/interface_cuda/interface.cpp @@ -123,17 +123,6 @@ enum { // count of (init - finalize) calls static int g_init = 0; -#ifndef MAGMA_NO_V1 - magma_queue_t* g_null_queues = NULL; - - #ifdef HAVE_PTHREAD_KEY - pthread_key_t g_magma_queue_key; - #else - magma_queue_t g_magma_queue = NULL; - #endif -#endif // MAGMA_NO_V1 - - // ----------------------------------------------------------------------------- // subset of the CUDA device properties, set by magma_init() struct magma_device_info @@ -232,29 +221,6 @@ magma_init() #endif } } - - #ifndef MAGMA_NO_V1 - #ifdef HAVE_PTHREAD_KEY - // create thread-specific key - // currently, this is needed only for MAGMA v1 compatability - // see magma_init, magmablas(Set|Get)KernelStream, magmaGetQueue - info = pthread_key_create( &g_magma_queue_key, NULL ); - if ( info != 0 ) { - info = MAGMA_ERR_UNKNOWN; - goto cleanup; - } - #endif - - // ----- queues with NULL streams (for backwards compatability with MAGMA 1.x) - // allocate array of queues with NULL stream - size = max( 1, g_magma_devices_cnt ) * sizeof(magma_queue_t); - magma_malloc_cpu( (void**) &g_null_queues, size ); - if ( g_null_queues == NULL ) { - info = MAGMA_ERR_HOST_ALLOC; - goto cleanup; - } - memset( g_null_queues, 0, size ); - #endif // MAGMA_NO_V1 } cleanup: g_init += 1; // increment (init - finalize) count @@ -291,21 +257,6 @@ magma_finalize() g_magma_devices = NULL; } - #ifndef MAGMA_NO_V1 - if ( g_null_queues != NULL ) { - for( int dev=0; dev < g_magma_devices_cnt; ++dev ) { - magma_queue_destroy( g_null_queues[dev] ); - g_null_queues[dev] = NULL; - } - magma_free_cpu( g_null_queues ); - g_null_queues = NULL; - } - - #ifdef HAVE_PTHREAD_KEY - pthread_key_delete( g_magma_queue_key ); - #endif - #endif // MAGMA_NO_V1 - #ifdef DEBUG_MEMORY magma_warn_leaks( g_pointers_dev, "device" ); magma_warn_leaks( g_pointers_cpu, "CPU" ); diff --git a/interface_cuda/interface_v1.cpp b/interface_cuda/interface_v1.cpp deleted file mode 100644 index d6eb4e095..000000000 --- a/interface_cuda/interface_v1.cpp +++ /dev/null @@ -1,189 +0,0 @@ -/* - -- MAGMA (version 2.0) -- - Univ. of Tennessee, Knoxville - Univ. of California, Berkeley - Univ. of Colorado, Denver - @date - - @author Mark Gates -*/ - -#include -#include -#include - -// these are included already in magma_internal.h & other headers -#include -//#include - -#include "magma_internal.h" -#include "error.h" - - -#if defined(MAGMA_HAVE_CUDA) || defined(MAGMA_HAVE_HIP) -#ifndef MAGMA_NO_V1 - -// ----------------------------------------------------------------------------- -// globals -// see interface.cpp for definitions - -#ifndef MAGMA_NO_V1 - extern magma_queue_t* g_null_queues; - - #ifdef HAVE_PTHREAD_KEY - extern pthread_key_t g_magma_queue_key; - #else - extern magma_queue_t g_magma_queue; - #endif -#endif // MAGMA_NO_V1 - - -// ----------------------------------------------------------------------------- -extern int g_magma_devices_cnt; - - -// ============================================================================= -// device support - -/***************************************************************************//** - @deprecated - Synchronize the current device. - This functionality does not exist in OpenCL, so it is deprecated for CUDA, too. - - @ingroup magma_device -*******************************************************************************/ -extern "C" void -magma_device_sync() -{ - cudaError_t err; - err = cudaDeviceSynchronize(); - check_error( err ); - MAGMA_UNUSED( err ); -} - - -// ============================================================================= -// queue support - -/***************************************************************************//** - @deprecated - - Sets the current global MAGMA v1 queue for kernels to execute in. - In MAGMA v2, all kernels take queue as an argument, so this is deprecated. - If compiled with MAGMA_NO_V1, this is not defined. - - @param[in] - queue Queue to set as current global MAGMA v1 queue. - - @return MAGMA_SUCCESS if successful - - @ingroup magma_queue -*******************************************************************************/ -extern "C" magma_int_t -magmablasSetKernelStream( magma_queue_t queue ) -{ - magma_int_t info = 0; - #ifdef HAVE_PTHREAD_KEY - info = pthread_setspecific( g_magma_queue_key, queue ); - #else - g_magma_queue = queue; - #endif - return info; -} - - -/***************************************************************************//** - @deprecated - - Gets the current global MAGMA v1 queue for kernels to execute in. - In MAGMA v2, all kernels take queue as an argument, so this is deprecated. - If compiled with MAGMA_NO_V1, this is not defined. - - @param[out] - queue_ptr On output, set to the current global MAGMA v1 queue. - - @return MAGMA_SUCCESS if successful - - @ingroup magma_queue -*******************************************************************************/ -extern "C" magma_int_t -magmablasGetKernelStream( magma_queue_t *queue_ptr ) -{ - #ifdef HAVE_PTHREAD_KEY - *queue_ptr = (magma_queue_t) pthread_getspecific( g_magma_queue_key ); - #else - *queue_ptr = g_magma_queue; - #endif - return 0; -} - - -/***************************************************************************//** - @deprecated - - Gets the current global MAGMA v1 queue for kernels to execute in. - Unlike magmablasGetKernelStream(), if the current queue is NULL, - this will return a special MAGMA queue that has a NULL CUDA stream. - This allows MAGMA v1 wrappers to call v2 kernels with a non-NULL queue. - - In MAGMA v2, all kernels take queue as an argument, so this is deprecated. - If compiled with MAGMA_NO_V1, this is not defined. - - @return Current global MAGMA v1 queue. - - @ingroup magma_queue -*******************************************************************************/ -extern "C" -magma_queue_t magmablasGetQueue() -{ - magma_queue_t queue; - #ifdef HAVE_PTHREAD_KEY - queue = (magma_queue_t) pthread_getspecific( g_magma_queue_key ); - #else - queue = g_magma_queue; - #endif - if ( queue == NULL ) { - magma_device_t dev; - magma_getdevice( &dev ); - if ( dev >= g_magma_devices_cnt || g_null_queues == NULL ) { - fprintf( stderr, "Error: %s requires magma_init() to be called first for MAGMA v1 compatability.\n", - __func__ ); - return NULL; - } - // create queue w/ NULL stream first time that NULL queue is used - if ( g_null_queues[dev] == NULL ) { - #ifdef MAGMA_HAVE_CUDA - magma_queue_create_from_cuda( dev, NULL, NULL, NULL, &g_null_queues[dev] ); - #elif defined(MAGMA_HAVE_HIP) - magma_queue_create_from_hip( dev, NULL, NULL, NULL, &g_null_queues[dev] ); - #endif - //printf( "dev %lld create queue %p\n", (long long) dev, (void*) g_null_queues[dev] ); - assert( g_null_queues[dev] != NULL ); - } - queue = g_null_queues[dev]; - } - assert( queue != NULL ); - return queue; -} - - -/***************************************************************************//** - @deprecated - MAGMA v1 version that doesn't take device ID. -*******************************************************************************/ -extern "C" void -magma_queue_create_v1_internal( - magma_queue_t* queue_ptr, - const char* func, const char* file, int line ) -{ - int device; - cudaError_t err; - err = cudaGetDevice( &device ); - check_xerror( err, func, file, line ); - MAGMA_UNUSED( err ); - - magma_queue_create_internal( device, queue_ptr, func, file, line ); -} - -#endif // not MAGMA_NO_V1 -#endif // MAGMA_HAVE_CUDA diff --git a/magmablas/Makefile.src b/magmablas/Makefile.src index c72e8789f..ef6952115 100644 --- a/magmablas/Makefile.src +++ b/magmablas/Makefile.src @@ -102,8 +102,6 @@ libmagma_src += \ $(cdir)/ztrtri_lower_batched.cu \ $(cdir)/ztrtri_upper.cu \ $(cdir)/ztrtri_upper_batched.cu \ - $(cdir)/magmablas_z_v1.cpp \ - $(cdir)/magmablas_zc_v1.cpp \ # multi-GPU libmagma_src += \ diff --git a/magmablas/magmablas_z_v1.cpp b/magmablas/magmablas_z_v1.cpp deleted file mode 100644 index 59fb13b07..000000000 --- a/magmablas/magmablas_z_v1.cpp +++ /dev/null @@ -1,825 +0,0 @@ -/* - -- MAGMA (version 2.0) -- - Univ. of Tennessee, Knoxville - Univ. of California, Berkeley - Univ. of Colorado, Denver - @date - - @precisions normal z -> c d s - - @author Mark Gates - - Implements all the wrappers for v1 backwards compatability. - Separating the wrappers allows the new functions to use magma_internal.h -*/ -#ifndef MAGMA_NO_V1 - -#include "magma_internal.h" -#include "magmablas_v1.h" // includes v1 prototypes; does NOT map routine names - -#define COMPLEX - -// These MAGMA v1 routines are all deprecated. -// See corresponding v2 functions for documentation. - -/******************************************************************************/ -extern "C" void -magmablas_zaxpycp_v1( - magma_int_t m, - magmaDoubleComplex_ptr r, - magmaDoubleComplex_ptr x, - magmaDoubleComplex_const_ptr b) -{ - magmablas_zaxpycp( m, r, x, b, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_zgeadd_v1( - magma_int_t m, magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_ptr dB, magma_int_t lddb ) -{ - magmablas_zgeadd( m, n, alpha, dA, ldda, dB, lddb, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_zgeadd2_v1( - magma_int_t m, magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex beta, - magmaDoubleComplex_ptr dB, magma_int_t lddb ) -{ - magmablas_zgeadd2( m, n, alpha, dA, ldda, beta, dB, lddb, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_zgemm_v1( - magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_const_ptr dB, magma_int_t lddb, - magmaDoubleComplex beta, - magmaDoubleComplex_ptr dC, magma_int_t lddc ) -{ - magmablas_zgemm( transA, transB, m, n, k, - alpha, dA, ldda, - dB, lddb, - beta, dC, lddc, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_zgemv_v1( - magma_trans_t trans, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_const_ptr dx, magma_int_t incx, - magmaDoubleComplex beta, - magmaDoubleComplex_ptr dy, magma_int_t incy) -{ - magmablas_zgemv( trans, m, n, alpha, dA, ldda, dx, incx, beta, dy, incy, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_zgemv_conj_v1( - magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_const_ptr dx, magma_int_t incx, - magmaDoubleComplex beta, - magmaDoubleComplex_ptr dy, magma_int_t incy) -{ - magmablas_zgemv_conj( - m, n, alpha, dA, ldda, dx, incx, beta, dy, incy, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_zgemm_reduce_v1( - magma_int_t m, magma_int_t n, magma_int_t k, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_const_ptr dB, magma_int_t lddb, - magmaDoubleComplex beta, - magmaDoubleComplex_ptr dC, magma_int_t lddc ) -{ - magmablas_zgemm_reduce( - m, n, k, alpha, dA, ldda, dB, lddb, beta, dC, lddc, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_zgetmatrix_transpose_v1( - magma_int_t m, magma_int_t n, - magmaDoubleComplex_const_ptr dAT, magma_int_t ldda, - magmaDoubleComplex *hA, magma_int_t lda, - magmaDoubleComplex_ptr dwork, magma_int_t lddwork, magma_int_t nb ) -{ - magma_queue_t queues[2]; - magma_queue_create_v1( &queues[0] ); - magma_queue_create_v1( &queues[1] ); - - magmablas_zgetmatrix_transpose( m, n, nb, dAT, ldda, hA, lda, dwork, lddwork, queues ); - - magma_queue_destroy( queues[0] ); - magma_queue_destroy( queues[1] ); -} - - -/******************************************************************************/ -extern "C" magma_int_t -magmablas_zhemv_v1( - magma_uplo_t uplo, magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_const_ptr dx, magma_int_t incx, - magmaDoubleComplex beta, - magmaDoubleComplex_ptr dy, magma_int_t incy ) -{ - return magmablas_zhemv( uplo, n, alpha, dA, ldda, dx, incx, beta, dy, incy, magmablasGetQueue() ); -} - - -#ifdef COMPLEX -/******************************************************************************/ -extern "C" magma_int_t -magmablas_zsymv_v1( - magma_uplo_t uplo, magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_const_ptr dx, magma_int_t incx, - magmaDoubleComplex beta, - magmaDoubleComplex_ptr dy, magma_int_t incy ) -{ - return magmablas_zsymv( uplo, n, alpha, dA, ldda, dx, incx, beta, dy, incy, magmablasGetQueue() ); -} -#endif - - -/******************************************************************************/ -extern "C" void -magmablas_zprbt_v1( - magma_int_t n, - magmaDoubleComplex *dA, magma_int_t ldda, - magmaDoubleComplex *du, magmaDoubleComplex *dv) -{ - magmablas_zprbt(n, dA, ldda, du, dv, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_zprbt_mv_v1( - magma_int_t n, magma_int_t nrhs, - magmaDoubleComplex *dv, magmaDoubleComplex *db, magma_int_t lddb) -{ - magmablas_zprbt_mv(n, nrhs, dv, db, lddb, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_zprbt_mtv_v1( - magma_int_t n, magma_int_t nrhs, - magmaDoubleComplex *du, magmaDoubleComplex *db, magma_int_t lddb) -{ - magmablas_zprbt_mtv(n, nrhs, du, db, lddb, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_zlacpy_v1( - magma_uplo_t uplo, magma_int_t m, magma_int_t n, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_ptr dB, magma_int_t lddb ) -{ - magmablas_zlacpy( uplo, m, n, dA, ldda, dB, lddb, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_zlacpy_conj_v1( - magma_int_t n, - magmaDoubleComplex_ptr dA1, magma_int_t lda1, - magmaDoubleComplex_ptr dA2, magma_int_t lda2) -{ - magmablas_zlacpy_conj( n, dA1, lda1, dA2, lda2, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_zlacpy_sym_in_v1( - magma_uplo_t uplo, magma_int_t m, magma_int_t n, - magma_int_t *rows, magma_int_t *perm, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_ptr dB, magma_int_t lddb ) -{ - magmablas_zlacpy_sym_in( uplo, m, n, rows, perm, dA, ldda, dB, lddb, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_zlacpy_sym_out_v1( - magma_uplo_t uplo, magma_int_t m, magma_int_t n, - magma_int_t *rows, magma_int_t *perm, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_ptr dB, magma_int_t lddb ) -{ - magmablas_zlacpy_sym_out( uplo, m, n, rows, perm, dA, ldda, dB, lddb, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" double -magmablas_zlange_v1( - magma_norm_t norm, magma_int_t m, magma_int_t n, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDouble_ptr dwork, magma_int_t lwork ) -{ - return magmablas_zlange( norm, m, n, dA, ldda, dwork, lwork, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" double -magmablas_zlanhe_v1( - magma_norm_t norm, magma_uplo_t uplo, magma_int_t n, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDouble_ptr dwork, magma_int_t lwork ) -{ - return magmablas_zlanhe( norm, uplo, n, dA, ldda, dwork, lwork, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magma_zlarfx_gpu_v1( - magma_int_t m, magma_int_t n, - magmaDoubleComplex_ptr v, - magmaDoubleComplex_ptr tau, - magmaDoubleComplex_ptr C, magma_int_t ldc, - magmaDouble_ptr xnorm, - magmaDoubleComplex_ptr dT, magma_int_t iter, - magmaDoubleComplex_ptr work ) -{ - magma_zlarfx_gpu(m, n, v, tau, C, ldc, xnorm, dT, iter, work, - magmablasGetQueue()); -} - - -/******************************************************************************/ -extern "C" void -magma_zlarfbx_gpu_v1( - magma_int_t m, magma_int_t k, - magmaDoubleComplex_ptr V, magma_int_t ldv, - magmaDoubleComplex_ptr dT, magma_int_t ldt, - magmaDoubleComplex_ptr c, - magmaDoubleComplex_ptr dwork) -{ - magma_zlarfbx_gpu( m, k, V, ldv, dT, ldt, c, dwork, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_zlarfg_v1( - magma_int_t n, - magmaDoubleComplex_ptr dalpha, - magmaDoubleComplex_ptr dx, magma_int_t incx, - magmaDoubleComplex_ptr dtau ) -{ - magmablas_zlarfg( n, dalpha, dx, incx, dtau, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magma_zlarfg_gpu_v1( - magma_int_t n, - magmaDoubleComplex_ptr dx0, - magmaDoubleComplex_ptr dx, - magmaDoubleComplex_ptr dtau, - magmaDouble_ptr dxnorm, - magmaDoubleComplex_ptr dAkk ) -{ - magma_zlarfg_gpu( n, dx0, dx, dtau, dxnorm, dAkk, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magma_zlarfgx_gpu_v1( - magma_int_t n, - magmaDoubleComplex_ptr dx0, - magmaDoubleComplex_ptr dx, - magmaDoubleComplex_ptr dtau, - magmaDouble_ptr dxnorm, - magmaDoubleComplex_ptr dA, magma_int_t iter) -{ - magma_zlarfgx_gpu( n, dx0, dx, dtau, dxnorm, dA, iter, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magma_zlarfgtx_gpu_v1( - magma_int_t n, - magmaDoubleComplex_ptr dx0, - magmaDoubleComplex_ptr dx, - magmaDoubleComplex_ptr dtau, - magmaDouble_ptr dxnorm, - magmaDoubleComplex_ptr dA, magma_int_t iter, - magmaDoubleComplex_ptr V, magma_int_t ldv, - magmaDoubleComplex_ptr T, magma_int_t ldt, - magmaDoubleComplex_ptr dwork ) -{ - magma_zlarfgtx_gpu(n, dx0, dx, dtau, dxnorm, dA, iter, V, ldv, - T, ldt, dwork, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_zlascl_v1( - magma_type_t type, magma_int_t kl, magma_int_t ku, - double cfrom, double cto, - magma_int_t m, magma_int_t n, - magmaDoubleComplex_ptr dA, magma_int_t ldda, - magma_int_t *info ) -{ - magmablas_zlascl( type, kl, ku, cfrom, cto, m, n, dA, ldda, magmablasGetQueue(), info ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_zlascl2_v1( - magma_type_t type, magma_int_t m, magma_int_t n, - magmaDouble_const_ptr dD, - magmaDoubleComplex_ptr dA, magma_int_t ldda, magma_int_t *info ) -{ - magmablas_zlascl2( type, m, n, dD, dA, ldda, magmablasGetQueue(), info ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_zlascl_2x2_v1( - magma_type_t type, magma_int_t m, - magmaDoubleComplex_const_ptr dW, magma_int_t lddw, - magmaDoubleComplex_ptr dA, magma_int_t ldda, - magma_int_t *info ) -{ - magmablas_zlascl_2x2( type, m, dW, lddw, dA, ldda, magmablasGetQueue(), info ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_zlascl_diag_v1( - magma_type_t type, magma_int_t m, magma_int_t n, - magmaDoubleComplex_const_ptr dD, magma_int_t lddd, - magmaDoubleComplex_ptr dA, magma_int_t ldda, - magma_int_t *info ) -{ - magmablas_zlascl_diag( type, m, n, dD, lddd, dA, ldda, magmablasGetQueue(), info ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_zlaset_v1( - magma_uplo_t uplo, magma_int_t m, magma_int_t n, - magmaDoubleComplex offdiag, magmaDoubleComplex diag, - magmaDoubleComplex_ptr dA, magma_int_t ldda ) -{ - magmablas_zlaset( uplo, m, n, offdiag, diag, dA, ldda, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_zlaset_band_v1( - magma_uplo_t uplo, magma_int_t m, magma_int_t n, magma_int_t k, - magmaDoubleComplex offdiag, magmaDoubleComplex diag, - magmaDoubleComplex_ptr dA, magma_int_t ldda) -{ - magmablas_zlaset_band(uplo, m, n, k, offdiag, diag, dA, ldda, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_zlaswp_v1( - magma_int_t n, - magmaDoubleComplex_ptr dAT, magma_int_t ldda, - magma_int_t k1, magma_int_t k2, - const magma_int_t *ipiv, magma_int_t inci ) -{ - magmablas_zlaswp( n, dAT, ldda, k1, k2, ipiv, inci, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_zlaswpx_v1( - magma_int_t n, - magmaDoubleComplex_ptr dA, magma_int_t ldx, magma_int_t ldy, - magma_int_t k1, magma_int_t k2, - const magma_int_t *ipiv, magma_int_t inci ) -{ - return magmablas_zlaswpx( n, dA, ldx, ldy, k1, k2, ipiv, inci, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_zlaswp2_v1( - magma_int_t n, - magmaDoubleComplex_ptr dAT, magma_int_t ldda, - magma_int_t k1, magma_int_t k2, - magmaInt_const_ptr d_ipiv, magma_int_t inci ) -{ - magmablas_zlaswp2( n, dAT, ldda, k1, k2, d_ipiv, inci, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_zlaswp_sym_v1( magma_int_t n, magmaDoubleComplex *dA, magma_int_t lda, - magma_int_t k1, magma_int_t k2, - const magma_int_t *ipiv, magma_int_t inci ) -{ - return magmablas_zlaswp_sym( n, dA, lda, k1, k2, ipiv, inci, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_dznrm2_check_v1( - magma_int_t m, magma_int_t n, - magmaDoubleComplex_ptr dA, magma_int_t ldda, - magmaDouble_ptr dxnorm, - magmaDouble_ptr dlsticc ) -{ - magmablas_dznrm2_check( m, n, dA, ldda, dxnorm, dlsticc, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_dznrm2_adjust_v1( - magma_int_t k, - magmaDouble_ptr dxnorm, - magmaDoubleComplex_ptr dc ) -{ - magmablas_dznrm2_adjust( k, dxnorm, dc, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_dznrm2_row_check_adjust_v1( - magma_int_t k, double tol, - magmaDouble_ptr dxnorm, - magmaDouble_ptr dxnorm2, - magmaDoubleComplex_ptr dC, magma_int_t lddc, - magmaDouble_ptr dlsticc ) -{ - magmablas_dznrm2_row_check_adjust( k, tol, dxnorm, dxnorm2, dC, lddc, dlsticc, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_dznrm2_cols_v1( - magma_int_t m, magma_int_t n, - magmaDoubleComplex_ptr dA, magma_int_t ldda, - magmaDouble_ptr dxnorm ) -{ - magmablas_dznrm2_cols( m, n, dA, ldda, dxnorm, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_zsetmatrix_transpose_v1( - magma_int_t m, magma_int_t n, - const magmaDoubleComplex *hA, magma_int_t lda, - magmaDoubleComplex_ptr dAT, magma_int_t ldda, - magmaDoubleComplex_ptr dwork, magma_int_t lddwork, magma_int_t nb ) -{ - magma_queue_t queues[2]; - magma_queue_create_v1( &queues[0] ); - magma_queue_create_v1( &queues[1] ); - - magmablas_zsetmatrix_transpose( m, n, nb, hA, lda, dAT, ldda, dwork, lddwork, queues ); - - magma_queue_destroy( queues[0] ); - magma_queue_destroy( queues[1] ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_zswap_v1( - magma_int_t n, - magmaDoubleComplex_ptr dx, magma_int_t incx, - magmaDoubleComplex_ptr dy, magma_int_t incy) -{ - magmablas_zswap( n, dx, incx, dy, incy, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_zswapblk_v1( - magma_order_t order, magma_int_t n, - magmaDoubleComplex_ptr dA, magma_int_t ldda, - magmaDoubleComplex_ptr dB, magma_int_t lddb, - magma_int_t i1, magma_int_t i2, - const magma_int_t *ipiv, magma_int_t inci, magma_int_t offset ) -{ - magmablas_zswapblk( - order, n, dA, ldda, dB, lddb, i1, i2, ipiv, inci, offset, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_zswapdblk_v1( - magma_int_t n, magma_int_t nb, - magmaDoubleComplex_ptr dA, magma_int_t ldda, magma_int_t inca, - magmaDoubleComplex_ptr dB, magma_int_t lddb, magma_int_t incb ) -{ - magmablas_zswapdblk( n, nb, dA, ldda, inca, dB, lddb, incb, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_zsymmetrize_v1( - magma_uplo_t uplo, magma_int_t m, - magmaDoubleComplex_ptr dA, magma_int_t ldda ) -{ - magmablas_zsymmetrize( uplo, m, dA, ldda, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_zsymmetrize_tiles_v1( - magma_uplo_t uplo, magma_int_t m, - magmaDoubleComplex_ptr dA, magma_int_t ldda, - magma_int_t ntile, magma_int_t mstride, magma_int_t nstride ) -{ - magmablas_zsymmetrize_tiles( uplo, m, dA, ldda, ntile, mstride, nstride, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_ztranspose_v1( - magma_int_t m, magma_int_t n, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_ptr dAT, magma_int_t lddat ) -{ - magmablas_ztranspose( m, n, dA, ldda, dAT, lddat, magmablasGetQueue() ); -} - - -#ifdef COMPLEX -/******************************************************************************/ -extern "C" void -magmablas_ztranspose_conj_v1( - magma_int_t m, magma_int_t n, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_ptr dAT, magma_int_t lddat ) -{ - magmablas_ztranspose_conj( m, n, dA, ldda, dAT, lddat, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_ztranspose_conj_inplace_v1( - magma_int_t n, - magmaDoubleComplex_ptr dA, magma_int_t ldda ) -{ - magmablas_ztranspose_conj_inplace( n, dA, ldda, magmablasGetQueue() ); -} -#endif - - -/******************************************************************************/ -extern "C" void -magmablas_ztranspose_inplace_v1( - magma_int_t n, - magmaDoubleComplex_ptr dA, magma_int_t ldda ) -{ - magmablas_ztranspose_inplace( n, dA, ldda, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_ztrsm_v1( - magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, - magma_int_t m, magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_ptr dB, magma_int_t lddb ) -{ - magmablas_ztrsm( side, uplo, transA, diag, m, n, alpha, dA, ldda, dB, lddb, - magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_ztrsm_outofplace_v1( - magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, - magma_int_t m, magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_ptr dB, magma_int_t lddb, - magmaDoubleComplex_ptr dX, magma_int_t lddx, - magma_int_t flag, - magmaDoubleComplex_ptr d_dinvA, magma_int_t dinvA_length ) -{ - magmablas_ztrsm_outofplace( side, uplo, transA, diag, m, n, alpha, - dA, ldda, dB, lddb, dX, lddx, flag, - d_dinvA, dinvA_length, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_ztrsm_work_v1( - magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, - magma_int_t m, magma_int_t n, - magmaDoubleComplex alpha, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_ptr dB, magma_int_t lddb, - magmaDoubleComplex_ptr dX, magma_int_t lddx, - magma_int_t flag, - magmaDoubleComplex_ptr d_dinvA, magma_int_t dinvA_length ) -{ - magmablas_ztrsm_work( side, uplo, transA, diag, m, n, alpha, - dA, ldda, dB, lddb, dX, lddx, flag, - d_dinvA, dinvA_length, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_ztrtri_diag_v1( - magma_uplo_t uplo, magma_diag_t diag, magma_int_t n, - magmaDoubleComplex_const_ptr dA, magma_int_t ldda, - magmaDoubleComplex_ptr d_dinvA) -{ - magmablas_ztrtri_diag( uplo, diag, n, dA, ldda, d_dinvA, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magma_zgetmatrix_1D_row_bcyclic_v1( - magma_int_t m, magma_int_t n, - magmaDoubleComplex_const_ptr const *dA, magma_int_t ldda, - magmaDoubleComplex *hA, magma_int_t lda, - magma_int_t ngpu, magma_int_t nb ) -{ - magma_queue_t queues[MagmaMaxGPUs]; - for( int dev=0; dev < ngpu; dev++ ) { - magma_setdevice( dev ); - magma_queue_create( dev, &queues[dev] ); - } - magma_zgetmatrix_1D_row_bcyclic( ngpu, m, n, nb, dA, ldda, hA, lda, queues ); - for( int dev=0; dev < ngpu; dev++ ) { - magma_setdevice( dev ); - magma_queue_sync( queues[dev] ); - magma_queue_destroy( queues[dev] ); - } -} - - -/******************************************************************************/ -extern "C" void -magma_zgetmatrix_1D_col_bcyclic_v1( - magma_int_t m, magma_int_t n, - magmaDoubleComplex_const_ptr const *dA, magma_int_t ldda, - magmaDoubleComplex *hA, magma_int_t lda, - magma_int_t ngpu, magma_int_t nb ) -{ - magma_queue_t queues[MagmaMaxGPUs]; - for( int dev=0; dev < ngpu; dev++ ) { - magma_setdevice( dev ); - magma_queue_create( dev, &queues[dev] ); - } - magma_zgetmatrix_1D_col_bcyclic( ngpu, m, n, nb, dA, ldda, hA, lda, queues ); - for( int dev=0; dev < ngpu; dev++ ) { - magma_setdevice( dev ); - magma_queue_sync( queues[dev] ); - magma_queue_destroy( queues[dev] ); - } -} - - -/******************************************************************************/ -extern "C" void -magma_zsetmatrix_1D_row_bcyclic_v1( - magma_int_t m, magma_int_t n, - const magmaDoubleComplex *hA, magma_int_t lda, - magmaDoubleComplex_ptr *dA, magma_int_t ldda, - magma_int_t ngpu, magma_int_t nb ) -{ - magma_queue_t queues[MagmaMaxGPUs]; - for( int dev=0; dev < ngpu; dev++ ) { - magma_setdevice( dev ); - magma_queue_create( dev, &queues[dev] ); - } - magma_zsetmatrix_1D_row_bcyclic( ngpu, m, n, nb, hA, lda, dA, ldda, queues ); - for( int dev=0; dev < ngpu; dev++ ) { - magma_setdevice( dev ); - magma_queue_sync( queues[dev] ); - magma_queue_destroy( queues[dev] ); - } -} - - -/******************************************************************************/ -extern "C" void -magma_zsetmatrix_1D_col_bcyclic_v1( - magma_int_t m, magma_int_t n, - const magmaDoubleComplex *hA, magma_int_t lda, - magmaDoubleComplex_ptr *dA, magma_int_t ldda, - magma_int_t ngpu, magma_int_t nb ) -{ - magma_queue_t queues[MagmaMaxGPUs]; - for( int dev=0; dev < ngpu; dev++ ) { - magma_setdevice( dev ); - magma_queue_create( dev, &queues[dev] ); - } - magma_zsetmatrix_1D_col_bcyclic( ngpu, m, n, nb, hA, lda, dA, ldda, queues ); - for( int dev=0; dev < ngpu; dev++ ) { - magma_setdevice( dev ); - magma_queue_sync( queues[dev] ); - magma_queue_destroy( queues[dev] ); - } -} - - -// in src/zlarfb_gpu.cpp -/******************************************************************************/ -extern "C" magma_int_t -magma_zlarfb_gpu_v1( - magma_side_t side, magma_trans_t trans, magma_direct_t direct, magma_storev_t storev, - magma_int_t m, magma_int_t n, magma_int_t k, - magmaDoubleComplex_const_ptr dV, magma_int_t lddv, - magmaDoubleComplex_const_ptr dT, magma_int_t lddt, - magmaDoubleComplex_ptr dC, magma_int_t lddc, - magmaDoubleComplex_ptr dwork, magma_int_t ldwork ) -{ - return magma_zlarfb_gpu( side, trans, direct, storev, - m, n, k, - dV, lddv, dT, lddt, dC, lddc, dwork, ldwork, - magmablasGetQueue() ); -} - - -// in src/zlarfb_gpu_gemm.cpp -/******************************************************************************/ -extern "C" magma_int_t -magma_zlarfb_gpu_gemm_v1( - magma_side_t side, magma_trans_t trans, magma_direct_t direct, magma_storev_t storev, - magma_int_t m, magma_int_t n, magma_int_t k, - magmaDoubleComplex_const_ptr dV, magma_int_t lddv, - magmaDoubleComplex_const_ptr dT, magma_int_t lddt, - magmaDoubleComplex_ptr dC, magma_int_t lddc, - magmaDoubleComplex_ptr dwork, magma_int_t ldwork, - magmaDoubleComplex_ptr dworkvt, magma_int_t ldworkvt ) -{ - return magma_zlarfb_gpu_gemm( side, trans, direct, storev, - m, n, k, - dV, lddv, dT, lddt, dC, lddc, - dwork, ldwork, dworkvt, ldworkvt, - magmablasGetQueue() ); -} - -#endif // MAGMA_NO_V1 diff --git a/magmablas/magmablas_zc_v1.cpp b/magmablas/magmablas_zc_v1.cpp deleted file mode 100644 index 896a6c54c..000000000 --- a/magmablas/magmablas_zc_v1.cpp +++ /dev/null @@ -1,96 +0,0 @@ -/* - -- MAGMA (version 2.0) -- - Univ. of Tennessee, Knoxville - Univ. of California, Berkeley - Univ. of Colorado, Denver - @date - - @precisions mixed zc -> ds - - @author Mark Gates - - Implements all the wrappers for v1 backwards compatability. - Separating the wrappers allows the new functions to use magma_internal.h -*/ -#ifndef MAGMA_NO_V1 - -#include "magma_internal.h" -#include "magmablas_v1.h" // includes v1 prototypes; does NOT map routine names - -// These MAGMA v1 routines are all deprecated. -// See corresponding v2 functions for documentation. - -/******************************************************************************/ -extern "C" void -magmablas_zcaxpycp_v1( - magma_int_t m, - magmaFloatComplex_ptr r, - magmaDoubleComplex_ptr x, - magmaDoubleComplex_const_ptr b, - magmaDoubleComplex_ptr w) -{ - magmablas_zcaxpycp( m, r, x, b, w, magmablasGetQueue() ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_clag2z_v1( - magma_int_t m, magma_int_t n, - magmaFloatComplex_const_ptr SA, magma_int_t ldsa, - magmaDoubleComplex_ptr A, magma_int_t lda, - magma_int_t *info) -{ - magmablas_clag2z( m, n, SA, ldsa, A, lda, magmablasGetQueue(), info ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_clat2z_v1( - magma_uplo_t uplo, magma_int_t n, - magmaFloatComplex_const_ptr SA, magma_int_t ldsa, - magmaDoubleComplex_ptr A, magma_int_t lda, - magma_int_t *info ) -{ - magmablas_clat2z( uplo, n, SA, ldsa, A, lda, magmablasGetQueue(), info ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_zlag2c_v1( - magma_int_t m, magma_int_t n, - magmaDoubleComplex_const_ptr A, magma_int_t lda, - magmaFloatComplex_ptr SA, magma_int_t ldsa, - magma_int_t *info ) -{ - magmablas_zlag2c( m, n, A, lda, SA, ldsa, magmablasGetQueue(), info ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_zlat2c_v1( - magma_uplo_t uplo, magma_int_t n, - magmaDoubleComplex_const_ptr A, magma_int_t lda, - magmaFloatComplex_ptr SA, magma_int_t ldsa, - magma_int_t *info ) -{ - magmablas_zlat2c( uplo, n, A, lda, SA, ldsa, magmablasGetQueue(), info ); -} - - -/******************************************************************************/ -extern "C" void -magmablas_zclaswp_v1( - magma_int_t n, - magmaDoubleComplex_ptr A, magma_int_t lda, - magmaFloatComplex_ptr SA, - magma_int_t m, - const magma_int_t *ipiv, magma_int_t incx ) -{ - magmablas_zclaswp( n, A, lda, SA, lda, m, ipiv, incx, magmablasGetQueue() ); -} - -#endif // MAGMA_NO_V1 diff --git a/src/zpotrf_mgpu_right.cpp b/src/zpotrf_mgpu_right.cpp index 2158701bc..dde00e098 100644 --- a/src/zpotrf_mgpu_right.cpp +++ b/src/zpotrf_mgpu_right.cpp @@ -11,8 +11,6 @@ @precisions normal z -> s d c */ -#ifndef MAGMA_NO_V1 - #include "magma_internal.h" #include "trace.h" @@ -512,4 +510,3 @@ magma_zpotrf_mgpu_right( #undef STREAM_ID #endif // needs updating to MAGMA v2 interfaces -#endif // MAGMA_NO_V1 From 8cb4cf7c6777c91f3e5f56353e3271190bed2be2 Mon Sep 17 00:00:00 2001 From: Cordell Bloor Date: Thu, 19 Dec 2024 23:18:40 -0700 Subject: [PATCH 03/27] Check ROCm library versions directly There is no guarantee that the version number of the rocm-core library will match that of other ROCm libraries. It is both simpler and more robust to directly check the version number of the relevent library. For this reason, Debian has declined to package rocm-core. All other AMD GPU dependencies of magma are available in Debian Trixie. The Debian builds of magama-rocm currently use libamdhip64-dev 5.7.1, libhipblas-dev 5.5.1, and libhipsparse-dev 5.7.1. The Ubuntu 24.04 LTS was frozen with those package versions in its universe repositories. There is no single value that could accurately describe the ROCM_VERSION on Debian or Ubuntu. At the moment, a failure to find rocm-core is treated by magma as a version less than 6.0.0. Without this change, the Debian build of magma will begin to fail when either of the libamdhip64-dev or libhipblas-dev libraries are updated to a newer version. Formatting and documentation fixes to "A leaner gesv_rbt_async with improved execution speed and accuracy (PR #62)" --- CMakeLists.txt | 863 ++++++++++---------- Makefile | 1 - include/magma_auxiliary.h | 14 +- interface_cuda/alloc.cpp | 47 +- interface_cuda/blas_z_v2.cpp | 2 +- interface_cuda/interface.cpp | 2 +- magmablas/ztrsm.cu | 10 + magmablas/ztrtri_diag.cu | 11 + make.inc-examples/make.inc.hip-gcc-mkl | 3 - make.inc-examples/make.inc.hip-gcc-openblas | 3 - src/zgerbt_gpu.cpp | 10 + src/zgerfs_nopiv_gpu.cpp | 20 + src/zgesv_nopiv_gpu.cpp | 10 + src/zgesv_rbt.cpp | 20 + src/zgetrf_nopiv_gpu.cpp | 11 + src/zgetrs_nopiv_gpu.cpp | 10 + tools/get-rocm-version.sh | 68 -- 17 files changed, 575 insertions(+), 530 deletions(-) delete mode 100755 tools/get-rocm-version.sh diff --git a/CMakeLists.txt b/CMakeLists.txt index ea852439f..7a9166be8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,15 +1,15 @@ -cmake_minimum_required(VERSION 3.18) +cmake_minimum_required( VERSION 3.18 ) # ---------------------------------------- # to disable Fortran, set this to "off" # see also -DADD_ below -option(USE_FORTRAN "Fortran is required for some tester checks, but can be disabled with reduced functionality" ON) +option( USE_FORTRAN "Fortran is required for some tester checks, but can be disabled with reduced functionality" ON ) if (USE_FORTRAN) - project(MAGMA C CXX Fortran) -else () - project(MAGMA C CXX) -endif () + project( MAGMA C CXX Fortran ) +else() + project( MAGMA C CXX ) +endif() FIND_PROGRAM(PROGRAM_CCACHE ccache) IF (PROGRAM_CCACHE) @@ -25,17 +25,17 @@ ENDIF () # ---------------------------------------- # MAGMA requires one backend to be enabled -option(MAGMA_ENABLE_CUDA "Enable the CUDA backend" OFF) -option(MAGMA_ENABLE_HIP "Enable the HIP backend" OFF) +option(MAGMA_ENABLE_CUDA "Enable the CUDA backend" OFF) +option(MAGMA_ENABLE_HIP "Enable the HIP backend" OFF) # check if one backend has been enabled if (NOT MAGMA_ENABLE_CUDA AND - NOT MAGMA_ENABLE_HIP -) - message(STATUS "MAGMA requires one enabled backend!") - message(STATUS "Building CUDA backend") - set(MAGMA_ENABLE_CUDA ON) -endif () + NOT MAGMA_ENABLE_HIP + ) + message(STATUS "MAGMA requires one enabled backend!") + message(STATUS "Building CUDA backend") + set( MAGMA_ENABLE_CUDA ON ) +endif() # ---------------------------------------- # don't regenerate files during make. @@ -47,25 +47,25 @@ set(CMAKE_SUPPRESS_REGENERATION on) # ---------------------------------------- # force an out-of-source build, to not overwrite the existing Makefiles # (out-of-source is cleaner, too) -string(COMPARE EQUAL "${CMAKE_SOURCE_DIR}" "${CMAKE_BINARY_DIR}" MAGMA_COMPILE_INPLACE) +string( COMPARE EQUAL "${CMAKE_SOURCE_DIR}" "${CMAKE_BINARY_DIR}" MAGMA_COMPILE_INPLACE ) if (MAGMA_COMPILE_INPLACE) - message(FATAL_ERROR "Compiling MAGMA with CMake requires an out-of-source build. To proceed: + message( FATAL_ERROR "Compiling MAGMA with CMake requires an out-of-source build. To proceed: rm -rf CMakeCache.txt CMakeFiles/ # delete files in ${CMAKE_SOURCE_DIR} mkdir build cd build cmake .. - make") -endif () + make" ) +endif() # ---------------------------------------- # prefer shared libraries -option(BUILD_SHARED_LIBS "If on, build shared libraries, otherwise build static libraries" ON) +option( BUILD_SHARED_LIBS "If on, build shared libraries, otherwise build static libraries" ON ) # prefer /usr/local/magma, instead of /usr/local. if (UNIX AND CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) set(CMAKE_INSTALL_PREFIX "/usr/local/magma" CACHE PATH "..." FORCE) -endif () +endif() # ---------------------------------------- # use C++14 and C99 @@ -77,47 +77,47 @@ CHECK_CXX_COMPILER_FLAG("-std=c++0x" COMPILER_SUPPORTS_CXX0X) CHECK_CXX_COMPILER_FLAG("-fPIC" COMPILER_SUPPORTS_FPIC) if (COMPILER_SUPPORTS_CXX14) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14") -elseif (COMPILER_SUPPORTS_CXX0X) +elseif(COMPILER_SUPPORTS_CXX0X) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x") -else () +else() message( WARNING "The compiler ${CMAKE_CXX_COMPILER} doesn't support the -std=c++14 flag. Some code may not compile.") -endif () +endif() CHECK_C_COMPILER_FLAG("-std=c99" COMPILER_SUPPORTS_C99) if (COMPILER_SUPPORTS_C99) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99") -else () - message(WARNING "The compiler ${CMAKE_C_COMPILER} doesn't support the -std=c99 flag. Some code may not compile.") -endif () +else() + message( WARNING "The compiler ${CMAKE_C_COMPILER} doesn't support the -std=c99 flag. Some code may not compile.") +endif() # ---------------------------------------- # check Fortran name mangling if (USE_FORTRAN) - include(FortranCInterface) - FortranCInterface_HEADER(${CMAKE_SOURCE_DIR}/include/magma_mangling_cmake.h MACRO_NAMESPACE MAGMA_) -else () + include( FortranCInterface ) + FortranCInterface_HEADER( ${CMAKE_SOURCE_DIR}/include/magma_mangling_cmake.h MACRO_NAMESPACE MAGMA_ ) +else() # set one of -DADD_, -DUPCASE, or -DNOCHANGE. See README. - message(STATUS "Building without Fortran compiler") - set(FORTRAN_CONVENTION "-DADD_" CACHE STRING "Fortran calling convention, one of -DADD_, -DNOCHANGE, -DUPCASE") - set_property(CACHE FORTRAN_CONVENTION PROPERTY STRINGS -DADD_ -DNOCHANGE -DUPCASE) - message(STATUS " Using ${FORTRAN_CONVENTION} for Fortran calling convention") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FORTRAN_CONVENTION}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FORTRAN_CONVENTION}") + message( STATUS "Building without Fortran compiler" ) + set( FORTRAN_CONVENTION "-DADD_" CACHE STRING "Fortran calling convention, one of -DADD_, -DNOCHANGE, -DUPCASE" ) + set_property( CACHE FORTRAN_CONVENTION PROPERTY STRINGS -DADD_ -DNOCHANGE -DUPCASE ) + message( STATUS " Using ${FORTRAN_CONVENTION} for Fortran calling convention" ) + set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FORTRAN_CONVENTION}" ) + set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FORTRAN_CONVENTION}" ) # see also NVCC_FLAGS below -endif () +endif() # ---------------------------------------- # locate OpenMP -find_package(OpenMP) +find_package( OpenMP ) if (OPENMP_FOUND) - message(STATUS "Found OpenMP") - message(STATUS " OpenMP_C_FLAGS ${OpenMP_C_FLAGS}") - message(STATUS " OpenMP_CXX_FLAGS ${OpenMP_CXX_FLAGS}") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") -endif () + message( STATUS "Found OpenMP" ) + message( STATUS " OpenMP_C_FLAGS ${OpenMP_C_FLAGS}" ) + message( STATUS " OpenMP_CXX_FLAGS ${OpenMP_CXX_FLAGS}" ) + set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}" ) + set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}" ) +endif() # ---------------------------------------- # locate CUDA libraries @@ -125,48 +125,47 @@ if (MAGMA_ENABLE_CUDA) if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel") set(CMAKE_CUDA_COMPILER_WORKS ON) # Fix for intel compiler and CUDA endif () - enable_language(CUDA) + enable_language( CUDA ) - set(CUDA_NAMES - "one or more of " + set( CUDA_NAMES + "one or more of " "Fermi, Kepler, Maxwell, Pascal, Volta, Turing, Ampere, Ada, Hopper, " - "or valid sm_XY or sm_XYZ") - set(GPU_TARGET "" CACHE STRING - "CUDA architectures to compile for, overrides CMAKE_CUDA_ARCHITECTURES; ${CUDA_NAMES}") - find_package(CUDAToolkit) + "or valid sm_XY or sm_XYZ" ) + set( GPU_TARGET "" CACHE STRING + "CUDA architectures to compile for, overrides CMAKE_CUDA_ARCHITECTURES; ${CUDA_NAMES}" ) + find_package( CUDAToolkit ) if (CUDAToolkit_FOUND) - message(STATUS "Found CUDA ${CUDA_VERSION}") - message(STATUS " CUDA_CUDART_LIBRARY: CUDA::cudart") + message( STATUS "Found CUDA ${CUDA_VERSION}" ) + message( STATUS " CUDA_CUDART_LIBRARY: CUDA::cudart" ) #message( STATUS " CUDA_CUBLAS_LIBRARIES: CUDA::cublas" ) - include_directories(${CUDAToolkit_INCLUDE_DIRS}) - link_directories(${CUDAToolkit_LIBRARY_DIR}) + include_directories( ${CUDAToolkit_INCLUDE_DIRS} ) if (GPU_TARGET) # Map names to architectures. if (GPU_TARGET MATCHES Fermi) - set(GPU_TARGET "${GPU_TARGET} sm_20") - endif () + set( GPU_TARGET "${GPU_TARGET} sm_20" ) + endif() if (GPU_TARGET MATCHES Kepler) - set(GPU_TARGET "${GPU_TARGET} sm_30 sm_35 sm_37") - endif () + set( GPU_TARGET "${GPU_TARGET} sm_30 sm_35 sm_37" ) + endif() if (GPU_TARGET MATCHES Maxwell) set( GPU_TARGET "${GPU_TARGET} sm_50 sm_52 sm_53" ) - endif () + endif() if (GPU_TARGET MATCHES Pascal) set( GPU_TARGET "${GPU_TARGET} sm_60 sm_61 sm_62" ) - endif () + endif() if (GPU_TARGET MATCHES Volta) set( GPU_TARGET "${GPU_TARGET} sm_70 sm_72" ) - endif () + endif() if (GPU_TARGET MATCHES Turing) - set(GPU_TARGET "${GPU_TARGET} sm_75") - endif () + set( GPU_TARGET "${GPU_TARGET} sm_75" ) + endif() if (GPU_TARGET MATCHES Ampere) set( GPU_TARGET "${GPU_TARGET} sm_80 sm_86 sm_87" ) @@ -174,223 +173,219 @@ if (MAGMA_ENABLE_CUDA) if (GPU_TARGET MATCHES Ada) set( GPU_TARGET "${GPU_TARGET} sm_89" ) - endif () + endif() if (GPU_TARGET MATCHES Hopper) set( GPU_TARGET "${GPU_TARGET} sm_90 sm_90a" ) - endif () + endif() # Find all sm_XY and sm_XYZ, then strip off sm_. string( REGEX MATCHALL "sm_[0-9][0-9a-z]+" sms "${GPU_TARGET}" ) - string(REPLACE "sm_" "" __cuda_architectures "${sms}") + string( REPLACE "sm_" "" __cuda_architectures "${sms}" ) if (NOT __cuda_architectures) - message(FATAL_ERROR - "GPU_TARGET must contain ${CUDA_NAMES}. " - "Was: ${GPU_TARGET}") - endif () + message( FATAL_ERROR + "GPU_TARGET must contain ${CUDA_NAMES}. " + "Was: ${GPU_TARGET}" ) + endif() - set(CMAKE_CUDA_ARCHITECTURES "${__cuda_architectures}") - endif () + set( CMAKE_CUDA_ARCHITECTURES "${__cuda_architectures}" ) + endif() - message(STATUS " Compile for CMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}") - set(MAGMA_CUDA_ARCH "${CMAKE_CUDA_ARCHITECTURES}") + message( STATUS " Compile for CMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}" ) + set( MAGMA_CUDA_ARCH "${CMAKE_CUDA_ARCHITECTURES}" ) # Find minimum arch in CMAKE_CUDA_ARCHITECTURES, if they're all numeric. - set(min_arch 9999) - foreach (arch ${CMAKE_CUDA_ARCHITECTURES}) + set( min_arch 9999 ) + foreach( arch ${CMAKE_CUDA_ARCHITECTURES} ) if (arch MATCHES "^([0-9]+)") # 80-real, 80-virtual, etc. okay if (CMAKE_MATCH_1 LESS min_arch) - set(min_arch "${CMAKE_MATCH_1}") - endif () - else () - set(min_arch 0) # arch like "native", min unknown + set( min_arch "${CMAKE_MATCH_1}" ) + endif() + else() + set( min_arch 0 ) # arch like "native", min unknown break() - endif () - endforeach () + endif() + endforeach() # Append zero, so it is comparable to '__CUDA_ARCH__' - set(MAGMA_CUDA_ARCH_MIN "${min_arch}0") + set( MAGMA_CUDA_ARCH_MIN "${min_arch}0" ) - add_library(magma_nvcc_flags INTERFACE) + add_library( magma_nvcc_flags INTERFACE ) if (COMPILER_SUPPORTS_FPIC) target_compile_options(magma_nvcc_flags - INTERFACE - $<$:--compiler-options;-fPIC,${FORTRAN_CONVENTION}> + INTERFACE + $<$:--compiler-options;-fPIC,${FORTRAN_CONVENTION}> ) - else () + else() # No Position Independent Code on Windows. # Compiler will complain if you add that flag. target_compile_options(magma_nvcc_flags - INTERFACE - $<$:--compiler-options;${FORTRAN_CONVENTION}> + INTERFACE + $<$:--compiler-options;${FORTRAN_CONVENTION}> ) - endif () + endif() - set(MAGMA_HAVE_CUDA "1") + set( MAGMA_HAVE_CUDA "1" ) - message(STATUS "Define -DMAGMA_HAVE_CUDA -DMAGMA_CUDA_ARCH_MIN=${MAGMA_CUDA_ARCH_MIN}") - else () - message(STATUS "Could not find CUDA") - endif () -endif () + message( STATUS "Define -DMAGMA_HAVE_CUDA -DMAGMA_CUDA_ARCH_MIN=${MAGMA_CUDA_ARCH_MIN}" ) + else() + message( STATUS "Could not find CUDA" ) + endif() +endif() # ---------------------------------------- # locate HIP/ROCm libraries if (MAGMA_ENABLE_HIP) - set(GPU_TARGET "gfx900" CACHE STRING "HIP architectures to compile for") - list(APPEND CMAKE_PREFIX_PATH /opt/rocm /opt/rocm/lib/cmake/hip) - find_package(HIP) - if (HIP_FOUND) - message(STATUS "Found HIP ${HIP_VERSION}") - message(STATUS " HIP_INCLUDE_DIRS: ${HIP_INCLUDE_DIRS}") - message(STATUS "GPU_TARGET: ${GPU_TARGET}") - - include_directories(${HIP_INCLUDE_DIRS}) - - set(HIP_SEPARABLE_COMPILATION ON) - - if (GPU_TARGET MATCHES kaveri) - set(GPU_TARGET ${GPU_TARGET} gfx700) - endif () - - if (GPU_TARGET MATCHES hawaii) - set(GPU_TARGET ${GPU_TARGET} gfx701) - endif () - - if (GPU_TARGET MATCHES kabini) - set(GPU_TARGET ${GPU_TARGET} gfx703) - endif () - - if (GPU_TARGET MATCHES mullins) - set(GPU_TARGET ${GPU_TARGET} gfx703) - endif () - - if (GPU_TARGET MATCHES bonaire) - set(GPU_TARGET ${GPU_TARGET} gfx704) - endif () - - if (GPU_TARGET MATCHES carrizo) - set(GPU_TARGET ${GPU_TARGET} gfx801) - endif () - - if (GPU_TARGET MATCHES iceland) - set(GPU_TARGET ${GPU_TARGET} gfx802) - endif () - - if (GPU_TARGET MATCHES tonga) - set(GPU_TARGET ${GPU_TARGET} gfx802) - endif () - - if (GPU_TARGET MATCHES fiji) - set(GPU_TARGET ${GPU_TARGET} gfx803) - endif () - - if (GPU_TARGET MATCHES polaris10) - set(GPU_TARGET ${GPU_TARGET} gfx803) - endif () - - if (GPU_TARGET MATCHES tongapro) - set(GPU_TARGET ${GPU_TARGET} gfx805) - endif () - - if (GPU_TARGET MATCHES stoney) - set(GPU_TARGET ${GPU_TARGET} gfx810) - endif () - - set(DEVCCFLAGS "") - set(VALID_GFXS "700;701;702;703;704;705;801;802;803;805;810;900;902;904;906;908;909;90c;1010;1011;1012;1030;1031;1032;1033") - foreach (GFX ${VALID_GFXS}) - if (GPU_TARGET MATCHES gfx${GFX}) - set(DEVCCFLAGS ${DEVCCFLAGS} --offload-arch=gfx${GFX}) - endif () - endforeach () - - set(DEVCCFLAGS ${DEVCCFLAGS} -fPIC ${FORTRAN_CONVENTION}) - set(MAGMA_HAVE_HIP "1") - message(STATUS "Define -DMAGMA_HAVE_HIP") - - set_property(TARGET hip::device APPEND PROPERTY COMPATIBLE_INTERFACE_BOOL INTERFACE_HIP_DEVICE_COMPILE) - set_property(TARGET hip::device PROPERTY INTERFACE_HIP_DEVICE_COMPILE ON) - set(GPU_ARCH_FLAGS ${DEVCCFLAGS}) - - #add_compile_options(${GPU_ARCH_FLAGS}) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_AMD__") - option(ROCM_CORE "Location of the rocm-core package") - execute_process(COMMAND "${CMAKE_SOURCE_DIR}/tools/get-rocm-version.sh" "${ROCM_CORE}" OUTPUT_VARIABLE ROCM_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE) - message(STATUS "ROCM_VERSION=${ROCM_VERSION}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DROCM_VERSION=${ROCM_VERSION}") - else () - message(STATUS "Could not find HIP") - endif () -endif () + set( GPU_TARGET "gfx900" CACHE STRING "HIP architectures to compile for" ) + list(APPEND CMAKE_PREFIX_PATH /opt/rocm /opt/rocm/lib/cmake/hip) + find_package( HIP ) + if (HIP_FOUND) + message( STATUS "Found HIP ${HIP_VERSION}" ) + message( STATUS " HIP_INCLUDE_DIRS: ${HIP_INCLUDE_DIRS}" ) + message( STATUS "GPU_TARGET: ${GPU_TARGET}" ) + + include_directories( ${HIP_INCLUDE_DIRS} ) + + set(HIP_SEPARABLE_COMPILATION ON) + + if (GPU_TARGET MATCHES kaveri) + set( GPU_TARGET ${GPU_TARGET} gfx700 ) + endif() + + if (GPU_TARGET MATCHES hawaii) + set( GPU_TARGET ${GPU_TARGET} gfx701 ) + endif() + + if (GPU_TARGET MATCHES kabini) + set( GPU_TARGET ${GPU_TARGET} gfx703 ) + endif() + + if (GPU_TARGET MATCHES mullins) + set( GPU_TARGET ${GPU_TARGET} gfx703 ) + endif() + + if (GPU_TARGET MATCHES bonaire) + set( GPU_TARGET ${GPU_TARGET} gfx704 ) + endif() + + if (GPU_TARGET MATCHES carrizo) + set( GPU_TARGET ${GPU_TARGET} gfx801 ) + endif() + + if (GPU_TARGET MATCHES iceland) + set( GPU_TARGET ${GPU_TARGET} gfx802 ) + endif() + + if (GPU_TARGET MATCHES tonga) + set( GPU_TARGET ${GPU_TARGET} gfx802 ) + endif() + + if (GPU_TARGET MATCHES fiji) + set( GPU_TARGET ${GPU_TARGET} gfx803 ) + endif() + + if (GPU_TARGET MATCHES polaris10) + set( GPU_TARGET ${GPU_TARGET} gfx803 ) + endif() + + if (GPU_TARGET MATCHES tongapro) + set( GPU_TARGET ${GPU_TARGET} gfx805 ) + endif() + + if (GPU_TARGET MATCHES stoney) + set( GPU_TARGET ${GPU_TARGET} gfx810 ) + endif() + + set( DEVCCFLAGS "" ) + set(VALID_GFXS "700;701;702;703;704;705;801;802;803;805;810;900;902;904;906;908;909;90c;1010;1011;1012;1030;1031;1032;1033") + foreach( GFX ${VALID_GFXS} ) + if ( GPU_TARGET MATCHES gfx${GFX} ) + set( DEVCCFLAGS ${DEVCCFLAGS} --offload-arch=gfx${GFX} ) + endif() + endforeach() + + set( DEVCCFLAGS ${DEVCCFLAGS} -fPIC ${FORTRAN_CONVENTION} ) + set(MAGMA_HAVE_HIP "1") + message( STATUS "Define -DMAGMA_HAVE_HIP" ) + + set_property(TARGET hip::device APPEND PROPERTY COMPATIBLE_INTERFACE_BOOL INTERFACE_HIP_DEVICE_COMPILE) + set_property(TARGET hip::device PROPERTY INTERFACE_HIP_DEVICE_COMPILE ON) + set(GPU_ARCH_FLAGS ${DEVCCFLAGS}) + + #add_compile_options(${GPU_ARCH_FLAGS}) + set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_AMD__" ) + else() + message( STATUS "Could not find HIP" ) + endif() +endif() # ---------------------------------------- # locate LAPACK libraries set(BLA_VENDOR "" CACHE STRING - "Use specified BLAS library. See https://cmake.org/cmake/help/latest/module/FindBLAS.html") + "Use specified BLAS library. See https://cmake.org/cmake/help/latest/module/FindBLAS.html") # List from CMake 3.17, minus some obsolete ones: # PhiPACK, Compaq CXML, DEC Alpha DXML, SunPerf, SGI SCSL, SGIMATH, # Intel, NAS (Apple veclib). # FLAME is BLIS. set_property(CACHE BLA_VENDOR PROPERTY STRINGS - "" "All" "Goto" "OpenBLAS" "FLAME" "ATLAS" "IBMESSL" - "Intel10_64lp" "Intel10_64lp_seq" "Intel10_64ilp" "Intel10_64ilp_seq" - "ACML" "ACML_MP" "ACML_GPU" - "Apple" - "Arm" "Arm_mp" "Arm_ilp64" "Arm_ilp64_mp" - "Generic") - -set(LAPACK_LIBRARIES "" CACHE STRING "Libraries for LAPACK and BLAS, to manually override search") + "" "All" "Goto" "OpenBLAS" "FLAME" "ATLAS" "IBMESSL" + "Intel10_64lp" "Intel10_64lp_seq" "Intel10_64ilp" "Intel10_64ilp_seq" + "ACML" "ACML_MP" "ACML_GPU" + "Apple" + "Arm" "Arm_mp" "Arm_ilp64" "Arm_ilp64_mp" + "Generic") + +set( LAPACK_LIBRARIES "" CACHE STRING "Libraries for LAPACK and BLAS, to manually override search" ) if (LAPACK_LIBRARIES STREQUAL "") - message(STATUS "Searching for BLAS and LAPACK. To override, set LAPACK_LIBRARIES using ccmake.") - find_package(LAPACK) + message( STATUS "Searching for BLAS and LAPACK. To override, set LAPACK_LIBRARIES using ccmake." ) + find_package( LAPACK ) # force showing updated LAPACK_LIBRARIES in ccmake / cmake-gui. - set(LAPACK_LIBRARIES ${LAPACK_LIBRARIES} CACHE STRING "Libraries for LAPACK and BLAS, to manually override search" FORCE) -else () - message(STATUS "User set LAPACK_LIBRARIES. To change, edit LAPACK_LIBRARIES using ccmake (set to empty to enable search).") + set( LAPACK_LIBRARIES ${LAPACK_LIBRARIES} CACHE STRING "Libraries for LAPACK and BLAS, to manually override search" FORCE ) +else() + message( STATUS "User set LAPACK_LIBRARIES. To change, edit LAPACK_LIBRARIES using ccmake (set to empty to enable search)." ) # Check either -lname syntax or file existence - foreach (LIB ${LAPACK_LIBRARIES}) + foreach( LIB ${LAPACK_LIBRARIES} ) if (NOT LIB MATCHES "^-l[a-zA-Z0-9_-]+$") - if (NOT EXISTS ${LIB}) - message(WARNING "\n Warning: file ${LIB} does not exist.\n") - endif () - endif () - endforeach () -endif () + if (NOT EXISTS ${LIB}) + message( WARNING "\n Warning: file ${LIB} does not exist.\n" ) + endif() + endif() + endforeach() +endif() # If using MKL, add it to includes and define MAGMA_WITH_MKL # Initially, this gets MKLROOT from environment, but then the user can edit it. if (LAPACK_LIBRARIES MATCHES mkl_core) - set(MKLROOT $ENV{MKLROOT} CACHE STRING "MKL installation directory") + set( MKLROOT $ENV{MKLROOT} CACHE STRING "MKL installation directory" ) if (MKLROOT STREQUAL "") - message(WARNING "LAPACK_LIBRARIES has MKL, but MKLROOT not set; can't add include directory.") - else () - message(STATUS "MKLROOT set to ${MKLROOT}. To change, edit MKLROOT using ccmake.") + message( WARNING "LAPACK_LIBRARIES has MKL, but MKLROOT not set; can't add include directory." ) + else() + message( STATUS "MKLROOT set to ${MKLROOT}. To change, edit MKLROOT using ccmake." ) if (NOT EXISTS ${MKLROOT}) - message(FATAL_ERROR "MKLROOT ${MKLROOT} directory does not exist.") - endif () - include_directories(${MKLROOT}/include) - add_definitions(-DMAGMA_WITH_MKL) - message(STATUS "Define -DMAGMA_WITH_MKL") - endif () -endif () + message( FATAL_ERROR "MKLROOT ${MKLROOT} directory does not exist." ) + endif() + include_directories( ${MKLROOT}/include ) + add_definitions( -DMAGMA_WITH_MKL ) + message( STATUS "Define -DMAGMA_WITH_MKL" ) + endif() +endif() # ---------------------------------------- # save magma.lib, magma_sparse.lib, etc. in lib/ -set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY lib) -set(CMAKE_LIBRARY_OUTPUT_DIRECTORY lib) +set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY lib ) +set( CMAKE_LIBRARY_OUTPUT_DIRECTORY lib ) # ---------------------------------------- # list of sources if (MAGMA_ENABLE_CUDA) - include(${CMAKE_SOURCE_DIR}/CMake.src.cuda) -else () - include(${CMAKE_SOURCE_DIR}/CMake.src.hip) -endif () + include( ${CMAKE_SOURCE_DIR}/CMake.src.cuda ) +else() + include( ${CMAKE_SOURCE_DIR}/CMake.src.hip ) +endif() # ---------------------------------------- # common flags @@ -401,20 +396,20 @@ if (WIN32) # -Wall is way too verbose; use -W4 # -MP enables parallel builds # -std=c99 is not implemented, so skip that - string(REGEX REPLACE " */W3" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") - string(REGEX REPLACE " */W3" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -W4 -MP -DMAGMA_NOAFFINITY") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -W4 -MP -DMAGMA_NOAFFINITY") -else () + string( REGEX REPLACE " */W3" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" ) + string( REGEX REPLACE " */W3" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" ) + set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -W4 -MP -DMAGMA_NOAFFINITY" ) + set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -W4 -MP -DMAGMA_NOAFFINITY" ) +else() # Primarily for gcc / nvcc: # Ignore unused static functions in headers. - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wno-unused-function") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-unused-function") -endif () + set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wno-unused-function" ) + set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-unused-function" ) +endif() if (CMAKE_HOST_APPLE) # Use rpaths, which is on by default in CMake 3. - set(CMAKE_MACOSX_RPATH 1) + set( CMAKE_MACOSX_RPATH 1 ) # 64-bit veclib (Accelerate) has issues; substitute correct functions from LAPACK. # (The issue is single precision functions that return doubles; @@ -422,48 +417,48 @@ if (CMAKE_HOST_APPLE) # but this is not feasible in Fortran.) if (LAPACK_LIBRARIES MATCHES "Accelerate") if (USE_FORTRAN) - message(STATUS "MacOS X: adding blas_fix library") - add_library(blas_fix ${libblas_fix_src}) - target_link_libraries(blas_fix - ${LAPACK_LIBRARIES} + message( STATUS "MacOS X: adding blas_fix library" ) + add_library( blas_fix ${libblas_fix_src} ) + target_link_libraries( blas_fix + ${LAPACK_LIBRARIES} ) - set(blas_fix blas_fix) - set(blas_fix_lib -lblas_fix) - else () - message(WARNING "\n Warning: cannot compile blas_fix library for MacOS X without Fortran compiler.\n") - endif () - endif () + set( blas_fix blas_fix ) + set( blas_fix_lib -lblas_fix ) + else() + message( WARNING "\n Warning: cannot compile blas_fix library for MacOS X without Fortran compiler.\n" ) + endif() + endif() - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMAGMA_NOAFFINITY") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMAGMA_NOAFFINITY") + set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMAGMA_NOAFFINITY" ) + set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMAGMA_NOAFFINITY" ) # previously, just compile as 32-bit, but CUDA 6.5 no longer has 32-bit FAT libraries ## set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -m32" ) ## set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32" ) ## set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -m32" ) ## set( CUDA_64_BIT_DEVICE_CODE OFF ) -endif () +endif() -include_directories("${CMAKE_BINARY_DIR}/include") +include_directories( "${CMAKE_BINARY_DIR}/include" ) -include_directories(include) -include_directories(control) +include_directories( include ) +include_directories( control ) if (MAGMA_ENABLE_CUDA) - include_directories(magmablas) # e.g., shuffle.cuh -else () - include_directories(magmablas_hip) # e.g., shuffle.cuh -endif () + include_directories( magmablas ) # e.g., shuffle.cuh +else() + include_directories( magmablas_hip ) # e.g., shuffle.cuh +endif() # Need to check sizeof(void*) after setting flags above; # CMAKE_SIZEOF_VOID_P can be wrong. -include(CheckTypeSize) -CHECK_TYPE_SIZE(void* SIZEOF_VOID_PTR) +include( CheckTypeSize ) +CHECK_TYPE_SIZE( void* SIZEOF_VOID_PTR ) if (USE_FORTRAN) - set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -Dmagma_devptr_t=\"integer\(kind=${SIZEOF_VOID_PTR}\)\"") -endif () + set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -Dmagma_devptr_t=\"integer\(kind=${SIZEOF_VOID_PTR}\)\"" ) +endif() # Configure config -configure_file(${CMAKE_SOURCE_DIR}/include/magma_config.h.in ${CMAKE_BINARY_DIR}/include/magma_config.h) +configure_file(${CMAKE_SOURCE_DIR}/include/magma_config.h.in ${CMAKE_BINARY_DIR}/include/magma_config.h) # ---------------------------------------- # compile MAGMA library @@ -473,24 +468,24 @@ if (WIN32) # understand that .F90 files should be pre-processed. # separate Fortran and C/C++/CUDA files - foreach (filename ${libmagma_all}) + foreach( filename ${libmagma_all} ) if (filename MATCHES "\\.(f)$") # |f90|F90 - list(APPEND libmagma_all_f ${filename}) + list( APPEND libmagma_all_f ${filename} ) elseif (filename MATCHES "\\.(c|cu|cpp)$") - list(APPEND libmagma_all_cpp ${filename}) - endif () - endforeach () + list( APPEND libmagma_all_cpp ${filename} ) + endif() + endforeach() #message( "libmagma_all_cpp ${libmagma_all_cpp}" ) #message( "libmagma_all_f ${libmagma_all_f}" ) # on Windows, Fortran files aren't compiled if listed here... - add_library(magma ${libmagma_all_cpp}) - target_link_libraries(magma - ${LAPACK_LIBRARIES} - CUDA::cudart - CUDA::cublas - CUDA::cusparse - magma_nvcc_flags + add_library( magma ${libmagma_all_cpp} ) + target_link_libraries( magma + ${LAPACK_LIBRARIES} + CUDA::cudart + CUDA::cublas + CUDA::cusparse + magma_nvcc_flags ) # no Fortran files at the moment (how to test libmagma_all_f is not empty?), @@ -505,51 +500,51 @@ if (WIN32) ## CUDA::cusparse ## ) ## make list of Fortran .mod files to install, as below -else () +else() # Unix doesn't seem to have a problem with mixing C, CUDA, and Fortran files if (MAGMA_ENABLE_CUDA) - #message(FATAL_ERROR "${libmagma_all}") - add_library(magma ${libmagma_all}) - target_link_libraries(magma - ${blas_fix} - ${LAPACK_LIBRARIES} - CUDA::cudart - CUDA::cublas - CUDA::cusparse - magma_nvcc_flags + #message(FATAL_ERROR "${libmagma_all}") + add_library( magma ${libmagma_all} ) + target_link_libraries( magma + ${blas_fix} + ${LAPACK_LIBRARIES} + CUDA::cudart + CUDA::cublas + CUDA::cusparse + magma_nvcc_flags ) - else () - find_package(hipBLAS) - if (hipBLAS_FOUND) - message(STATUS "Found rocBLAS ${rocBLAS_VERSION}") - endif () - find_package(hipSPARSE) - if (hipSPARSE_FOUND) - message(STATUS "Found rocSPARSE ${rocSPARSE_VERSION}") - endif () - add_library(magma ${libmagma_all}) - target_link_libraries(magma - hip::host - ${blas_fix} - ${LAPACK_LIBRARIES} + else() + find_package( hipBLAS ) + if (hipBLAS_FOUND) + message( STATUS "Found rocBLAS ${rocBLAS_VERSION}" ) + endif() + find_package( hipSPARSE ) + if (hipSPARSE_FOUND) + message( STATUS "Found rocSPARSE ${rocSPARSE_VERSION}" ) + endif() + add_library( magma ${libmagma_all} ) + target_link_libraries( magma + hip::host + ${blas_fix} + ${LAPACK_LIBRARIES} hip::device - roc::hipblas - roc::hipsparse + roc::hipblas + roc::hipsparse ) - endif () + endif() if (USE_FORTRAN) # make list of Fortran .mod files to install - foreach (filename ${libmagma_all}) + foreach( filename ${libmagma_all} ) if (filename MATCHES "\\.(f90|F90)$") # mod files seem to wind up in root build directory - get_filename_component(fmod ${filename} NAME_WE) - list(APPEND modules "${CMAKE_BINARY_DIR}/${fmod}.mod") - endif () - endforeach () - endif () -endif () -add_custom_target(lib DEPENDS magma) + get_filename_component( fmod ${filename} NAME_WE ) + list( APPEND modules "${CMAKE_BINARY_DIR}/${fmod}.mod" ) + endif() + endforeach() + endif() +endif() +add_custom_target( lib DEPENDS magma ) # ---------------------------------------- @@ -557,35 +552,35 @@ add_custom_target(lib DEPENDS magma) # If use fortran, compile only Fortran files, not magma_[sdcz]_no_fortran.cpp # else, compile only C++ files, not Fortran files if (USE_FORTRAN) - foreach (filename ${liblapacktest_all}) + foreach( filename ${liblapacktest_all} ) if (filename MATCHES "\\.(f|f90|F90)$") - list(APPEND liblapacktest_all_f ${filename}) - endif () - endforeach () - add_library(lapacktest ${liblapacktest_all_f}) -else () + list( APPEND liblapacktest_all_f ${filename} ) + endif() + endforeach() + add_library( lapacktest ${liblapacktest_all_f} ) +else() # alternatively, use only C/C++/CUDA files, including magma_[sdcz]_no_fortran.cpp - foreach (filename ${liblapacktest_all}) + foreach( filename ${liblapacktest_all} ) if (filename MATCHES "\\.(c|cu|cpp)$") - list(APPEND liblapacktest_all_cpp ${filename}) - endif () - endforeach () - add_library(lapacktest ${liblapacktest_all_cpp}) -endif () -target_link_libraries(lapacktest - ${blas_fix} - ${LAPACK_LIBRARIES} + list( APPEND liblapacktest_all_cpp ${filename} ) + endif() + endforeach() + add_library( lapacktest ${liblapacktest_all_cpp} ) +endif() +target_link_libraries( lapacktest + ${blas_fix} + ${LAPACK_LIBRARIES} ) # ---------------------------------------- # compile tester library -add_library(tester ${libtest_all}) -target_link_libraries(tester - magma - lapacktest - ${blas_fix} - ${LAPACK_LIBRARIES} +add_library( tester ${libtest_all} ) +target_link_libraries( tester + magma + lapacktest + ${blas_fix} + ${LAPACK_LIBRARIES} ) @@ -594,38 +589,38 @@ target_link_libraries(tester # sparse doesn't have Fortran at the moment, so no need for above shenanigans if (MAGMA_ENABLE_CUDA) - include_directories(sparse/include) - include_directories(sparse/control) -else () - include_directories(sparse_hip/include) - include_directories(sparse_hip/control) -endif () -include_directories(testing) + include_directories( sparse/include ) + include_directories( sparse/control ) +else() + include_directories( sparse_hip/include ) + include_directories( sparse_hip/control ) +endif() +include_directories( testing ) if (MAGMA_ENABLE_CUDA) - add_library(magma_sparse ${libsparse_all}) + add_library( magma_sparse ${libsparse_all} ) set_property(TARGET magma_sparse PROPERTY CUDA_STANDARD 14) - target_link_libraries(magma_sparse - magma - ${blas_fix} - ${LAPACK_LIBRARIES} - CUDA::cudart - CUDA::cublas - CUDA::cusparse - magma_nvcc_flags + target_link_libraries( magma_sparse + magma + ${blas_fix} + ${LAPACK_LIBRARIES} + CUDA::cudart + CUDA::cublas + CUDA::cusparse + magma_nvcc_flags ) -else () - add_library(magma_sparse ${libsparse_all}) - target_link_libraries(magma_sparse - magma - ${blas_fix} - ${LAPACK_LIBRARIES} - hip::device - roc::hipblas - roc::hipsparse +else() + add_library( magma_sparse ${libsparse_all} ) + target_link_libraries( magma_sparse + magma + ${blas_fix} + ${LAPACK_LIBRARIES} + hip::device + roc::hipblas + roc::hipsparse ) -endif () -add_custom_target(sparse-lib DEPENDS magma_sparse) +endif() +add_custom_target( sparse-lib DEPENDS magma_sparse ) # ---------------------------------------- @@ -633,119 +628,119 @@ add_custom_target(sparse-lib DEPENDS magma_sparse) # save testers to testing/ # save tester lib files to testing_lib/ to avoid cluttering lib/ -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY testing) -set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY testing_lib) -set(CMAKE_LIBRARY_OUTPUT_DIRECTORY testing_lib) +set( CMAKE_RUNTIME_OUTPUT_DIRECTORY testing ) +set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY testing_lib ) +set( CMAKE_LIBRARY_OUTPUT_DIRECTORY testing_lib ) # skip Fortran testers, which require an extra file from CUDA -foreach (filename ${testing_all}) +foreach( filename ${testing_all} ) if (filename MATCHES "\\.(c|cu|cpp)$") - list(APPEND testing_all_cpp ${filename}) - endif () -endforeach () -foreach (TEST ${testing_all_cpp}) - string(REGEX REPLACE "\\.(cpp|f90|F90)" "" EXE ${TEST}) - string(REGEX REPLACE "testing/" "" EXE ${EXE}) + list( APPEND testing_all_cpp ${filename} ) + endif() +endforeach() +foreach( TEST ${testing_all_cpp} ) + string( REGEX REPLACE "\\.(cpp|f90|F90)" "" EXE ${TEST} ) + string( REGEX REPLACE "testing/" "" EXE ${EXE} ) #message( "${TEST} --> ${EXE}" ) - add_executable(${EXE} ${TEST}) - target_link_libraries(${EXE} tester lapacktest magma) - list(APPEND testing ${EXE}) -endforeach () -add_custom_target(testing DEPENDS ${testing}) + add_executable( ${EXE} ${TEST} ) + target_link_libraries( ${EXE} tester lapacktest magma ) + list( APPEND testing ${EXE} ) +endforeach() +add_custom_target( testing DEPENDS ${testing} ) # ---------------------------------------- # compile each sparse tester if (MAGMA_ENABLE_CUDA) - set(SPARSE_TEST_DIR "sparse/testing") -else () - set(SPARSE_TEST_DIR "sparse_hip/testing") -endif () + set(SPARSE_TEST_DIR "sparse/testing") +else() + set(SPARSE_TEST_DIR "sparse_hip/testing") +endif() -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${SPARSE_TEST_DIR}") -cmake_policy(SET CMP0037 OLD) -foreach (TEST ${sparse_testing_all}) - string(REGEX REPLACE "\\.(cpp|f90|F90)" "" EXE ${TEST}) - string(REGEX REPLACE "${SPARSE_TEST_DIR}/" "" EXE ${EXE}) +set( CMAKE_RUNTIME_OUTPUT_DIRECTORY "${SPARSE_TEST_DIR}" ) +cmake_policy( SET CMP0037 OLD) +foreach( TEST ${sparse_testing_all} ) + string( REGEX REPLACE "\\.(cpp|f90|F90)" "" EXE ${TEST} ) + string( REGEX REPLACE "${SPARSE_TEST_DIR}/" "" EXE ${EXE} ) #message( "${TEST} --> ${EXE}" ) - add_executable(${EXE} ${TEST}) - target_link_libraries(${EXE} magma_sparse magma) - list(APPEND sparse-testing ${EXE}) -endforeach () -add_custom_target(sparse-testing DEPENDS ${sparse-testing}) + add_executable( ${EXE} ${TEST} ) + target_link_libraries( ${EXE} magma_sparse magma ) + list( APPEND sparse-testing ${EXE} ) +endforeach() +add_custom_target( sparse-testing DEPENDS ${sparse-testing} ) # ---------------------------------------- # what to install -install(TARGETS magma magma_sparse ${blas_fix} - RUNTIME DESTINATION bin - LIBRARY DESTINATION lib - ARCHIVE DESTINATION lib) +install( TARGETS magma magma_sparse ${blas_fix} + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib ) if (MAGMA_ENABLE_CUDA) - file(GLOB headers include/*.h sparse/include/*.h "${CMAKE_BINARY_DIR}/include/*.h") -else () - file(GLOB headers include/*.h sparse_hip/include/*.h "${CMAKE_BINARY_DIR}/include/*.h") -endif () + file( GLOB headers include/*.h sparse/include/*.h "${CMAKE_BINARY_DIR}/include/*.h" ) +else() + file( GLOB headers include/*.h sparse_hip/include/*.h "${CMAKE_BINARY_DIR}/include/*.h" ) +endif() if (USE_FORTRAN) - install(FILES ${headers} ${modules} - DESTINATION include) -else () - install(FILES ${headers} DESTINATION include) -endif () + install( FILES ${headers} ${modules} + DESTINATION include ) +else() + install( FILES ${headers} DESTINATION include ) +endif() # ---------------------------------------- # pkg-config get_target_property(MAGMA_INCLUDE magma INCLUDE_DIRECTORIES) -foreach (dir ${MAGMA_INCLUDE}) +foreach(dir ${MAGMA_INCLUDE}) string(APPEND INCLUDE_COMPILER_STRING "-I${dir} ") -endforeach () -set(MAGMA_INCLUDE "${INCLUDE_COMPILER_STRING}") -set(pkgconfig lib/pkgconfig/magma.pc) -message(STATUS "pkgconfig ${pkgconfig}") -set(INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") -set(CFLAGS "${CMAKE_C_FLAGS}") -set(CXXFLAGS "${CMAKE_CXX_FLAGS}") +endforeach() +set( MAGMA_INCLUDE "${INCLUDE_COMPILER_STRING}" ) +set( pkgconfig lib/pkgconfig/magma.pc ) +message( STATUS "pkgconfig ${pkgconfig}" ) +set( INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}" ) +set( CFLAGS "${CMAKE_C_FLAGS}" ) +set( CXXFLAGS "${CMAKE_CXX_FLAGS}" ) # CMake finds the Accelerate directory; we want -framework Accelerate for linking. -string(REPLACE "/System/Library/Frameworks/Accelerate.framework" "-framework Accelerate" LAPACK_LIBS "${LAPACK_LIBRARIES}") +string( REPLACE "/System/Library/Frameworks/Accelerate.framework" "-framework Accelerate" LAPACK_LIBS "${LAPACK_LIBRARIES}" ) if (MAGMA_ENABLE_CUDA) - string(REPLACE ";" " " LIBS - "${blas_fix_lib} ${LAPACK_LIBS} -L${CUDAToolkit_LIBRARY_DIR} -lcudart -lcublas -lcusparse") -else () - string(REPLACE ";" " " LIBS - "${blas_fix_lib} ${LAPACK_LIBS} ${HIP} ${rocBLAS} ${rocSPARSE}") - # "${blas_fix_lib} ${LAPACK_LIBS} hip::device roc::hipblas roc::hipsparse" ) -endif () -set(MAGMA_REQUIRED "") -configure_file("${pkgconfig}.in" "${pkgconfig}" @ONLY) -install(FILES "${CMAKE_BINARY_DIR}/${pkgconfig}" - DESTINATION lib/pkgconfig) + string( REPLACE ";" " " LIBS + "${blas_fix_lib} ${LAPACK_LIBS} -L${CUDAToolkit_LIBRARY_DIR} -lcudart -lcublas -lcusparse") +else() + string( REPLACE ";" " " LIBS + "${blas_fix_lib} ${LAPACK_LIBS} ${HIP} ${rocBLAS} ${rocSPARSE}" ) +# "${blas_fix_lib} ${LAPACK_LIBS} hip::device roc::hipblas roc::hipsparse" ) +endif() +set( MAGMA_REQUIRED "" ) +configure_file( "${pkgconfig}.in" "${pkgconfig}" @ONLY ) +install( FILES "${CMAKE_BINARY_DIR}/${pkgconfig}" + DESTINATION lib/pkgconfig ) # ---------------------------------------- -get_directory_property(compile_definitions COMPILE_DEFINITIONS) +get_directory_property( compile_definitions COMPILE_DEFINITIONS ) -message(STATUS "Flags") -message(STATUS " CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}") -message(STATUS " CFLAGS: ${CMAKE_C_FLAGS}") -message(STATUS " CXXFLAGS: ${CMAKE_CXX_FLAGS}") +message( STATUS "Flags" ) +message( STATUS " CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}" ) +message( STATUS " CFLAGS: ${CMAKE_C_FLAGS}" ) +message( STATUS " CXXFLAGS: ${CMAKE_CXX_FLAGS}" ) if (MAGMA_ENABLE_CUDA) - message(STATUS " NVCCFLAGS: ${CMAKE_CUDA_FLAGS}") -else () - message(STATUS " DEVCCFLAGS: ${DEVCCFLAGS}") -endif () -message(STATUS " FFLAGS: ${CMAKE_Fortran_FLAGS}") -message(STATUS " LIBS: ${LIBS}") -message(STATUS " blas_fix: ${blas_fix} (MacOS Accelerate only)") -message(STATUS " LAPACK_LIBRARIES: ${LAPACK_LIBRARIES}") -message(STATUS " INCLUDE_DIRECTORIES: ${MAGMA_INCLUDE}") + message( STATUS " NVCCFLAGS: ${CMAKE_CUDA_FLAGS}" ) +else() + message( STATUS " DEVCCFLAGS: ${DEVCCFLAGS}" ) +endif() +message( STATUS " FFLAGS: ${CMAKE_Fortran_FLAGS}" ) +message( STATUS " LIBS: ${LIBS}" ) +message( STATUS " blas_fix: ${blas_fix} (MacOS Accelerate only)" ) +message( STATUS " LAPACK_LIBRARIES: ${LAPACK_LIBRARIES}" ) +message( STATUS " INCLUDE_DIRECTORIES: ${MAGMA_INCLUDE}" ) if (MAGMA_ENABLE_CUDA) - message(STATUS " CUDA_CUDART_LIBRARY: CUDA::cudart") - message(STATUS " CUDA_CUBLAS_LIBRARIES: CUDA::cublas") - message(STATUS " CUDA_cusparse_LIBRARY: CUDA::cusparse") -else () - message(STATUS " HIP_LIBRARY: hip::device") - message(STATUS " HIP_BLAS_LIBRARIES: roc::hipblas") - message(STATUS " HIP_sparse_LIBRARY: roc::hipsparse") -endif () -message(STATUS " Fortran modules: ${modules}") + message( STATUS " CUDA_CUDART_LIBRARY: CUDA::cudart" ) + message( STATUS " CUDA_CUBLAS_LIBRARIES: CUDA::cublas" ) + message( STATUS " CUDA_cusparse_LIBRARY: CUDA::cusparse" ) +else() + message( STATUS " HIP_LIBRARY: hip::device" ) + message( STATUS " HIP_BLAS_LIBRARIES: roc::hipblas" ) + message( STATUS " HIP_sparse_LIBRARY: roc::hipsparse" ) +endif() +message( STATUS " Fortran modules: ${modules}" ) diff --git a/Makefile b/Makefile index 44880ab61..1fabf54ca 100644 --- a/Makefile +++ b/Makefile @@ -847,7 +847,6 @@ d_ext := cu else ifeq ($(BACKEND),hip) d_ext := cpp CXXFLAGS += -D__HIP_PLATFORM_AMD__ -CXXFLAGS += -DROCM_VERSION=$(shell ./tools/get-rocm-version.sh) endif diff --git a/include/magma_auxiliary.h b/include/magma_auxiliary.h index 2c7fd075c..32434b575 100644 --- a/include/magma_auxiliary.h +++ b/include/magma_auxiliary.h @@ -139,43 +139,43 @@ magma_memset_async(void * ptr, int value, size_t count, magma_queue_t queue); /// Type-safe version of magma_malloc(), for magma_int_t arrays. Allocates n*sizeof(magma_int_t) bytes. static inline magma_int_t magma_imalloc( magmaInt_ptr *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(magma_int_t) ); } -/// Type-safe version of magma_malloc(), for magma_int_t arrays. Allocates n*sizeof(magma_int_t) bytes. +/// Type-safe asynchronous version of magma_malloc(), for magma_int_t arrays. Allocates n*sizeof(magma_int_t) bytes using CUDA stream specified in queue. static inline magma_int_t magma_imalloc_async( magmaInt_ptr *ptr_ptr, size_t n, magma_queue_t queue ) { return magma_malloc_async( (magma_ptr*) ptr_ptr, n*sizeof(magma_int_t), queue ); } /// Type-safe version of magma_malloc(), for magma_index_t arrays. Allocates n*sizeof(magma_index_t) bytes. static inline magma_int_t magma_index_malloc( magmaIndex_ptr *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(magma_index_t) ); } -/// Type-safe version of magma_malloc(), for magma_index_t arrays. Allocates n*sizeof(magma_index_t) bytes. +/// Type-safe asynchronous version of magma_malloc(), for magma_index_t arrays. Allocates n*sizeof(magma_index_t) bytes using CUDA stream specified in queue. static inline magma_int_t magma_index_malloc_async( magmaIndex_ptr *ptr_ptr, size_t n, magma_queue_t queue ) { return magma_malloc_async( (magma_ptr*) ptr_ptr, n*sizeof(magma_index_t), queue ); } /// Type-safe version of magma_malloc(), for magma_uindex_t arrays. Allocates n*sizeof(magma_uindex_t) bytes. static inline magma_int_t magma_uindex_malloc( magmaUIndex_ptr *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(magma_uindex_t) ); } -/// Type-safe version of magma_malloc(), for magma_uindex_t arrays. Allocates n*sizeof(magma_uindex_t) bytes. +/// Type-safe asynchronous version of magma_malloc(), for magma_uindex_t arrays. Allocates n*sizeof(magma_uindex_t) bytes using CUDA stream specified in queue. static inline magma_int_t magma_uindex_malloc_async( magmaUIndex_ptr *ptr_ptr, size_t n, magma_queue_t queue ) { return magma_malloc_async( (magma_ptr*) ptr_ptr, n*sizeof(magma_uindex_t), queue); } /// Type-safe version of magma_malloc(), for float arrays. Allocates n*sizeof(float) bytes. static inline magma_int_t magma_smalloc( magmaFloat_ptr *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(float) ); } -/// Type-safe version of magma_malloc(), for float arrays. Allocates n*sizeof(float) bytes. +/// Type-safe asynchronous version of magma_malloc(), for float arrays. Allocates n*sizeof(float) bytes using CUDA stream specified in queue. static inline magma_int_t magma_smalloc_async( magmaFloat_ptr *ptr_ptr, size_t n, magma_queue_t queue ) { return magma_malloc_async( (magma_ptr*) ptr_ptr, n*sizeof(float), queue); } /// Type-safe version of magma_malloc(), for double arrays. Allocates n*sizeof(double) bytes. static inline magma_int_t magma_dmalloc( magmaDouble_ptr *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(double) ); } -/// Type-safe version of magma_malloc(), for double arrays. Allocates n*sizeof(double) bytes. +/// Type-safe asynchronous version of magma_malloc(), for double arrays. Allocates n*sizeof(double) bytes using CUDA stream specified in queue. static inline magma_int_t magma_dmalloc_async( magmaDouble_ptr *ptr_ptr, size_t n, magma_queue_t queue ) { return magma_malloc_async( (magma_ptr*) ptr_ptr, n*sizeof(double), queue); } /// Type-safe version of magma_malloc(), for magmaFloatComplex arrays. Allocates n*sizeof(magmaFloatComplex) bytes. static inline magma_int_t magma_cmalloc( magmaFloatComplex_ptr *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(magmaFloatComplex) ); } -/// Type-safe version of magma_malloc(), for magmaFloatComplex arrays. Allocates n*sizeof(magmaFloatComplex) bytes. +/// Type-safe asynchronous version of magma_malloc(), for magmaFloatComplex arrays. Allocates n*sizeof(magmaFloatComplex) bytes using CUDA stream specified in queue. static inline magma_int_t magma_cmalloc_async( magmaFloatComplex_ptr *ptr_ptr, size_t n, magma_queue_t queue ) { return magma_malloc_async( (magma_ptr*) ptr_ptr, n*sizeof(magmaFloatComplex), queue ); } /// Type-safe version of magma_malloc(), for magmaDoubleComplex arrays. Allocates n*sizeof(magmaDoubleComplex) bytes. static inline magma_int_t magma_zmalloc( magmaDoubleComplex_ptr *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(magmaDoubleComplex) ); } -/// Type-safe version of magma_malloc_async(), for magmaDoubleComplex arrays. Allocates n*sizeof(magmaDoubleComplex) bytes. +/// Type-safe asynchronous version of magma_malloc_async(), for magmaDoubleComplex arrays. Allocates n*sizeof(magmaDoubleComplex) bytes using CUDA stream specified in queue. static inline magma_int_t magma_zmalloc_async( magmaDoubleComplex_ptr *ptr_ptr, size_t n, magma_queue_t queue ) { return magma_malloc_async( (magma_ptr*) ptr_ptr, n*sizeof(magmaDoubleComplex), queue ); } /// @} diff --git a/interface_cuda/alloc.cpp b/interface_cuda/alloc.cpp index 74f9bdbd1..da30b1143 100644 --- a/interface_cuda/alloc.cpp +++ b/interface_cuda/alloc.cpp @@ -89,21 +89,23 @@ magma_malloc( magma_ptr* ptrPtr, size_t size ) @param[in] size Size in bytes to allocate. If size = 0, allocates some minimal size. - @param[in] + @param[in] queue Magma queue whose CUDA stream is used to put the cudaMalloc on. @return MAGMA_SUCCESS @return MAGMA_ERR_DEVICE_ALLOC on failure Type-safe versions avoid the need for a (void**) cast and explicit sizeof. - @see magma_smalloc_q - @see magma_dmalloc_q - @see magma_cmalloc_q - @see magma_zmalloc_q - @see magma_imalloc_q - @see magma_index_malloc_q + @see magma_smalloc_async + @see magma_dmalloc_async + @see magma_cmalloc_async + @see magma_zmalloc_async + @see magma_imalloc_async + @see magma_index_malloc_async + @see magma_uindex_malloc_async @ingroup magma_malloc + *******************************************************************************/ extern "C" magma_int_t magma_malloc_async( magma_ptr* ptrPtr, size_t size, magma_queue_t queue) @@ -249,10 +251,10 @@ magma_malloc_cpu( void** ptrPtr, size_t size ) } #endif #else - *ptrPtr = malloc( size ); - if ( *ptrPtr == NULL ) { - return MAGMA_ERR_HOST_ALLOC; - } + *ptrPtr = malloc( size ); + if ( *ptrPtr == NULL ) { + return MAGMA_ERR_HOST_ALLOC; + } #endif #ifdef DEBUG_MEMORY @@ -413,16 +415,37 @@ magma_mem_info(size_t * freeMem, size_t * totalMem) { return MAGMA_SUCCESS; } +/***************************************************************************//** + Sets memory chunk pointed to by ptr of size count to the byte value of value. + + @param[in] + ptr Address of the chunk of GPU memory to set. + + @param[in] + count Number of bytes to set starting at ptr. + + @return CUDA_SUCCESS + @return CUDA_ERROR_* code on failure +*******************************************************************************/ extern "C" magma_int_t magma_memset(void * ptr, int value, size_t count) { return cudaMemset(ptr, value, count); } +/***************************************************************************//** + + @copydoc magma_memset + + @param[in] + queue magma_queue_t + A pointer to a magma_queue structure that will be used for the + execution of this method, and all methods it calls. queue != nullptr + +*******************************************************************************/ extern "C" magma_int_t magma_memset_async(void * ptr, int value, size_t count, magma_queue_t queue) { #ifdef MAGMA_HAVE_CUDA -// return cudaMemsetAsync(ptr, value, count, queue); return cudaMemsetAsync(ptr, value, count, queue->cuda_stream()); #elif defined(MAGMA_HAVE_HIP) return hipMemsetAsync(ptr, value, count, queue->hip_stream()); diff --git a/interface_cuda/blas_z_v2.cpp b/interface_cuda/blas_z_v2.cpp index 351ca7a7e..c7f5da2e0 100644 --- a/interface_cuda/blas_z_v2.cpp +++ b/interface_cuda/blas_z_v2.cpp @@ -1859,7 +1859,7 @@ magma_ztrmm( int(m), int(n), (cuDoubleComplex*)&alpha, (const cuDoubleComplex*)dA, int(ldda), (cuDoubleComplex*)dB, int(lddb) - #if (ROCM_VERSION >= 60000) + #if (hipblasVersionMajor >= 2) , (cuDoubleComplex*)dB, int(lddb) #endif ); diff --git a/interface_cuda/interface.cpp b/interface_cuda/interface.cpp index 0c317ebb5..19dd6dc4b 100644 --- a/interface_cuda/interface.cpp +++ b/interface_cuda/interface.cpp @@ -491,7 +491,7 @@ magma_is_devptr( const void* A ) #endif #elif defined(MAGMA_HAVE_HIP) - #if ROCM_VERSION >= 60000 + #if HIP_VERSION_MAJOR >= 6 return (attr.type == hipMemoryTypeDevice); #else return (attr.memoryType == hipMemoryTypeDevice); diff --git a/magmablas/ztrsm.cu b/magmablas/ztrsm.cu index 81cc626a6..8474e4aae 100644 --- a/magmablas/ztrsm.cu +++ b/magmablas/ztrsm.cu @@ -742,6 +742,16 @@ void magmablas_ztrsm( magma_free( dX ); } +/***************************************************************************//** + + @copydoc magmablas_ztrsm + + @param[in] + queue magma_queue_t + A pointer to a magma_queue structure that will be used for the + execution of this method, and all methods it calls. queue != nullptr + +*******************************************************************************/ extern "C" void magmablas_ztrsm_async( magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, diff --git a/magmablas/ztrtri_diag.cu b/magmablas/ztrtri_diag.cu index 2e75f20e3..8207b4dba 100644 --- a/magmablas/ztrtri_diag.cu +++ b/magmablas/ztrtri_diag.cu @@ -179,6 +179,17 @@ magmablas_ztrtri_diag( } } + +/***************************************************************************//** + + @copydoc magmablas_ztrtri_diag + + @param[in] + queue magma_queue_t + A pointer to a magma_queue structure that will be used for the + execution of this method, and all methods it calls. queue != nullptr + +*******************************************************************************/ extern "C" void magmablas_ztrtri_diag_async( magma_uplo_t uplo, magma_diag_t diag, magma_int_t n, diff --git a/make.inc-examples/make.inc.hip-gcc-mkl b/make.inc-examples/make.inc.hip-gcc-mkl index 4a7809e20..24ee6bf6b 100644 --- a/make.inc-examples/make.inc.hip-gcc-mkl +++ b/make.inc-examples/make.inc.hip-gcc-mkl @@ -104,9 +104,6 @@ LDFLAGS = $(FPIC) $(FOPENMP) # add in the default for device compiling DEVCCFLAGS = -O3 -DNDEBUG -DADD_ -# Add ROCM_VERSION for device compilation -DEVCCFLAGS += -DROCM_VERSION=$(shell ./tools/get-rocm-version.sh) - # add the flags in a backend-specific way ifeq ($(BACKEND),cuda) DEVCCFLAGS += -Xcompiler "$(FPIC)" -Xcompiler "$(FOPENMP)" -std=c++11 diff --git a/make.inc-examples/make.inc.hip-gcc-openblas b/make.inc-examples/make.inc.hip-gcc-openblas index 2c6be03ea..547169bb1 100644 --- a/make.inc-examples/make.inc.hip-gcc-openblas +++ b/make.inc-examples/make.inc.hip-gcc-openblas @@ -108,9 +108,6 @@ LDFLAGS = $(FPIC) $(FOPENMP) # add in the default for device compiling DEVCCFLAGS = -O3 -DNDEBUG -DADD_ -# Add ROCM_VERSION for device compilation -DEVCCFLAGS += -DROCM_VERSION=$(shell ./tools/get-rocm-version.sh) - # add the flags in a backend-specific way ifeq ($(BACKEND),cuda) DEVCCFLAGS += -Xcompiler "$(FPIC)" -Xcompiler "$(FOPENMP)" -std=c++11 diff --git a/src/zgerbt_gpu.cpp b/src/zgerbt_gpu.cpp index 436b13ce1..44a4ee40a 100644 --- a/src/zgerbt_gpu.cpp +++ b/src/zgerbt_gpu.cpp @@ -170,6 +170,16 @@ magma_zgerbt_gpu( return *info; } +/***************************************************************************//** + + @copydoc magma_zgerbt_gpu + + @param[in] + queue magma_queue_t + A pointer to a magma_queue structure that will be used for the + execution of this method, and all methods it calls. queue != nullptr + +*******************************************************************************/ extern "C" magma_int_t magma_zgerbt_gpu_async( const magma_bool_t gen, const magma_int_t n, const magma_int_t nrhs, diff --git a/src/zgerfs_nopiv_gpu.cpp b/src/zgerfs_nopiv_gpu.cpp index 3432d67b5..0a2e0152b 100644 --- a/src/zgerfs_nopiv_gpu.cpp +++ b/src/zgerfs_nopiv_gpu.cpp @@ -277,6 +277,26 @@ magma_zgerfs_nopiv_gpu( return *info; } +/***************************************************************************//** + + @copydoc magma_zgerfs_nopiv_gpu + + @param[in] + iter_max INTEGER + The maximum number of refinement iterations performed. + + @param[in] + bwdmax DOUBLE + Refine the solution if the error is above the threshold multiplied by + bwdmax. See @magma_zgerfs_nopiv_gpu for how error threshold is calculated. + Set to zero to perform all iterations specified in iter_max. + + @param[in] + queue magma_queue_t + A pointer to a magma_queue structure that will be used for the + execution of this method, and all methods it calls. queue != nullptr + +*******************************************************************************/ extern "C" magma_int_t magma_zgerfs_nopiv_gpu_async( magma_trans_t trans, magma_int_t n, magma_int_t nrhs, diff --git a/src/zgesv_nopiv_gpu.cpp b/src/zgesv_nopiv_gpu.cpp index ab76c5ef9..ba389c2ee 100644 --- a/src/zgesv_nopiv_gpu.cpp +++ b/src/zgesv_nopiv_gpu.cpp @@ -94,6 +94,16 @@ magma_zgesv_nopiv_gpu( return *info; } +/***************************************************************************//** + + @copydoc magma_zgesv_nopiv_gpu + + @param[in] + queue magma_queue_t + A pointer to a magma_queue structure that will be used for the + execution of this method, and all methods it calls. queue != nullptr + +*******************************************************************************/ extern "C" magma_int_t magma_zgesv_nopiv_gpu_async( magma_int_t n, magma_int_t nrhs, diff --git a/src/zgesv_rbt.cpp b/src/zgesv_rbt.cpp index 9b11daa77..61055e814 100644 --- a/src/zgesv_rbt.cpp +++ b/src/zgesv_rbt.cpp @@ -202,6 +202,26 @@ magma_zgesv_rbt( return *info; } +/***************************************************************************//** + + @copydoc magma_zgetrf_nopiv_gpu + + @param[in] + iter_max INTEGER + The maximum number of refinement iterations performed. + + @param[in] + bwdmax DOUBLE + Refine the solution if the error is above the threshold multiplied by + bwdmax. See @magma_zgerfs_nopiv_gpu for how error threshold is calculated. + Set to zero to perform all iterations specified in iter_max. + + @param[in] + queue magma_queue_t + A pointer to a magma_queue structure that will be used for the + execution of this method, and all methods it calls. queue != nullptr + +*******************************************************************************/ extern "C" magma_int_t magma_zgesv_rbt_async( const magma_bool_t refine, const magma_int_t n, const magma_int_t nrhs, diff --git a/src/zgetrf_nopiv_gpu.cpp b/src/zgetrf_nopiv_gpu.cpp index 5abbbafa7..7d20f3264 100644 --- a/src/zgetrf_nopiv_gpu.cpp +++ b/src/zgetrf_nopiv_gpu.cpp @@ -210,6 +210,17 @@ magma_zgetrf_nopiv_gpu( return *info; } /* magma_zgetrf_nopiv_gpu */ + +/***************************************************************************//** + + @copydoc magma_zgetrf_nopiv_gpu + + @param[in] + queue magma_queue_t + A pointer to a magma_queue structure that will be used for the + execution of this method, and all methods it calls. queue != nullptr + +*******************************************************************************/ extern "C" magma_int_t magma_zgetrf_nopiv_gpu_async( magma_int_t m, magma_int_t n, diff --git a/src/zgetrs_nopiv_gpu.cpp b/src/zgetrs_nopiv_gpu.cpp index 1a56bee75..9eef098df 100644 --- a/src/zgetrs_nopiv_gpu.cpp +++ b/src/zgetrs_nopiv_gpu.cpp @@ -131,6 +131,16 @@ magma_zgetrs_nopiv_gpu( return *info; } +/***************************************************************************//** + + @copydoc magma_zgetrs_nopiv_gpu + + @param[in] + queue magma_queue_t + A pointer to a magma_queue structure that will be used for the + execution of this method, and all methods it calls. queue != nullptr + +*******************************************************************************/ extern "C" magma_int_t magma_zgetrs_nopiv_gpu_async( magma_trans_t trans, magma_int_t n, magma_int_t nrhs, diff --git a/tools/get-rocm-version.sh b/tools/get-rocm-version.sh deleted file mode 100755 index c28974a12..000000000 --- a/tools/get-rocm-version.sh +++ /dev/null @@ -1,68 +0,0 @@ -#!/bin/bash - -DEBUG=0 -[[ -d $1 ]] && ROCM_PATH=$1 || ROCM_PATH="${ROCM_PATH:-/opt/rocm}" -HEADER_PATH_ONE=${ROCM_PATH}/include/rocm-core/rocm_version.h -HEADER_PATH_TWO=${ROCM_PATH}/include/rocm_version.h -PACKAGE_PATH_ONE=${ROCM_PATH}/.info/version-dev -PACKAGE_PATH_TWO=${ROCM_PATH}/.info/version - -function parse_semver() { - local token="$1" - local major=0 - local minor=0 - local patch=0 - - if egrep '^[0-9]+\.[0-9]+\.[0-9]+' <<<"$token" >/dev/null 2>&1 - then - # It has the correct syntax. - local n=${token//[!0-9]/ } - local a=(${n//\./ }) - major=${a[0]} - minor=${a[1]} - patch=${a[2]} - fi - - echo "$major $minor $patch" -} - -if [[ -f ${HEADER_PATH_ONE} ]] -then - ROCM_VERSION_DEV_RAW=$(grep ROCM_BUILD_INFO ${HEADER_PATH_ONE} | cut -d '"' -f 2) - if [[ "x$DEBUG" = "x1" ]] - then - echo "Found ${HEADER_PATH_ONE}, ROCM_VERSION_DEV_RAW=${ROCM_VERSION_DEV_RAW}" - fi -elif [[ -f ${HEADER_PATH_TWO} ]] -then - ROCM_VERSION_DEV_RAW=$(grep ROCM_BUILD_INFO ${HEADER_PATH_TWO} | cut -d '"' -f 2) - if [[ "x$DEBUG" = "x1" ]] - then - echo "Found ${HEADER_PATH_TWO}, ROCM_VERSION_DEV_RAW=${ROCM_VERSION_DEV_RAW}" - fi -elif [[ -f ${PACKAGE_PATH_ONE} ]] -then - ROCM_VERSION_DEV_RAW=$(cat ${PACKAGE_PATH_ONE}) - if [[ "x$DEBUG" = "x1" ]] - then - echo "Found ${PACKAGE_PATH_ONE}, ROCM_VERSION_DEV_RAW=${ROCM_VERSION_DEV_RAW}" - fi -elif [[ -f ${PACKAGE_PATH_TWO} ]] -then - ROCM_VERSION_DEV_RAW=$(cat ${PACKAGE_PATH_TWO}) - if [[ "x$DEBUG" = "x1" ]] - then - echo "Found ${PACKAGE_PATH_TWO}, ROCM_VERSION_DEV_RAW=${ROCM_VERSION_DEV_RAW}" - fi -else - echo "ROCM_VERSION_NOT_FOUND" - exit 1 -fi - -a=($(parse_semver "${ROCM_VERSION_DEV_RAW}")) -major=${a[0]} -minor=${a[1]} -patch=${a[2]} - -echo "$((major * 10000 + minor * 100 + patch))" - From be231a8707e1a65b16d5b87311a956b67781ee75 Mon Sep 17 00:00:00 2001 From: Xeonacid Date: Tue, 29 Apr 2025 14:56:50 +0800 Subject: [PATCH 04/27] Drop CMP0037 to fix cmake 4.0 build error Don't know why it is here. Removing it builds fine on my end. --- CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7a9166be8..9324be3eb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -660,7 +660,6 @@ endif() set( CMAKE_RUNTIME_OUTPUT_DIRECTORY "${SPARSE_TEST_DIR}" ) -cmake_policy( SET CMP0037 OLD) foreach( TEST ${sparse_testing_all} ) string( REGEX REPLACE "\\.(cpp|f90|F90)" "" EXE ${TEST} ) string( REGEX REPLACE "${SPARSE_TEST_DIR}/" "" EXE ${EXE} ) From 87bf295a8ffbd041b49184b1a5fe2374630fee1e Mon Sep 17 00:00:00 2001 From: Mark Gates Date: Fri, 14 Feb 2025 13:31:31 +0100 Subject: [PATCH 05/27] remove blas_fix, which was to support macOS Accelerate --- blas_fix/Makefile | 10 - blas_fix/Makefile.src | 67 -- blas_fix/README.txt | 8 - blas_fix/cdotc.f | 102 --- blas_fix/cdotu.f | 98 --- blas_fix/cladiv.f | 97 --- blas_fix/clangb.f | 227 ------ blas_fix/clange.f | 214 ------ blas_fix/clangt.f | 209 ------ blas_fix/clanhb.f | 277 -------- blas_fix/clanhe.f | 259 ------- blas_fix/clanhf.f | 1573 ----------------------------------------- blas_fix/clanhp.f | 270 ------- blas_fix/clanhs.f | 208 ------ blas_fix/clanht.f | 189 ----- blas_fix/clansb.f | 261 ------- blas_fix/clansp.f | 273 ------- blas_fix/clansy.f | 244 ------- blas_fix/clantb.f | 364 ---------- blas_fix/clantp.f | 358 ---------- blas_fix/clantr.f | 356 ---------- blas_fix/sasum.f | 112 --- blas_fix/scabs1.f | 57 -- blas_fix/scasum.f | 97 --- blas_fix/scnrm2.f | 119 ---- blas_fix/scsum1.f | 140 ---- blas_fix/sdot.f | 117 --- blas_fix/sdsdot.f | 255 ------- blas_fix/sladiv.f | 253 ------- blas_fix/slamch.f | 192 ----- blas_fix/slangb.f | 226 ------ blas_fix/slange.f | 212 ------ blas_fix/slangt.f | 209 ------ blas_fix/slanhs.f | 206 ------ blas_fix/slansb.f | 259 ------- blas_fix/slansf.f | 965 ------------------------- blas_fix/slansp.f | 262 ------- blas_fix/slanst.f | 187 ----- blas_fix/slansy.f | 242 ------- blas_fix/slantb.f | 362 ---------- blas_fix/slantp.f | 356 ---------- blas_fix/slantr.f | 354 ---------- blas_fix/slapy2.f | 104 --- blas_fix/slapy3.f | 111 --- blas_fix/snrm2.f | 112 --- 45 files changed, 11173 deletions(-) delete mode 100644 blas_fix/Makefile delete mode 100644 blas_fix/Makefile.src delete mode 100644 blas_fix/README.txt delete mode 100644 blas_fix/cdotc.f delete mode 100644 blas_fix/cdotu.f delete mode 100644 blas_fix/cladiv.f delete mode 100644 blas_fix/clangb.f delete mode 100644 blas_fix/clange.f delete mode 100644 blas_fix/clangt.f delete mode 100644 blas_fix/clanhb.f delete mode 100644 blas_fix/clanhe.f delete mode 100644 blas_fix/clanhf.f delete mode 100644 blas_fix/clanhp.f delete mode 100644 blas_fix/clanhs.f delete mode 100644 blas_fix/clanht.f delete mode 100644 blas_fix/clansb.f delete mode 100644 blas_fix/clansp.f delete mode 100644 blas_fix/clansy.f delete mode 100644 blas_fix/clantb.f delete mode 100644 blas_fix/clantp.f delete mode 100644 blas_fix/clantr.f delete mode 100644 blas_fix/sasum.f delete mode 100644 blas_fix/scabs1.f delete mode 100644 blas_fix/scasum.f delete mode 100644 blas_fix/scnrm2.f delete mode 100644 blas_fix/scsum1.f delete mode 100644 blas_fix/sdot.f delete mode 100644 blas_fix/sdsdot.f delete mode 100644 blas_fix/sladiv.f delete mode 100644 blas_fix/slamch.f delete mode 100644 blas_fix/slangb.f delete mode 100644 blas_fix/slange.f delete mode 100644 blas_fix/slangt.f delete mode 100644 blas_fix/slanhs.f delete mode 100644 blas_fix/slansb.f delete mode 100644 blas_fix/slansf.f delete mode 100644 blas_fix/slansp.f delete mode 100644 blas_fix/slanst.f delete mode 100644 blas_fix/slansy.f delete mode 100644 blas_fix/slantb.f delete mode 100644 blas_fix/slantp.f delete mode 100644 blas_fix/slantr.f delete mode 100644 blas_fix/slapy2.f delete mode 100644 blas_fix/slapy3.f delete mode 100644 blas_fix/snrm2.f diff --git a/blas_fix/Makefile b/blas_fix/Makefile deleted file mode 100644 index 6e7f5693d..000000000 --- a/blas_fix/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -# See Makefile.src for list of files in this directory. -# This makefile simply forwards commands to the top-level makefile. - -top := .. -pwd := $(shell pwd) -cdir := $(notdir $(pwd)) - -default: $(cdir) - -include $(top)/Makefile.subdir diff --git a/blas_fix/Makefile.src b/blas_fix/Makefile.src deleted file mode 100644 index 6a3a76c78..000000000 --- a/blas_fix/Makefile.src +++ /dev/null @@ -1,67 +0,0 @@ -#////////////////////////////////////////////////////////////////////////////// -# -- MAGMA (version 2.0) -- -# Univ. of Tennessee, Knoxville -# Univ. of California, Berkeley -# Univ. of Colorado, Denver -# @date -#////////////////////////////////////////////////////////////////////////////// - -# push previous directory -dir_stack := $(dir_stack) $(cdir) -cdir := blas_fix -# ---------------------------------------------------------------------- - - -# BLAS single & single-complex functions -libblas_fix_src := \ - $(cdir)/cdotc.f \ - $(cdir)/cdotu.f \ - $(cdir)/sasum.f \ - $(cdir)/scabs1.f \ - $(cdir)/scasum.f \ - $(cdir)/scnrm2.f \ - $(cdir)/sdot.f \ - $(cdir)/sdsdot.f \ - $(cdir)/snrm2.f \ - -# LAPACK single & single-complex functions -libblas_fix_src += \ - $(cdir)/cladiv.f \ - $(cdir)/clangb.f \ - $(cdir)/clange.f \ - $(cdir)/clangt.f \ - $(cdir)/clanhb.f \ - $(cdir)/clanhe.f \ - $(cdir)/clanhf.f \ - $(cdir)/clanhp.f \ - $(cdir)/clanhs.f \ - $(cdir)/clanht.f \ - $(cdir)/clansb.f \ - $(cdir)/clansp.f \ - $(cdir)/clansy.f \ - $(cdir)/clantb.f \ - $(cdir)/clantp.f \ - $(cdir)/clantr.f \ - $(cdir)/scsum1.f \ - $(cdir)/sladiv.f \ - $(cdir)/slangb.f \ - $(cdir)/slange.f \ - $(cdir)/slangt.f \ - $(cdir)/slanhs.f \ - $(cdir)/slansb.f \ - $(cdir)/slansf.f \ - $(cdir)/slansp.f \ - $(cdir)/slanst.f \ - $(cdir)/slansy.f \ - $(cdir)/slantb.f \ - $(cdir)/slantp.f \ - $(cdir)/slantr.f \ - $(cdir)/slapy2.f \ - $(cdir)/slapy3.f \ - $(cdir)/slamch.f \ - - -# ---------------------------------------------------------------------- -# pop first directory -cdir := $(firstword $(dir_stack)) -dir_stack := $(wordlist 2, $(words $(dir_stack)), $(dir_stack)) diff --git a/blas_fix/README.txt b/blas_fix/README.txt deleted file mode 100644 index a91934443..000000000 --- a/blas_fix/README.txt +++ /dev/null @@ -1,8 +0,0 @@ -MacOS veclib has a bug where some single precision functions return -a double precision result, for instance slange. -This is observed with -m64, but oddly not with -m32. -The easiest fix is to replace those routines with correct ones from LAPACK (3.5.0). -See BLAS_FIX in make.inc.macos - -Note that these are Level 1 and Level 2 BLAS and BLAS-like functions, -primarily norms and dot products, which are not generally performance critical. diff --git a/blas_fix/cdotc.f b/blas_fix/cdotc.f deleted file mode 100644 index 8e7d8b9d9..000000000 --- a/blas_fix/cdotc.f +++ /dev/null @@ -1,102 +0,0 @@ -*> \brief \b CDOTC -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -* Definition: -* =========== -* -* COMPLEX FUNCTION CDOTC(N,CX,INCX,CY,INCY) -* -* .. Scalar Arguments .. -* INTEGER INCX,INCY,N -* .. -* .. Array Arguments .. -* COMPLEX CX(*),CY(*) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> forms the dot product of two vectors, conjugating the first -*> vector. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date November 2011 -* -*> \ingroup complex_blas_level1 -* -*> \par Further Details: -* ===================== -*> -*> \verbatim -*> -*> jack dongarra, linpack, 3/11/78. -*> modified 12/3/93, array(1) declarations changed to array(*) -*> \endverbatim -*> -* ===================================================================== - COMPLEX FUNCTION CDOTC(N,CX,INCX,CY,INCY) -* -* -- Reference BLAS level1 routine (version 3.4.0) -- -* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* November 2011 -* -* .. Scalar Arguments .. - INTEGER INCX,INCY,N -* .. -* .. Array Arguments .. - COMPLEX CX(*),CY(*) -* .. -* -* ===================================================================== -* -* .. Local Scalars .. - COMPLEX CTEMP - INTEGER I,IX,IY -* .. -* .. Intrinsic Functions .. - INTRINSIC CONJG -* .. - CTEMP = (0.0,0.0) - CDOTC = (0.0,0.0) - IF (N.LE.0) RETURN - IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN -* -* code for both increments equal to 1 -* - DO I = 1,N - CTEMP = CTEMP + CONJG(CX(I))*CY(I) - END DO - ELSE -* -* code for unequal increments or equal increments -* not equal to 1 -* - IX = 1 - IY = 1 - IF (INCX.LT.0) IX = (-N+1)*INCX + 1 - IF (INCY.LT.0) IY = (-N+1)*INCY + 1 - DO I = 1,N - CTEMP = CTEMP + CONJG(CX(IX))*CY(IY) - IX = IX + INCX - IY = IY + INCY - END DO - END IF - CDOTC = CTEMP - RETURN - END diff --git a/blas_fix/cdotu.f b/blas_fix/cdotu.f deleted file mode 100644 index 456a409f7..000000000 --- a/blas_fix/cdotu.f +++ /dev/null @@ -1,98 +0,0 @@ -*> \brief \b CDOTU -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -* Definition: -* =========== -* -* COMPLEX FUNCTION CDOTU(N,CX,INCX,CY,INCY) -* -* .. Scalar Arguments .. -* INTEGER INCX,INCY,N -* .. -* .. Array Arguments .. -* COMPLEX CX(*),CY(*) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> CDOTU forms the dot product of two vectors. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date November 2011 -* -*> \ingroup complex_blas_level1 -* -*> \par Further Details: -* ===================== -*> -*> \verbatim -*> -*> jack dongarra, linpack, 3/11/78. -*> modified 12/3/93, array(1) declarations changed to array(*) -*> \endverbatim -*> -* ===================================================================== - COMPLEX FUNCTION CDOTU(N,CX,INCX,CY,INCY) -* -* -- Reference BLAS level1 routine (version 3.4.0) -- -* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* November 2011 -* -* .. Scalar Arguments .. - INTEGER INCX,INCY,N -* .. -* .. Array Arguments .. - COMPLEX CX(*),CY(*) -* .. -* -* ===================================================================== -* -* .. Local Scalars .. - COMPLEX CTEMP - INTEGER I,IX,IY -* .. - CTEMP = (0.0,0.0) - CDOTU = (0.0,0.0) - IF (N.LE.0) RETURN - IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN -* -* code for both increments equal to 1 -* - DO I = 1,N - CTEMP = CTEMP + CX(I)*CY(I) - END DO - ELSE -* -* code for unequal increments or equal increments -* not equal to 1 -* - IX = 1 - IY = 1 - IF (INCX.LT.0) IX = (-N+1)*INCX + 1 - IF (INCY.LT.0) IY = (-N+1)*INCY + 1 - DO I = 1,N - CTEMP = CTEMP + CX(IX)*CY(IY) - IX = IX + INCX - IY = IY + INCY - END DO - END IF - CDOTU = CTEMP - RETURN - END diff --git a/blas_fix/cladiv.f b/blas_fix/cladiv.f deleted file mode 100644 index 0d0ec666f..000000000 --- a/blas_fix/cladiv.f +++ /dev/null @@ -1,97 +0,0 @@ -*> \brief \b CLADIV performs complex division in real arithmetic, avoiding unnecessary overflow. -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download CLADIV + dependencies -*> -*> [TGZ] -*> -*> [ZIP] -*> -*> [TXT] -*> \endhtmlonly -* -* Definition: -* =========== -* -* COMPLEX FUNCTION CLADIV( X, Y ) -* -* .. Scalar Arguments .. -* COMPLEX X, Y -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> CLADIV := X / Y, where X and Y are complex. The computation of X / Y -*> will not overflow on an intermediary step unless the results -*> overflows. -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[in] X -*> \verbatim -*> X is COMPLEX -*> \endverbatim -*> -*> \param[in] Y -*> \verbatim -*> Y is COMPLEX -*> The complex scalars X and Y. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date September 2012 -* -*> \ingroup complexOTHERauxiliary -* -* ===================================================================== - COMPLEX FUNCTION CLADIV( X, Y ) -* -* -- LAPACK auxiliary routine (version 3.4.2) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* September 2012 -* -* .. Scalar Arguments .. - COMPLEX X, Y -* .. -* -* ===================================================================== -* -* .. Local Scalars .. - REAL ZI, ZR -* .. -* .. External Subroutines .. - EXTERNAL SLADIV -* .. -* .. Intrinsic Functions .. - INTRINSIC AIMAG, CMPLX, REAL -* .. -* .. Executable Statements .. -* - CALL SLADIV( REAL( X ), AIMAG( X ), REAL( Y ), AIMAG( Y ), ZR, - $ ZI ) - CLADIV = CMPLX( ZR, ZI ) -* - RETURN -* -* End of CLADIV -* - END diff --git a/blas_fix/clangb.f b/blas_fix/clangb.f deleted file mode 100644 index cb6ef626e..000000000 --- a/blas_fix/clangb.f +++ /dev/null @@ -1,227 +0,0 @@ -*> \brief \b CLANGB returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value of any element of general band matrix. -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download CLANGB + dependencies -*> -*> [TGZ] -*> -*> [ZIP] -*> -*> [TXT] -*> \endhtmlonly -* -* Definition: -* =========== -* -* REAL FUNCTION CLANGB( NORM, N, KL, KU, AB, LDAB, -* WORK ) -* -* .. Scalar Arguments .. -* CHARACTER NORM -* INTEGER KL, KU, LDAB, N -* .. -* .. Array Arguments .. -* REAL WORK( * ) -* COMPLEX AB( LDAB, * ) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> CLANGB returns the value of the one norm, or the Frobenius norm, or -*> the infinity norm, or the element of largest absolute value of an -*> n by n band matrix A, with kl sub-diagonals and ku super-diagonals. -*> \endverbatim -*> -*> \return CLANGB -*> \verbatim -*> -*> CLANGB = ( max(abs(A(i,j))), NORM = 'M' or 'm' -*> ( -*> ( norm1(A), NORM = '1', 'O' or 'o' -*> ( -*> ( normI(A), NORM = 'I' or 'i' -*> ( -*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' -*> -*> where norm1 denotes the one norm of a matrix (maximum column sum), -*> normI denotes the infinity norm of a matrix (maximum row sum) and -*> normF denotes the Frobenius norm of a matrix (square root of sum of -*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[in] NORM -*> \verbatim -*> NORM is CHARACTER*1 -*> Specifies the value to be returned in CLANGB as described -*> above. -*> \endverbatim -*> -*> \param[in] N -*> \verbatim -*> N is INTEGER -*> The order of the matrix A. N >= 0. When N = 0, CLANGB is -*> set to zero. -*> \endverbatim -*> -*> \param[in] KL -*> \verbatim -*> KL is INTEGER -*> The number of sub-diagonals of the matrix A. KL >= 0. -*> \endverbatim -*> -*> \param[in] KU -*> \verbatim -*> KU is INTEGER -*> The number of super-diagonals of the matrix A. KU >= 0. -*> \endverbatim -*> -*> \param[in] AB -*> \verbatim -*> AB is COMPLEX array, dimension (LDAB,N) -*> The band matrix A, stored in rows 1 to KL+KU+1. The j-th -*> column of A is stored in the j-th column of the array AB as -*> follows: -*> AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl). -*> \endverbatim -*> -*> \param[in] LDAB -*> \verbatim -*> LDAB is INTEGER -*> The leading dimension of the array AB. LDAB >= KL+KU+1. -*> \endverbatim -*> -*> \param[out] WORK -*> \verbatim -*> WORK is REAL array, dimension (MAX(1,LWORK)), -*> where LWORK >= N when NORM = 'I'; otherwise, WORK is not -*> referenced. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date September 2012 -* -*> \ingroup complexGBauxiliary -* -* ===================================================================== - REAL FUNCTION CLANGB( NORM, N, KL, KU, AB, LDAB, - $ WORK ) -* -* -- LAPACK auxiliary routine (version 3.4.2) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* September 2012 -* -* .. Scalar Arguments .. - CHARACTER NORM - INTEGER KL, KU, LDAB, N -* .. -* .. Array Arguments .. - REAL WORK( * ) - COMPLEX AB( LDAB, * ) -* .. -* -* ===================================================================== -* -* .. Parameters .. - REAL ONE, ZERO - PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) -* .. -* .. Local Scalars .. - INTEGER I, J, K, L - REAL SCALE, SUM, VALUE, TEMP -* .. -* .. External Functions .. - LOGICAL LSAME, SISNAN - EXTERNAL LSAME, SISNAN -* .. -* .. External Subroutines .. - EXTERNAL CLASSQ -* .. -* .. Intrinsic Functions .. - INTRINSIC ABS, MAX, MIN, SQRT -* .. -* .. Executable Statements .. -* - VALUE = ZERO - IF( N.EQ.0 ) THEN - VALUE = ZERO - ELSE IF( LSAME( NORM, 'M' ) ) THEN -* -* Find max(abs(A(i,j))). -* - VALUE = ZERO - DO 20 J = 1, N - DO 10 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 ) - TEMP = ABS( AB( I, J ) ) - IF( VALUE.LT.TEMP .OR. SISNAN( TEMP ) ) VALUE = TEMP - 10 CONTINUE - 20 CONTINUE - ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN -* -* Find norm1(A). -* - VALUE = ZERO - DO 40 J = 1, N - SUM = ZERO - DO 30 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 ) - SUM = SUM + ABS( AB( I, J ) ) - 30 CONTINUE - IF( VALUE.LT.SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 40 CONTINUE - ELSE IF( LSAME( NORM, 'I' ) ) THEN -* -* Find normI(A). -* - DO 50 I = 1, N - WORK( I ) = ZERO - 50 CONTINUE - DO 70 J = 1, N - K = KU + 1 - J - DO 60 I = MAX( 1, J-KU ), MIN( N, J+KL ) - WORK( I ) = WORK( I ) + ABS( AB( K+I, J ) ) - 60 CONTINUE - 70 CONTINUE - VALUE = ZERO - DO 80 I = 1, N - TEMP = WORK( I ) - IF( VALUE.LT.TEMP .OR. SISNAN( TEMP ) ) VALUE = TEMP - 80 CONTINUE - ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN -* -* Find normF(A). -* - SCALE = ZERO - SUM = ONE - DO 90 J = 1, N - L = MAX( 1, J-KU ) - K = KU + 1 - J + L - CALL CLASSQ( MIN( N, J+KL )-L+1, AB( K, J ), 1, SCALE, SUM ) - 90 CONTINUE - VALUE = SCALE*SQRT( SUM ) - END IF -* - CLANGB = VALUE - RETURN -* -* End of CLANGB -* - END diff --git a/blas_fix/clange.f b/blas_fix/clange.f deleted file mode 100644 index 8423b69c4..000000000 --- a/blas_fix/clange.f +++ /dev/null @@ -1,214 +0,0 @@ -*> \brief \b CLANGE returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value of any element of a general rectangular matrix. -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download CLANGE + dependencies -*> -*> [TGZ] -*> -*> [ZIP] -*> -*> [TXT] -*> \endhtmlonly -* -* Definition: -* =========== -* -* REAL FUNCTION CLANGE( NORM, M, N, A, LDA, WORK ) -* -* .. Scalar Arguments .. -* CHARACTER NORM -* INTEGER LDA, M, N -* .. -* .. Array Arguments .. -* REAL WORK( * ) -* COMPLEX A( LDA, * ) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> CLANGE returns the value of the one norm, or the Frobenius norm, or -*> the infinity norm, or the element of largest absolute value of a -*> complex matrix A. -*> \endverbatim -*> -*> \return CLANGE -*> \verbatim -*> -*> CLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' -*> ( -*> ( norm1(A), NORM = '1', 'O' or 'o' -*> ( -*> ( normI(A), NORM = 'I' or 'i' -*> ( -*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' -*> -*> where norm1 denotes the one norm of a matrix (maximum column sum), -*> normI denotes the infinity norm of a matrix (maximum row sum) and -*> normF denotes the Frobenius norm of a matrix (square root of sum of -*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[in] NORM -*> \verbatim -*> NORM is CHARACTER*1 -*> Specifies the value to be returned in CLANGE as described -*> above. -*> \endverbatim -*> -*> \param[in] M -*> \verbatim -*> M is INTEGER -*> The number of rows of the matrix A. M >= 0. When M = 0, -*> CLANGE is set to zero. -*> \endverbatim -*> -*> \param[in] N -*> \verbatim -*> N is INTEGER -*> The number of columns of the matrix A. N >= 0. When N = 0, -*> CLANGE is set to zero. -*> \endverbatim -*> -*> \param[in] A -*> \verbatim -*> A is COMPLEX array, dimension (LDA,N) -*> The m by n matrix A. -*> \endverbatim -*> -*> \param[in] LDA -*> \verbatim -*> LDA is INTEGER -*> The leading dimension of the array A. LDA >= max(M,1). -*> \endverbatim -*> -*> \param[out] WORK -*> \verbatim -*> WORK is REAL array, dimension (MAX(1,LWORK)), -*> where LWORK >= M when NORM = 'I'; otherwise, WORK is not -*> referenced. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date September 2012 -* -*> \ingroup complexGEauxiliary -* -* ===================================================================== - REAL FUNCTION CLANGE( NORM, M, N, A, LDA, WORK ) -* -* -- LAPACK auxiliary routine (version 3.4.2) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* September 2012 -* -* .. Scalar Arguments .. - CHARACTER NORM - INTEGER LDA, M, N -* .. -* .. Array Arguments .. - REAL WORK( * ) - COMPLEX A( LDA, * ) -* .. -* -* ===================================================================== -* -* .. Parameters .. - REAL ONE, ZERO - PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) -* .. -* .. Local Scalars .. - INTEGER I, J - REAL SCALE, SUM, VALUE, TEMP -* .. -* .. External Functions .. - LOGICAL LSAME, SISNAN - EXTERNAL LSAME, SISNAN -* .. -* .. External Subroutines .. - EXTERNAL CLASSQ -* .. -* .. Intrinsic Functions .. - INTRINSIC ABS, MIN, SQRT -* .. -* .. Executable Statements .. -* - VALUE = ZERO - IF( MIN( M, N ).EQ.0 ) THEN - VALUE = ZERO - ELSE IF( LSAME( NORM, 'M' ) ) THEN -* -* Find max(abs(A(i,j))). -* - VALUE = ZERO - DO 20 J = 1, N - DO 10 I = 1, M - TEMP = ABS( A( I, J ) ) - IF( VALUE.LT.TEMP .OR. SISNAN( TEMP ) ) VALUE = TEMP - 10 CONTINUE - 20 CONTINUE - ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN -* -* Find norm1(A). -* - VALUE = ZERO - DO 40 J = 1, N - SUM = ZERO - DO 30 I = 1, M - SUM = SUM + ABS( A( I, J ) ) - 30 CONTINUE - IF( VALUE.LT.SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 40 CONTINUE - ELSE IF( LSAME( NORM, 'I' ) ) THEN -* -* Find normI(A). -* - DO 50 I = 1, M - WORK( I ) = ZERO - 50 CONTINUE - DO 70 J = 1, N - DO 60 I = 1, M - WORK( I ) = WORK( I ) + ABS( A( I, J ) ) - 60 CONTINUE - 70 CONTINUE - VALUE = ZERO - DO 80 I = 1, M - TEMP = WORK( I ) - IF( VALUE.LT.TEMP .OR. SISNAN( TEMP ) ) VALUE = TEMP - 80 CONTINUE - ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN -* -* Find normF(A). -* - SCALE = ZERO - SUM = ONE - DO 90 J = 1, N - CALL CLASSQ( M, A( 1, J ), 1, SCALE, SUM ) - 90 CONTINUE - VALUE = SCALE*SQRT( SUM ) - END IF -* - CLANGE = VALUE - RETURN -* -* End of CLANGE -* - END diff --git a/blas_fix/clangt.f b/blas_fix/clangt.f deleted file mode 100644 index 6b15a3913..000000000 --- a/blas_fix/clangt.f +++ /dev/null @@ -1,209 +0,0 @@ -*> \brief \b CLANGT returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value of any element of a general tridiagonal matrix. -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download CLANGT + dependencies -*> -*> [TGZ] -*> -*> [ZIP] -*> -*> [TXT] -*> \endhtmlonly -* -* Definition: -* =========== -* -* REAL FUNCTION CLANGT( NORM, N, DL, D, DU ) -* -* .. Scalar Arguments .. -* CHARACTER NORM -* INTEGER N -* .. -* .. Array Arguments .. -* COMPLEX D( * ), DL( * ), DU( * ) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> CLANGT returns the value of the one norm, or the Frobenius norm, or -*> the infinity norm, or the element of largest absolute value of a -*> complex tridiagonal matrix A. -*> \endverbatim -*> -*> \return CLANGT -*> \verbatim -*> -*> CLANGT = ( max(abs(A(i,j))), NORM = 'M' or 'm' -*> ( -*> ( norm1(A), NORM = '1', 'O' or 'o' -*> ( -*> ( normI(A), NORM = 'I' or 'i' -*> ( -*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' -*> -*> where norm1 denotes the one norm of a matrix (maximum column sum), -*> normI denotes the infinity norm of a matrix (maximum row sum) and -*> normF denotes the Frobenius norm of a matrix (square root of sum of -*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[in] NORM -*> \verbatim -*> NORM is CHARACTER*1 -*> Specifies the value to be returned in CLANGT as described -*> above. -*> \endverbatim -*> -*> \param[in] N -*> \verbatim -*> N is INTEGER -*> The order of the matrix A. N >= 0. When N = 0, CLANGT is -*> set to zero. -*> \endverbatim -*> -*> \param[in] DL -*> \verbatim -*> DL is COMPLEX array, dimension (N-1) -*> The (n-1) sub-diagonal elements of A. -*> \endverbatim -*> -*> \param[in] D -*> \verbatim -*> D is COMPLEX array, dimension (N) -*> The diagonal elements of A. -*> \endverbatim -*> -*> \param[in] DU -*> \verbatim -*> DU is COMPLEX array, dimension (N-1) -*> The (n-1) super-diagonal elements of A. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date September 2012 -* -*> \ingroup complexOTHERauxiliary -* -* ===================================================================== - REAL FUNCTION CLANGT( NORM, N, DL, D, DU ) -* -* -- LAPACK auxiliary routine (version 3.4.2) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* September 2012 -* -* .. Scalar Arguments .. - CHARACTER NORM - INTEGER N -* .. -* .. Array Arguments .. - COMPLEX D( * ), DL( * ), DU( * ) -* .. -* -* ===================================================================== -* -* .. Parameters .. - REAL ONE, ZERO - PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) -* .. -* .. Local Scalars .. - INTEGER I - REAL ANORM, SCALE, SUM, TEMP -* .. -* .. External Functions .. - LOGICAL LSAME, SISNAN - EXTERNAL LSAME, SISNAN -* .. -* .. External Subroutines .. - EXTERNAL CLASSQ -* .. -* .. Intrinsic Functions .. - INTRINSIC ABS, SQRT -* .. -* .. Executable Statements .. -* - ANORM = ZERO - IF( N.LE.0 ) THEN - ANORM = ZERO - ELSE IF( LSAME( NORM, 'M' ) ) THEN -* -* Find max(abs(A(i,j))). -* - ANORM = ABS( D( N ) ) - DO 10 I = 1, N - 1 - IF( ANORM.LT.ABS( DL( I ) ) .OR. SISNAN( ABS( DL( I ) ) ) ) - $ ANORM = ABS(DL(I)) - IF( ANORM.LT.ABS( D( I ) ) .OR. SISNAN( ABS( D( I ) ) ) ) - $ ANORM = ABS(D(I)) - IF( ANORM.LT.ABS( DU( I ) ) .OR. SISNAN (ABS( DU( I ) ) ) ) - $ ANORM = ABS(DU(I)) - 10 CONTINUE - ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' ) THEN -* -* Find norm1(A). -* - IF( N.EQ.1 ) THEN - ANORM = ABS( D( 1 ) ) - ELSE - ANORM = ABS( D( 1 ) )+ABS( DL( 1 ) ) - TEMP = ABS( D( N ) )+ABS( DU( N-1 ) ) - IF( ANORM .LT. TEMP .OR. SISNAN( TEMP ) ) ANORM = TEMP - DO 20 I = 2, N - 1 - TEMP = ABS( D( I ) )+ABS( DL( I ) )+ABS( DU( I-1 ) ) - IF( ANORM .LT. TEMP .OR. SISNAN( TEMP ) ) ANORM = TEMP - 20 CONTINUE - END IF - ELSE IF( LSAME( NORM, 'I' ) ) THEN -* -* Find normI(A). -* - IF( N.EQ.1 ) THEN - ANORM = ABS( D( 1 ) ) - ELSE - ANORM = ABS( D( 1 ) )+ABS( DU( 1 ) ) - TEMP = ABS( D( N ) )+ABS( DL( N-1 ) ) - IF( ANORM .LT. TEMP .OR. SISNAN( TEMP ) ) ANORM = TEMP - DO 30 I = 2, N - 1 - TEMP = ABS( D( I ) )+ABS( DU( I ) )+ABS( DL( I-1 ) ) - IF( ANORM .LT. TEMP .OR. SISNAN( TEMP ) ) ANORM = TEMP - 30 CONTINUE - END IF - ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN -* -* Find normF(A). -* - SCALE = ZERO - SUM = ONE - CALL CLASSQ( N, D, 1, SCALE, SUM ) - IF( N.GT.1 ) THEN - CALL CLASSQ( N-1, DL, 1, SCALE, SUM ) - CALL CLASSQ( N-1, DU, 1, SCALE, SUM ) - END IF - ANORM = SCALE*SQRT( SUM ) - END IF -* - CLANGT = ANORM - RETURN -* -* End of CLANGT -* - END diff --git a/blas_fix/clanhb.f b/blas_fix/clanhb.f deleted file mode 100644 index e6e0d042c..000000000 --- a/blas_fix/clanhb.f +++ /dev/null @@ -1,277 +0,0 @@ -*> \brief \b CLANHB returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a Hermitian band matrix. -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download CLANHB + dependencies -*> -*> [TGZ] -*> -*> [ZIP] -*> -*> [TXT] -*> \endhtmlonly -* -* Definition: -* =========== -* -* REAL FUNCTION CLANHB( NORM, UPLO, N, K, AB, LDAB, -* WORK ) -* -* .. Scalar Arguments .. -* CHARACTER NORM, UPLO -* INTEGER K, LDAB, N -* .. -* .. Array Arguments .. -* REAL WORK( * ) -* COMPLEX AB( LDAB, * ) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> CLANHB returns the value of the one norm, or the Frobenius norm, or -*> the infinity norm, or the element of largest absolute value of an -*> n by n hermitian band matrix A, with k super-diagonals. -*> \endverbatim -*> -*> \return CLANHB -*> \verbatim -*> -*> CLANHB = ( max(abs(A(i,j))), NORM = 'M' or 'm' -*> ( -*> ( norm1(A), NORM = '1', 'O' or 'o' -*> ( -*> ( normI(A), NORM = 'I' or 'i' -*> ( -*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' -*> -*> where norm1 denotes the one norm of a matrix (maximum column sum), -*> normI denotes the infinity norm of a matrix (maximum row sum) and -*> normF denotes the Frobenius norm of a matrix (square root of sum of -*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[in] NORM -*> \verbatim -*> NORM is CHARACTER*1 -*> Specifies the value to be returned in CLANHB as described -*> above. -*> \endverbatim -*> -*> \param[in] UPLO -*> \verbatim -*> UPLO is CHARACTER*1 -*> Specifies whether the upper or lower triangular part of the -*> band matrix A is supplied. -*> = 'U': Upper triangular -*> = 'L': Lower triangular -*> \endverbatim -*> -*> \param[in] N -*> \verbatim -*> N is INTEGER -*> The order of the matrix A. N >= 0. When N = 0, CLANHB is -*> set to zero. -*> \endverbatim -*> -*> \param[in] K -*> \verbatim -*> K is INTEGER -*> The number of super-diagonals or sub-diagonals of the -*> band matrix A. K >= 0. -*> \endverbatim -*> -*> \param[in] AB -*> \verbatim -*> AB is COMPLEX array, dimension (LDAB,N) -*> The upper or lower triangle of the hermitian band matrix A, -*> stored in the first K+1 rows of AB. The j-th column of A is -*> stored in the j-th column of the array AB as follows: -*> if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j; -*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+k). -*> Note that the imaginary parts of the diagonal elements need -*> not be set and are assumed to be zero. -*> \endverbatim -*> -*> \param[in] LDAB -*> \verbatim -*> LDAB is INTEGER -*> The leading dimension of the array AB. LDAB >= K+1. -*> \endverbatim -*> -*> \param[out] WORK -*> \verbatim -*> WORK is REAL array, dimension (MAX(1,LWORK)), -*> where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, -*> WORK is not referenced. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date September 2012 -* -*> \ingroup complexOTHERauxiliary -* -* ===================================================================== - REAL FUNCTION CLANHB( NORM, UPLO, N, K, AB, LDAB, - $ WORK ) -* -* -- LAPACK auxiliary routine (version 3.4.2) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* September 2012 -* -* .. Scalar Arguments .. - CHARACTER NORM, UPLO - INTEGER K, LDAB, N -* .. -* .. Array Arguments .. - REAL WORK( * ) - COMPLEX AB( LDAB, * ) -* .. -* -* ===================================================================== -* -* .. Parameters .. - REAL ONE, ZERO - PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) -* .. -* .. Local Scalars .. - INTEGER I, J, L - REAL ABSA, SCALE, SUM, VALUE -* .. -* .. External Functions .. - LOGICAL LSAME, SISNAN - EXTERNAL LSAME, SISNAN -* .. -* .. External Subroutines .. - EXTERNAL CLASSQ -* .. -* .. Intrinsic Functions .. - INTRINSIC ABS, MAX, MIN, REAL, SQRT -* .. -* .. Executable Statements .. -* - VALUE = ZERO - IF( N.EQ.0 ) THEN - VALUE = ZERO - ELSE IF( LSAME( NORM, 'M' ) ) THEN -* -* Find max(abs(A(i,j))). -* - VALUE = ZERO - IF( LSAME( UPLO, 'U' ) ) THEN - DO 20 J = 1, N - DO 10 I = MAX( K+2-J, 1 ), K - SUM = ABS( AB( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 10 CONTINUE - SUM = ABS( REAL( AB( K+1, J ) ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 20 CONTINUE - ELSE - DO 40 J = 1, N - SUM = ABS( REAL( AB( 1, J ) ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - DO 30 I = 2, MIN( N+1-J, K+1 ) - SUM = ABS( AB( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 30 CONTINUE - 40 CONTINUE - END IF - ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. - $ ( NORM.EQ.'1' ) ) THEN -* -* Find normI(A) ( = norm1(A), since A is hermitian). -* - VALUE = ZERO - IF( LSAME( UPLO, 'U' ) ) THEN - DO 60 J = 1, N - SUM = ZERO - L = K + 1 - J - DO 50 I = MAX( 1, J-K ), J - 1 - ABSA = ABS( AB( L+I, J ) ) - SUM = SUM + ABSA - WORK( I ) = WORK( I ) + ABSA - 50 CONTINUE - WORK( J ) = SUM + ABS( REAL( AB( K+1, J ) ) ) - 60 CONTINUE - DO 70 I = 1, N - SUM = WORK( I ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 70 CONTINUE - ELSE - DO 80 I = 1, N - WORK( I ) = ZERO - 80 CONTINUE - DO 100 J = 1, N - SUM = WORK( J ) + ABS( REAL( AB( 1, J ) ) ) - L = 1 - J - DO 90 I = J + 1, MIN( N, J+K ) - ABSA = ABS( AB( L+I, J ) ) - SUM = SUM + ABSA - WORK( I ) = WORK( I ) + ABSA - 90 CONTINUE - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 100 CONTINUE - END IF - ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN -* -* Find normF(A). -* - SCALE = ZERO - SUM = ONE - IF( K.GT.0 ) THEN - IF( LSAME( UPLO, 'U' ) ) THEN - DO 110 J = 2, N - CALL CLASSQ( MIN( J-1, K ), AB( MAX( K+2-J, 1 ), J ), - $ 1, SCALE, SUM ) - 110 CONTINUE - L = K + 1 - ELSE - DO 120 J = 1, N - 1 - CALL CLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE, - $ SUM ) - 120 CONTINUE - L = 1 - END IF - SUM = 2*SUM - ELSE - L = 1 - END IF - DO 130 J = 1, N - IF( REAL( AB( L, J ) ).NE.ZERO ) THEN - ABSA = ABS( REAL( AB( L, J ) ) ) - IF( SCALE.LT.ABSA ) THEN - SUM = ONE + SUM*( SCALE / ABSA )**2 - SCALE = ABSA - ELSE - SUM = SUM + ( ABSA / SCALE )**2 - END IF - END IF - 130 CONTINUE - VALUE = SCALE*SQRT( SUM ) - END IF -* - CLANHB = VALUE - RETURN -* -* End of CLANHB -* - END diff --git a/blas_fix/clanhe.f b/blas_fix/clanhe.f deleted file mode 100644 index fba278b11..000000000 --- a/blas_fix/clanhe.f +++ /dev/null @@ -1,259 +0,0 @@ -*> \brief \b CLANHE returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a complex Hermitian matrix. -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download CLANHE + dependencies -*> -*> [TGZ] -*> -*> [ZIP] -*> -*> [TXT] -*> \endhtmlonly -* -* Definition: -* =========== -* -* REAL FUNCTION CLANHE( NORM, UPLO, N, A, LDA, WORK ) -* -* .. Scalar Arguments .. -* CHARACTER NORM, UPLO -* INTEGER LDA, N -* .. -* .. Array Arguments .. -* REAL WORK( * ) -* COMPLEX A( LDA, * ) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> CLANHE returns the value of the one norm, or the Frobenius norm, or -*> the infinity norm, or the element of largest absolute value of a -*> complex hermitian matrix A. -*> \endverbatim -*> -*> \return CLANHE -*> \verbatim -*> -*> CLANHE = ( max(abs(A(i,j))), NORM = 'M' or 'm' -*> ( -*> ( norm1(A), NORM = '1', 'O' or 'o' -*> ( -*> ( normI(A), NORM = 'I' or 'i' -*> ( -*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' -*> -*> where norm1 denotes the one norm of a matrix (maximum column sum), -*> normI denotes the infinity norm of a matrix (maximum row sum) and -*> normF denotes the Frobenius norm of a matrix (square root of sum of -*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[in] NORM -*> \verbatim -*> NORM is CHARACTER*1 -*> Specifies the value to be returned in CLANHE as described -*> above. -*> \endverbatim -*> -*> \param[in] UPLO -*> \verbatim -*> UPLO is CHARACTER*1 -*> Specifies whether the upper or lower triangular part of the -*> hermitian matrix A is to be referenced. -*> = 'U': Upper triangular part of A is referenced -*> = 'L': Lower triangular part of A is referenced -*> \endverbatim -*> -*> \param[in] N -*> \verbatim -*> N is INTEGER -*> The order of the matrix A. N >= 0. When N = 0, CLANHE is -*> set to zero. -*> \endverbatim -*> -*> \param[in] A -*> \verbatim -*> A is COMPLEX array, dimension (LDA,N) -*> The hermitian matrix A. If UPLO = 'U', the leading n by n -*> upper triangular part of A contains the upper triangular part -*> of the matrix A, and the strictly lower triangular part of A -*> is not referenced. If UPLO = 'L', the leading n by n lower -*> triangular part of A contains the lower triangular part of -*> the matrix A, and the strictly upper triangular part of A is -*> not referenced. Note that the imaginary parts of the diagonal -*> elements need not be set and are assumed to be zero. -*> \endverbatim -*> -*> \param[in] LDA -*> \verbatim -*> LDA is INTEGER -*> The leading dimension of the array A. LDA >= max(N,1). -*> \endverbatim -*> -*> \param[out] WORK -*> \verbatim -*> WORK is REAL array, dimension (MAX(1,LWORK)), -*> where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, -*> WORK is not referenced. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date September 2012 -* -*> \ingroup complexHEauxiliary -* -* ===================================================================== - REAL FUNCTION CLANHE( NORM, UPLO, N, A, LDA, WORK ) -* -* -- LAPACK auxiliary routine (version 3.4.2) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* September 2012 -* -* .. Scalar Arguments .. - CHARACTER NORM, UPLO - INTEGER LDA, N -* .. -* .. Array Arguments .. - REAL WORK( * ) - COMPLEX A( LDA, * ) -* .. -* -* ===================================================================== -* -* .. Parameters .. - REAL ONE, ZERO - PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) -* .. -* .. Local Scalars .. - INTEGER I, J - REAL ABSA, SCALE, SUM, VALUE -* .. -* .. External Functions .. - LOGICAL LSAME, SISNAN - EXTERNAL LSAME, SISNAN -* .. -* .. External Subroutines .. - EXTERNAL CLASSQ -* .. -* .. Intrinsic Functions .. - INTRINSIC ABS, REAL, SQRT -* .. -* .. Executable Statements .. -* - VALUE = ZERO - IF( N.EQ.0 ) THEN - VALUE = ZERO - ELSE IF( LSAME( NORM, 'M' ) ) THEN -* -* Find max(abs(A(i,j))). -* - VALUE = ZERO - IF( LSAME( UPLO, 'U' ) ) THEN - DO 20 J = 1, N - DO 10 I = 1, J - 1 - SUM = ABS( A( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 10 CONTINUE - SUM = ABS( REAL( A( J, J ) ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 20 CONTINUE - ELSE - DO 40 J = 1, N - SUM = ABS( REAL( A( J, J ) ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - DO 30 I = J + 1, N - SUM = ABS( A( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 30 CONTINUE - 40 CONTINUE - END IF - ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. - $ ( NORM.EQ.'1' ) ) THEN -* -* Find normI(A) ( = norm1(A), since A is hermitian). -* - VALUE = ZERO - IF( LSAME( UPLO, 'U' ) ) THEN - DO 60 J = 1, N - SUM = ZERO - DO 50 I = 1, J - 1 - ABSA = ABS( A( I, J ) ) - SUM = SUM + ABSA - WORK( I ) = WORK( I ) + ABSA - 50 CONTINUE - WORK( J ) = SUM + ABS( REAL( A( J, J ) ) ) - 60 CONTINUE - DO 70 I = 1, N - SUM = WORK( I ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 70 CONTINUE - ELSE - DO 80 I = 1, N - WORK( I ) = ZERO - 80 CONTINUE - DO 100 J = 1, N - SUM = WORK( J ) + ABS( REAL( A( J, J ) ) ) - DO 90 I = J + 1, N - ABSA = ABS( A( I, J ) ) - SUM = SUM + ABSA - WORK( I ) = WORK( I ) + ABSA - 90 CONTINUE - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 100 CONTINUE - END IF - ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN -* -* Find normF(A). -* - SCALE = ZERO - SUM = ONE - IF( LSAME( UPLO, 'U' ) ) THEN - DO 110 J = 2, N - CALL CLASSQ( J-1, A( 1, J ), 1, SCALE, SUM ) - 110 CONTINUE - ELSE - DO 120 J = 1, N - 1 - CALL CLASSQ( N-J, A( J+1, J ), 1, SCALE, SUM ) - 120 CONTINUE - END IF - SUM = 2*SUM - DO 130 I = 1, N - IF( REAL( A( I, I ) ).NE.ZERO ) THEN - ABSA = ABS( REAL( A( I, I ) ) ) - IF( SCALE.LT.ABSA ) THEN - SUM = ONE + SUM*( SCALE / ABSA )**2 - SCALE = ABSA - ELSE - SUM = SUM + ( ABSA / SCALE )**2 - END IF - END IF - 130 CONTINUE - VALUE = SCALE*SQRT( SUM ) - END IF -* - CLANHE = VALUE - RETURN -* -* End of CLANHE -* - END diff --git a/blas_fix/clanhf.f b/blas_fix/clanhf.f deleted file mode 100644 index f7bd4fb55..000000000 --- a/blas_fix/clanhf.f +++ /dev/null @@ -1,1573 +0,0 @@ -*> \brief \b CLANHF returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a Hermitian matrix in RFP format. -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download CLANHF + dependencies -*> -*> [TGZ] -*> -*> [ZIP] -*> -*> [TXT] -*> \endhtmlonly -* -* Definition: -* =========== -* -* REAL FUNCTION CLANHF( NORM, TRANSR, UPLO, N, A, WORK ) -* -* .. Scalar Arguments .. -* CHARACTER NORM, TRANSR, UPLO -* INTEGER N -* .. -* .. Array Arguments .. -* REAL WORK( 0: * ) -* COMPLEX A( 0: * ) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> CLANHF returns the value of the one norm, or the Frobenius norm, or -*> the infinity norm, or the element of largest absolute value of a -*> complex Hermitian matrix A in RFP format. -*> \endverbatim -*> -*> \return CLANHF -*> \verbatim -*> -*> CLANHF = ( max(abs(A(i,j))), NORM = 'M' or 'm' -*> ( -*> ( norm1(A), NORM = '1', 'O' or 'o' -*> ( -*> ( normI(A), NORM = 'I' or 'i' -*> ( -*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' -*> -*> where norm1 denotes the one norm of a matrix (maximum column sum), -*> normI denotes the infinity norm of a matrix (maximum row sum) and -*> normF denotes the Frobenius norm of a matrix (square root of sum of -*> squares). Note that max(abs(A(i,j))) is not a matrix norm. -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[in] NORM -*> \verbatim -*> NORM is CHARACTER -*> Specifies the value to be returned in CLANHF as described -*> above. -*> \endverbatim -*> -*> \param[in] TRANSR -*> \verbatim -*> TRANSR is CHARACTER -*> Specifies whether the RFP format of A is normal or -*> conjugate-transposed format. -*> = 'N': RFP format is Normal -*> = 'C': RFP format is Conjugate-transposed -*> \endverbatim -*> -*> \param[in] UPLO -*> \verbatim -*> UPLO is CHARACTER -*> On entry, UPLO specifies whether the RFP matrix A came from -*> an upper or lower triangular matrix as follows: -*> -*> UPLO = 'U' or 'u' RFP A came from an upper triangular -*> matrix -*> -*> UPLO = 'L' or 'l' RFP A came from a lower triangular -*> matrix -*> \endverbatim -*> -*> \param[in] N -*> \verbatim -*> N is INTEGER -*> The order of the matrix A. N >= 0. When N = 0, CLANHF is -*> set to zero. -*> \endverbatim -*> -*> \param[in] A -*> \verbatim -*> A is COMPLEX array, dimension ( N*(N+1)/2 ); -*> On entry, the matrix A in RFP Format. -*> RFP Format is described by TRANSR, UPLO and N as follows: -*> If TRANSR='N' then RFP A is (0:N,0:K-1) when N is even; -*> K=N/2. RFP A is (0:N-1,0:K) when N is odd; K=N/2. If -*> TRANSR = 'C' then RFP is the Conjugate-transpose of RFP A -*> as defined when TRANSR = 'N'. The contents of RFP A are -*> defined by UPLO as follows: If UPLO = 'U' the RFP A -*> contains the ( N*(N+1)/2 ) elements of upper packed A -*> either in normal or conjugate-transpose Format. If -*> UPLO = 'L' the RFP A contains the ( N*(N+1) /2 ) elements -*> of lower packed A either in normal or conjugate-transpose -*> Format. The LDA of RFP A is (N+1)/2 when TRANSR = 'C'. When -*> TRANSR is 'N' the LDA is N+1 when N is even and is N when -*> is odd. See the Note below for more details. -*> Unchanged on exit. -*> \endverbatim -*> -*> \param[out] WORK -*> \verbatim -*> WORK is REAL array, dimension (LWORK), -*> where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, -*> WORK is not referenced. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date September 2012 -* -*> \ingroup complexOTHERcomputational -* -*> \par Further Details: -* ===================== -*> -*> \verbatim -*> -*> We first consider Standard Packed Format when N is even. -*> We give an example where N = 6. -*> -*> AP is Upper AP is Lower -*> -*> 00 01 02 03 04 05 00 -*> 11 12 13 14 15 10 11 -*> 22 23 24 25 20 21 22 -*> 33 34 35 30 31 32 33 -*> 44 45 40 41 42 43 44 -*> 55 50 51 52 53 54 55 -*> -*> -*> Let TRANSR = 'N'. RFP holds AP as follows: -*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last -*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of -*> conjugate-transpose of the first three columns of AP upper. -*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first -*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of -*> conjugate-transpose of the last three columns of AP lower. -*> To denote conjugate we place -- above the element. This covers the -*> case N even and TRANSR = 'N'. -*> -*> RFP A RFP A -*> -*> -- -- -- -*> 03 04 05 33 43 53 -*> -- -- -*> 13 14 15 00 44 54 -*> -- -*> 23 24 25 10 11 55 -*> -*> 33 34 35 20 21 22 -*> -- -*> 00 44 45 30 31 32 -*> -- -- -*> 01 11 55 40 41 42 -*> -- -- -- -*> 02 12 22 50 51 52 -*> -*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- -*> transpose of RFP A above. One therefore gets: -*> -*> -*> RFP A RFP A -*> -*> -- -- -- -- -- -- -- -- -- -- -*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 -*> -- -- -- -- -- -- -- -- -- -- -*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 -*> -- -- -- -- -- -- -- -- -- -- -*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 -*> -*> -*> We next consider Standard Packed Format when N is odd. -*> We give an example where N = 5. -*> -*> AP is Upper AP is Lower -*> -*> 00 01 02 03 04 00 -*> 11 12 13 14 10 11 -*> 22 23 24 20 21 22 -*> 33 34 30 31 32 33 -*> 44 40 41 42 43 44 -*> -*> -*> Let TRANSR = 'N'. RFP holds AP as follows: -*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last -*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of -*> conjugate-transpose of the first two columns of AP upper. -*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first -*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of -*> conjugate-transpose of the last two columns of AP lower. -*> To denote conjugate we place -- above the element. This covers the -*> case N odd and TRANSR = 'N'. -*> -*> RFP A RFP A -*> -*> -- -- -*> 02 03 04 00 33 43 -*> -- -*> 12 13 14 10 11 44 -*> -*> 22 23 24 20 21 22 -*> -- -*> 00 33 34 30 31 32 -*> -- -- -*> 01 11 44 40 41 42 -*> -*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- -*> transpose of RFP A above. One therefore gets: -*> -*> -*> RFP A RFP A -*> -*> -- -- -- -- -- -- -- -- -- -*> 02 12 22 00 01 00 10 20 30 40 50 -*> -- -- -- -- -- -- -- -- -- -*> 03 13 23 33 11 33 11 21 31 41 51 -*> -- -- -- -- -- -- -- -- -- -*> 04 14 24 34 44 43 44 22 32 42 52 -*> \endverbatim -*> -* ===================================================================== - REAL FUNCTION CLANHF( NORM, TRANSR, UPLO, N, A, WORK ) -* -* -- LAPACK computational routine (version 3.4.2) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* September 2012 -* -* .. Scalar Arguments .. - CHARACTER NORM, TRANSR, UPLO - INTEGER N -* .. -* .. Array Arguments .. - REAL WORK( 0: * ) - COMPLEX A( 0: * ) -* .. -* -* ===================================================================== -* -* .. Parameters .. - REAL ONE, ZERO - PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) -* .. -* .. Local Scalars .. - INTEGER I, J, IFM, ILU, NOE, N1, K, L, LDA - REAL SCALE, S, VALUE, AA, TEMP -* .. -* .. External Functions .. - LOGICAL LSAME, SISNAN - EXTERNAL LSAME, SISNAN -* .. -* .. External Subroutines .. - EXTERNAL CLASSQ -* .. -* .. Intrinsic Functions .. - INTRINSIC ABS, REAL, SQRT -* .. -* .. Executable Statements .. -* - IF( N.EQ.0 ) THEN - CLANHF = ZERO - RETURN - ELSE IF( N.EQ.1 ) THEN - CLANHF = ABS( A(0) ) - RETURN - END IF -* -* set noe = 1 if n is odd. if n is even set noe=0 -* - NOE = 1 - IF( MOD( N, 2 ).EQ.0 ) - $ NOE = 0 -* -* set ifm = 0 when form='C' or 'c' and 1 otherwise -* - IFM = 1 - IF( LSAME( TRANSR, 'C' ) ) - $ IFM = 0 -* -* set ilu = 0 when uplo='U or 'u' and 1 otherwise -* - ILU = 1 - IF( LSAME( UPLO, 'U' ) ) - $ ILU = 0 -* -* set lda = (n+1)/2 when ifm = 0 -* set lda = n when ifm = 1 and noe = 1 -* set lda = n+1 when ifm = 1 and noe = 0 -* - IF( IFM.EQ.1 ) THEN - IF( NOE.EQ.1 ) THEN - LDA = N - ELSE -* noe=0 - LDA = N + 1 - END IF - ELSE -* ifm=0 - LDA = ( N+1 ) / 2 - END IF -* - IF( LSAME( NORM, 'M' ) ) THEN -* -* Find max(abs(A(i,j))). -* - K = ( N+1 ) / 2 - VALUE = ZERO - IF( NOE.EQ.1 ) THEN -* n is odd & n = k + k - 1 - IF( IFM.EQ.1 ) THEN -* A is n by k - IF( ILU.EQ.1 ) THEN -* uplo ='L' - J = 0 -* -> L(0,0) - TEMP = ABS( REAL( A( J+J*LDA ) ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - DO I = 1, N - 1 - TEMP = ABS( A( I+J*LDA ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - DO J = 1, K - 1 - DO I = 0, J - 2 - TEMP = ABS( A( I+J*LDA ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - I = J - 1 -* L(k+j,k+j) - TEMP = ABS( REAL( A( I+J*LDA ) ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - I = J -* -> L(j,j) - TEMP = ABS( REAL( A( I+J*LDA ) ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - DO I = J + 1, N - 1 - TEMP = ABS( A( I+J*LDA ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - END DO - ELSE -* uplo = 'U' - DO J = 0, K - 2 - DO I = 0, K + J - 2 - TEMP = ABS( A( I+J*LDA ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - I = K + J - 1 -* -> U(i,i) - TEMP = ABS( REAL( A( I+J*LDA ) ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - I = I + 1 -* =k+j; i -> U(j,j) - TEMP = ABS( REAL( A( I+J*LDA ) ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - DO I = K + J + 1, N - 1 - TEMP = ABS( A( I+J*LDA ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - END DO - DO I = 0, N - 2 - TEMP = ABS( A( I+J*LDA ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP -* j=k-1 - END DO -* i=n-1 -> U(n-1,n-1) - TEMP = ABS( REAL( A( I+J*LDA ) ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END IF - ELSE -* xpose case; A is k by n - IF( ILU.EQ.1 ) THEN -* uplo ='L' - DO J = 0, K - 2 - DO I = 0, J - 1 - TEMP = ABS( A( I+J*LDA ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - I = J -* L(i,i) - TEMP = ABS( REAL( A( I+J*LDA ) ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - I = J + 1 -* L(j+k,j+k) - TEMP = ABS( REAL( A( I+J*LDA ) ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - DO I = J + 2, K - 1 - TEMP = ABS( A( I+J*LDA ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - END DO - J = K - 1 - DO I = 0, K - 2 - TEMP = ABS( A( I+J*LDA ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - I = K - 1 -* -> L(i,i) is at A(i,j) - TEMP = ABS( REAL( A( I+J*LDA ) ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - DO J = K, N - 1 - DO I = 0, K - 1 - TEMP = ABS( A( I+J*LDA ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - END DO - ELSE -* uplo = 'U' - DO J = 0, K - 2 - DO I = 0, K - 1 - TEMP = ABS( A( I+J*LDA ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - END DO - J = K - 1 -* -> U(j,j) is at A(0,j) - TEMP = ABS( REAL( A( 0+J*LDA ) ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - DO I = 1, K - 1 - TEMP = ABS( A( I+J*LDA ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - DO J = K, N - 1 - DO I = 0, J - K - 1 - TEMP = ABS( A( I+J*LDA ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - I = J - K -* -> U(i,i) at A(i,j) - TEMP = ABS( REAL( A( I+J*LDA ) ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - I = J - K + 1 -* U(j,j) - TEMP = ABS( REAL( A( I+J*LDA ) ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - DO I = J - K + 2, K - 1 - TEMP = ABS( A( I+J*LDA ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - END DO - END IF - END IF - ELSE -* n is even & k = n/2 - IF( IFM.EQ.1 ) THEN -* A is n+1 by k - IF( ILU.EQ.1 ) THEN -* uplo ='L' - J = 0 -* -> L(k,k) & j=1 -> L(0,0) - TEMP = ABS( REAL( A( J+J*LDA ) ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - TEMP = ABS( REAL( A( J+1+J*LDA ) ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - DO I = 2, N - TEMP = ABS( A( I+J*LDA ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - DO J = 1, K - 1 - DO I = 0, J - 1 - TEMP = ABS( A( I+J*LDA ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - I = J -* L(k+j,k+j) - TEMP = ABS( REAL( A( I+J*LDA ) ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - I = J + 1 -* -> L(j,j) - TEMP = ABS( REAL( A( I+J*LDA ) ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - DO I = J + 2, N - TEMP = ABS( A( I+J*LDA ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - END DO - ELSE -* uplo = 'U' - DO J = 0, K - 2 - DO I = 0, K + J - 1 - TEMP = ABS( A( I+J*LDA ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - I = K + J -* -> U(i,i) - TEMP = ABS( REAL( A( I+J*LDA ) ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - I = I + 1 -* =k+j+1; i -> U(j,j) - TEMP = ABS( REAL( A( I+J*LDA ) ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - DO I = K + J + 2, N - TEMP = ABS( A( I+J*LDA ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - END DO - DO I = 0, N - 2 - TEMP = ABS( A( I+J*LDA ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP -* j=k-1 - END DO -* i=n-1 -> U(n-1,n-1) - TEMP = ABS( REAL( A( I+J*LDA ) ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - I = N -* -> U(k-1,k-1) - TEMP = ABS( REAL( A( I+J*LDA ) ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END IF - ELSE -* xpose case; A is k by n+1 - IF( ILU.EQ.1 ) THEN -* uplo ='L' - J = 0 -* -> L(k,k) at A(0,0) - TEMP = ABS( REAL( A( J+J*LDA ) ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - DO I = 1, K - 1 - TEMP = ABS( A( I+J*LDA ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - DO J = 1, K - 1 - DO I = 0, J - 2 - TEMP = ABS( A( I+J*LDA ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - I = J - 1 -* L(i,i) - TEMP = ABS( REAL( A( I+J*LDA ) ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - I = J -* L(j+k,j+k) - TEMP = ABS( REAL( A( I+J*LDA ) ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - DO I = J + 1, K - 1 - TEMP = ABS( A( I+J*LDA ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - END DO - J = K - DO I = 0, K - 2 - TEMP = ABS( A( I+J*LDA ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - I = K - 1 -* -> L(i,i) is at A(i,j) - TEMP = ABS( REAL( A( I+J*LDA ) ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - DO J = K + 1, N - DO I = 0, K - 1 - TEMP = ABS( A( I+J*LDA ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - END DO - ELSE -* uplo = 'U' - DO J = 0, K - 1 - DO I = 0, K - 1 - TEMP = ABS( A( I+J*LDA ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - END DO - J = K -* -> U(j,j) is at A(0,j) - TEMP = ABS( REAL( A( 0+J*LDA ) ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - DO I = 1, K - 1 - TEMP = ABS( A( I+J*LDA ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - DO J = K + 1, N - 1 - DO I = 0, J - K - 2 - TEMP = ABS( A( I+J*LDA ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - I = J - K - 1 -* -> U(i,i) at A(i,j) - TEMP = ABS( REAL( A( I+J*LDA ) ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - I = J - K -* U(j,j) - TEMP = ABS( REAL( A( I+J*LDA ) ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - DO I = J - K + 1, K - 1 - TEMP = ABS( A( I+J*LDA ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - END DO - J = N - DO I = 0, K - 2 - TEMP = ABS( A( I+J*LDA ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - I = K - 1 -* U(k,k) at A(i,j) - TEMP = ABS( REAL( A( I+J*LDA ) ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END IF - END IF - END IF - ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. - $ ( NORM.EQ.'1' ) ) THEN -* -* Find normI(A) ( = norm1(A), since A is Hermitian). -* - IF( IFM.EQ.1 ) THEN -* A is 'N' - K = N / 2 - IF( NOE.EQ.1 ) THEN -* n is odd & A is n by (n+1)/2 - IF( ILU.EQ.0 ) THEN -* uplo = 'U' - DO I = 0, K - 1 - WORK( I ) = ZERO - END DO - DO J = 0, K - S = ZERO - DO I = 0, K + J - 1 - AA = ABS( A( I+J*LDA ) ) -* -> A(i,j+k) - S = S + AA - WORK( I ) = WORK( I ) + AA - END DO - AA = ABS( REAL( A( I+J*LDA ) ) ) -* -> A(j+k,j+k) - WORK( J+K ) = S + AA - IF( I.EQ.K+K ) - $ GO TO 10 - I = I + 1 - AA = ABS( REAL( A( I+J*LDA ) ) ) -* -> A(j,j) - WORK( J ) = WORK( J ) + AA - S = ZERO - DO L = J + 1, K - 1 - I = I + 1 - AA = ABS( A( I+J*LDA ) ) -* -> A(l,j) - S = S + AA - WORK( L ) = WORK( L ) + AA - END DO - WORK( J ) = WORK( J ) + S - END DO - 10 CONTINUE - VALUE = WORK( 0 ) - DO I = 1, N-1 - TEMP = WORK( I ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - ELSE -* ilu = 1 & uplo = 'L' - K = K + 1 -* k=(n+1)/2 for n odd and ilu=1 - DO I = K, N - 1 - WORK( I ) = ZERO - END DO - DO J = K - 1, 0, -1 - S = ZERO - DO I = 0, J - 2 - AA = ABS( A( I+J*LDA ) ) -* -> A(j+k,i+k) - S = S + AA - WORK( I+K ) = WORK( I+K ) + AA - END DO - IF( J.GT.0 ) THEN - AA = ABS( REAL( A( I+J*LDA ) ) ) -* -> A(j+k,j+k) - S = S + AA - WORK( I+K ) = WORK( I+K ) + S -* i=j - I = I + 1 - END IF - AA = ABS( REAL( A( I+J*LDA ) ) ) -* -> A(j,j) - WORK( J ) = AA - S = ZERO - DO L = J + 1, N - 1 - I = I + 1 - AA = ABS( A( I+J*LDA ) ) -* -> A(l,j) - S = S + AA - WORK( L ) = WORK( L ) + AA - END DO - WORK( J ) = WORK( J ) + S - END DO - VALUE = WORK( 0 ) - DO I = 1, N-1 - TEMP = WORK( I ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - END IF - ELSE -* n is even & A is n+1 by k = n/2 - IF( ILU.EQ.0 ) THEN -* uplo = 'U' - DO I = 0, K - 1 - WORK( I ) = ZERO - END DO - DO J = 0, K - 1 - S = ZERO - DO I = 0, K + J - 1 - AA = ABS( A( I+J*LDA ) ) -* -> A(i,j+k) - S = S + AA - WORK( I ) = WORK( I ) + AA - END DO - AA = ABS( REAL( A( I+J*LDA ) ) ) -* -> A(j+k,j+k) - WORK( J+K ) = S + AA - I = I + 1 - AA = ABS( REAL( A( I+J*LDA ) ) ) -* -> A(j,j) - WORK( J ) = WORK( J ) + AA - S = ZERO - DO L = J + 1, K - 1 - I = I + 1 - AA = ABS( A( I+J*LDA ) ) -* -> A(l,j) - S = S + AA - WORK( L ) = WORK( L ) + AA - END DO - WORK( J ) = WORK( J ) + S - END DO - VALUE = WORK( 0 ) - DO I = 1, N-1 - TEMP = WORK( I ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - ELSE -* ilu = 1 & uplo = 'L' - DO I = K, N - 1 - WORK( I ) = ZERO - END DO - DO J = K - 1, 0, -1 - S = ZERO - DO I = 0, J - 1 - AA = ABS( A( I+J*LDA ) ) -* -> A(j+k,i+k) - S = S + AA - WORK( I+K ) = WORK( I+K ) + AA - END DO - AA = ABS( REAL( A( I+J*LDA ) ) ) -* -> A(j+k,j+k) - S = S + AA - WORK( I+K ) = WORK( I+K ) + S -* i=j - I = I + 1 - AA = ABS( REAL( A( I+J*LDA ) ) ) -* -> A(j,j) - WORK( J ) = AA - S = ZERO - DO L = J + 1, N - 1 - I = I + 1 - AA = ABS( A( I+J*LDA ) ) -* -> A(l,j) - S = S + AA - WORK( L ) = WORK( L ) + AA - END DO - WORK( J ) = WORK( J ) + S - END DO - VALUE = WORK( 0 ) - DO I = 1, N-1 - TEMP = WORK( I ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - END IF - END IF - ELSE -* ifm=0 - K = N / 2 - IF( NOE.EQ.1 ) THEN -* n is odd & A is (n+1)/2 by n - IF( ILU.EQ.0 ) THEN -* uplo = 'U' - N1 = K -* n/2 - K = K + 1 -* k is the row size and lda - DO I = N1, N - 1 - WORK( I ) = ZERO - END DO - DO J = 0, N1 - 1 - S = ZERO - DO I = 0, K - 1 - AA = ABS( A( I+J*LDA ) ) -* A(j,n1+i) - WORK( I+N1 ) = WORK( I+N1 ) + AA - S = S + AA - END DO - WORK( J ) = S - END DO -* j=n1=k-1 is special - S = ABS( REAL( A( 0+J*LDA ) ) ) -* A(k-1,k-1) - DO I = 1, K - 1 - AA = ABS( A( I+J*LDA ) ) -* A(k-1,i+n1) - WORK( I+N1 ) = WORK( I+N1 ) + AA - S = S + AA - END DO - WORK( J ) = WORK( J ) + S - DO J = K, N - 1 - S = ZERO - DO I = 0, J - K - 1 - AA = ABS( A( I+J*LDA ) ) -* A(i,j-k) - WORK( I ) = WORK( I ) + AA - S = S + AA - END DO -* i=j-k - AA = ABS( REAL( A( I+J*LDA ) ) ) -* A(j-k,j-k) - S = S + AA - WORK( J-K ) = WORK( J-K ) + S - I = I + 1 - S = ABS( REAL( A( I+J*LDA ) ) ) -* A(j,j) - DO L = J + 1, N - 1 - I = I + 1 - AA = ABS( A( I+J*LDA ) ) -* A(j,l) - WORK( L ) = WORK( L ) + AA - S = S + AA - END DO - WORK( J ) = WORK( J ) + S - END DO - VALUE = WORK( 0 ) - DO I = 1, N-1 - TEMP = WORK( I ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - ELSE -* ilu=1 & uplo = 'L' - K = K + 1 -* k=(n+1)/2 for n odd and ilu=1 - DO I = K, N - 1 - WORK( I ) = ZERO - END DO - DO J = 0, K - 2 -* process - S = ZERO - DO I = 0, J - 1 - AA = ABS( A( I+J*LDA ) ) -* A(j,i) - WORK( I ) = WORK( I ) + AA - S = S + AA - END DO - AA = ABS( REAL( A( I+J*LDA ) ) ) -* i=j so process of A(j,j) - S = S + AA - WORK( J ) = S -* is initialised here - I = I + 1 -* i=j process A(j+k,j+k) - AA = ABS( REAL( A( I+J*LDA ) ) ) - S = AA - DO L = K + J + 1, N - 1 - I = I + 1 - AA = ABS( A( I+J*LDA ) ) -* A(l,k+j) - S = S + AA - WORK( L ) = WORK( L ) + AA - END DO - WORK( K+J ) = WORK( K+J ) + S - END DO -* j=k-1 is special :process col A(k-1,0:k-1) - S = ZERO - DO I = 0, K - 2 - AA = ABS( A( I+J*LDA ) ) -* A(k,i) - WORK( I ) = WORK( I ) + AA - S = S + AA - END DO -* i=k-1 - AA = ABS( REAL( A( I+J*LDA ) ) ) -* A(k-1,k-1) - S = S + AA - WORK( I ) = S -* done with col j=k+1 - DO J = K, N - 1 -* process col j of A = A(j,0:k-1) - S = ZERO - DO I = 0, K - 1 - AA = ABS( A( I+J*LDA ) ) -* A(j,i) - WORK( I ) = WORK( I ) + AA - S = S + AA - END DO - WORK( J ) = WORK( J ) + S - END DO - VALUE = WORK( 0 ) - DO I = 1, N-1 - TEMP = WORK( I ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - END IF - ELSE -* n is even & A is k=n/2 by n+1 - IF( ILU.EQ.0 ) THEN -* uplo = 'U' - DO I = K, N - 1 - WORK( I ) = ZERO - END DO - DO J = 0, K - 1 - S = ZERO - DO I = 0, K - 1 - AA = ABS( A( I+J*LDA ) ) -* A(j,i+k) - WORK( I+K ) = WORK( I+K ) + AA - S = S + AA - END DO - WORK( J ) = S - END DO -* j=k - AA = ABS( REAL( A( 0+J*LDA ) ) ) -* A(k,k) - S = AA - DO I = 1, K - 1 - AA = ABS( A( I+J*LDA ) ) -* A(k,k+i) - WORK( I+K ) = WORK( I+K ) + AA - S = S + AA - END DO - WORK( J ) = WORK( J ) + S - DO J = K + 1, N - 1 - S = ZERO - DO I = 0, J - 2 - K - AA = ABS( A( I+J*LDA ) ) -* A(i,j-k-1) - WORK( I ) = WORK( I ) + AA - S = S + AA - END DO -* i=j-1-k - AA = ABS( REAL( A( I+J*LDA ) ) ) -* A(j-k-1,j-k-1) - S = S + AA - WORK( J-K-1 ) = WORK( J-K-1 ) + S - I = I + 1 - AA = ABS( REAL( A( I+J*LDA ) ) ) -* A(j,j) - S = AA - DO L = J + 1, N - 1 - I = I + 1 - AA = ABS( A( I+J*LDA ) ) -* A(j,l) - WORK( L ) = WORK( L ) + AA - S = S + AA - END DO - WORK( J ) = WORK( J ) + S - END DO -* j=n - S = ZERO - DO I = 0, K - 2 - AA = ABS( A( I+J*LDA ) ) -* A(i,k-1) - WORK( I ) = WORK( I ) + AA - S = S + AA - END DO -* i=k-1 - AA = ABS( REAL( A( I+J*LDA ) ) ) -* A(k-1,k-1) - S = S + AA - WORK( I ) = WORK( I ) + S - VALUE = WORK( 0 ) - DO I = 1, N-1 - TEMP = WORK( I ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - ELSE -* ilu=1 & uplo = 'L' - DO I = K, N - 1 - WORK( I ) = ZERO - END DO -* j=0 is special :process col A(k:n-1,k) - S = ABS( REAL( A( 0 ) ) ) -* A(k,k) - DO I = 1, K - 1 - AA = ABS( A( I ) ) -* A(k+i,k) - WORK( I+K ) = WORK( I+K ) + AA - S = S + AA - END DO - WORK( K ) = WORK( K ) + S - DO J = 1, K - 1 -* process - S = ZERO - DO I = 0, J - 2 - AA = ABS( A( I+J*LDA ) ) -* A(j-1,i) - WORK( I ) = WORK( I ) + AA - S = S + AA - END DO - AA = ABS( REAL( A( I+J*LDA ) ) ) -* i=j-1 so process of A(j-1,j-1) - S = S + AA - WORK( J-1 ) = S -* is initialised here - I = I + 1 -* i=j process A(j+k,j+k) - AA = ABS( REAL( A( I+J*LDA ) ) ) - S = AA - DO L = K + J + 1, N - 1 - I = I + 1 - AA = ABS( A( I+J*LDA ) ) -* A(l,k+j) - S = S + AA - WORK( L ) = WORK( L ) + AA - END DO - WORK( K+J ) = WORK( K+J ) + S - END DO -* j=k is special :process col A(k,0:k-1) - S = ZERO - DO I = 0, K - 2 - AA = ABS( A( I+J*LDA ) ) -* A(k,i) - WORK( I ) = WORK( I ) + AA - S = S + AA - END DO -* -* i=k-1 - AA = ABS( REAL( A( I+J*LDA ) ) ) -* A(k-1,k-1) - S = S + AA - WORK( I ) = S -* done with col j=k+1 - DO J = K + 1, N -* -* process col j-1 of A = A(j-1,0:k-1) - S = ZERO - DO I = 0, K - 1 - AA = ABS( A( I+J*LDA ) ) -* A(j-1,i) - WORK( I ) = WORK( I ) + AA - S = S + AA - END DO - WORK( J-1 ) = WORK( J-1 ) + S - END DO - VALUE = WORK( 0 ) - DO I = 1, N-1 - TEMP = WORK( I ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - END IF - END IF - END IF - ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN -* -* Find normF(A). -* - K = ( N+1 ) / 2 - SCALE = ZERO - S = ONE - IF( NOE.EQ.1 ) THEN -* n is odd - IF( IFM.EQ.1 ) THEN -* A is normal & A is n by k - IF( ILU.EQ.0 ) THEN -* A is upper - DO J = 0, K - 3 - CALL CLASSQ( K-J-2, A( K+J+1+J*LDA ), 1, SCALE, S ) -* L at A(k,0) - END DO - DO J = 0, K - 1 - CALL CLASSQ( K+J-1, A( 0+J*LDA ), 1, SCALE, S ) -* trap U at A(0,0) - END DO - S = S + S -* double s for the off diagonal elements - L = K - 1 -* -> U(k,k) at A(k-1,0) - DO I = 0, K - 2 - AA = REAL( A( L ) ) -* U(k+i,k+i) - IF( AA.NE.ZERO ) THEN - IF( SCALE.LT.AA ) THEN - S = ONE + S*( SCALE / AA )**2 - SCALE = AA - ELSE - S = S + ( AA / SCALE )**2 - END IF - END IF - AA = REAL( A( L+1 ) ) -* U(i,i) - IF( AA.NE.ZERO ) THEN - IF( SCALE.LT.AA ) THEN - S = ONE + S*( SCALE / AA )**2 - SCALE = AA - ELSE - S = S + ( AA / SCALE )**2 - END IF - END IF - L = L + LDA + 1 - END DO - AA = REAL( A( L ) ) -* U(n-1,n-1) - IF( AA.NE.ZERO ) THEN - IF( SCALE.LT.AA ) THEN - S = ONE + S*( SCALE / AA )**2 - SCALE = AA - ELSE - S = S + ( AA / SCALE )**2 - END IF - END IF - ELSE -* ilu=1 & A is lower - DO J = 0, K - 1 - CALL CLASSQ( N-J-1, A( J+1+J*LDA ), 1, SCALE, S ) -* trap L at A(0,0) - END DO - DO J = 1, K - 2 - CALL CLASSQ( J, A( 0+( 1+J )*LDA ), 1, SCALE, S ) -* U at A(0,1) - END DO - S = S + S -* double s for the off diagonal elements - AA = REAL( A( 0 ) ) -* L(0,0) at A(0,0) - IF( AA.NE.ZERO ) THEN - IF( SCALE.LT.AA ) THEN - S = ONE + S*( SCALE / AA )**2 - SCALE = AA - ELSE - S = S + ( AA / SCALE )**2 - END IF - END IF - L = LDA -* -> L(k,k) at A(0,1) - DO I = 1, K - 1 - AA = REAL( A( L ) ) -* L(k-1+i,k-1+i) - IF( AA.NE.ZERO ) THEN - IF( SCALE.LT.AA ) THEN - S = ONE + S*( SCALE / AA )**2 - SCALE = AA - ELSE - S = S + ( AA / SCALE )**2 - END IF - END IF - AA = REAL( A( L+1 ) ) -* L(i,i) - IF( AA.NE.ZERO ) THEN - IF( SCALE.LT.AA ) THEN - S = ONE + S*( SCALE / AA )**2 - SCALE = AA - ELSE - S = S + ( AA / SCALE )**2 - END IF - END IF - L = L + LDA + 1 - END DO - END IF - ELSE -* A is xpose & A is k by n - IF( ILU.EQ.0 ) THEN -* A**H is upper - DO J = 1, K - 2 - CALL CLASSQ( J, A( 0+( K+J )*LDA ), 1, SCALE, S ) -* U at A(0,k) - END DO - DO J = 0, K - 2 - CALL CLASSQ( K, A( 0+J*LDA ), 1, SCALE, S ) -* k by k-1 rect. at A(0,0) - END DO - DO J = 0, K - 2 - CALL CLASSQ( K-J-1, A( J+1+( J+K-1 )*LDA ), 1, - $ SCALE, S ) -* L at A(0,k-1) - END DO - S = S + S -* double s for the off diagonal elements - L = 0 + K*LDA - LDA -* -> U(k-1,k-1) at A(0,k-1) - AA = REAL( A( L ) ) -* U(k-1,k-1) - IF( AA.NE.ZERO ) THEN - IF( SCALE.LT.AA ) THEN - S = ONE + S*( SCALE / AA )**2 - SCALE = AA - ELSE - S = S + ( AA / SCALE )**2 - END IF - END IF - L = L + LDA -* -> U(0,0) at A(0,k) - DO J = K, N - 1 - AA = REAL( A( L ) ) -* -> U(j-k,j-k) - IF( AA.NE.ZERO ) THEN - IF( SCALE.LT.AA ) THEN - S = ONE + S*( SCALE / AA )**2 - SCALE = AA - ELSE - S = S + ( AA / SCALE )**2 - END IF - END IF - AA = REAL( A( L+1 ) ) -* -> U(j,j) - IF( AA.NE.ZERO ) THEN - IF( SCALE.LT.AA ) THEN - S = ONE + S*( SCALE / AA )**2 - SCALE = AA - ELSE - S = S + ( AA / SCALE )**2 - END IF - END IF - L = L + LDA + 1 - END DO - ELSE -* A**H is lower - DO J = 1, K - 1 - CALL CLASSQ( J, A( 0+J*LDA ), 1, SCALE, S ) -* U at A(0,0) - END DO - DO J = K, N - 1 - CALL CLASSQ( K, A( 0+J*LDA ), 1, SCALE, S ) -* k by k-1 rect. at A(0,k) - END DO - DO J = 0, K - 3 - CALL CLASSQ( K-J-2, A( J+2+J*LDA ), 1, SCALE, S ) -* L at A(1,0) - END DO - S = S + S -* double s for the off diagonal elements - L = 0 -* -> L(0,0) at A(0,0) - DO I = 0, K - 2 - AA = REAL( A( L ) ) -* L(i,i) - IF( AA.NE.ZERO ) THEN - IF( SCALE.LT.AA ) THEN - S = ONE + S*( SCALE / AA )**2 - SCALE = AA - ELSE - S = S + ( AA / SCALE )**2 - END IF - END IF - AA = REAL( A( L+1 ) ) -* L(k+i,k+i) - IF( AA.NE.ZERO ) THEN - IF( SCALE.LT.AA ) THEN - S = ONE + S*( SCALE / AA )**2 - SCALE = AA - ELSE - S = S + ( AA / SCALE )**2 - END IF - END IF - L = L + LDA + 1 - END DO -* L-> k-1 + (k-1)*lda or L(k-1,k-1) at A(k-1,k-1) - AA = REAL( A( L ) ) -* L(k-1,k-1) at A(k-1,k-1) - IF( AA.NE.ZERO ) THEN - IF( SCALE.LT.AA ) THEN - S = ONE + S*( SCALE / AA )**2 - SCALE = AA - ELSE - S = S + ( AA / SCALE )**2 - END IF - END IF - END IF - END IF - ELSE -* n is even - IF( IFM.EQ.1 ) THEN -* A is normal - IF( ILU.EQ.0 ) THEN -* A is upper - DO J = 0, K - 2 - CALL CLASSQ( K-J-1, A( K+J+2+J*LDA ), 1, SCALE, S ) -* L at A(k+1,0) - END DO - DO J = 0, K - 1 - CALL CLASSQ( K+J, A( 0+J*LDA ), 1, SCALE, S ) -* trap U at A(0,0) - END DO - S = S + S -* double s for the off diagonal elements - L = K -* -> U(k,k) at A(k,0) - DO I = 0, K - 1 - AA = REAL( A( L ) ) -* U(k+i,k+i) - IF( AA.NE.ZERO ) THEN - IF( SCALE.LT.AA ) THEN - S = ONE + S*( SCALE / AA )**2 - SCALE = AA - ELSE - S = S + ( AA / SCALE )**2 - END IF - END IF - AA = REAL( A( L+1 ) ) -* U(i,i) - IF( AA.NE.ZERO ) THEN - IF( SCALE.LT.AA ) THEN - S = ONE + S*( SCALE / AA )**2 - SCALE = AA - ELSE - S = S + ( AA / SCALE )**2 - END IF - END IF - L = L + LDA + 1 - END DO - ELSE -* ilu=1 & A is lower - DO J = 0, K - 1 - CALL CLASSQ( N-J-1, A( J+2+J*LDA ), 1, SCALE, S ) -* trap L at A(1,0) - END DO - DO J = 1, K - 1 - CALL CLASSQ( J, A( 0+J*LDA ), 1, SCALE, S ) -* U at A(0,0) - END DO - S = S + S -* double s for the off diagonal elements - L = 0 -* -> L(k,k) at A(0,0) - DO I = 0, K - 1 - AA = REAL( A( L ) ) -* L(k-1+i,k-1+i) - IF( AA.NE.ZERO ) THEN - IF( SCALE.LT.AA ) THEN - S = ONE + S*( SCALE / AA )**2 - SCALE = AA - ELSE - S = S + ( AA / SCALE )**2 - END IF - END IF - AA = REAL( A( L+1 ) ) -* L(i,i) - IF( AA.NE.ZERO ) THEN - IF( SCALE.LT.AA ) THEN - S = ONE + S*( SCALE / AA )**2 - SCALE = AA - ELSE - S = S + ( AA / SCALE )**2 - END IF - END IF - L = L + LDA + 1 - END DO - END IF - ELSE -* A is xpose - IF( ILU.EQ.0 ) THEN -* A**H is upper - DO J = 1, K - 1 - CALL CLASSQ( J, A( 0+( K+1+J )*LDA ), 1, SCALE, S ) -* U at A(0,k+1) - END DO - DO J = 0, K - 1 - CALL CLASSQ( K, A( 0+J*LDA ), 1, SCALE, S ) -* k by k rect. at A(0,0) - END DO - DO J = 0, K - 2 - CALL CLASSQ( K-J-1, A( J+1+( J+K )*LDA ), 1, SCALE, - $ S ) -* L at A(0,k) - END DO - S = S + S -* double s for the off diagonal elements - L = 0 + K*LDA -* -> U(k,k) at A(0,k) - AA = REAL( A( L ) ) -* U(k,k) - IF( AA.NE.ZERO ) THEN - IF( SCALE.LT.AA ) THEN - S = ONE + S*( SCALE / AA )**2 - SCALE = AA - ELSE - S = S + ( AA / SCALE )**2 - END IF - END IF - L = L + LDA -* -> U(0,0) at A(0,k+1) - DO J = K + 1, N - 1 - AA = REAL( A( L ) ) -* -> U(j-k-1,j-k-1) - IF( AA.NE.ZERO ) THEN - IF( SCALE.LT.AA ) THEN - S = ONE + S*( SCALE / AA )**2 - SCALE = AA - ELSE - S = S + ( AA / SCALE )**2 - END IF - END IF - AA = REAL( A( L+1 ) ) -* -> U(j,j) - IF( AA.NE.ZERO ) THEN - IF( SCALE.LT.AA ) THEN - S = ONE + S*( SCALE / AA )**2 - SCALE = AA - ELSE - S = S + ( AA / SCALE )**2 - END IF - END IF - L = L + LDA + 1 - END DO -* L=k-1+n*lda -* -> U(k-1,k-1) at A(k-1,n) - AA = REAL( A( L ) ) -* U(k,k) - IF( AA.NE.ZERO ) THEN - IF( SCALE.LT.AA ) THEN - S = ONE + S*( SCALE / AA )**2 - SCALE = AA - ELSE - S = S + ( AA / SCALE )**2 - END IF - END IF - ELSE -* A**H is lower - DO J = 1, K - 1 - CALL CLASSQ( J, A( 0+( J+1 )*LDA ), 1, SCALE, S ) -* U at A(0,1) - END DO - DO J = K + 1, N - CALL CLASSQ( K, A( 0+J*LDA ), 1, SCALE, S ) -* k by k rect. at A(0,k+1) - END DO - DO J = 0, K - 2 - CALL CLASSQ( K-J-1, A( J+1+J*LDA ), 1, SCALE, S ) -* L at A(0,0) - END DO - S = S + S -* double s for the off diagonal elements - L = 0 -* -> L(k,k) at A(0,0) - AA = REAL( A( L ) ) -* L(k,k) at A(0,0) - IF( AA.NE.ZERO ) THEN - IF( SCALE.LT.AA ) THEN - S = ONE + S*( SCALE / AA )**2 - SCALE = AA - ELSE - S = S + ( AA / SCALE )**2 - END IF - END IF - L = LDA -* -> L(0,0) at A(0,1) - DO I = 0, K - 2 - AA = REAL( A( L ) ) -* L(i,i) - IF( AA.NE.ZERO ) THEN - IF( SCALE.LT.AA ) THEN - S = ONE + S*( SCALE / AA )**2 - SCALE = AA - ELSE - S = S + ( AA / SCALE )**2 - END IF - END IF - AA = REAL( A( L+1 ) ) -* L(k+i+1,k+i+1) - IF( AA.NE.ZERO ) THEN - IF( SCALE.LT.AA ) THEN - S = ONE + S*( SCALE / AA )**2 - SCALE = AA - ELSE - S = S + ( AA / SCALE )**2 - END IF - END IF - L = L + LDA + 1 - END DO -* L-> k - 1 + k*lda or L(k-1,k-1) at A(k-1,k) - AA = REAL( A( L ) ) -* L(k-1,k-1) at A(k-1,k) - IF( AA.NE.ZERO ) THEN - IF( SCALE.LT.AA ) THEN - S = ONE + S*( SCALE / AA )**2 - SCALE = AA - ELSE - S = S + ( AA / SCALE )**2 - END IF - END IF - END IF - END IF - END IF - VALUE = SCALE*SQRT( S ) - END IF -* - CLANHF = VALUE - RETURN -* -* End of CLANHF -* - END diff --git a/blas_fix/clanhp.f b/blas_fix/clanhp.f deleted file mode 100644 index 2279701ce..000000000 --- a/blas_fix/clanhp.f +++ /dev/null @@ -1,270 +0,0 @@ -*> \brief \b CLANHP returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a complex Hermitian matrix supplied in packed form. -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download CLANHP + dependencies -*> -*> [TGZ] -*> -*> [ZIP] -*> -*> [TXT] -*> \endhtmlonly -* -* Definition: -* =========== -* -* REAL FUNCTION CLANHP( NORM, UPLO, N, AP, WORK ) -* -* .. Scalar Arguments .. -* CHARACTER NORM, UPLO -* INTEGER N -* .. -* .. Array Arguments .. -* REAL WORK( * ) -* COMPLEX AP( * ) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> CLANHP returns the value of the one norm, or the Frobenius norm, or -*> the infinity norm, or the element of largest absolute value of a -*> complex hermitian matrix A, supplied in packed form. -*> \endverbatim -*> -*> \return CLANHP -*> \verbatim -*> -*> CLANHP = ( max(abs(A(i,j))), NORM = 'M' or 'm' -*> ( -*> ( norm1(A), NORM = '1', 'O' or 'o' -*> ( -*> ( normI(A), NORM = 'I' or 'i' -*> ( -*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' -*> -*> where norm1 denotes the one norm of a matrix (maximum column sum), -*> normI denotes the infinity norm of a matrix (maximum row sum) and -*> normF denotes the Frobenius norm of a matrix (square root of sum of -*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[in] NORM -*> \verbatim -*> NORM is CHARACTER*1 -*> Specifies the value to be returned in CLANHP as described -*> above. -*> \endverbatim -*> -*> \param[in] UPLO -*> \verbatim -*> UPLO is CHARACTER*1 -*> Specifies whether the upper or lower triangular part of the -*> hermitian matrix A is supplied. -*> = 'U': Upper triangular part of A is supplied -*> = 'L': Lower triangular part of A is supplied -*> \endverbatim -*> -*> \param[in] N -*> \verbatim -*> N is INTEGER -*> The order of the matrix A. N >= 0. When N = 0, CLANHP is -*> set to zero. -*> \endverbatim -*> -*> \param[in] AP -*> \verbatim -*> AP is COMPLEX array, dimension (N*(N+1)/2) -*> The upper or lower triangle of the hermitian matrix A, packed -*> columnwise in a linear array. The j-th column of A is stored -*> in the array AP as follows: -*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; -*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. -*> Note that the imaginary parts of the diagonal elements need -*> not be set and are assumed to be zero. -*> \endverbatim -*> -*> \param[out] WORK -*> \verbatim -*> WORK is REAL array, dimension (MAX(1,LWORK)), -*> where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, -*> WORK is not referenced. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date September 2012 -* -*> \ingroup complexOTHERauxiliary -* -* ===================================================================== - REAL FUNCTION CLANHP( NORM, UPLO, N, AP, WORK ) -* -* -- LAPACK auxiliary routine (version 3.4.2) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* September 2012 -* -* .. Scalar Arguments .. - CHARACTER NORM, UPLO - INTEGER N -* .. -* .. Array Arguments .. - REAL WORK( * ) - COMPLEX AP( * ) -* .. -* -* ===================================================================== -* -* .. Parameters .. - REAL ONE, ZERO - PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) -* .. -* .. Local Scalars .. - INTEGER I, J, K - REAL ABSA, SCALE, SUM, VALUE -* .. -* .. External Functions .. - LOGICAL LSAME, SISNAN - EXTERNAL LSAME, SISNAN -* .. -* .. External Subroutines .. - EXTERNAL CLASSQ -* .. -* .. Intrinsic Functions .. - INTRINSIC ABS, REAL, SQRT -* .. -* .. Executable Statements .. -* - VALUE = ZERO - IF( N.EQ.0 ) THEN - VALUE = ZERO - ELSE IF( LSAME( NORM, 'M' ) ) THEN -* -* Find max(abs(A(i,j))). -* - VALUE = ZERO - IF( LSAME( UPLO, 'U' ) ) THEN - K = 0 - DO 20 J = 1, N - DO 10 I = K + 1, K + J - 1 - SUM = ABS( AP( I ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 10 CONTINUE - K = K + J - SUM = ABS( REAL( AP( K ) ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 20 CONTINUE - ELSE - K = 1 - DO 40 J = 1, N - SUM = ABS( REAL( AP( K ) ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - DO 30 I = K + 1, K + N - J - SUM = ABS( AP( I ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 30 CONTINUE - K = K + N - J + 1 - 40 CONTINUE - END IF - ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. - $ ( NORM.EQ.'1' ) ) THEN -* -* Find normI(A) ( = norm1(A), since A is hermitian). -* - VALUE = ZERO - K = 1 - IF( LSAME( UPLO, 'U' ) ) THEN - DO 60 J = 1, N - SUM = ZERO - DO 50 I = 1, J - 1 - ABSA = ABS( AP( K ) ) - SUM = SUM + ABSA - WORK( I ) = WORK( I ) + ABSA - K = K + 1 - 50 CONTINUE - WORK( J ) = SUM + ABS( REAL( AP( K ) ) ) - K = K + 1 - 60 CONTINUE - DO 70 I = 1, N - SUM = WORK( I ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 70 CONTINUE - ELSE - DO 80 I = 1, N - WORK( I ) = ZERO - 80 CONTINUE - DO 100 J = 1, N - SUM = WORK( J ) + ABS( REAL( AP( K ) ) ) - K = K + 1 - DO 90 I = J + 1, N - ABSA = ABS( AP( K ) ) - SUM = SUM + ABSA - WORK( I ) = WORK( I ) + ABSA - K = K + 1 - 90 CONTINUE - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 100 CONTINUE - END IF - ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN -* -* Find normF(A). -* - SCALE = ZERO - SUM = ONE - K = 2 - IF( LSAME( UPLO, 'U' ) ) THEN - DO 110 J = 2, N - CALL CLASSQ( J-1, AP( K ), 1, SCALE, SUM ) - K = K + J - 110 CONTINUE - ELSE - DO 120 J = 1, N - 1 - CALL CLASSQ( N-J, AP( K ), 1, SCALE, SUM ) - K = K + N - J + 1 - 120 CONTINUE - END IF - SUM = 2*SUM - K = 1 - DO 130 I = 1, N - IF( REAL( AP( K ) ).NE.ZERO ) THEN - ABSA = ABS( REAL( AP( K ) ) ) - IF( SCALE.LT.ABSA ) THEN - SUM = ONE + SUM*( SCALE / ABSA )**2 - SCALE = ABSA - ELSE - SUM = SUM + ( ABSA / SCALE )**2 - END IF - END IF - IF( LSAME( UPLO, 'U' ) ) THEN - K = K + I + 1 - ELSE - K = K + N - I + 1 - END IF - 130 CONTINUE - VALUE = SCALE*SQRT( SUM ) - END IF -* - CLANHP = VALUE - RETURN -* -* End of CLANHP -* - END diff --git a/blas_fix/clanhs.f b/blas_fix/clanhs.f deleted file mode 100644 index f0be01791..000000000 --- a/blas_fix/clanhs.f +++ /dev/null @@ -1,208 +0,0 @@ -*> \brief \b CLANHS returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value of any element of an upper Hessenberg matrix. -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download CLANHS + dependencies -*> -*> [TGZ] -*> -*> [ZIP] -*> -*> [TXT] -*> \endhtmlonly -* -* Definition: -* =========== -* -* REAL FUNCTION CLANHS( NORM, N, A, LDA, WORK ) -* -* .. Scalar Arguments .. -* CHARACTER NORM -* INTEGER LDA, N -* .. -* .. Array Arguments .. -* REAL WORK( * ) -* COMPLEX A( LDA, * ) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> CLANHS returns the value of the one norm, or the Frobenius norm, or -*> the infinity norm, or the element of largest absolute value of a -*> Hessenberg matrix A. -*> \endverbatim -*> -*> \return CLANHS -*> \verbatim -*> -*> CLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm' -*> ( -*> ( norm1(A), NORM = '1', 'O' or 'o' -*> ( -*> ( normI(A), NORM = 'I' or 'i' -*> ( -*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' -*> -*> where norm1 denotes the one norm of a matrix (maximum column sum), -*> normI denotes the infinity norm of a matrix (maximum row sum) and -*> normF denotes the Frobenius norm of a matrix (square root of sum of -*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[in] NORM -*> \verbatim -*> NORM is CHARACTER*1 -*> Specifies the value to be returned in CLANHS as described -*> above. -*> \endverbatim -*> -*> \param[in] N -*> \verbatim -*> N is INTEGER -*> The order of the matrix A. N >= 0. When N = 0, CLANHS is -*> set to zero. -*> \endverbatim -*> -*> \param[in] A -*> \verbatim -*> A is COMPLEX array, dimension (LDA,N) -*> The n by n upper Hessenberg matrix A; the part of A below the -*> first sub-diagonal is not referenced. -*> \endverbatim -*> -*> \param[in] LDA -*> \verbatim -*> LDA is INTEGER -*> The leading dimension of the array A. LDA >= max(N,1). -*> \endverbatim -*> -*> \param[out] WORK -*> \verbatim -*> WORK is REAL array, dimension (MAX(1,LWORK)), -*> where LWORK >= N when NORM = 'I'; otherwise, WORK is not -*> referenced. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date September 2012 -* -*> \ingroup complexOTHERauxiliary -* -* ===================================================================== - REAL FUNCTION CLANHS( NORM, N, A, LDA, WORK ) -* -* -- LAPACK auxiliary routine (version 3.4.2) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* September 2012 -* -* .. Scalar Arguments .. - CHARACTER NORM - INTEGER LDA, N -* .. -* .. Array Arguments .. - REAL WORK( * ) - COMPLEX A( LDA, * ) -* .. -* -* ===================================================================== -* -* .. Parameters .. - REAL ONE, ZERO - PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) -* .. -* .. Local Scalars .. - INTEGER I, J - REAL SCALE, SUM, VALUE -* .. -* .. External Functions .. - LOGICAL LSAME, SISNAN - EXTERNAL LSAME, SISNAN -* .. -* .. External Subroutines .. - EXTERNAL CLASSQ -* .. -* .. Intrinsic Functions .. - INTRINSIC ABS, MIN, SQRT -* .. -* .. Executable Statements .. -* - VALUE = ZERO - IF( N.EQ.0 ) THEN - VALUE = ZERO - ELSE IF( LSAME( NORM, 'M' ) ) THEN -* -* Find max(abs(A(i,j))). -* - VALUE = ZERO - DO 20 J = 1, N - DO 10 I = 1, MIN( N, J+1 ) - SUM = ABS( A( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 10 CONTINUE - 20 CONTINUE - ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN -* -* Find norm1(A). -* - VALUE = ZERO - DO 40 J = 1, N - SUM = ZERO - DO 30 I = 1, MIN( N, J+1 ) - SUM = SUM + ABS( A( I, J ) ) - 30 CONTINUE - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 40 CONTINUE - ELSE IF( LSAME( NORM, 'I' ) ) THEN -* -* Find normI(A). -* - DO 50 I = 1, N - WORK( I ) = ZERO - 50 CONTINUE - DO 70 J = 1, N - DO 60 I = 1, MIN( N, J+1 ) - WORK( I ) = WORK( I ) + ABS( A( I, J ) ) - 60 CONTINUE - 70 CONTINUE - VALUE = ZERO - DO 80 I = 1, N - SUM = WORK( I ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 80 CONTINUE - ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN -* -* Find normF(A). -* - SCALE = ZERO - SUM = ONE - DO 90 J = 1, N - CALL CLASSQ( MIN( N, J+1 ), A( 1, J ), 1, SCALE, SUM ) - 90 CONTINUE - VALUE = SCALE*SQRT( SUM ) - END IF -* - CLANHS = VALUE - RETURN -* -* End of CLANHS -* - END diff --git a/blas_fix/clanht.f b/blas_fix/clanht.f deleted file mode 100644 index f50d2c883..000000000 --- a/blas_fix/clanht.f +++ /dev/null @@ -1,189 +0,0 @@ -*> \brief \b CLANHT returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a complex Hermitian tridiagonal matrix. -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download CLANHT + dependencies -*> -*> [TGZ] -*> -*> [ZIP] -*> -*> [TXT] -*> \endhtmlonly -* -* Definition: -* =========== -* -* REAL FUNCTION CLANHT( NORM, N, D, E ) -* -* .. Scalar Arguments .. -* CHARACTER NORM -* INTEGER N -* .. -* .. Array Arguments .. -* REAL D( * ) -* COMPLEX E( * ) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> CLANHT returns the value of the one norm, or the Frobenius norm, or -*> the infinity norm, or the element of largest absolute value of a -*> complex Hermitian tridiagonal matrix A. -*> \endverbatim -*> -*> \return CLANHT -*> \verbatim -*> -*> CLANHT = ( max(abs(A(i,j))), NORM = 'M' or 'm' -*> ( -*> ( norm1(A), NORM = '1', 'O' or 'o' -*> ( -*> ( normI(A), NORM = 'I' or 'i' -*> ( -*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' -*> -*> where norm1 denotes the one norm of a matrix (maximum column sum), -*> normI denotes the infinity norm of a matrix (maximum row sum) and -*> normF denotes the Frobenius norm of a matrix (square root of sum of -*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[in] NORM -*> \verbatim -*> NORM is CHARACTER*1 -*> Specifies the value to be returned in CLANHT as described -*> above. -*> \endverbatim -*> -*> \param[in] N -*> \verbatim -*> N is INTEGER -*> The order of the matrix A. N >= 0. When N = 0, CLANHT is -*> set to zero. -*> \endverbatim -*> -*> \param[in] D -*> \verbatim -*> D is REAL array, dimension (N) -*> The diagonal elements of A. -*> \endverbatim -*> -*> \param[in] E -*> \verbatim -*> E is COMPLEX array, dimension (N-1) -*> The (n-1) sub-diagonal or super-diagonal elements of A. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date September 2012 -* -*> \ingroup complexOTHERauxiliary -* -* ===================================================================== - REAL FUNCTION CLANHT( NORM, N, D, E ) -* -* -- LAPACK auxiliary routine (version 3.4.2) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* September 2012 -* -* .. Scalar Arguments .. - CHARACTER NORM - INTEGER N -* .. -* .. Array Arguments .. - REAL D( * ) - COMPLEX E( * ) -* .. -* -* ===================================================================== -* -* .. Parameters .. - REAL ONE, ZERO - PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) -* .. -* .. Local Scalars .. - INTEGER I - REAL ANORM, SCALE, SUM -* .. -* .. External Functions .. - LOGICAL LSAME, SISNAN - EXTERNAL LSAME, SISNAN -* .. -* .. External Subroutines .. - EXTERNAL CLASSQ, SLASSQ -* .. -* .. Intrinsic Functions .. - INTRINSIC ABS, SQRT -* .. -* .. Executable Statements .. -* - ANORM = ZERO - IF( N.LE.0 ) THEN - ANORM = ZERO - ELSE IF( LSAME( NORM, 'M' ) ) THEN -* -* Find max(abs(A(i,j))). -* - ANORM = ABS( D( N ) ) - DO 10 I = 1, N - 1 - SUM = ABS( D( I ) ) - IF( ANORM .LT. SUM .OR. SISNAN( SUM ) ) ANORM = SUM - SUM = ABS( E( I ) ) - IF( ANORM .LT. SUM .OR. SISNAN( SUM ) ) ANORM = SUM - 10 CONTINUE - ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' .OR. - $ LSAME( NORM, 'I' ) ) THEN -* -* Find norm1(A). -* - IF( N.EQ.1 ) THEN - ANORM = ABS( D( 1 ) ) - ELSE - ANORM = ABS( D( 1 ) )+ABS( E( 1 ) ) - SUM = ABS( E( N-1 ) )+ABS( D( N ) ) - IF( ANORM .LT. SUM .OR. SISNAN( SUM ) ) ANORM = SUM - DO 20 I = 2, N - 1 - SUM = ABS( D( I ) )+ABS( E( I ) )+ABS( E( I-1 ) ) - IF( ANORM .LT. SUM .OR. SISNAN( SUM ) ) ANORM = SUM - 20 CONTINUE - END IF - ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN -* -* Find normF(A). -* - SCALE = ZERO - SUM = ONE - IF( N.GT.1 ) THEN - CALL CLASSQ( N-1, E, 1, SCALE, SUM ) - SUM = 2*SUM - END IF - CALL SLASSQ( N, D, 1, SCALE, SUM ) - ANORM = SCALE*SQRT( SUM ) - END IF -* - CLANHT = ANORM - RETURN -* -* End of CLANHT -* - END diff --git a/blas_fix/clansb.f b/blas_fix/clansb.f deleted file mode 100644 index d386d7c09..000000000 --- a/blas_fix/clansb.f +++ /dev/null @@ -1,261 +0,0 @@ -*> \brief \b CLANSB returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a symmetric band matrix. -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download CLANSB + dependencies -*> -*> [TGZ] -*> -*> [ZIP] -*> -*> [TXT] -*> \endhtmlonly -* -* Definition: -* =========== -* -* REAL FUNCTION CLANSB( NORM, UPLO, N, K, AB, LDAB, -* WORK ) -* -* .. Scalar Arguments .. -* CHARACTER NORM, UPLO -* INTEGER K, LDAB, N -* .. -* .. Array Arguments .. -* REAL WORK( * ) -* COMPLEX AB( LDAB, * ) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> CLANSB returns the value of the one norm, or the Frobenius norm, or -*> the infinity norm, or the element of largest absolute value of an -*> n by n symmetric band matrix A, with k super-diagonals. -*> \endverbatim -*> -*> \return CLANSB -*> \verbatim -*> -*> CLANSB = ( max(abs(A(i,j))), NORM = 'M' or 'm' -*> ( -*> ( norm1(A), NORM = '1', 'O' or 'o' -*> ( -*> ( normI(A), NORM = 'I' or 'i' -*> ( -*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' -*> -*> where norm1 denotes the one norm of a matrix (maximum column sum), -*> normI denotes the infinity norm of a matrix (maximum row sum) and -*> normF denotes the Frobenius norm of a matrix (square root of sum of -*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[in] NORM -*> \verbatim -*> NORM is CHARACTER*1 -*> Specifies the value to be returned in CLANSB as described -*> above. -*> \endverbatim -*> -*> \param[in] UPLO -*> \verbatim -*> UPLO is CHARACTER*1 -*> Specifies whether the upper or lower triangular part of the -*> band matrix A is supplied. -*> = 'U': Upper triangular part is supplied -*> = 'L': Lower triangular part is supplied -*> \endverbatim -*> -*> \param[in] N -*> \verbatim -*> N is INTEGER -*> The order of the matrix A. N >= 0. When N = 0, CLANSB is -*> set to zero. -*> \endverbatim -*> -*> \param[in] K -*> \verbatim -*> K is INTEGER -*> The number of super-diagonals or sub-diagonals of the -*> band matrix A. K >= 0. -*> \endverbatim -*> -*> \param[in] AB -*> \verbatim -*> AB is COMPLEX array, dimension (LDAB,N) -*> The upper or lower triangle of the symmetric band matrix A, -*> stored in the first K+1 rows of AB. The j-th column of A is -*> stored in the j-th column of the array AB as follows: -*> if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j; -*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+k). -*> \endverbatim -*> -*> \param[in] LDAB -*> \verbatim -*> LDAB is INTEGER -*> The leading dimension of the array AB. LDAB >= K+1. -*> \endverbatim -*> -*> \param[out] WORK -*> \verbatim -*> WORK is REAL array, dimension (MAX(1,LWORK)), -*> where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, -*> WORK is not referenced. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date September 2012 -* -*> \ingroup complexOTHERauxiliary -* -* ===================================================================== - REAL FUNCTION CLANSB( NORM, UPLO, N, K, AB, LDAB, - $ WORK ) -* -* -- LAPACK auxiliary routine (version 3.4.2) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* September 2012 -* -* .. Scalar Arguments .. - CHARACTER NORM, UPLO - INTEGER K, LDAB, N -* .. -* .. Array Arguments .. - REAL WORK( * ) - COMPLEX AB( LDAB, * ) -* .. -* -* ===================================================================== -* -* .. Parameters .. - REAL ONE, ZERO - PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) -* .. -* .. Local Scalars .. - INTEGER I, J, L - REAL ABSA, SCALE, SUM, VALUE -* .. -* .. External Functions .. - LOGICAL LSAME, SISNAN - EXTERNAL LSAME, SISNAN -* .. -* .. External Subroutines .. - EXTERNAL CLASSQ -* .. -* .. Intrinsic Functions .. - INTRINSIC ABS, MAX, MIN, SQRT -* .. -* .. Executable Statements .. -* - VALUE = ZERO - IF( N.EQ.0 ) THEN - VALUE = ZERO - ELSE IF( LSAME( NORM, 'M' ) ) THEN -* -* Find max(abs(A(i,j))). -* - VALUE = ZERO - IF( LSAME( UPLO, 'U' ) ) THEN - DO 20 J = 1, N - DO 10 I = MAX( K+2-J, 1 ), K + 1 - SUM = ABS( AB( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 10 CONTINUE - 20 CONTINUE - ELSE - DO 40 J = 1, N - DO 30 I = 1, MIN( N+1-J, K+1 ) - SUM = ABS( AB( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 30 CONTINUE - 40 CONTINUE - END IF - ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. - $ ( NORM.EQ.'1' ) ) THEN -* -* Find normI(A) ( = norm1(A), since A is symmetric). -* - VALUE = ZERO - IF( LSAME( UPLO, 'U' ) ) THEN - DO 60 J = 1, N - SUM = ZERO - L = K + 1 - J - DO 50 I = MAX( 1, J-K ), J - 1 - ABSA = ABS( AB( L+I, J ) ) - SUM = SUM + ABSA - WORK( I ) = WORK( I ) + ABSA - 50 CONTINUE - WORK( J ) = SUM + ABS( AB( K+1, J ) ) - 60 CONTINUE - DO 70 I = 1, N - SUM = WORK( I ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 70 CONTINUE - ELSE - DO 80 I = 1, N - WORK( I ) = ZERO - 80 CONTINUE - DO 100 J = 1, N - SUM = WORK( J ) + ABS( AB( 1, J ) ) - L = 1 - J - DO 90 I = J + 1, MIN( N, J+K ) - ABSA = ABS( AB( L+I, J ) ) - SUM = SUM + ABSA - WORK( I ) = WORK( I ) + ABSA - 90 CONTINUE - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 100 CONTINUE - END IF - ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN -* -* Find normF(A). -* - SCALE = ZERO - SUM = ONE - IF( K.GT.0 ) THEN - IF( LSAME( UPLO, 'U' ) ) THEN - DO 110 J = 2, N - CALL CLASSQ( MIN( J-1, K ), AB( MAX( K+2-J, 1 ), J ), - $ 1, SCALE, SUM ) - 110 CONTINUE - L = K + 1 - ELSE - DO 120 J = 1, N - 1 - CALL CLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE, - $ SUM ) - 120 CONTINUE - L = 1 - END IF - SUM = 2*SUM - ELSE - L = 1 - END IF - CALL CLASSQ( N, AB( L, 1 ), LDAB, SCALE, SUM ) - VALUE = SCALE*SQRT( SUM ) - END IF -* - CLANSB = VALUE - RETURN -* -* End of CLANSB -* - END diff --git a/blas_fix/clansp.f b/blas_fix/clansp.f deleted file mode 100644 index 19b85d6ce..000000000 --- a/blas_fix/clansp.f +++ /dev/null @@ -1,273 +0,0 @@ -*> \brief \b CLANSP returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a symmetric matrix supplied in packed form. -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download CLANSP + dependencies -*> -*> [TGZ] -*> -*> [ZIP] -*> -*> [TXT] -*> \endhtmlonly -* -* Definition: -* =========== -* -* REAL FUNCTION CLANSP( NORM, UPLO, N, AP, WORK ) -* -* .. Scalar Arguments .. -* CHARACTER NORM, UPLO -* INTEGER N -* .. -* .. Array Arguments .. -* REAL WORK( * ) -* COMPLEX AP( * ) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> CLANSP returns the value of the one norm, or the Frobenius norm, or -*> the infinity norm, or the element of largest absolute value of a -*> complex symmetric matrix A, supplied in packed form. -*> \endverbatim -*> -*> \return CLANSP -*> \verbatim -*> -*> CLANSP = ( max(abs(A(i,j))), NORM = 'M' or 'm' -*> ( -*> ( norm1(A), NORM = '1', 'O' or 'o' -*> ( -*> ( normI(A), NORM = 'I' or 'i' -*> ( -*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' -*> -*> where norm1 denotes the one norm of a matrix (maximum column sum), -*> normI denotes the infinity norm of a matrix (maximum row sum) and -*> normF denotes the Frobenius norm of a matrix (square root of sum of -*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[in] NORM -*> \verbatim -*> NORM is CHARACTER*1 -*> Specifies the value to be returned in CLANSP as described -*> above. -*> \endverbatim -*> -*> \param[in] UPLO -*> \verbatim -*> UPLO is CHARACTER*1 -*> Specifies whether the upper or lower triangular part of the -*> symmetric matrix A is supplied. -*> = 'U': Upper triangular part of A is supplied -*> = 'L': Lower triangular part of A is supplied -*> \endverbatim -*> -*> \param[in] N -*> \verbatim -*> N is INTEGER -*> The order of the matrix A. N >= 0. When N = 0, CLANSP is -*> set to zero. -*> \endverbatim -*> -*> \param[in] AP -*> \verbatim -*> AP is COMPLEX array, dimension (N*(N+1)/2) -*> The upper or lower triangle of the symmetric matrix A, packed -*> columnwise in a linear array. The j-th column of A is stored -*> in the array AP as follows: -*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; -*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. -*> \endverbatim -*> -*> \param[out] WORK -*> \verbatim -*> WORK is REAL array, dimension (MAX(1,LWORK)), -*> where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, -*> WORK is not referenced. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date September 2012 -* -*> \ingroup complexOTHERauxiliary -* -* ===================================================================== - REAL FUNCTION CLANSP( NORM, UPLO, N, AP, WORK ) -* -* -- LAPACK auxiliary routine (version 3.4.2) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* September 2012 -* -* .. Scalar Arguments .. - CHARACTER NORM, UPLO - INTEGER N -* .. -* .. Array Arguments .. - REAL WORK( * ) - COMPLEX AP( * ) -* .. -* -* ===================================================================== -* -* .. Parameters .. - REAL ONE, ZERO - PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) -* .. -* .. Local Scalars .. - INTEGER I, J, K - REAL ABSA, SCALE, SUM, VALUE -* .. -* .. External Functions .. - LOGICAL LSAME, SISNAN - EXTERNAL LSAME, SISNAN -* .. -* .. External Subroutines .. - EXTERNAL CLASSQ -* .. -* .. Intrinsic Functions .. - INTRINSIC ABS, AIMAG, REAL, SQRT -* .. -* .. Executable Statements .. -* - VALUE = ZERO - IF( N.EQ.0 ) THEN - VALUE = ZERO - ELSE IF( LSAME( NORM, 'M' ) ) THEN -* -* Find max(abs(A(i,j))). -* - VALUE = ZERO - IF( LSAME( UPLO, 'U' ) ) THEN - K = 1 - DO 20 J = 1, N - DO 10 I = K, K + J - 1 - SUM = ABS( AP( I ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 10 CONTINUE - K = K + J - 20 CONTINUE - ELSE - K = 1 - DO 40 J = 1, N - DO 30 I = K, K + N - J - SUM = ABS( AP( I ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 30 CONTINUE - K = K + N - J + 1 - 40 CONTINUE - END IF - ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. - $ ( NORM.EQ.'1' ) ) THEN -* -* Find normI(A) ( = norm1(A), since A is symmetric). -* - VALUE = ZERO - K = 1 - IF( LSAME( UPLO, 'U' ) ) THEN - DO 60 J = 1, N - SUM = ZERO - DO 50 I = 1, J - 1 - ABSA = ABS( AP( K ) ) - SUM = SUM + ABSA - WORK( I ) = WORK( I ) + ABSA - K = K + 1 - 50 CONTINUE - WORK( J ) = SUM + ABS( AP( K ) ) - K = K + 1 - 60 CONTINUE - DO 70 I = 1, N - SUM = WORK( I ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 70 CONTINUE - ELSE - DO 80 I = 1, N - WORK( I ) = ZERO - 80 CONTINUE - DO 100 J = 1, N - SUM = WORK( J ) + ABS( AP( K ) ) - K = K + 1 - DO 90 I = J + 1, N - ABSA = ABS( AP( K ) ) - SUM = SUM + ABSA - WORK( I ) = WORK( I ) + ABSA - K = K + 1 - 90 CONTINUE - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 100 CONTINUE - END IF - ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN -* -* Find normF(A). -* - SCALE = ZERO - SUM = ONE - K = 2 - IF( LSAME( UPLO, 'U' ) ) THEN - DO 110 J = 2, N - CALL CLASSQ( J-1, AP( K ), 1, SCALE, SUM ) - K = K + J - 110 CONTINUE - ELSE - DO 120 J = 1, N - 1 - CALL CLASSQ( N-J, AP( K ), 1, SCALE, SUM ) - K = K + N - J + 1 - 120 CONTINUE - END IF - SUM = 2*SUM - K = 1 - DO 130 I = 1, N - IF( REAL( AP( K ) ).NE.ZERO ) THEN - ABSA = ABS( REAL( AP( K ) ) ) - IF( SCALE.LT.ABSA ) THEN - SUM = ONE + SUM*( SCALE / ABSA )**2 - SCALE = ABSA - ELSE - SUM = SUM + ( ABSA / SCALE )**2 - END IF - END IF - IF( AIMAG( AP( K ) ).NE.ZERO ) THEN - ABSA = ABS( AIMAG( AP( K ) ) ) - IF( SCALE.LT.ABSA ) THEN - SUM = ONE + SUM*( SCALE / ABSA )**2 - SCALE = ABSA - ELSE - SUM = SUM + ( ABSA / SCALE )**2 - END IF - END IF - IF( LSAME( UPLO, 'U' ) ) THEN - K = K + I + 1 - ELSE - K = K + N - I + 1 - END IF - 130 CONTINUE - VALUE = SCALE*SQRT( SUM ) - END IF -* - CLANSP = VALUE - RETURN -* -* End of CLANSP -* - END diff --git a/blas_fix/clansy.f b/blas_fix/clansy.f deleted file mode 100644 index 18cb018ff..000000000 --- a/blas_fix/clansy.f +++ /dev/null @@ -1,244 +0,0 @@ -*> \brief \b CLANSY returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a complex symmetric matrix. -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download CLANSY + dependencies -*> -*> [TGZ] -*> -*> [ZIP] -*> -*> [TXT] -*> \endhtmlonly -* -* Definition: -* =========== -* -* REAL FUNCTION CLANSY( NORM, UPLO, N, A, LDA, WORK ) -* -* .. Scalar Arguments .. -* CHARACTER NORM, UPLO -* INTEGER LDA, N -* .. -* .. Array Arguments .. -* REAL WORK( * ) -* COMPLEX A( LDA, * ) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> CLANSY returns the value of the one norm, or the Frobenius norm, or -*> the infinity norm, or the element of largest absolute value of a -*> complex symmetric matrix A. -*> \endverbatim -*> -*> \return CLANSY -*> \verbatim -*> -*> CLANSY = ( max(abs(A(i,j))), NORM = 'M' or 'm' -*> ( -*> ( norm1(A), NORM = '1', 'O' or 'o' -*> ( -*> ( normI(A), NORM = 'I' or 'i' -*> ( -*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' -*> -*> where norm1 denotes the one norm of a matrix (maximum column sum), -*> normI denotes the infinity norm of a matrix (maximum row sum) and -*> normF denotes the Frobenius norm of a matrix (square root of sum of -*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[in] NORM -*> \verbatim -*> NORM is CHARACTER*1 -*> Specifies the value to be returned in CLANSY as described -*> above. -*> \endverbatim -*> -*> \param[in] UPLO -*> \verbatim -*> UPLO is CHARACTER*1 -*> Specifies whether the upper or lower triangular part of the -*> symmetric matrix A is to be referenced. -*> = 'U': Upper triangular part of A is referenced -*> = 'L': Lower triangular part of A is referenced -*> \endverbatim -*> -*> \param[in] N -*> \verbatim -*> N is INTEGER -*> The order of the matrix A. N >= 0. When N = 0, CLANSY is -*> set to zero. -*> \endverbatim -*> -*> \param[in] A -*> \verbatim -*> A is COMPLEX array, dimension (LDA,N) -*> The symmetric matrix A. If UPLO = 'U', the leading n by n -*> upper triangular part of A contains the upper triangular part -*> of the matrix A, and the strictly lower triangular part of A -*> is not referenced. If UPLO = 'L', the leading n by n lower -*> triangular part of A contains the lower triangular part of -*> the matrix A, and the strictly upper triangular part of A is -*> not referenced. -*> \endverbatim -*> -*> \param[in] LDA -*> \verbatim -*> LDA is INTEGER -*> The leading dimension of the array A. LDA >= max(N,1). -*> \endverbatim -*> -*> \param[out] WORK -*> \verbatim -*> WORK is REAL array, dimension (MAX(1,LWORK)), -*> where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, -*> WORK is not referenced. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date September 2012 -* -*> \ingroup complexSYauxiliary -* -* ===================================================================== - REAL FUNCTION CLANSY( NORM, UPLO, N, A, LDA, WORK ) -* -* -- LAPACK auxiliary routine (version 3.4.2) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* September 2012 -* -* .. Scalar Arguments .. - CHARACTER NORM, UPLO - INTEGER LDA, N -* .. -* .. Array Arguments .. - REAL WORK( * ) - COMPLEX A( LDA, * ) -* .. -* -* ===================================================================== -* -* .. Parameters .. - REAL ONE, ZERO - PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) -* .. -* .. Local Scalars .. - INTEGER I, J - REAL ABSA, SCALE, SUM, VALUE -* .. -* .. External Functions .. - LOGICAL LSAME, SISNAN - EXTERNAL LSAME, SISNAN -* .. -* .. External Subroutines .. - EXTERNAL CLASSQ -* .. -* .. Intrinsic Functions .. - INTRINSIC ABS, SQRT -* .. -* .. Executable Statements .. -* - VALUE = ZERO - IF( N.EQ.0 ) THEN - VALUE = ZERO - ELSE IF( LSAME( NORM, 'M' ) ) THEN -* -* Find max(abs(A(i,j))). -* - VALUE = ZERO - IF( LSAME( UPLO, 'U' ) ) THEN - DO 20 J = 1, N - DO 10 I = 1, J - SUM = ABS( A( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 10 CONTINUE - 20 CONTINUE - ELSE - DO 40 J = 1, N - DO 30 I = J, N - SUM = ABS( A( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 30 CONTINUE - 40 CONTINUE - END IF - ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. - $ ( NORM.EQ.'1' ) ) THEN -* -* Find normI(A) ( = norm1(A), since A is symmetric). -* - VALUE = ZERO - IF( LSAME( UPLO, 'U' ) ) THEN - DO 60 J = 1, N - SUM = ZERO - DO 50 I = 1, J - 1 - ABSA = ABS( A( I, J ) ) - SUM = SUM + ABSA - WORK( I ) = WORK( I ) + ABSA - 50 CONTINUE - WORK( J ) = SUM + ABS( A( J, J ) ) - 60 CONTINUE - DO 70 I = 1, N - SUM = WORK( I ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 70 CONTINUE - ELSE - DO 80 I = 1, N - WORK( I ) = ZERO - 80 CONTINUE - DO 100 J = 1, N - SUM = WORK( J ) + ABS( A( J, J ) ) - DO 90 I = J + 1, N - ABSA = ABS( A( I, J ) ) - SUM = SUM + ABSA - WORK( I ) = WORK( I ) + ABSA - 90 CONTINUE - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 100 CONTINUE - END IF - ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN -* -* Find normF(A). -* - SCALE = ZERO - SUM = ONE - IF( LSAME( UPLO, 'U' ) ) THEN - DO 110 J = 2, N - CALL CLASSQ( J-1, A( 1, J ), 1, SCALE, SUM ) - 110 CONTINUE - ELSE - DO 120 J = 1, N - 1 - CALL CLASSQ( N-J, A( J+1, J ), 1, SCALE, SUM ) - 120 CONTINUE - END IF - SUM = 2*SUM - CALL CLASSQ( N, A, LDA+1, SCALE, SUM ) - VALUE = SCALE*SQRT( SUM ) - END IF -* - CLANSY = VALUE - RETURN -* -* End of CLANSY -* - END diff --git a/blas_fix/clantb.f b/blas_fix/clantb.f deleted file mode 100644 index 6104f8ef7..000000000 --- a/blas_fix/clantb.f +++ /dev/null @@ -1,364 +0,0 @@ -*> \brief \b CLANTB returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a triangular band matrix. -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download CLANTB + dependencies -*> -*> [TGZ] -*> -*> [ZIP] -*> -*> [TXT] -*> \endhtmlonly -* -* Definition: -* =========== -* -* REAL FUNCTION CLANTB( NORM, UPLO, DIAG, N, K, AB, -* LDAB, WORK ) -* -* .. Scalar Arguments .. -* CHARACTER DIAG, NORM, UPLO -* INTEGER K, LDAB, N -* .. -* .. Array Arguments .. -* REAL WORK( * ) -* COMPLEX AB( LDAB, * ) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> CLANTB returns the value of the one norm, or the Frobenius norm, or -*> the infinity norm, or the element of largest absolute value of an -*> n by n triangular band matrix A, with ( k + 1 ) diagonals. -*> \endverbatim -*> -*> \return CLANTB -*> \verbatim -*> -*> CLANTB = ( max(abs(A(i,j))), NORM = 'M' or 'm' -*> ( -*> ( norm1(A), NORM = '1', 'O' or 'o' -*> ( -*> ( normI(A), NORM = 'I' or 'i' -*> ( -*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' -*> -*> where norm1 denotes the one norm of a matrix (maximum column sum), -*> normI denotes the infinity norm of a matrix (maximum row sum) and -*> normF denotes the Frobenius norm of a matrix (square root of sum of -*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[in] NORM -*> \verbatim -*> NORM is CHARACTER*1 -*> Specifies the value to be returned in CLANTB as described -*> above. -*> \endverbatim -*> -*> \param[in] UPLO -*> \verbatim -*> UPLO is CHARACTER*1 -*> Specifies whether the matrix A is upper or lower triangular. -*> = 'U': Upper triangular -*> = 'L': Lower triangular -*> \endverbatim -*> -*> \param[in] DIAG -*> \verbatim -*> DIAG is CHARACTER*1 -*> Specifies whether or not the matrix A is unit triangular. -*> = 'N': Non-unit triangular -*> = 'U': Unit triangular -*> \endverbatim -*> -*> \param[in] N -*> \verbatim -*> N is INTEGER -*> The order of the matrix A. N >= 0. When N = 0, CLANTB is -*> set to zero. -*> \endverbatim -*> -*> \param[in] K -*> \verbatim -*> K is INTEGER -*> The number of super-diagonals of the matrix A if UPLO = 'U', -*> or the number of sub-diagonals of the matrix A if UPLO = 'L'. -*> K >= 0. -*> \endverbatim -*> -*> \param[in] AB -*> \verbatim -*> AB is COMPLEX array, dimension (LDAB,N) -*> The upper or lower triangular band matrix A, stored in the -*> first k+1 rows of AB. The j-th column of A is stored -*> in the j-th column of the array AB as follows: -*> if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j; -*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+k). -*> Note that when DIAG = 'U', the elements of the array AB -*> corresponding to the diagonal elements of the matrix A are -*> not referenced, but are assumed to be one. -*> \endverbatim -*> -*> \param[in] LDAB -*> \verbatim -*> LDAB is INTEGER -*> The leading dimension of the array AB. LDAB >= K+1. -*> \endverbatim -*> -*> \param[out] WORK -*> \verbatim -*> WORK is REAL array, dimension (MAX(1,LWORK)), -*> where LWORK >= N when NORM = 'I'; otherwise, WORK is not -*> referenced. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date September 2012 -* -*> \ingroup complexOTHERauxiliary -* -* ===================================================================== - REAL FUNCTION CLANTB( NORM, UPLO, DIAG, N, K, AB, - $ LDAB, WORK ) -* -* -- LAPACK auxiliary routine (version 3.4.2) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* September 2012 -* -* .. Scalar Arguments .. - CHARACTER DIAG, NORM, UPLO - INTEGER K, LDAB, N -* .. -* .. Array Arguments .. - REAL WORK( * ) - COMPLEX AB( LDAB, * ) -* .. -* -* ===================================================================== -* -* .. Parameters .. - REAL ONE, ZERO - PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) -* .. -* .. Local Scalars .. - LOGICAL UDIAG - INTEGER I, J, L - REAL SCALE, SUM, VALUE -* .. -* .. External Functions .. - LOGICAL LSAME, SISNAN - EXTERNAL LSAME, SISNAN -* .. -* .. External Subroutines .. - EXTERNAL CLASSQ -* .. -* .. Intrinsic Functions .. - INTRINSIC ABS, MAX, MIN, SQRT -* .. -* .. Executable Statements .. -* - VALUE = ZERO - IF( N.EQ.0 ) THEN - VALUE = ZERO - ELSE IF( LSAME( NORM, 'M' ) ) THEN -* -* Find max(abs(A(i,j))). -* - IF( LSAME( DIAG, 'U' ) ) THEN - VALUE = ONE - IF( LSAME( UPLO, 'U' ) ) THEN - DO 20 J = 1, N - DO 10 I = MAX( K+2-J, 1 ), K - SUM = ABS( AB( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 10 CONTINUE - 20 CONTINUE - ELSE - DO 40 J = 1, N - DO 30 I = 2, MIN( N+1-J, K+1 ) - SUM = ABS( AB( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 30 CONTINUE - 40 CONTINUE - END IF - ELSE - VALUE = ZERO - IF( LSAME( UPLO, 'U' ) ) THEN - DO 60 J = 1, N - DO 50 I = MAX( K+2-J, 1 ), K + 1 - SUM = ABS( AB( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 50 CONTINUE - 60 CONTINUE - ELSE - DO 80 J = 1, N - DO 70 I = 1, MIN( N+1-J, K+1 ) - SUM = ABS( AB( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 70 CONTINUE - 80 CONTINUE - END IF - END IF - ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN -* -* Find norm1(A). -* - VALUE = ZERO - UDIAG = LSAME( DIAG, 'U' ) - IF( LSAME( UPLO, 'U' ) ) THEN - DO 110 J = 1, N - IF( UDIAG ) THEN - SUM = ONE - DO 90 I = MAX( K+2-J, 1 ), K - SUM = SUM + ABS( AB( I, J ) ) - 90 CONTINUE - ELSE - SUM = ZERO - DO 100 I = MAX( K+2-J, 1 ), K + 1 - SUM = SUM + ABS( AB( I, J ) ) - 100 CONTINUE - END IF - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 110 CONTINUE - ELSE - DO 140 J = 1, N - IF( UDIAG ) THEN - SUM = ONE - DO 120 I = 2, MIN( N+1-J, K+1 ) - SUM = SUM + ABS( AB( I, J ) ) - 120 CONTINUE - ELSE - SUM = ZERO - DO 130 I = 1, MIN( N+1-J, K+1 ) - SUM = SUM + ABS( AB( I, J ) ) - 130 CONTINUE - END IF - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 140 CONTINUE - END IF - ELSE IF( LSAME( NORM, 'I' ) ) THEN -* -* Find normI(A). -* - VALUE = ZERO - IF( LSAME( UPLO, 'U' ) ) THEN - IF( LSAME( DIAG, 'U' ) ) THEN - DO 150 I = 1, N - WORK( I ) = ONE - 150 CONTINUE - DO 170 J = 1, N - L = K + 1 - J - DO 160 I = MAX( 1, J-K ), J - 1 - WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) - 160 CONTINUE - 170 CONTINUE - ELSE - DO 180 I = 1, N - WORK( I ) = ZERO - 180 CONTINUE - DO 200 J = 1, N - L = K + 1 - J - DO 190 I = MAX( 1, J-K ), J - WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) - 190 CONTINUE - 200 CONTINUE - END IF - ELSE - IF( LSAME( DIAG, 'U' ) ) THEN - DO 210 I = 1, N - WORK( I ) = ONE - 210 CONTINUE - DO 230 J = 1, N - L = 1 - J - DO 220 I = J + 1, MIN( N, J+K ) - WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) - 220 CONTINUE - 230 CONTINUE - ELSE - DO 240 I = 1, N - WORK( I ) = ZERO - 240 CONTINUE - DO 260 J = 1, N - L = 1 - J - DO 250 I = J, MIN( N, J+K ) - WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) - 250 CONTINUE - 260 CONTINUE - END IF - END IF - DO 270 I = 1, N - SUM = WORK( I ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 270 CONTINUE - ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN -* -* Find normF(A). -* - IF( LSAME( UPLO, 'U' ) ) THEN - IF( LSAME( DIAG, 'U' ) ) THEN - SCALE = ONE - SUM = N - IF( K.GT.0 ) THEN - DO 280 J = 2, N - CALL CLASSQ( MIN( J-1, K ), - $ AB( MAX( K+2-J, 1 ), J ), 1, SCALE, - $ SUM ) - 280 CONTINUE - END IF - ELSE - SCALE = ZERO - SUM = ONE - DO 290 J = 1, N - CALL CLASSQ( MIN( J, K+1 ), AB( MAX( K+2-J, 1 ), J ), - $ 1, SCALE, SUM ) - 290 CONTINUE - END IF - ELSE - IF( LSAME( DIAG, 'U' ) ) THEN - SCALE = ONE - SUM = N - IF( K.GT.0 ) THEN - DO 300 J = 1, N - 1 - CALL CLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE, - $ SUM ) - 300 CONTINUE - END IF - ELSE - SCALE = ZERO - SUM = ONE - DO 310 J = 1, N - CALL CLASSQ( MIN( N-J+1, K+1 ), AB( 1, J ), 1, SCALE, - $ SUM ) - 310 CONTINUE - END IF - END IF - VALUE = SCALE*SQRT( SUM ) - END IF -* - CLANTB = VALUE - RETURN -* -* End of CLANTB -* - END diff --git a/blas_fix/clantp.f b/blas_fix/clantp.f deleted file mode 100644 index 7a67253b8..000000000 --- a/blas_fix/clantp.f +++ /dev/null @@ -1,358 +0,0 @@ -*> \brief \b CLANTP returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a triangular matrix supplied in packed form. -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download CLANTP + dependencies -*> -*> [TGZ] -*> -*> [ZIP] -*> -*> [TXT] -*> \endhtmlonly -* -* Definition: -* =========== -* -* REAL FUNCTION CLANTP( NORM, UPLO, DIAG, N, AP, WORK ) -* -* .. Scalar Arguments .. -* CHARACTER DIAG, NORM, UPLO -* INTEGER N -* .. -* .. Array Arguments .. -* REAL WORK( * ) -* COMPLEX AP( * ) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> CLANTP returns the value of the one norm, or the Frobenius norm, or -*> the infinity norm, or the element of largest absolute value of a -*> triangular matrix A, supplied in packed form. -*> \endverbatim -*> -*> \return CLANTP -*> \verbatim -*> -*> CLANTP = ( max(abs(A(i,j))), NORM = 'M' or 'm' -*> ( -*> ( norm1(A), NORM = '1', 'O' or 'o' -*> ( -*> ( normI(A), NORM = 'I' or 'i' -*> ( -*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' -*> -*> where norm1 denotes the one norm of a matrix (maximum column sum), -*> normI denotes the infinity norm of a matrix (maximum row sum) and -*> normF denotes the Frobenius norm of a matrix (square root of sum of -*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[in] NORM -*> \verbatim -*> NORM is CHARACTER*1 -*> Specifies the value to be returned in CLANTP as described -*> above. -*> \endverbatim -*> -*> \param[in] UPLO -*> \verbatim -*> UPLO is CHARACTER*1 -*> Specifies whether the matrix A is upper or lower triangular. -*> = 'U': Upper triangular -*> = 'L': Lower triangular -*> \endverbatim -*> -*> \param[in] DIAG -*> \verbatim -*> DIAG is CHARACTER*1 -*> Specifies whether or not the matrix A is unit triangular. -*> = 'N': Non-unit triangular -*> = 'U': Unit triangular -*> \endverbatim -*> -*> \param[in] N -*> \verbatim -*> N is INTEGER -*> The order of the matrix A. N >= 0. When N = 0, CLANTP is -*> set to zero. -*> \endverbatim -*> -*> \param[in] AP -*> \verbatim -*> AP is COMPLEX array, dimension (N*(N+1)/2) -*> The upper or lower triangular matrix A, packed columnwise in -*> a linear array. The j-th column of A is stored in the array -*> AP as follows: -*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; -*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. -*> Note that when DIAG = 'U', the elements of the array AP -*> corresponding to the diagonal elements of the matrix A are -*> not referenced, but are assumed to be one. -*> \endverbatim -*> -*> \param[out] WORK -*> \verbatim -*> WORK is REAL array, dimension (MAX(1,LWORK)), -*> where LWORK >= N when NORM = 'I'; otherwise, WORK is not -*> referenced. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date September 2012 -* -*> \ingroup complexOTHERauxiliary -* -* ===================================================================== - REAL FUNCTION CLANTP( NORM, UPLO, DIAG, N, AP, WORK ) -* -* -- LAPACK auxiliary routine (version 3.4.2) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* September 2012 -* -* .. Scalar Arguments .. - CHARACTER DIAG, NORM, UPLO - INTEGER N -* .. -* .. Array Arguments .. - REAL WORK( * ) - COMPLEX AP( * ) -* .. -* -* ===================================================================== -* -* .. Parameters .. - REAL ONE, ZERO - PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) -* .. -* .. Local Scalars .. - LOGICAL UDIAG - INTEGER I, J, K - REAL SCALE, SUM, VALUE -* .. -* .. External Functions .. - LOGICAL LSAME, SISNAN - EXTERNAL LSAME, SISNAN -* .. -* .. External Subroutines .. - EXTERNAL CLASSQ -* .. -* .. Intrinsic Functions .. - INTRINSIC ABS, SQRT -* .. -* .. Executable Statements .. -* - VALUE = ZERO - IF( N.EQ.0 ) THEN - VALUE = ZERO - ELSE IF( LSAME( NORM, 'M' ) ) THEN -* -* Find max(abs(A(i,j))). -* - K = 1 - IF( LSAME( DIAG, 'U' ) ) THEN - VALUE = ONE - IF( LSAME( UPLO, 'U' ) ) THEN - DO 20 J = 1, N - DO 10 I = K, K + J - 2 - SUM = ABS( AP( I ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 10 CONTINUE - K = K + J - 20 CONTINUE - ELSE - DO 40 J = 1, N - DO 30 I = K + 1, K + N - J - SUM = ABS( AP( I ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 30 CONTINUE - K = K + N - J + 1 - 40 CONTINUE - END IF - ELSE - VALUE = ZERO - IF( LSAME( UPLO, 'U' ) ) THEN - DO 60 J = 1, N - DO 50 I = K, K + J - 1 - SUM = ABS( AP( I ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 50 CONTINUE - K = K + J - 60 CONTINUE - ELSE - DO 80 J = 1, N - DO 70 I = K, K + N - J - SUM = ABS( AP( I ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 70 CONTINUE - K = K + N - J + 1 - 80 CONTINUE - END IF - END IF - ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN -* -* Find norm1(A). -* - VALUE = ZERO - K = 1 - UDIAG = LSAME( DIAG, 'U' ) - IF( LSAME( UPLO, 'U' ) ) THEN - DO 110 J = 1, N - IF( UDIAG ) THEN - SUM = ONE - DO 90 I = K, K + J - 2 - SUM = SUM + ABS( AP( I ) ) - 90 CONTINUE - ELSE - SUM = ZERO - DO 100 I = K, K + J - 1 - SUM = SUM + ABS( AP( I ) ) - 100 CONTINUE - END IF - K = K + J - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 110 CONTINUE - ELSE - DO 140 J = 1, N - IF( UDIAG ) THEN - SUM = ONE - DO 120 I = K + 1, K + N - J - SUM = SUM + ABS( AP( I ) ) - 120 CONTINUE - ELSE - SUM = ZERO - DO 130 I = K, K + N - J - SUM = SUM + ABS( AP( I ) ) - 130 CONTINUE - END IF - K = K + N - J + 1 - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 140 CONTINUE - END IF - ELSE IF( LSAME( NORM, 'I' ) ) THEN -* -* Find normI(A). -* - K = 1 - IF( LSAME( UPLO, 'U' ) ) THEN - IF( LSAME( DIAG, 'U' ) ) THEN - DO 150 I = 1, N - WORK( I ) = ONE - 150 CONTINUE - DO 170 J = 1, N - DO 160 I = 1, J - 1 - WORK( I ) = WORK( I ) + ABS( AP( K ) ) - K = K + 1 - 160 CONTINUE - K = K + 1 - 170 CONTINUE - ELSE - DO 180 I = 1, N - WORK( I ) = ZERO - 180 CONTINUE - DO 200 J = 1, N - DO 190 I = 1, J - WORK( I ) = WORK( I ) + ABS( AP( K ) ) - K = K + 1 - 190 CONTINUE - 200 CONTINUE - END IF - ELSE - IF( LSAME( DIAG, 'U' ) ) THEN - DO 210 I = 1, N - WORK( I ) = ONE - 210 CONTINUE - DO 230 J = 1, N - K = K + 1 - DO 220 I = J + 1, N - WORK( I ) = WORK( I ) + ABS( AP( K ) ) - K = K + 1 - 220 CONTINUE - 230 CONTINUE - ELSE - DO 240 I = 1, N - WORK( I ) = ZERO - 240 CONTINUE - DO 260 J = 1, N - DO 250 I = J, N - WORK( I ) = WORK( I ) + ABS( AP( K ) ) - K = K + 1 - 250 CONTINUE - 260 CONTINUE - END IF - END IF - VALUE = ZERO - DO 270 I = 1, N - SUM = WORK( I ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 270 CONTINUE - ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN -* -* Find normF(A). -* - IF( LSAME( UPLO, 'U' ) ) THEN - IF( LSAME( DIAG, 'U' ) ) THEN - SCALE = ONE - SUM = N - K = 2 - DO 280 J = 2, N - CALL CLASSQ( J-1, AP( K ), 1, SCALE, SUM ) - K = K + J - 280 CONTINUE - ELSE - SCALE = ZERO - SUM = ONE - K = 1 - DO 290 J = 1, N - CALL CLASSQ( J, AP( K ), 1, SCALE, SUM ) - K = K + J - 290 CONTINUE - END IF - ELSE - IF( LSAME( DIAG, 'U' ) ) THEN - SCALE = ONE - SUM = N - K = 2 - DO 300 J = 1, N - 1 - CALL CLASSQ( N-J, AP( K ), 1, SCALE, SUM ) - K = K + N - J + 1 - 300 CONTINUE - ELSE - SCALE = ZERO - SUM = ONE - K = 1 - DO 310 J = 1, N - CALL CLASSQ( N-J+1, AP( K ), 1, SCALE, SUM ) - K = K + N - J + 1 - 310 CONTINUE - END IF - END IF - VALUE = SCALE*SQRT( SUM ) - END IF -* - CLANTP = VALUE - RETURN -* -* End of CLANTP -* - END diff --git a/blas_fix/clantr.f b/blas_fix/clantr.f deleted file mode 100644 index f5ad18ce3..000000000 --- a/blas_fix/clantr.f +++ /dev/null @@ -1,356 +0,0 @@ -*> \brief \b CLANTR returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a trapezoidal or triangular matrix. -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download CLANTR + dependencies -*> -*> [TGZ] -*> -*> [ZIP] -*> -*> [TXT] -*> \endhtmlonly -* -* Definition: -* =========== -* -* REAL FUNCTION CLANTR( NORM, UPLO, DIAG, M, N, A, LDA, -* WORK ) -* -* .. Scalar Arguments .. -* CHARACTER DIAG, NORM, UPLO -* INTEGER LDA, M, N -* .. -* .. Array Arguments .. -* REAL WORK( * ) -* COMPLEX A( LDA, * ) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> CLANTR returns the value of the one norm, or the Frobenius norm, or -*> the infinity norm, or the element of largest absolute value of a -*> trapezoidal or triangular matrix A. -*> \endverbatim -*> -*> \return CLANTR -*> \verbatim -*> -*> CLANTR = ( max(abs(A(i,j))), NORM = 'M' or 'm' -*> ( -*> ( norm1(A), NORM = '1', 'O' or 'o' -*> ( -*> ( normI(A), NORM = 'I' or 'i' -*> ( -*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' -*> -*> where norm1 denotes the one norm of a matrix (maximum column sum), -*> normI denotes the infinity norm of a matrix (maximum row sum) and -*> normF denotes the Frobenius norm of a matrix (square root of sum of -*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[in] NORM -*> \verbatim -*> NORM is CHARACTER*1 -*> Specifies the value to be returned in CLANTR as described -*> above. -*> \endverbatim -*> -*> \param[in] UPLO -*> \verbatim -*> UPLO is CHARACTER*1 -*> Specifies whether the matrix A is upper or lower trapezoidal. -*> = 'U': Upper trapezoidal -*> = 'L': Lower trapezoidal -*> Note that A is triangular instead of trapezoidal if M = N. -*> \endverbatim -*> -*> \param[in] DIAG -*> \verbatim -*> DIAG is CHARACTER*1 -*> Specifies whether or not the matrix A has unit diagonal. -*> = 'N': Non-unit diagonal -*> = 'U': Unit diagonal -*> \endverbatim -*> -*> \param[in] M -*> \verbatim -*> M is INTEGER -*> The number of rows of the matrix A. M >= 0, and if -*> UPLO = 'U', M <= N. When M = 0, CLANTR is set to zero. -*> \endverbatim -*> -*> \param[in] N -*> \verbatim -*> N is INTEGER -*> The number of columns of the matrix A. N >= 0, and if -*> UPLO = 'L', N <= M. When N = 0, CLANTR is set to zero. -*> \endverbatim -*> -*> \param[in] A -*> \verbatim -*> A is COMPLEX array, dimension (LDA,N) -*> The trapezoidal matrix A (A is triangular if M = N). -*> If UPLO = 'U', the leading m by n upper trapezoidal part of -*> the array A contains the upper trapezoidal matrix, and the -*> strictly lower triangular part of A is not referenced. -*> If UPLO = 'L', the leading m by n lower trapezoidal part of -*> the array A contains the lower trapezoidal matrix, and the -*> strictly upper triangular part of A is not referenced. Note -*> that when DIAG = 'U', the diagonal elements of A are not -*> referenced and are assumed to be one. -*> \endverbatim -*> -*> \param[in] LDA -*> \verbatim -*> LDA is INTEGER -*> The leading dimension of the array A. LDA >= max(M,1). -*> \endverbatim -*> -*> \param[out] WORK -*> \verbatim -*> WORK is REAL array, dimension (MAX(1,LWORK)), -*> where LWORK >= M when NORM = 'I'; otherwise, WORK is not -*> referenced. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date September 2012 -* -*> \ingroup complexOTHERauxiliary -* -* ===================================================================== - REAL FUNCTION CLANTR( NORM, UPLO, DIAG, M, N, A, LDA, - $ WORK ) -* -* -- LAPACK auxiliary routine (version 3.4.2) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* September 2012 -* -* .. Scalar Arguments .. - CHARACTER DIAG, NORM, UPLO - INTEGER LDA, M, N -* .. -* .. Array Arguments .. - REAL WORK( * ) - COMPLEX A( LDA, * ) -* .. -* -* ===================================================================== -* -* .. Parameters .. - REAL ONE, ZERO - PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) -* .. -* .. Local Scalars .. - LOGICAL UDIAG - INTEGER I, J - REAL SCALE, SUM, VALUE -* .. -* .. External Functions .. - LOGICAL LSAME, SISNAN - EXTERNAL LSAME, SISNAN -* .. -* .. External Subroutines .. - EXTERNAL CLASSQ -* .. -* .. Intrinsic Functions .. - INTRINSIC ABS, MIN, SQRT -* .. -* .. Executable Statements .. -* - VALUE = ZERO - IF( MIN( M, N ).EQ.0 ) THEN - VALUE = ZERO - ELSE IF( LSAME( NORM, 'M' ) ) THEN -* -* Find max(abs(A(i,j))). -* - IF( LSAME( DIAG, 'U' ) ) THEN - VALUE = ONE - IF( LSAME( UPLO, 'U' ) ) THEN - DO 20 J = 1, N - DO 10 I = 1, MIN( M, J-1 ) - SUM = ABS( A( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 10 CONTINUE - 20 CONTINUE - ELSE - DO 40 J = 1, N - DO 30 I = J + 1, M - SUM = ABS( A( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 30 CONTINUE - 40 CONTINUE - END IF - ELSE - VALUE = ZERO - IF( LSAME( UPLO, 'U' ) ) THEN - DO 60 J = 1, N - DO 50 I = 1, MIN( M, J ) - SUM = ABS( A( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 50 CONTINUE - 60 CONTINUE - ELSE - DO 80 J = 1, N - DO 70 I = J, M - SUM = ABS( A( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 70 CONTINUE - 80 CONTINUE - END IF - END IF - ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN -* -* Find norm1(A). -* - VALUE = ZERO - UDIAG = LSAME( DIAG, 'U' ) - IF( LSAME( UPLO, 'U' ) ) THEN - DO 110 J = 1, N - IF( ( UDIAG ) .AND. ( J.LE.M ) ) THEN - SUM = ONE - DO 90 I = 1, J - 1 - SUM = SUM + ABS( A( I, J ) ) - 90 CONTINUE - ELSE - SUM = ZERO - DO 100 I = 1, MIN( M, J ) - SUM = SUM + ABS( A( I, J ) ) - 100 CONTINUE - END IF - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 110 CONTINUE - ELSE - DO 140 J = 1, N - IF( UDIAG ) THEN - SUM = ONE - DO 120 I = J + 1, M - SUM = SUM + ABS( A( I, J ) ) - 120 CONTINUE - ELSE - SUM = ZERO - DO 130 I = J, M - SUM = SUM + ABS( A( I, J ) ) - 130 CONTINUE - END IF - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 140 CONTINUE - END IF - ELSE IF( LSAME( NORM, 'I' ) ) THEN -* -* Find normI(A). -* - IF( LSAME( UPLO, 'U' ) ) THEN - IF( LSAME( DIAG, 'U' ) ) THEN - DO 150 I = 1, M - WORK( I ) = ONE - 150 CONTINUE - DO 170 J = 1, N - DO 160 I = 1, MIN( M, J-1 ) - WORK( I ) = WORK( I ) + ABS( A( I, J ) ) - 160 CONTINUE - 170 CONTINUE - ELSE - DO 180 I = 1, M - WORK( I ) = ZERO - 180 CONTINUE - DO 200 J = 1, N - DO 190 I = 1, MIN( M, J ) - WORK( I ) = WORK( I ) + ABS( A( I, J ) ) - 190 CONTINUE - 200 CONTINUE - END IF - ELSE - IF( LSAME( DIAG, 'U' ) ) THEN - DO 210 I = 1, N - WORK( I ) = ONE - 210 CONTINUE - DO 220 I = N + 1, M - WORK( I ) = ZERO - 220 CONTINUE - DO 240 J = 1, N - DO 230 I = J + 1, M - WORK( I ) = WORK( I ) + ABS( A( I, J ) ) - 230 CONTINUE - 240 CONTINUE - ELSE - DO 250 I = 1, M - WORK( I ) = ZERO - 250 CONTINUE - DO 270 J = 1, N - DO 260 I = J, M - WORK( I ) = WORK( I ) + ABS( A( I, J ) ) - 260 CONTINUE - 270 CONTINUE - END IF - END IF - VALUE = ZERO - DO 280 I = 1, M - SUM = WORK( I ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 280 CONTINUE - ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN -* -* Find normF(A). -* - IF( LSAME( UPLO, 'U' ) ) THEN - IF( LSAME( DIAG, 'U' ) ) THEN - SCALE = ONE - SUM = MIN( M, N ) - DO 290 J = 2, N - CALL CLASSQ( MIN( M, J-1 ), A( 1, J ), 1, SCALE, SUM ) - 290 CONTINUE - ELSE - SCALE = ZERO - SUM = ONE - DO 300 J = 1, N - CALL CLASSQ( MIN( M, J ), A( 1, J ), 1, SCALE, SUM ) - 300 CONTINUE - END IF - ELSE - IF( LSAME( DIAG, 'U' ) ) THEN - SCALE = ONE - SUM = MIN( M, N ) - DO 310 J = 1, N - CALL CLASSQ( M-J, A( MIN( M, J+1 ), J ), 1, SCALE, - $ SUM ) - 310 CONTINUE - ELSE - SCALE = ZERO - SUM = ONE - DO 320 J = 1, N - CALL CLASSQ( M-J+1, A( J, J ), 1, SCALE, SUM ) - 320 CONTINUE - END IF - END IF - VALUE = SCALE*SQRT( SUM ) - END IF -* - CLANTR = VALUE - RETURN -* -* End of CLANTR -* - END diff --git a/blas_fix/sasum.f b/blas_fix/sasum.f deleted file mode 100644 index 46a4ecc1b..000000000 --- a/blas_fix/sasum.f +++ /dev/null @@ -1,112 +0,0 @@ -*> \brief \b SASUM -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -* Definition: -* =========== -* -* REAL FUNCTION SASUM(N,SX,INCX) -* -* .. Scalar Arguments .. -* INTEGER INCX,N -* .. -* .. Array Arguments .. -* REAL SX(*) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> SASUM takes the sum of the absolute values. -*> uses unrolled loops for increment equal to one. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date November 2011 -* -*> \ingroup single_blas_level1 -* -*> \par Further Details: -* ===================== -*> -*> \verbatim -*> -*> jack dongarra, linpack, 3/11/78. -*> modified 3/93 to return if incx .le. 0. -*> modified 12/3/93, array(1) declarations changed to array(*) -*> \endverbatim -*> -* ===================================================================== - REAL FUNCTION SASUM(N,SX,INCX) -* -* -- Reference BLAS level1 routine (version 3.4.0) -- -* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* November 2011 -* -* .. Scalar Arguments .. - INTEGER INCX,N -* .. -* .. Array Arguments .. - REAL SX(*) -* .. -* -* ===================================================================== -* -* .. Local Scalars .. - REAL STEMP - INTEGER I,M,MP1,NINCX -* .. -* .. Intrinsic Functions .. - INTRINSIC ABS,MOD -* .. - SASUM = 0.0e0 - STEMP = 0.0e0 - IF (N.LE.0 .OR. INCX.LE.0) RETURN - IF (INCX.EQ.1) THEN -* code for increment equal to 1 -* -* -* clean-up loop -* - M = MOD(N,6) - IF (M.NE.0) THEN - DO I = 1,M - STEMP = STEMP + ABS(SX(I)) - END DO - IF (N.LT.6) THEN - SASUM = STEMP - RETURN - END IF - END IF - MP1 = M + 1 - DO I = MP1,N,6 - STEMP = STEMP + ABS(SX(I)) + ABS(SX(I+1)) + - $ ABS(SX(I+2)) + ABS(SX(I+3)) + - $ ABS(SX(I+4)) + ABS(SX(I+5)) - END DO - ELSE -* -* code for increment not equal to 1 -* - NINCX = N*INCX - DO I = 1,NINCX,INCX - STEMP = STEMP + ABS(SX(I)) - END DO - END IF - SASUM = STEMP - RETURN - END diff --git a/blas_fix/scabs1.f b/blas_fix/scabs1.f deleted file mode 100644 index cdb5c0b9a..000000000 --- a/blas_fix/scabs1.f +++ /dev/null @@ -1,57 +0,0 @@ -*> \brief \b SCABS1 -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -* Definition: -* =========== -* -* REAL FUNCTION SCABS1(Z) -* -* .. Scalar Arguments .. -* COMPLEX Z -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> SCABS1 computes absolute value of a complex number -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date November 2011 -* -*> \ingroup single_blas_level1 -* -* ===================================================================== - REAL FUNCTION SCABS1(Z) -* -* -- Reference BLAS level1 routine (version 3.4.0) -- -* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* November 2011 -* -* .. Scalar Arguments .. - COMPLEX Z -* .. -* -* ===================================================================== -* -* .. Intrinsic Functions .. - INTRINSIC ABS,AIMAG,REAL -* .. - SCABS1 = ABS(REAL(Z)) + ABS(AIMAG(Z)) - RETURN - END diff --git a/blas_fix/scasum.f b/blas_fix/scasum.f deleted file mode 100644 index 03154eb58..000000000 --- a/blas_fix/scasum.f +++ /dev/null @@ -1,97 +0,0 @@ -*> \brief \b SCASUM -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -* Definition: -* =========== -* -* REAL FUNCTION SCASUM(N,CX,INCX) -* -* .. Scalar Arguments .. -* INTEGER INCX,N -* .. -* .. Array Arguments .. -* COMPLEX CX(*) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> SCASUM takes the sum of the absolute values of a complex vector and -*> returns a single precision result. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date November 2011 -* -*> \ingroup single_blas_level1 -* -*> \par Further Details: -* ===================== -*> -*> \verbatim -*> -*> jack dongarra, linpack, 3/11/78. -*> modified 3/93 to return if incx .le. 0. -*> modified 12/3/93, array(1) declarations changed to array(*) -*> \endverbatim -*> -* ===================================================================== - REAL FUNCTION SCASUM(N,CX,INCX) -* -* -- Reference BLAS level1 routine (version 3.4.0) -- -* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* November 2011 -* -* .. Scalar Arguments .. - INTEGER INCX,N -* .. -* .. Array Arguments .. - COMPLEX CX(*) -* .. -* -* ===================================================================== -* -* .. Local Scalars .. - REAL STEMP - INTEGER I,NINCX -* .. -* .. Intrinsic Functions .. - INTRINSIC ABS,AIMAG,REAL -* .. - SCASUM = 0.0e0 - STEMP = 0.0e0 - IF (N.LE.0 .OR. INCX.LE.0) RETURN - IF (INCX.EQ.1) THEN -* -* code for increment equal to 1 -* - DO I = 1,N - STEMP = STEMP + ABS(REAL(CX(I))) + ABS(AIMAG(CX(I))) - END DO - ELSE -* -* code for increment not equal to 1 -* - NINCX = N*INCX - DO I = 1,NINCX,INCX - STEMP = STEMP + ABS(REAL(CX(I))) + ABS(AIMAG(CX(I))) - END DO - END IF - SCASUM = STEMP - RETURN - END diff --git a/blas_fix/scnrm2.f b/blas_fix/scnrm2.f deleted file mode 100644 index 4a581e8e1..000000000 --- a/blas_fix/scnrm2.f +++ /dev/null @@ -1,119 +0,0 @@ -*> \brief \b SCNRM2 -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -* Definition: -* =========== -* -* REAL FUNCTION SCNRM2(N,X,INCX) -* -* .. Scalar Arguments .. -* INTEGER INCX,N -* .. -* .. Array Arguments .. -* COMPLEX X(*) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> SCNRM2 returns the euclidean norm of a vector via the function -*> name, so that -*> -*> SCNRM2 := sqrt( x**H*x ) -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date November 2011 -* -*> \ingroup single_blas_level1 -* -*> \par Further Details: -* ===================== -*> -*> \verbatim -*> -*> -- This version written on 25-October-1982. -*> Modified on 14-October-1993 to inline the call to CLASSQ. -*> Sven Hammarling, Nag Ltd. -*> \endverbatim -*> -* ===================================================================== - REAL FUNCTION SCNRM2(N,X,INCX) -* -* -- Reference BLAS level1 routine (version 3.4.0) -- -* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* November 2011 -* -* .. Scalar Arguments .. - INTEGER INCX,N -* .. -* .. Array Arguments .. - COMPLEX X(*) -* .. -* -* ===================================================================== -* -* .. Parameters .. - REAL ONE,ZERO - PARAMETER (ONE=1.0E+0,ZERO=0.0E+0) -* .. -* .. Local Scalars .. - REAL NORM,SCALE,SSQ,TEMP - INTEGER IX -* .. -* .. Intrinsic Functions .. - INTRINSIC ABS,AIMAG,REAL,SQRT -* .. - IF (N.LT.1 .OR. INCX.LT.1) THEN - NORM = ZERO - ELSE - SCALE = ZERO - SSQ = ONE -* The following loop is equivalent to this call to the LAPACK -* auxiliary routine: -* CALL CLASSQ( N, X, INCX, SCALE, SSQ ) -* - DO 10 IX = 1,1 + (N-1)*INCX,INCX - IF (REAL(X(IX)).NE.ZERO) THEN - TEMP = ABS(REAL(X(IX))) - IF (SCALE.LT.TEMP) THEN - SSQ = ONE + SSQ* (SCALE/TEMP)**2 - SCALE = TEMP - ELSE - SSQ = SSQ + (TEMP/SCALE)**2 - END IF - END IF - IF (AIMAG(X(IX)).NE.ZERO) THEN - TEMP = ABS(AIMAG(X(IX))) - IF (SCALE.LT.TEMP) THEN - SSQ = ONE + SSQ* (SCALE/TEMP)**2 - SCALE = TEMP - ELSE - SSQ = SSQ + (TEMP/SCALE)**2 - END IF - END IF - 10 CONTINUE - NORM = SCALE*SQRT(SSQ) - END IF -* - SCNRM2 = NORM - RETURN -* -* End of SCNRM2. -* - END diff --git a/blas_fix/scsum1.f b/blas_fix/scsum1.f deleted file mode 100644 index 2fbb911b9..000000000 --- a/blas_fix/scsum1.f +++ /dev/null @@ -1,140 +0,0 @@ -*> \brief \b SCSUM1 forms the 1-norm of the complex vector using the true absolute value. -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download SCSUM1 + dependencies -*> -*> [TGZ] -*> -*> [ZIP] -*> -*> [TXT] -*> \endhtmlonly -* -* Definition: -* =========== -* -* REAL FUNCTION SCSUM1( N, CX, INCX ) -* -* .. Scalar Arguments .. -* INTEGER INCX, N -* .. -* .. Array Arguments .. -* COMPLEX CX( * ) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> SCSUM1 takes the sum of the absolute values of a complex -*> vector and returns a single precision result. -*> -*> Based on SCASUM from the Level 1 BLAS. -*> The change is to use the 'genuine' absolute value. -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[in] N -*> \verbatim -*> N is INTEGER -*> The number of elements in the vector CX. -*> \endverbatim -*> -*> \param[in] CX -*> \verbatim -*> CX is COMPLEX array, dimension (N) -*> The vector whose elements will be summed. -*> \endverbatim -*> -*> \param[in] INCX -*> \verbatim -*> INCX is INTEGER -*> The spacing between successive values of CX. INCX > 0. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date September 2012 -* -*> \ingroup complexOTHERauxiliary -* -*> \par Contributors: -* ================== -*> -*> Nick Higham for use with CLACON. -* -* ===================================================================== - REAL FUNCTION SCSUM1( N, CX, INCX ) -* -* -- LAPACK auxiliary routine (version 3.4.2) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* September 2012 -* -* .. Scalar Arguments .. - INTEGER INCX, N -* .. -* .. Array Arguments .. - COMPLEX CX( * ) -* .. -* -* ===================================================================== -* -* .. Local Scalars .. - INTEGER I, NINCX - REAL STEMP -* .. -* .. Intrinsic Functions .. - INTRINSIC ABS -* .. -* .. Executable Statements .. -* - SCSUM1 = 0.0E0 - STEMP = 0.0E0 - IF( N.LE.0 ) - $ RETURN - IF( INCX.EQ.1 ) - $ GO TO 20 -* -* CODE FOR INCREMENT NOT EQUAL TO 1 -* - NINCX = N*INCX - DO 10 I = 1, NINCX, INCX -* -* NEXT LINE MODIFIED. -* - STEMP = STEMP + ABS( CX( I ) ) - 10 CONTINUE - SCSUM1 = STEMP - RETURN -* -* CODE FOR INCREMENT EQUAL TO 1 -* - 20 CONTINUE - DO 30 I = 1, N -* -* NEXT LINE MODIFIED. -* - STEMP = STEMP + ABS( CX( I ) ) - 30 CONTINUE - SCSUM1 = STEMP - RETURN -* -* End of SCSUM1 -* - END diff --git a/blas_fix/sdot.f b/blas_fix/sdot.f deleted file mode 100644 index 68555aad8..000000000 --- a/blas_fix/sdot.f +++ /dev/null @@ -1,117 +0,0 @@ -*> \brief \b SDOT -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -* Definition: -* =========== -* -* REAL FUNCTION SDOT(N,SX,INCX,SY,INCY) -* -* .. Scalar Arguments .. -* INTEGER INCX,INCY,N -* .. -* .. Array Arguments .. -* REAL SX(*),SY(*) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> SDOT forms the dot product of two vectors. -*> uses unrolled loops for increments equal to one. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date November 2011 -* -*> \ingroup single_blas_level1 -* -*> \par Further Details: -* ===================== -*> -*> \verbatim -*> -*> jack dongarra, linpack, 3/11/78. -*> modified 12/3/93, array(1) declarations changed to array(*) -*> \endverbatim -*> -* ===================================================================== - REAL FUNCTION SDOT(N,SX,INCX,SY,INCY) -* -* -- Reference BLAS level1 routine (version 3.4.0) -- -* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* November 2011 -* -* .. Scalar Arguments .. - INTEGER INCX,INCY,N -* .. -* .. Array Arguments .. - REAL SX(*),SY(*) -* .. -* -* ===================================================================== -* -* .. Local Scalars .. - REAL STEMP - INTEGER I,IX,IY,M,MP1 -* .. -* .. Intrinsic Functions .. - INTRINSIC MOD -* .. - STEMP = 0.0e0 - SDOT = 0.0e0 - IF (N.LE.0) RETURN - IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN -* -* code for both increments equal to 1 -* -* -* clean-up loop -* - M = MOD(N,5) - IF (M.NE.0) THEN - DO I = 1,M - STEMP = STEMP + SX(I)*SY(I) - END DO - IF (N.LT.5) THEN - SDOT=STEMP - RETURN - END IF - END IF - MP1 = M + 1 - DO I = MP1,N,5 - STEMP = STEMP + SX(I)*SY(I) + SX(I+1)*SY(I+1) + - $ SX(I+2)*SY(I+2) + SX(I+3)*SY(I+3) + SX(I+4)*SY(I+4) - END DO - ELSE -* -* code for unequal increments or equal increments -* not equal to 1 -* - IX = 1 - IY = 1 - IF (INCX.LT.0) IX = (-N+1)*INCX + 1 - IF (INCY.LT.0) IY = (-N+1)*INCY + 1 - DO I = 1,N - STEMP = STEMP + SX(IX)*SY(IY) - IX = IX + INCX - IY = IY + INCY - END DO - END IF - SDOT = STEMP - RETURN - END diff --git a/blas_fix/sdsdot.f b/blas_fix/sdsdot.f deleted file mode 100644 index 736ba0a0f..000000000 --- a/blas_fix/sdsdot.f +++ /dev/null @@ -1,255 +0,0 @@ -*> \brief \b SDSDOT -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -* Definition: -* =========== -* -* REAL FUNCTION SDSDOT(N,SB,SX,INCX,SY,INCY) -* -* .. Scalar Arguments .. -* REAL SB -* INTEGER INCX,INCY,N -* .. -* .. Array Arguments .. -* REAL SX(*),SY(*) -* .. -* -* PURPOSE -* ======= -* -* Compute the inner product of two vectors with extended -* precision accumulation. -* -* Returns S.P. result with dot product accumulated in D.P. -* SDSDOT = SB + sum for I = 0 to N-1 of SX(LX+I*INCX)*SY(LY+I*INCY), -* where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is -* defined in a similar way using INCY. -* -* AUTHOR -* ====== -* Lawson, C. L., (JPL), Hanson, R. J., (SNLA), -* Kincaid, D. R., (U. of Texas), Krogh, F. T., (JPL) -* -* ARGUMENTS -* ========= -* -* N (input) INTEGER -* number of elements in input vector(s) -* -* SB (input) REAL -* single precision scalar to be added to inner product -* -* SX (input) REAL array, dimension (N) -* single precision vector with N elements -* -* INCX (input) INTEGER -* storage spacing between elements of SX -* -* SY (input) REAL array, dimension (N) -* single precision vector with N elements -* -* INCY (input) INTEGER -* storage spacing between elements of SY -* -* SDSDOT (output) REAL -* single precision dot product (SB if N .LE. 0) -* -* Further Details -* =============== -* -* REFERENCES -* -* C. L. Lawson, R. J. Hanson, D. R. Kincaid and F. T. -* Krogh, Basic linear algebra subprograms for Fortran -* usage, Algorithm No. 539, Transactions on Mathematical -* Software 5, 3 (September 1979), pp. 308-323. -* -* REVISION HISTORY (YYMMDD) -* -* 791001 DATE WRITTEN -* 890531 Changed all specific intrinsics to generic. (WRB) -* 890831 Modified array declarations. (WRB) -* 890831 REVISION DATE from Version 3.2 -* 891214 Prologue converted to Version 4.0 format. (BAB) -* 920310 Corrected definition of LX in DESCRIPTION. (WRB) -* 920501 Reformatted the REFERENCES section. (WRB) -* 070118 Reformat to LAPACK coding style -* -* ===================================================================== -* -* .. Local Scalars .. -* DOUBLE PRECISION DSDOT -* INTEGER I,KX,KY,NS -* .. -* .. Intrinsic Functions .. -* INTRINSIC DBLE -* .. -* DSDOT = SB -* IF (N.LE.0) THEN -* SDSDOT = DSDOT -* RETURN -* END IF -* IF (INCX.EQ.INCY .AND. INCX.GT.0) THEN -* -* Code for equal and positive increments. -* -* NS = N*INCX -* DO I = 1,NS,INCX -* DSDOT = DSDOT + DBLE(SX(I))*DBLE(SY(I)) -* END DO -* ELSE -* -* Code for unequal or nonpositive increments. -* -* KX = 1 -* KY = 1 -* IF (INCX.LT.0) KX = 1 + (1-N)*INCX -* IF (INCY.LT.0) KY = 1 + (1-N)*INCY -* DO I = 1,N -* DSDOT = DSDOT + DBLE(SX(KX))*DBLE(SY(KY)) -* KX = KX + INCX -* KY = KY + INCY -* END DO -* END IF -* SDSDOT = DSDOT -* RETURN -* END -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date November 2011 -* -*> \ingroup single_blas_level1 -* -* ===================================================================== - REAL FUNCTION SDSDOT(N,SB,SX,INCX,SY,INCY) -* -* -- Reference BLAS level1 routine (version 3.4.0) -- -* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* November 2011 -* -* .. Scalar Arguments .. - REAL SB - INTEGER INCX,INCY,N -* .. -* .. Array Arguments .. - REAL SX(*),SY(*) -* .. -* -* PURPOSE -* ======= -* -* Compute the inner product of two vectors with extended -* precision accumulation. -* -* Returns S.P. result with dot product accumulated in D.P. -* SDSDOT = SB + sum for I = 0 to N-1 of SX(LX+I*INCX)*SY(LY+I*INCY), -* where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is -* defined in a similar way using INCY. -* -* AUTHOR -* ====== -* Lawson, C. L., (JPL), Hanson, R. J., (SNLA), -* Kincaid, D. R., (U. of Texas), Krogh, F. T., (JPL) -* -* ARGUMENTS -* ========= -* -* N (input) INTEGER -* number of elements in input vector(s) -* -* SB (input) REAL -* single precision scalar to be added to inner product -* -* SX (input) REAL array, dimension (N) -* single precision vector with N elements -* -* INCX (input) INTEGER -* storage spacing between elements of SX -* -* SY (input) REAL array, dimension (N) -* single precision vector with N elements -* -* INCY (input) INTEGER -* storage spacing between elements of SY -* -* SDSDOT (output) REAL -* single precision dot product (SB if N .LE. 0) -* -* Further Details -* =============== -* -* REFERENCES -* -* C. L. Lawson, R. J. Hanson, D. R. Kincaid and F. T. -* Krogh, Basic linear algebra subprograms for Fortran -* usage, Algorithm No. 539, Transactions on Mathematical -* Software 5, 3 (September 1979), pp. 308-323. -* -* REVISION HISTORY (YYMMDD) -* -* 791001 DATE WRITTEN -* 890531 Changed all specific intrinsics to generic. (WRB) -* 890831 Modified array declarations. (WRB) -* 890831 REVISION DATE from Version 3.2 -* 891214 Prologue converted to Version 4.0 format. (BAB) -* 920310 Corrected definition of LX in DESCRIPTION. (WRB) -* 920501 Reformatted the REFERENCES section. (WRB) -* 070118 Reformat to LAPACK coding style -* -* ===================================================================== -* -* .. Local Scalars .. - DOUBLE PRECISION DSDOT - INTEGER I,KX,KY,NS -* .. -* .. Intrinsic Functions .. - INTRINSIC DBLE -* .. - DSDOT = SB - IF (N.LE.0) THEN - SDSDOT = REAL(DSDOT) - RETURN - END IF - IF (INCX.EQ.INCY .AND. INCX.GT.0) THEN -* -* Code for equal and positive increments. -* - NS = N*INCX - DO I = 1,NS,INCX - DSDOT = DSDOT + DBLE(SX(I))*DBLE(SY(I)) - END DO - ELSE -* -* Code for unequal or nonpositive increments. -* - KX = 1 - KY = 1 - IF (INCX.LT.0) KX = 1 + (1-N)*INCX - IF (INCY.LT.0) KY = 1 + (1-N)*INCY - DO I = 1,N - DSDOT = DSDOT + DBLE(SX(KX))*DBLE(SY(KY)) - KX = KX + INCX - KY = KY + INCY - END DO - END IF - SDSDOT = REAL(DSDOT) - RETURN - END diff --git a/blas_fix/sladiv.f b/blas_fix/sladiv.f deleted file mode 100644 index 6d26da20c..000000000 --- a/blas_fix/sladiv.f +++ /dev/null @@ -1,253 +0,0 @@ -*> \brief \b SLADIV performs complex division in real arithmetic, avoiding unnecessary overflow. -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download SLADIV + dependencies -*> -*> [TGZ] -*> -*> [ZIP] -*> -*> [TXT] -*> \endhtmlonly -* -* Definition: -* =========== -* -* SUBROUTINE SLADIV( A, B, C, D, P, Q ) -* -* .. Scalar Arguments .. -* REAL A, B, C, D, P, Q -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> SLADIV performs complex division in real arithmetic -*> -*> a + i*b -*> p + i*q = --------- -*> c + i*d -*> -*> The algorithm is due to Michael Baudin and Robert L. Smith -*> and can be found in the paper -*> "A Robust Complex Division in Scilab" -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[in] A -*> \verbatim -*> A is REAL -*> \endverbatim -*> -*> \param[in] B -*> \verbatim -*> B is REAL -*> \endverbatim -*> -*> \param[in] C -*> \verbatim -*> C is REAL -*> \endverbatim -*> -*> \param[in] D -*> \verbatim -*> D is REAL -*> The scalars a, b, c, and d in the above expression. -*> \endverbatim -*> -*> \param[out] P -*> \verbatim -*> P is REAL -*> \endverbatim -*> -*> \param[out] Q -*> \verbatim -*> Q is REAL -*> The scalars p and q in the above expression. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date January 2013 -* -*> \ingroup auxOTHERauxiliary -* -* ===================================================================== - SUBROUTINE SLADIV( A, B, C, D, P, Q ) -* -* -- LAPACK auxiliary routine (version 3.5.0) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* January 2013 -* -* .. Scalar Arguments .. - REAL A, B, C, D, P, Q -* .. -* -* ===================================================================== -* -* .. Parameters .. - REAL BS - PARAMETER ( BS = 2.0E0 ) - REAL HALF - PARAMETER ( HALF = 0.5E0 ) - REAL TWO - PARAMETER ( TWO = 2.0E0 ) -* -* .. Local Scalars .. - REAL AA, BB, CC, DD, AB, CD, S, OV, UN, BE, EPS -* .. -* .. External Functions .. - REAL SLAMCH - EXTERNAL SLAMCH -* .. -* .. External Subroutines .. - EXTERNAL SLADIV1 -* .. -* .. Intrinsic Functions .. - INTRINSIC ABS, MAX -* .. -* .. Executable Statements .. -* - AA = A - BB = B - CC = C - DD = D - AB = MAX( ABS(A), ABS(B) ) - CD = MAX( ABS(C), ABS(D) ) - S = 1.0E0 - - OV = SLAMCH( 'Overflow threshold' ) - UN = SLAMCH( 'Safe minimum' ) - EPS = SLAMCH( 'Epsilon' ) - BE = BS / (EPS*EPS) - - IF( AB >= HALF*OV ) THEN - AA = HALF * AA - BB = HALF * BB - S = TWO * S - END IF - IF( CD >= HALF*OV ) THEN - CC = HALF * CC - DD = HALF * DD - S = HALF * S - END IF - IF( AB <= UN*BS/EPS ) THEN - AA = AA * BE - BB = BB * BE - S = S / BE - END IF - IF( CD <= UN*BS/EPS ) THEN - CC = CC * BE - DD = DD * BE - S = S * BE - END IF - IF( ABS( D ).LE.ABS( C ) ) THEN - CALL SLADIV1(AA, BB, CC, DD, P, Q) - ELSE - CALL SLADIV1(BB, AA, DD, CC, P, Q) - Q = -Q - END IF - P = P * S - Q = Q * S -* - RETURN -* -* End of SLADIV -* - END - - - - SUBROUTINE SLADIV1( A, B, C, D, P, Q ) -* -* -- LAPACK auxiliary routine (version 3.5.0) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* January 2013 -* -* .. Scalar Arguments .. - REAL A, B, C, D, P, Q -* .. -* -* ===================================================================== -* -* .. Parameters .. - REAL ONE - PARAMETER ( ONE = 1.0E0 ) -* -* .. Local Scalars .. - REAL R, T -* .. -* .. External Functions .. - REAL SLADIV2 - EXTERNAL SLADIV2 -* .. -* .. Executable Statements .. -* - R = D / C - T = ONE / (C + D * R) - P = SLADIV2(A, B, C, D, R, T) - A = -A - Q = SLADIV2(B, A, C, D, R, T) -* - RETURN -* -* End of SLADIV1 -* - END - - REAL FUNCTION SLADIV2( A, B, C, D, R, T ) -* -* -- LAPACK auxiliary routine (version 3.5.0) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* January 2013 -* -* .. Scalar Arguments .. - REAL A, B, C, D, R, T -* .. -* -* ===================================================================== -* -* .. Parameters .. - REAL ZERO - PARAMETER ( ZERO = 0.0E0 ) -* -* .. Local Scalars .. - REAL BR -* .. -* .. Executable Statements .. -* - IF( R.NE.ZERO ) THEN - BR = B * R - if( BR.NE.ZERO ) THEN - SLADIV2 = (A + BR) * T - ELSE - SLADIV2 = A * T + (B * T) * R - END IF - ELSE - SLADIV2 = (A + D * (B / C)) * T - END IF -* - RETURN -* -* End of SLADIV -* - END diff --git a/blas_fix/slamch.f b/blas_fix/slamch.f deleted file mode 100644 index 8653c66e7..000000000 --- a/blas_fix/slamch.f +++ /dev/null @@ -1,192 +0,0 @@ -*> \brief \b SLAMCH -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -* Definition: -* =========== -* -* REAL FUNCTION SLAMCH( CMACH ) -* -* .. Scalar Arguments .. -* CHARACTER CMACH -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> SLAMCH determines single precision machine parameters. -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[in] CMACH -*> \verbatim -*> Specifies the value to be returned by SLAMCH: -*> = 'E' or 'e', SLAMCH := eps -*> = 'S' or 's , SLAMCH := sfmin -*> = 'B' or 'b', SLAMCH := base -*> = 'P' or 'p', SLAMCH := eps*base -*> = 'N' or 'n', SLAMCH := t -*> = 'R' or 'r', SLAMCH := rnd -*> = 'M' or 'm', SLAMCH := emin -*> = 'U' or 'u', SLAMCH := rmin -*> = 'L' or 'l', SLAMCH := emax -*> = 'O' or 'o', SLAMCH := rmax -*> where -*> eps = relative machine precision -*> sfmin = safe minimum, such that 1/sfmin does not overflow -*> base = base of the machine -*> prec = eps*base -*> t = number of (base) digits in the mantissa -*> rnd = 1.0 when rounding occurs in addition, 0.0 otherwise -*> emin = minimum exponent before (gradual) underflow -*> rmin = underflow threshold - base**(emin-1) -*> emax = largest exponent before overflow -*> rmax = overflow threshold - (base**emax)*(1-eps) -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date November 2011 -* -*> \ingroup auxOTHERauxiliary -* -* ===================================================================== - REAL FUNCTION SLAMCH( CMACH ) -* -* -- LAPACK auxiliary routine (version 3.4.0) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* November 2011 -* -* .. Scalar Arguments .. - CHARACTER CMACH -* .. -* -* ===================================================================== -* -* .. Parameters .. - REAL ONE, ZERO - PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) -* .. -* .. Local Scalars .. - REAL RND, EPS, SFMIN, SMALL, RMACH -* .. -* .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME -* .. -* .. Intrinsic Functions .. - INTRINSIC DIGITS, EPSILON, HUGE, MAXEXPONENT, - $ MINEXPONENT, RADIX, TINY -* .. -* .. Executable Statements .. -* -* -* Assume rounding, not chopping. Always. -* - RND = ONE -* - IF( ONE.EQ.RND ) THEN - EPS = EPSILON(ZERO) * 0.5 - ELSE - EPS = EPSILON(ZERO) - END IF -* - IF( LSAME( CMACH, 'E' ) ) THEN - RMACH = EPS - ELSE IF( LSAME( CMACH, 'S' ) ) THEN - SFMIN = TINY(ZERO) - SMALL = ONE / HUGE(ZERO) - IF( SMALL.GE.SFMIN ) THEN -* -* Use SMALL plus a bit, to avoid the possibility of rounding -* causing overflow when computing 1/sfmin. -* - SFMIN = SMALL*( ONE+EPS ) - END IF - RMACH = SFMIN - ELSE IF( LSAME( CMACH, 'B' ) ) THEN - RMACH = RADIX(ZERO) - ELSE IF( LSAME( CMACH, 'P' ) ) THEN - RMACH = EPS * RADIX(ZERO) - ELSE IF( LSAME( CMACH, 'N' ) ) THEN - RMACH = DIGITS(ZERO) - ELSE IF( LSAME( CMACH, 'R' ) ) THEN - RMACH = RND - ELSE IF( LSAME( CMACH, 'M' ) ) THEN - RMACH = MINEXPONENT(ZERO) - ELSE IF( LSAME( CMACH, 'U' ) ) THEN - RMACH = tiny(zero) - ELSE IF( LSAME( CMACH, 'L' ) ) THEN - RMACH = MAXEXPONENT(ZERO) - ELSE IF( LSAME( CMACH, 'O' ) ) THEN - RMACH = HUGE(ZERO) - ELSE - RMACH = ZERO - END IF -* - SLAMCH = RMACH - RETURN -* -* End of SLAMCH -* - END -* ===================================================================== -*> \brief \b SLAMC3 -*> \details -*> \b Purpose: -*> \verbatim -*> SLAMC3 is intended to force A and B to be stored prior to doing -*> the addition of A and B , for use in situations where optimizers -*> might hold one of these in a register. -*> \endverbatim -*> \author LAPACK is a software package provided by Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. -*> \date November 2011 -*> \ingroup auxOTHERauxiliary -*> -*> \param[in] A -*> \verbatim -*> \endverbatim -*> -*> \param[in] B -*> \verbatim -*> The values A and B. -*> \endverbatim -*> -* - REAL FUNCTION SLAMC3( A, B ) -* -* -- LAPACK auxiliary routine (version 3.4.0) -- -* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. -* November 2010 -* -* .. Scalar Arguments .. - REAL A, B -* .. -* ===================================================================== -* -* .. Executable Statements .. -* - SLAMC3 = A + B -* - RETURN -* -* End of SLAMC3 -* - END -* -* ===================================================================== diff --git a/blas_fix/slangb.f b/blas_fix/slangb.f deleted file mode 100644 index c543372ef..000000000 --- a/blas_fix/slangb.f +++ /dev/null @@ -1,226 +0,0 @@ -*> \brief \b SLANGB returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value of any element of general band matrix. -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download SLANGB + dependencies -*> -*> [TGZ] -*> -*> [ZIP] -*> -*> [TXT] -*> \endhtmlonly -* -* Definition: -* =========== -* -* REAL FUNCTION SLANGB( NORM, N, KL, KU, AB, LDAB, -* WORK ) -* -* .. Scalar Arguments .. -* CHARACTER NORM -* INTEGER KL, KU, LDAB, N -* .. -* .. Array Arguments .. -* REAL AB( LDAB, * ), WORK( * ) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> SLANGB returns the value of the one norm, or the Frobenius norm, or -*> the infinity norm, or the element of largest absolute value of an -*> n by n band matrix A, with kl sub-diagonals and ku super-diagonals. -*> \endverbatim -*> -*> \return SLANGB -*> \verbatim -*> -*> SLANGB = ( max(abs(A(i,j))), NORM = 'M' or 'm' -*> ( -*> ( norm1(A), NORM = '1', 'O' or 'o' -*> ( -*> ( normI(A), NORM = 'I' or 'i' -*> ( -*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' -*> -*> where norm1 denotes the one norm of a matrix (maximum column sum), -*> normI denotes the infinity norm of a matrix (maximum row sum) and -*> normF denotes the Frobenius norm of a matrix (square root of sum of -*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[in] NORM -*> \verbatim -*> NORM is CHARACTER*1 -*> Specifies the value to be returned in SLANGB as described -*> above. -*> \endverbatim -*> -*> \param[in] N -*> \verbatim -*> N is INTEGER -*> The order of the matrix A. N >= 0. When N = 0, SLANGB is -*> set to zero. -*> \endverbatim -*> -*> \param[in] KL -*> \verbatim -*> KL is INTEGER -*> The number of sub-diagonals of the matrix A. KL >= 0. -*> \endverbatim -*> -*> \param[in] KU -*> \verbatim -*> KU is INTEGER -*> The number of super-diagonals of the matrix A. KU >= 0. -*> \endverbatim -*> -*> \param[in] AB -*> \verbatim -*> AB is REAL array, dimension (LDAB,N) -*> The band matrix A, stored in rows 1 to KL+KU+1. The j-th -*> column of A is stored in the j-th column of the array AB as -*> follows: -*> AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl). -*> \endverbatim -*> -*> \param[in] LDAB -*> \verbatim -*> LDAB is INTEGER -*> The leading dimension of the array AB. LDAB >= KL+KU+1. -*> \endverbatim -*> -*> \param[out] WORK -*> \verbatim -*> WORK is REAL array, dimension (MAX(1,LWORK)), -*> where LWORK >= N when NORM = 'I'; otherwise, WORK is not -*> referenced. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date September 2012 -* -*> \ingroup realGBauxiliary -* -* ===================================================================== - REAL FUNCTION SLANGB( NORM, N, KL, KU, AB, LDAB, - $ WORK ) -* -* -- LAPACK auxiliary routine (version 3.4.2) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* September 2012 -* -* .. Scalar Arguments .. - CHARACTER NORM - INTEGER KL, KU, LDAB, N -* .. -* .. Array Arguments .. - REAL AB( LDAB, * ), WORK( * ) -* .. -* -* ===================================================================== -* -* -* .. Parameters .. - REAL ONE, ZERO - PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) -* .. -* .. Local Scalars .. - INTEGER I, J, K, L - REAL SCALE, SUM, VALUE, TEMP -* .. -* .. External Subroutines .. - EXTERNAL SLASSQ -* .. -* .. External Functions .. - LOGICAL LSAME, SISNAN - EXTERNAL LSAME, SISNAN -* .. -* .. Intrinsic Functions .. - INTRINSIC ABS, MAX, MIN, SQRT -* .. -* .. Executable Statements .. -* - VALUE = ZERO - IF( N.EQ.0 ) THEN - VALUE = ZERO - ELSE IF( LSAME( NORM, 'M' ) ) THEN -* -* Find max(abs(A(i,j))). -* - VALUE = ZERO - DO 20 J = 1, N - DO 10 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 ) - TEMP = ABS( AB( I, J ) ) - IF( VALUE.LT.TEMP .OR. SISNAN( TEMP ) ) VALUE = TEMP - 10 CONTINUE - 20 CONTINUE - ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN -* -* Find norm1(A). -* - VALUE = ZERO - DO 40 J = 1, N - SUM = ZERO - DO 30 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 ) - SUM = SUM + ABS( AB( I, J ) ) - 30 CONTINUE - IF( VALUE.LT.SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 40 CONTINUE - ELSE IF( LSAME( NORM, 'I' ) ) THEN -* -* Find normI(A). -* - DO 50 I = 1, N - WORK( I ) = ZERO - 50 CONTINUE - DO 70 J = 1, N - K = KU + 1 - J - DO 60 I = MAX( 1, J-KU ), MIN( N, J+KL ) - WORK( I ) = WORK( I ) + ABS( AB( K+I, J ) ) - 60 CONTINUE - 70 CONTINUE - VALUE = ZERO - DO 80 I = 1, N - TEMP = WORK( I ) - IF( VALUE.LT.TEMP .OR. SISNAN( TEMP ) ) VALUE = TEMP - 80 CONTINUE - ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN -* -* Find normF(A). -* - SCALE = ZERO - SUM = ONE - DO 90 J = 1, N - L = MAX( 1, J-KU ) - K = KU + 1 - J + L - CALL SLASSQ( MIN( N, J+KL )-L+1, AB( K, J ), 1, SCALE, SUM ) - 90 CONTINUE - VALUE = SCALE*SQRT( SUM ) - END IF -* - SLANGB = VALUE - RETURN -* -* End of SLANGB -* - END diff --git a/blas_fix/slange.f b/blas_fix/slange.f deleted file mode 100644 index 2ba10ac5f..000000000 --- a/blas_fix/slange.f +++ /dev/null @@ -1,212 +0,0 @@ -*> \brief \b SLANGE returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value of any element of a general rectangular matrix. -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download SLANGE + dependencies -*> -*> [TGZ] -*> -*> [ZIP] -*> -*> [TXT] -*> \endhtmlonly -* -* Definition: -* =========== -* -* REAL FUNCTION SLANGE( NORM, M, N, A, LDA, WORK ) -* -* .. Scalar Arguments .. -* CHARACTER NORM -* INTEGER LDA, M, N -* .. -* .. Array Arguments .. -* REAL A( LDA, * ), WORK( * ) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> SLANGE returns the value of the one norm, or the Frobenius norm, or -*> the infinity norm, or the element of largest absolute value of a -*> real matrix A. -*> \endverbatim -*> -*> \return SLANGE -*> \verbatim -*> -*> SLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' -*> ( -*> ( norm1(A), NORM = '1', 'O' or 'o' -*> ( -*> ( normI(A), NORM = 'I' or 'i' -*> ( -*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' -*> -*> where norm1 denotes the one norm of a matrix (maximum column sum), -*> normI denotes the infinity norm of a matrix (maximum row sum) and -*> normF denotes the Frobenius norm of a matrix (square root of sum of -*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[in] NORM -*> \verbatim -*> NORM is CHARACTER*1 -*> Specifies the value to be returned in SLANGE as described -*> above. -*> \endverbatim -*> -*> \param[in] M -*> \verbatim -*> M is INTEGER -*> The number of rows of the matrix A. M >= 0. When M = 0, -*> SLANGE is set to zero. -*> \endverbatim -*> -*> \param[in] N -*> \verbatim -*> N is INTEGER -*> The number of columns of the matrix A. N >= 0. When N = 0, -*> SLANGE is set to zero. -*> \endverbatim -*> -*> \param[in] A -*> \verbatim -*> A is REAL array, dimension (LDA,N) -*> The m by n matrix A. -*> \endverbatim -*> -*> \param[in] LDA -*> \verbatim -*> LDA is INTEGER -*> The leading dimension of the array A. LDA >= max(M,1). -*> \endverbatim -*> -*> \param[out] WORK -*> \verbatim -*> WORK is REAL array, dimension (MAX(1,LWORK)), -*> where LWORK >= M when NORM = 'I'; otherwise, WORK is not -*> referenced. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date September 2012 -* -*> \ingroup realGEauxiliary -* -* ===================================================================== - REAL FUNCTION SLANGE( NORM, M, N, A, LDA, WORK ) -* -* -- LAPACK auxiliary routine (version 3.4.2) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* September 2012 -* -* .. Scalar Arguments .. - CHARACTER NORM - INTEGER LDA, M, N -* .. -* .. Array Arguments .. - REAL A( LDA, * ), WORK( * ) -* .. -* -* ===================================================================== -* -* .. Parameters .. - REAL ONE, ZERO - PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) -* .. -* .. Local Scalars .. - INTEGER I, J - REAL SCALE, SUM, VALUE, TEMP -* .. -* .. External Subroutines .. - EXTERNAL SLASSQ -* .. -* .. External Functions .. - LOGICAL LSAME, SISNAN - EXTERNAL LSAME, SISNAN -* .. -* .. Intrinsic Functions .. - INTRINSIC ABS, MIN, SQRT -* .. -* .. Executable Statements .. -* - VALUE = ZERO - IF( MIN( M, N ).EQ.0 ) THEN - VALUE = ZERO - ELSE IF( LSAME( NORM, 'M' ) ) THEN -* -* Find max(abs(A(i,j))). -* - VALUE = ZERO - DO 20 J = 1, N - DO 10 I = 1, M - TEMP = ABS( A( I, J ) ) - IF( VALUE.LT.TEMP .OR. SISNAN( TEMP ) ) VALUE = TEMP - 10 CONTINUE - 20 CONTINUE - ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN -* -* Find norm1(A). -* - VALUE = ZERO - DO 40 J = 1, N - SUM = ZERO - DO 30 I = 1, M - SUM = SUM + ABS( A( I, J ) ) - 30 CONTINUE - IF( VALUE.LT.SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 40 CONTINUE - ELSE IF( LSAME( NORM, 'I' ) ) THEN -* -* Find normI(A). -* - DO 50 I = 1, M - WORK( I ) = ZERO - 50 CONTINUE - DO 70 J = 1, N - DO 60 I = 1, M - WORK( I ) = WORK( I ) + ABS( A( I, J ) ) - 60 CONTINUE - 70 CONTINUE - VALUE = ZERO - DO 80 I = 1, M - TEMP = WORK( I ) - IF( VALUE.LT.TEMP .OR. SISNAN( TEMP ) ) VALUE = TEMP - 80 CONTINUE - ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN -* -* Find normF(A). -* - SCALE = ZERO - SUM = ONE - DO 90 J = 1, N - CALL SLASSQ( M, A( 1, J ), 1, SCALE, SUM ) - 90 CONTINUE - VALUE = SCALE*SQRT( SUM ) - END IF -* - SLANGE = VALUE - RETURN -* -* End of SLANGE -* - END diff --git a/blas_fix/slangt.f b/blas_fix/slangt.f deleted file mode 100644 index 3a9310680..000000000 --- a/blas_fix/slangt.f +++ /dev/null @@ -1,209 +0,0 @@ -*> \brief \b SLANGT returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value of any element of a general tridiagonal matrix. -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download SLANGT + dependencies -*> -*> [TGZ] -*> -*> [ZIP] -*> -*> [TXT] -*> \endhtmlonly -* -* Definition: -* =========== -* -* REAL FUNCTION SLANGT( NORM, N, DL, D, DU ) -* -* .. Scalar Arguments .. -* CHARACTER NORM -* INTEGER N -* .. -* .. Array Arguments .. -* REAL D( * ), DL( * ), DU( * ) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> SLANGT returns the value of the one norm, or the Frobenius norm, or -*> the infinity norm, or the element of largest absolute value of a -*> real tridiagonal matrix A. -*> \endverbatim -*> -*> \return SLANGT -*> \verbatim -*> -*> SLANGT = ( max(abs(A(i,j))), NORM = 'M' or 'm' -*> ( -*> ( norm1(A), NORM = '1', 'O' or 'o' -*> ( -*> ( normI(A), NORM = 'I' or 'i' -*> ( -*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' -*> -*> where norm1 denotes the one norm of a matrix (maximum column sum), -*> normI denotes the infinity norm of a matrix (maximum row sum) and -*> normF denotes the Frobenius norm of a matrix (square root of sum of -*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[in] NORM -*> \verbatim -*> NORM is CHARACTER*1 -*> Specifies the value to be returned in SLANGT as described -*> above. -*> \endverbatim -*> -*> \param[in] N -*> \verbatim -*> N is INTEGER -*> The order of the matrix A. N >= 0. When N = 0, SLANGT is -*> set to zero. -*> \endverbatim -*> -*> \param[in] DL -*> \verbatim -*> DL is REAL array, dimension (N-1) -*> The (n-1) sub-diagonal elements of A. -*> \endverbatim -*> -*> \param[in] D -*> \verbatim -*> D is REAL array, dimension (N) -*> The diagonal elements of A. -*> \endverbatim -*> -*> \param[in] DU -*> \verbatim -*> DU is REAL array, dimension (N-1) -*> The (n-1) super-diagonal elements of A. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date September 2012 -* -*> \ingroup realOTHERauxiliary -* -* ===================================================================== - REAL FUNCTION SLANGT( NORM, N, DL, D, DU ) -* -* -- LAPACK auxiliary routine (version 3.4.2) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* September 2012 -* -* .. Scalar Arguments .. - CHARACTER NORM - INTEGER N -* .. -* .. Array Arguments .. - REAL D( * ), DL( * ), DU( * ) -* .. -* -* ===================================================================== -* -* .. Parameters .. - REAL ONE, ZERO - PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) -* .. -* .. Local Scalars .. - INTEGER I - REAL ANORM, SCALE, SUM, TEMP -* .. -* .. External Functions .. - LOGICAL LSAME, SISNAN - EXTERNAL LSAME, SISNAN -* .. -* .. External Subroutines .. - EXTERNAL SLASSQ -* .. -* .. Intrinsic Functions .. - INTRINSIC ABS, SQRT -* .. -* .. Executable Statements .. -* - ANORM = ZERO - IF( N.LE.0 ) THEN - ANORM = ZERO - ELSE IF( LSAME( NORM, 'M' ) ) THEN -* -* Find max(abs(A(i,j))). -* - ANORM = ABS( D( N ) ) - DO 10 I = 1, N - 1 - IF( ANORM.LT.ABS( DL( I ) ) .OR. SISNAN( ABS( DL( I ) ) ) ) - $ ANORM = ABS(DL(I)) - IF( ANORM.LT.ABS( D( I ) ) .OR. SISNAN( ABS( D( I ) ) ) ) - $ ANORM = ABS(D(I)) - IF( ANORM.LT.ABS( DU( I ) ) .OR. SISNAN (ABS( DU( I ) ) ) ) - $ ANORM = ABS(DU(I)) - 10 CONTINUE - ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' ) THEN -* -* Find norm1(A). -* - IF( N.EQ.1 ) THEN - ANORM = ABS( D( 1 ) ) - ELSE - ANORM = ABS( D( 1 ) )+ABS( DL( 1 ) ) - TEMP = ABS( D( N ) )+ABS( DU( N-1 ) ) - IF( ANORM .LT. TEMP .OR. SISNAN( TEMP ) ) ANORM = TEMP - DO 20 I = 2, N - 1 - TEMP = ABS( D( I ) )+ABS( DL( I ) )+ABS( DU( I-1 ) ) - IF( ANORM .LT. TEMP .OR. SISNAN( TEMP ) ) ANORM = TEMP - 20 CONTINUE - END IF - ELSE IF( LSAME( NORM, 'I' ) ) THEN -* -* Find normI(A). -* - IF( N.EQ.1 ) THEN - ANORM = ABS( D( 1 ) ) - ELSE - ANORM = ABS( D( 1 ) )+ABS( DU( 1 ) ) - TEMP = ABS( D( N ) )+ABS( DL( N-1 ) ) - IF( ANORM .LT. TEMP .OR. SISNAN( TEMP ) ) ANORM = TEMP - DO 30 I = 2, N - 1 - TEMP = ABS( D( I ) )+ABS( DU( I ) )+ABS( DL( I-1 ) ) - IF( ANORM .LT. TEMP .OR. SISNAN( TEMP ) ) ANORM = TEMP - 30 CONTINUE - END IF - ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN -* -* Find normF(A). -* - SCALE = ZERO - SUM = ONE - CALL SLASSQ( N, D, 1, SCALE, SUM ) - IF( N.GT.1 ) THEN - CALL SLASSQ( N-1, DL, 1, SCALE, SUM ) - CALL SLASSQ( N-1, DU, 1, SCALE, SUM ) - END IF - ANORM = SCALE*SQRT( SUM ) - END IF -* - SLANGT = ANORM - RETURN -* -* End of SLANGT -* - END diff --git a/blas_fix/slanhs.f b/blas_fix/slanhs.f deleted file mode 100644 index fbf4d993c..000000000 --- a/blas_fix/slanhs.f +++ /dev/null @@ -1,206 +0,0 @@ -*> \brief \b SLANHS returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value of any element of an upper Hessenberg matrix. -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download SLANHS + dependencies -*> -*> [TGZ] -*> -*> [ZIP] -*> -*> [TXT] -*> \endhtmlonly -* -* Definition: -* =========== -* -* REAL FUNCTION SLANHS( NORM, N, A, LDA, WORK ) -* -* .. Scalar Arguments .. -* CHARACTER NORM -* INTEGER LDA, N -* .. -* .. Array Arguments .. -* REAL A( LDA, * ), WORK( * ) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> SLANHS returns the value of the one norm, or the Frobenius norm, or -*> the infinity norm, or the element of largest absolute value of a -*> Hessenberg matrix A. -*> \endverbatim -*> -*> \return SLANHS -*> \verbatim -*> -*> SLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm' -*> ( -*> ( norm1(A), NORM = '1', 'O' or 'o' -*> ( -*> ( normI(A), NORM = 'I' or 'i' -*> ( -*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' -*> -*> where norm1 denotes the one norm of a matrix (maximum column sum), -*> normI denotes the infinity norm of a matrix (maximum row sum) and -*> normF denotes the Frobenius norm of a matrix (square root of sum of -*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[in] NORM -*> \verbatim -*> NORM is CHARACTER*1 -*> Specifies the value to be returned in SLANHS as described -*> above. -*> \endverbatim -*> -*> \param[in] N -*> \verbatim -*> N is INTEGER -*> The order of the matrix A. N >= 0. When N = 0, SLANHS is -*> set to zero. -*> \endverbatim -*> -*> \param[in] A -*> \verbatim -*> A is REAL array, dimension (LDA,N) -*> The n by n upper Hessenberg matrix A; the part of A below the -*> first sub-diagonal is not referenced. -*> \endverbatim -*> -*> \param[in] LDA -*> \verbatim -*> LDA is INTEGER -*> The leading dimension of the array A. LDA >= max(N,1). -*> \endverbatim -*> -*> \param[out] WORK -*> \verbatim -*> WORK is REAL array, dimension (MAX(1,LWORK)), -*> where LWORK >= N when NORM = 'I'; otherwise, WORK is not -*> referenced. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date September 2012 -* -*> \ingroup realOTHERauxiliary -* -* ===================================================================== - REAL FUNCTION SLANHS( NORM, N, A, LDA, WORK ) -* -* -- LAPACK auxiliary routine (version 3.4.2) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* September 2012 -* -* .. Scalar Arguments .. - CHARACTER NORM - INTEGER LDA, N -* .. -* .. Array Arguments .. - REAL A( LDA, * ), WORK( * ) -* .. -* -* ===================================================================== -* -* .. Parameters .. - REAL ONE, ZERO - PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) -* .. -* .. Local Scalars .. - INTEGER I, J - REAL SCALE, SUM, VALUE -* .. -* .. External Subroutines .. - EXTERNAL SLASSQ -* .. -* .. External Functions .. - LOGICAL LSAME, SISNAN - EXTERNAL LSAME, SISNAN -* .. -* .. Intrinsic Functions .. - INTRINSIC ABS, MIN, SQRT -* .. -* .. Executable Statements .. -* - VALUE = ZERO - IF( N.EQ.0 ) THEN - VALUE = ZERO - ELSE IF( LSAME( NORM, 'M' ) ) THEN -* -* Find max(abs(A(i,j))). -* - VALUE = ZERO - DO 20 J = 1, N - DO 10 I = 1, MIN( N, J+1 ) - SUM = ABS( A( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 10 CONTINUE - 20 CONTINUE - ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN -* -* Find norm1(A). -* - VALUE = ZERO - DO 40 J = 1, N - SUM = ZERO - DO 30 I = 1, MIN( N, J+1 ) - SUM = SUM + ABS( A( I, J ) ) - 30 CONTINUE - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 40 CONTINUE - ELSE IF( LSAME( NORM, 'I' ) ) THEN -* -* Find normI(A). -* - DO 50 I = 1, N - WORK( I ) = ZERO - 50 CONTINUE - DO 70 J = 1, N - DO 60 I = 1, MIN( N, J+1 ) - WORK( I ) = WORK( I ) + ABS( A( I, J ) ) - 60 CONTINUE - 70 CONTINUE - VALUE = ZERO - DO 80 I = 1, N - SUM = WORK( I ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 80 CONTINUE - ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN -* -* Find normF(A). -* - SCALE = ZERO - SUM = ONE - DO 90 J = 1, N - CALL SLASSQ( MIN( N, J+1 ), A( 1, J ), 1, SCALE, SUM ) - 90 CONTINUE - VALUE = SCALE*SQRT( SUM ) - END IF -* - SLANHS = VALUE - RETURN -* -* End of SLANHS -* - END diff --git a/blas_fix/slansb.f b/blas_fix/slansb.f deleted file mode 100644 index 9ac026464..000000000 --- a/blas_fix/slansb.f +++ /dev/null @@ -1,259 +0,0 @@ -*> \brief \b SLANSB returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a symmetric band matrix. -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download SLANSB + dependencies -*> -*> [TGZ] -*> -*> [ZIP] -*> -*> [TXT] -*> \endhtmlonly -* -* Definition: -* =========== -* -* REAL FUNCTION SLANSB( NORM, UPLO, N, K, AB, LDAB, -* WORK ) -* -* .. Scalar Arguments .. -* CHARACTER NORM, UPLO -* INTEGER K, LDAB, N -* .. -* .. Array Arguments .. -* REAL AB( LDAB, * ), WORK( * ) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> SLANSB returns the value of the one norm, or the Frobenius norm, or -*> the infinity norm, or the element of largest absolute value of an -*> n by n symmetric band matrix A, with k super-diagonals. -*> \endverbatim -*> -*> \return SLANSB -*> \verbatim -*> -*> SLANSB = ( max(abs(A(i,j))), NORM = 'M' or 'm' -*> ( -*> ( norm1(A), NORM = '1', 'O' or 'o' -*> ( -*> ( normI(A), NORM = 'I' or 'i' -*> ( -*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' -*> -*> where norm1 denotes the one norm of a matrix (maximum column sum), -*> normI denotes the infinity norm of a matrix (maximum row sum) and -*> normF denotes the Frobenius norm of a matrix (square root of sum of -*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[in] NORM -*> \verbatim -*> NORM is CHARACTER*1 -*> Specifies the value to be returned in SLANSB as described -*> above. -*> \endverbatim -*> -*> \param[in] UPLO -*> \verbatim -*> UPLO is CHARACTER*1 -*> Specifies whether the upper or lower triangular part of the -*> band matrix A is supplied. -*> = 'U': Upper triangular part is supplied -*> = 'L': Lower triangular part is supplied -*> \endverbatim -*> -*> \param[in] N -*> \verbatim -*> N is INTEGER -*> The order of the matrix A. N >= 0. When N = 0, SLANSB is -*> set to zero. -*> \endverbatim -*> -*> \param[in] K -*> \verbatim -*> K is INTEGER -*> The number of super-diagonals or sub-diagonals of the -*> band matrix A. K >= 0. -*> \endverbatim -*> -*> \param[in] AB -*> \verbatim -*> AB is REAL array, dimension (LDAB,N) -*> The upper or lower triangle of the symmetric band matrix A, -*> stored in the first K+1 rows of AB. The j-th column of A is -*> stored in the j-th column of the array AB as follows: -*> if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j; -*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+k). -*> \endverbatim -*> -*> \param[in] LDAB -*> \verbatim -*> LDAB is INTEGER -*> The leading dimension of the array AB. LDAB >= K+1. -*> \endverbatim -*> -*> \param[out] WORK -*> \verbatim -*> WORK is REAL array, dimension (MAX(1,LWORK)), -*> where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, -*> WORK is not referenced. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date September 2012 -* -*> \ingroup realOTHERauxiliary -* -* ===================================================================== - REAL FUNCTION SLANSB( NORM, UPLO, N, K, AB, LDAB, - $ WORK ) -* -* -- LAPACK auxiliary routine (version 3.4.2) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* September 2012 -* -* .. Scalar Arguments .. - CHARACTER NORM, UPLO - INTEGER K, LDAB, N -* .. -* .. Array Arguments .. - REAL AB( LDAB, * ), WORK( * ) -* .. -* -* ===================================================================== -* -* .. Parameters .. - REAL ONE, ZERO - PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) -* .. -* .. Local Scalars .. - INTEGER I, J, L - REAL ABSA, SCALE, SUM, VALUE -* .. -* .. External Subroutines .. - EXTERNAL SLASSQ -* .. -* .. External Functions .. - LOGICAL LSAME, SISNAN - EXTERNAL LSAME, SISNAN -* .. -* .. Intrinsic Functions .. - INTRINSIC ABS, MAX, MIN, SQRT -* .. -* .. Executable Statements .. -* - VALUE = ZERO - IF( N.EQ.0 ) THEN - VALUE = ZERO - ELSE IF( LSAME( NORM, 'M' ) ) THEN -* -* Find max(abs(A(i,j))). -* - VALUE = ZERO - IF( LSAME( UPLO, 'U' ) ) THEN - DO 20 J = 1, N - DO 10 I = MAX( K+2-J, 1 ), K + 1 - SUM = ABS( AB( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 10 CONTINUE - 20 CONTINUE - ELSE - DO 40 J = 1, N - DO 30 I = 1, MIN( N+1-J, K+1 ) - SUM = ABS( AB( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 30 CONTINUE - 40 CONTINUE - END IF - ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. - $ ( NORM.EQ.'1' ) ) THEN -* -* Find normI(A) ( = norm1(A), since A is symmetric). -* - VALUE = ZERO - IF( LSAME( UPLO, 'U' ) ) THEN - DO 60 J = 1, N - SUM = ZERO - L = K + 1 - J - DO 50 I = MAX( 1, J-K ), J - 1 - ABSA = ABS( AB( L+I, J ) ) - SUM = SUM + ABSA - WORK( I ) = WORK( I ) + ABSA - 50 CONTINUE - WORK( J ) = SUM + ABS( AB( K+1, J ) ) - 60 CONTINUE - DO 70 I = 1, N - SUM = WORK( I ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 70 CONTINUE - ELSE - DO 80 I = 1, N - WORK( I ) = ZERO - 80 CONTINUE - DO 100 J = 1, N - SUM = WORK( J ) + ABS( AB( 1, J ) ) - L = 1 - J - DO 90 I = J + 1, MIN( N, J+K ) - ABSA = ABS( AB( L+I, J ) ) - SUM = SUM + ABSA - WORK( I ) = WORK( I ) + ABSA - 90 CONTINUE - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 100 CONTINUE - END IF - ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN -* -* Find normF(A). -* - SCALE = ZERO - SUM = ONE - IF( K.GT.0 ) THEN - IF( LSAME( UPLO, 'U' ) ) THEN - DO 110 J = 2, N - CALL SLASSQ( MIN( J-1, K ), AB( MAX( K+2-J, 1 ), J ), - $ 1, SCALE, SUM ) - 110 CONTINUE - L = K + 1 - ELSE - DO 120 J = 1, N - 1 - CALL SLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE, - $ SUM ) - 120 CONTINUE - L = 1 - END IF - SUM = 2*SUM - ELSE - L = 1 - END IF - CALL SLASSQ( N, AB( L, 1 ), LDAB, SCALE, SUM ) - VALUE = SCALE*SQRT( SUM ) - END IF -* - SLANSB = VALUE - RETURN -* -* End of SLANSB -* - END diff --git a/blas_fix/slansf.f b/blas_fix/slansf.f deleted file mode 100644 index a9f6c58a1..000000000 --- a/blas_fix/slansf.f +++ /dev/null @@ -1,965 +0,0 @@ -*> \brief \b SLANSF -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download SLANSF + dependencies -*> -*> [TGZ] -*> -*> [ZIP] -*> -*> [TXT] -*> \endhtmlonly -* -* Definition: -* =========== -* -* REAL FUNCTION SLANSF( NORM, TRANSR, UPLO, N, A, WORK ) -* -* .. Scalar Arguments .. -* CHARACTER NORM, TRANSR, UPLO -* INTEGER N -* .. -* .. Array Arguments .. -* REAL A( 0: * ), WORK( 0: * ) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> SLANSF returns the value of the one norm, or the Frobenius norm, or -*> the infinity norm, or the element of largest absolute value of a -*> real symmetric matrix A in RFP format. -*> \endverbatim -*> -*> \return SLANSF -*> \verbatim -*> -*> SLANSF = ( max(abs(A(i,j))), NORM = 'M' or 'm' -*> ( -*> ( norm1(A), NORM = '1', 'O' or 'o' -*> ( -*> ( normI(A), NORM = 'I' or 'i' -*> ( -*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' -*> -*> where norm1 denotes the one norm of a matrix (maximum column sum), -*> normI denotes the infinity norm of a matrix (maximum row sum) and -*> normF denotes the Frobenius norm of a matrix (square root of sum of -*> squares). Note that max(abs(A(i,j))) is not a matrix norm. -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[in] NORM -*> \verbatim -*> NORM is CHARACTER*1 -*> Specifies the value to be returned in SLANSF as described -*> above. -*> \endverbatim -*> -*> \param[in] TRANSR -*> \verbatim -*> TRANSR is CHARACTER*1 -*> Specifies whether the RFP format of A is normal or -*> transposed format. -*> = 'N': RFP format is Normal; -*> = 'T': RFP format is Transpose. -*> \endverbatim -*> -*> \param[in] UPLO -*> \verbatim -*> UPLO is CHARACTER*1 -*> On entry, UPLO specifies whether the RFP matrix A came from -*> an upper or lower triangular matrix as follows: -*> = 'U': RFP A came from an upper triangular matrix; -*> = 'L': RFP A came from a lower triangular matrix. -*> \endverbatim -*> -*> \param[in] N -*> \verbatim -*> N is INTEGER -*> The order of the matrix A. N >= 0. When N = 0, SLANSF is -*> set to zero. -*> \endverbatim -*> -*> \param[in] A -*> \verbatim -*> A is REAL array, dimension ( N*(N+1)/2 ); -*> On entry, the upper (if UPLO = 'U') or lower (if UPLO = 'L') -*> part of the symmetric matrix A stored in RFP format. See the -*> "Notes" below for more details. -*> Unchanged on exit. -*> \endverbatim -*> -*> \param[out] WORK -*> \verbatim -*> WORK is REAL array, dimension (MAX(1,LWORK)), -*> where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, -*> WORK is not referenced. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date September 2012 -* -*> \ingroup realOTHERcomputational -* -*> \par Further Details: -* ===================== -*> -*> \verbatim -*> -*> We first consider Rectangular Full Packed (RFP) Format when N is -*> even. We give an example where N = 6. -*> -*> AP is Upper AP is Lower -*> -*> 00 01 02 03 04 05 00 -*> 11 12 13 14 15 10 11 -*> 22 23 24 25 20 21 22 -*> 33 34 35 30 31 32 33 -*> 44 45 40 41 42 43 44 -*> 55 50 51 52 53 54 55 -*> -*> -*> Let TRANSR = 'N'. RFP holds AP as follows: -*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last -*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of -*> the transpose of the first three columns of AP upper. -*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first -*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of -*> the transpose of the last three columns of AP lower. -*> This covers the case N even and TRANSR = 'N'. -*> -*> RFP A RFP A -*> -*> 03 04 05 33 43 53 -*> 13 14 15 00 44 54 -*> 23 24 25 10 11 55 -*> 33 34 35 20 21 22 -*> 00 44 45 30 31 32 -*> 01 11 55 40 41 42 -*> 02 12 22 50 51 52 -*> -*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the -*> transpose of RFP A above. One therefore gets: -*> -*> -*> RFP A RFP A -*> -*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 -*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 -*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 -*> -*> -*> We then consider Rectangular Full Packed (RFP) Format when N is -*> odd. We give an example where N = 5. -*> -*> AP is Upper AP is Lower -*> -*> 00 01 02 03 04 00 -*> 11 12 13 14 10 11 -*> 22 23 24 20 21 22 -*> 33 34 30 31 32 33 -*> 44 40 41 42 43 44 -*> -*> -*> Let TRANSR = 'N'. RFP holds AP as follows: -*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last -*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of -*> the transpose of the first two columns of AP upper. -*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first -*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of -*> the transpose of the last two columns of AP lower. -*> This covers the case N odd and TRANSR = 'N'. -*> -*> RFP A RFP A -*> -*> 02 03 04 00 33 43 -*> 12 13 14 10 11 44 -*> 22 23 24 20 21 22 -*> 00 33 34 30 31 32 -*> 01 11 44 40 41 42 -*> -*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the -*> transpose of RFP A above. One therefore gets: -*> -*> RFP A RFP A -*> -*> 02 12 22 00 01 00 10 20 30 40 50 -*> 03 13 23 33 11 33 11 21 31 41 51 -*> 04 14 24 34 44 43 44 22 32 42 52 -*> \endverbatim -* -* ===================================================================== - REAL FUNCTION SLANSF( NORM, TRANSR, UPLO, N, A, WORK ) -* -* -- LAPACK computational routine (version 3.4.2) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* September 2012 -* -* .. Scalar Arguments .. - CHARACTER NORM, TRANSR, UPLO - INTEGER N -* .. -* .. Array Arguments .. - REAL A( 0: * ), WORK( 0: * ) -* .. -* -* ===================================================================== -* -* .. -* .. Parameters .. - REAL ONE, ZERO - PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) -* .. -* .. Local Scalars .. - INTEGER I, J, IFM, ILU, NOE, N1, K, L, LDA - REAL SCALE, S, VALUE, AA, TEMP -* .. -* .. External Functions .. - LOGICAL LSAME, SISNAN - EXTERNAL LSAME, SISNAN -* .. -* .. External Subroutines .. - EXTERNAL SLASSQ -* .. -* .. Intrinsic Functions .. - INTRINSIC ABS, SQRT -* .. -* .. Executable Statements .. -* - VALUE = ZERO - IF( N.EQ.0 ) THEN - SLANSF = ZERO - RETURN - ELSE IF( N.EQ.1 ) THEN - SLANSF = ABS( A(0) ) - RETURN - END IF -* -* set noe = 1 if n is odd. if n is even set noe=0 -* - NOE = 1 - IF( MOD( N, 2 ).EQ.0 ) - $ NOE = 0 -* -* set ifm = 0 when form='T or 't' and 1 otherwise -* - IFM = 1 - IF( LSAME( TRANSR, 'T' ) ) - $ IFM = 0 -* -* set ilu = 0 when uplo='U or 'u' and 1 otherwise -* - ILU = 1 - IF( LSAME( UPLO, 'U' ) ) - $ ILU = 0 -* -* set lda = (n+1)/2 when ifm = 0 -* set lda = n when ifm = 1 and noe = 1 -* set lda = n+1 when ifm = 1 and noe = 0 -* - IF( IFM.EQ.1 ) THEN - IF( NOE.EQ.1 ) THEN - LDA = N - ELSE -* noe=0 - LDA = N + 1 - END IF - ELSE -* ifm=0 - LDA = ( N+1 ) / 2 - END IF -* - IF( LSAME( NORM, 'M' ) ) THEN -* -* Find max(abs(A(i,j))). -* - K = ( N+1 ) / 2 - VALUE = ZERO - IF( NOE.EQ.1 ) THEN -* n is odd - IF( IFM.EQ.1 ) THEN -* A is n by k - DO J = 0, K - 1 - DO I = 0, N - 1 - TEMP = ABS( A( I+J*LDA ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - END DO - ELSE -* xpose case; A is k by n - DO J = 0, N - 1 - DO I = 0, K - 1 - TEMP = ABS( A( I+J*LDA ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - END DO - END IF - ELSE -* n is even - IF( IFM.EQ.1 ) THEN -* A is n+1 by k - DO J = 0, K - 1 - DO I = 0, N - TEMP = ABS( A( I+J*LDA ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - END DO - ELSE -* xpose case; A is k by n+1 - DO J = 0, N - DO I = 0, K - 1 - TEMP = ABS( A( I+J*LDA ) ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - END DO - END IF - END IF - ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. - $ ( NORM.EQ.'1' ) ) THEN -* -* Find normI(A) ( = norm1(A), since A is symmetric). -* - IF( IFM.EQ.1 ) THEN - K = N / 2 - IF( NOE.EQ.1 ) THEN -* n is odd - IF( ILU.EQ.0 ) THEN - DO I = 0, K - 1 - WORK( I ) = ZERO - END DO - DO J = 0, K - S = ZERO - DO I = 0, K + J - 1 - AA = ABS( A( I+J*LDA ) ) -* -> A(i,j+k) - S = S + AA - WORK( I ) = WORK( I ) + AA - END DO - AA = ABS( A( I+J*LDA ) ) -* -> A(j+k,j+k) - WORK( J+K ) = S + AA - IF( I.EQ.K+K ) - $ GO TO 10 - I = I + 1 - AA = ABS( A( I+J*LDA ) ) -* -> A(j,j) - WORK( J ) = WORK( J ) + AA - S = ZERO - DO L = J + 1, K - 1 - I = I + 1 - AA = ABS( A( I+J*LDA ) ) -* -> A(l,j) - S = S + AA - WORK( L ) = WORK( L ) + AA - END DO - WORK( J ) = WORK( J ) + S - END DO - 10 CONTINUE - VALUE = WORK( 0 ) - DO I = 1, N-1 - TEMP = WORK( I ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - ELSE -* ilu = 1 - K = K + 1 -* k=(n+1)/2 for n odd and ilu=1 - DO I = K, N - 1 - WORK( I ) = ZERO - END DO - DO J = K - 1, 0, -1 - S = ZERO - DO I = 0, J - 2 - AA = ABS( A( I+J*LDA ) ) -* -> A(j+k,i+k) - S = S + AA - WORK( I+K ) = WORK( I+K ) + AA - END DO - IF( J.GT.0 ) THEN - AA = ABS( A( I+J*LDA ) ) -* -> A(j+k,j+k) - S = S + AA - WORK( I+K ) = WORK( I+K ) + S -* i=j - I = I + 1 - END IF - AA = ABS( A( I+J*LDA ) ) -* -> A(j,j) - WORK( J ) = AA - S = ZERO - DO L = J + 1, N - 1 - I = I + 1 - AA = ABS( A( I+J*LDA ) ) -* -> A(l,j) - S = S + AA - WORK( L ) = WORK( L ) + AA - END DO - WORK( J ) = WORK( J ) + S - END DO - VALUE = WORK( 0 ) - DO I = 1, N-1 - TEMP = WORK( I ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - END IF - ELSE -* n is even - IF( ILU.EQ.0 ) THEN - DO I = 0, K - 1 - WORK( I ) = ZERO - END DO - DO J = 0, K - 1 - S = ZERO - DO I = 0, K + J - 1 - AA = ABS( A( I+J*LDA ) ) -* -> A(i,j+k) - S = S + AA - WORK( I ) = WORK( I ) + AA - END DO - AA = ABS( A( I+J*LDA ) ) -* -> A(j+k,j+k) - WORK( J+K ) = S + AA - I = I + 1 - AA = ABS( A( I+J*LDA ) ) -* -> A(j,j) - WORK( J ) = WORK( J ) + AA - S = ZERO - DO L = J + 1, K - 1 - I = I + 1 - AA = ABS( A( I+J*LDA ) ) -* -> A(l,j) - S = S + AA - WORK( L ) = WORK( L ) + AA - END DO - WORK( J ) = WORK( J ) + S - END DO - VALUE = WORK( 0 ) - DO I = 1, N-1 - TEMP = WORK( I ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - ELSE -* ilu = 1 - DO I = K, N - 1 - WORK( I ) = ZERO - END DO - DO J = K - 1, 0, -1 - S = ZERO - DO I = 0, J - 1 - AA = ABS( A( I+J*LDA ) ) -* -> A(j+k,i+k) - S = S + AA - WORK( I+K ) = WORK( I+K ) + AA - END DO - AA = ABS( A( I+J*LDA ) ) -* -> A(j+k,j+k) - S = S + AA - WORK( I+K ) = WORK( I+K ) + S -* i=j - I = I + 1 - AA = ABS( A( I+J*LDA ) ) -* -> A(j,j) - WORK( J ) = AA - S = ZERO - DO L = J + 1, N - 1 - I = I + 1 - AA = ABS( A( I+J*LDA ) ) -* -> A(l,j) - S = S + AA - WORK( L ) = WORK( L ) + AA - END DO - WORK( J ) = WORK( J ) + S - END DO - VALUE = WORK( 0 ) - DO I = 1, N-1 - TEMP = WORK( I ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - END IF - END IF - ELSE -* ifm=0 - K = N / 2 - IF( NOE.EQ.1 ) THEN -* n is odd - IF( ILU.EQ.0 ) THEN - N1 = K -* n/2 - K = K + 1 -* k is the row size and lda - DO I = N1, N - 1 - WORK( I ) = ZERO - END DO - DO J = 0, N1 - 1 - S = ZERO - DO I = 0, K - 1 - AA = ABS( A( I+J*LDA ) ) -* A(j,n1+i) - WORK( I+N1 ) = WORK( I+N1 ) + AA - S = S + AA - END DO - WORK( J ) = S - END DO -* j=n1=k-1 is special - S = ABS( A( 0+J*LDA ) ) -* A(k-1,k-1) - DO I = 1, K - 1 - AA = ABS( A( I+J*LDA ) ) -* A(k-1,i+n1) - WORK( I+N1 ) = WORK( I+N1 ) + AA - S = S + AA - END DO - WORK( J ) = WORK( J ) + S - DO J = K, N - 1 - S = ZERO - DO I = 0, J - K - 1 - AA = ABS( A( I+J*LDA ) ) -* A(i,j-k) - WORK( I ) = WORK( I ) + AA - S = S + AA - END DO -* i=j-k - AA = ABS( A( I+J*LDA ) ) -* A(j-k,j-k) - S = S + AA - WORK( J-K ) = WORK( J-K ) + S - I = I + 1 - S = ABS( A( I+J*LDA ) ) -* A(j,j) - DO L = J + 1, N - 1 - I = I + 1 - AA = ABS( A( I+J*LDA ) ) -* A(j,l) - WORK( L ) = WORK( L ) + AA - S = S + AA - END DO - WORK( J ) = WORK( J ) + S - END DO - VALUE = WORK( 0 ) - DO I = 1, N-1 - TEMP = WORK( I ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - ELSE -* ilu=1 - K = K + 1 -* k=(n+1)/2 for n odd and ilu=1 - DO I = K, N - 1 - WORK( I ) = ZERO - END DO - DO J = 0, K - 2 -* process - S = ZERO - DO I = 0, J - 1 - AA = ABS( A( I+J*LDA ) ) -* A(j,i) - WORK( I ) = WORK( I ) + AA - S = S + AA - END DO - AA = ABS( A( I+J*LDA ) ) -* i=j so process of A(j,j) - S = S + AA - WORK( J ) = S -* is initialised here - I = I + 1 -* i=j process A(j+k,j+k) - AA = ABS( A( I+J*LDA ) ) - S = AA - DO L = K + J + 1, N - 1 - I = I + 1 - AA = ABS( A( I+J*LDA ) ) -* A(l,k+j) - S = S + AA - WORK( L ) = WORK( L ) + AA - END DO - WORK( K+J ) = WORK( K+J ) + S - END DO -* j=k-1 is special :process col A(k-1,0:k-1) - S = ZERO - DO I = 0, K - 2 - AA = ABS( A( I+J*LDA ) ) -* A(k,i) - WORK( I ) = WORK( I ) + AA - S = S + AA - END DO -* i=k-1 - AA = ABS( A( I+J*LDA ) ) -* A(k-1,k-1) - S = S + AA - WORK( I ) = S -* done with col j=k+1 - DO J = K, N - 1 -* process col j of A = A(j,0:k-1) - S = ZERO - DO I = 0, K - 1 - AA = ABS( A( I+J*LDA ) ) -* A(j,i) - WORK( I ) = WORK( I ) + AA - S = S + AA - END DO - WORK( J ) = WORK( J ) + S - END DO - VALUE = WORK( 0 ) - DO I = 1, N-1 - TEMP = WORK( I ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - END IF - ELSE -* n is even - IF( ILU.EQ.0 ) THEN - DO I = K, N - 1 - WORK( I ) = ZERO - END DO - DO J = 0, K - 1 - S = ZERO - DO I = 0, K - 1 - AA = ABS( A( I+J*LDA ) ) -* A(j,i+k) - WORK( I+K ) = WORK( I+K ) + AA - S = S + AA - END DO - WORK( J ) = S - END DO -* j=k - AA = ABS( A( 0+J*LDA ) ) -* A(k,k) - S = AA - DO I = 1, K - 1 - AA = ABS( A( I+J*LDA ) ) -* A(k,k+i) - WORK( I+K ) = WORK( I+K ) + AA - S = S + AA - END DO - WORK( J ) = WORK( J ) + S - DO J = K + 1, N - 1 - S = ZERO - DO I = 0, J - 2 - K - AA = ABS( A( I+J*LDA ) ) -* A(i,j-k-1) - WORK( I ) = WORK( I ) + AA - S = S + AA - END DO -* i=j-1-k - AA = ABS( A( I+J*LDA ) ) -* A(j-k-1,j-k-1) - S = S + AA - WORK( J-K-1 ) = WORK( J-K-1 ) + S - I = I + 1 - AA = ABS( A( I+J*LDA ) ) -* A(j,j) - S = AA - DO L = J + 1, N - 1 - I = I + 1 - AA = ABS( A( I+J*LDA ) ) -* A(j,l) - WORK( L ) = WORK( L ) + AA - S = S + AA - END DO - WORK( J ) = WORK( J ) + S - END DO -* j=n - S = ZERO - DO I = 0, K - 2 - AA = ABS( A( I+J*LDA ) ) -* A(i,k-1) - WORK( I ) = WORK( I ) + AA - S = S + AA - END DO -* i=k-1 - AA = ABS( A( I+J*LDA ) ) -* A(k-1,k-1) - S = S + AA - WORK( I ) = WORK( I ) + S - VALUE = WORK ( 0 ) - DO I = 1, N-1 - TEMP = WORK( I ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - ELSE -* ilu=1 - DO I = K, N - 1 - WORK( I ) = ZERO - END DO -* j=0 is special :process col A(k:n-1,k) - S = ABS( A( 0 ) ) -* A(k,k) - DO I = 1, K - 1 - AA = ABS( A( I ) ) -* A(k+i,k) - WORK( I+K ) = WORK( I+K ) + AA - S = S + AA - END DO - WORK( K ) = WORK( K ) + S - DO J = 1, K - 1 -* process - S = ZERO - DO I = 0, J - 2 - AA = ABS( A( I+J*LDA ) ) -* A(j-1,i) - WORK( I ) = WORK( I ) + AA - S = S + AA - END DO - AA = ABS( A( I+J*LDA ) ) -* i=j-1 so process of A(j-1,j-1) - S = S + AA - WORK( J-1 ) = S -* is initialised here - I = I + 1 -* i=j process A(j+k,j+k) - AA = ABS( A( I+J*LDA ) ) - S = AA - DO L = K + J + 1, N - 1 - I = I + 1 - AA = ABS( A( I+J*LDA ) ) -* A(l,k+j) - S = S + AA - WORK( L ) = WORK( L ) + AA - END DO - WORK( K+J ) = WORK( K+J ) + S - END DO -* j=k is special :process col A(k,0:k-1) - S = ZERO - DO I = 0, K - 2 - AA = ABS( A( I+J*LDA ) ) -* A(k,i) - WORK( I ) = WORK( I ) + AA - S = S + AA - END DO -* i=k-1 - AA = ABS( A( I+J*LDA ) ) -* A(k-1,k-1) - S = S + AA - WORK( I ) = S -* done with col j=k+1 - DO J = K + 1, N -* process col j-1 of A = A(j-1,0:k-1) - S = ZERO - DO I = 0, K - 1 - AA = ABS( A( I+J*LDA ) ) -* A(j-1,i) - WORK( I ) = WORK( I ) + AA - S = S + AA - END DO - WORK( J-1 ) = WORK( J-1 ) + S - END DO - VALUE = WORK( 0 ) - DO I = 1, N-1 - TEMP = WORK( I ) - IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) - $ VALUE = TEMP - END DO - END IF - END IF - END IF - ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN -* -* Find normF(A). -* - K = ( N+1 ) / 2 - SCALE = ZERO - S = ONE - IF( NOE.EQ.1 ) THEN -* n is odd - IF( IFM.EQ.1 ) THEN -* A is normal - IF( ILU.EQ.0 ) THEN -* A is upper - DO J = 0, K - 3 - CALL SLASSQ( K-J-2, A( K+J+1+J*LDA ), 1, SCALE, S ) -* L at A(k,0) - END DO - DO J = 0, K - 1 - CALL SLASSQ( K+J-1, A( 0+J*LDA ), 1, SCALE, S ) -* trap U at A(0,0) - END DO - S = S + S -* double s for the off diagonal elements - CALL SLASSQ( K-1, A( K ), LDA+1, SCALE, S ) -* tri L at A(k,0) - CALL SLASSQ( K, A( K-1 ), LDA+1, SCALE, S ) -* tri U at A(k-1,0) - ELSE -* ilu=1 & A is lower - DO J = 0, K - 1 - CALL SLASSQ( N-J-1, A( J+1+J*LDA ), 1, SCALE, S ) -* trap L at A(0,0) - END DO - DO J = 0, K - 2 - CALL SLASSQ( J, A( 0+( 1+J )*LDA ), 1, SCALE, S ) -* U at A(0,1) - END DO - S = S + S -* double s for the off diagonal elements - CALL SLASSQ( K, A( 0 ), LDA+1, SCALE, S ) -* tri L at A(0,0) - CALL SLASSQ( K-1, A( 0+LDA ), LDA+1, SCALE, S ) -* tri U at A(0,1) - END IF - ELSE -* A is xpose - IF( ILU.EQ.0 ) THEN -* A**T is upper - DO J = 1, K - 2 - CALL SLASSQ( J, A( 0+( K+J )*LDA ), 1, SCALE, S ) -* U at A(0,k) - END DO - DO J = 0, K - 2 - CALL SLASSQ( K, A( 0+J*LDA ), 1, SCALE, S ) -* k by k-1 rect. at A(0,0) - END DO - DO J = 0, K - 2 - CALL SLASSQ( K-J-1, A( J+1+( J+K-1 )*LDA ), 1, - $ SCALE, S ) -* L at A(0,k-1) - END DO - S = S + S -* double s for the off diagonal elements - CALL SLASSQ( K-1, A( 0+K*LDA ), LDA+1, SCALE, S ) -* tri U at A(0,k) - CALL SLASSQ( K, A( 0+( K-1 )*LDA ), LDA+1, SCALE, S ) -* tri L at A(0,k-1) - ELSE -* A**T is lower - DO J = 1, K - 1 - CALL SLASSQ( J, A( 0+J*LDA ), 1, SCALE, S ) -* U at A(0,0) - END DO - DO J = K, N - 1 - CALL SLASSQ( K, A( 0+J*LDA ), 1, SCALE, S ) -* k by k-1 rect. at A(0,k) - END DO - DO J = 0, K - 3 - CALL SLASSQ( K-J-2, A( J+2+J*LDA ), 1, SCALE, S ) -* L at A(1,0) - END DO - S = S + S -* double s for the off diagonal elements - CALL SLASSQ( K, A( 0 ), LDA+1, SCALE, S ) -* tri U at A(0,0) - CALL SLASSQ( K-1, A( 1 ), LDA+1, SCALE, S ) -* tri L at A(1,0) - END IF - END IF - ELSE -* n is even - IF( IFM.EQ.1 ) THEN -* A is normal - IF( ILU.EQ.0 ) THEN -* A is upper - DO J = 0, K - 2 - CALL SLASSQ( K-J-1, A( K+J+2+J*LDA ), 1, SCALE, S ) -* L at A(k+1,0) - END DO - DO J = 0, K - 1 - CALL SLASSQ( K+J, A( 0+J*LDA ), 1, SCALE, S ) -* trap U at A(0,0) - END DO - S = S + S -* double s for the off diagonal elements - CALL SLASSQ( K, A( K+1 ), LDA+1, SCALE, S ) -* tri L at A(k+1,0) - CALL SLASSQ( K, A( K ), LDA+1, SCALE, S ) -* tri U at A(k,0) - ELSE -* ilu=1 & A is lower - DO J = 0, K - 1 - CALL SLASSQ( N-J-1, A( J+2+J*LDA ), 1, SCALE, S ) -* trap L at A(1,0) - END DO - DO J = 1, K - 1 - CALL SLASSQ( J, A( 0+J*LDA ), 1, SCALE, S ) -* U at A(0,0) - END DO - S = S + S -* double s for the off diagonal elements - CALL SLASSQ( K, A( 1 ), LDA+1, SCALE, S ) -* tri L at A(1,0) - CALL SLASSQ( K, A( 0 ), LDA+1, SCALE, S ) -* tri U at A(0,0) - END IF - ELSE -* A is xpose - IF( ILU.EQ.0 ) THEN -* A**T is upper - DO J = 1, K - 1 - CALL SLASSQ( J, A( 0+( K+1+J )*LDA ), 1, SCALE, S ) -* U at A(0,k+1) - END DO - DO J = 0, K - 1 - CALL SLASSQ( K, A( 0+J*LDA ), 1, SCALE, S ) -* k by k rect. at A(0,0) - END DO - DO J = 0, K - 2 - CALL SLASSQ( K-J-1, A( J+1+( J+K )*LDA ), 1, SCALE, - $ S ) -* L at A(0,k) - END DO - S = S + S -* double s for the off diagonal elements - CALL SLASSQ( K, A( 0+( K+1 )*LDA ), LDA+1, SCALE, S ) -* tri U at A(0,k+1) - CALL SLASSQ( K, A( 0+K*LDA ), LDA+1, SCALE, S ) -* tri L at A(0,k) - ELSE -* A**T is lower - DO J = 1, K - 1 - CALL SLASSQ( J, A( 0+( J+1 )*LDA ), 1, SCALE, S ) -* U at A(0,1) - END DO - DO J = K + 1, N - CALL SLASSQ( K, A( 0+J*LDA ), 1, SCALE, S ) -* k by k rect. at A(0,k+1) - END DO - DO J = 0, K - 2 - CALL SLASSQ( K-J-1, A( J+1+J*LDA ), 1, SCALE, S ) -* L at A(0,0) - END DO - S = S + S -* double s for the off diagonal elements - CALL SLASSQ( K, A( LDA ), LDA+1, SCALE, S ) -* tri L at A(0,1) - CALL SLASSQ( K, A( 0 ), LDA+1, SCALE, S ) -* tri U at A(0,0) - END IF - END IF - END IF - VALUE = SCALE*SQRT( S ) - END IF -* - SLANSF = VALUE - RETURN -* -* End of SLANSF -* - END diff --git a/blas_fix/slansp.f b/blas_fix/slansp.f deleted file mode 100644 index 28b0e70c8..000000000 --- a/blas_fix/slansp.f +++ /dev/null @@ -1,262 +0,0 @@ -*> \brief \b SLANSP returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a symmetric matrix supplied in packed form. -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download SLANSP + dependencies -*> -*> [TGZ] -*> -*> [ZIP] -*> -*> [TXT] -*> \endhtmlonly -* -* Definition: -* =========== -* -* REAL FUNCTION SLANSP( NORM, UPLO, N, AP, WORK ) -* -* .. Scalar Arguments .. -* CHARACTER NORM, UPLO -* INTEGER N -* .. -* .. Array Arguments .. -* REAL AP( * ), WORK( * ) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> SLANSP returns the value of the one norm, or the Frobenius norm, or -*> the infinity norm, or the element of largest absolute value of a -*> real symmetric matrix A, supplied in packed form. -*> \endverbatim -*> -*> \return SLANSP -*> \verbatim -*> -*> SLANSP = ( max(abs(A(i,j))), NORM = 'M' or 'm' -*> ( -*> ( norm1(A), NORM = '1', 'O' or 'o' -*> ( -*> ( normI(A), NORM = 'I' or 'i' -*> ( -*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' -*> -*> where norm1 denotes the one norm of a matrix (maximum column sum), -*> normI denotes the infinity norm of a matrix (maximum row sum) and -*> normF denotes the Frobenius norm of a matrix (square root of sum of -*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[in] NORM -*> \verbatim -*> NORM is CHARACTER*1 -*> Specifies the value to be returned in SLANSP as described -*> above. -*> \endverbatim -*> -*> \param[in] UPLO -*> \verbatim -*> UPLO is CHARACTER*1 -*> Specifies whether the upper or lower triangular part of the -*> symmetric matrix A is supplied. -*> = 'U': Upper triangular part of A is supplied -*> = 'L': Lower triangular part of A is supplied -*> \endverbatim -*> -*> \param[in] N -*> \verbatim -*> N is INTEGER -*> The order of the matrix A. N >= 0. When N = 0, SLANSP is -*> set to zero. -*> \endverbatim -*> -*> \param[in] AP -*> \verbatim -*> AP is REAL array, dimension (N*(N+1)/2) -*> The upper or lower triangle of the symmetric matrix A, packed -*> columnwise in a linear array. The j-th column of A is stored -*> in the array AP as follows: -*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; -*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. -*> \endverbatim -*> -*> \param[out] WORK -*> \verbatim -*> WORK is REAL array, dimension (MAX(1,LWORK)), -*> where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, -*> WORK is not referenced. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date September 2012 -* -*> \ingroup realOTHERauxiliary -* -* ===================================================================== - REAL FUNCTION SLANSP( NORM, UPLO, N, AP, WORK ) -* -* -- LAPACK auxiliary routine (version 3.4.2) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* September 2012 -* -* .. Scalar Arguments .. - CHARACTER NORM, UPLO - INTEGER N -* .. -* .. Array Arguments .. - REAL AP( * ), WORK( * ) -* .. -* -* ===================================================================== -* -* .. Parameters .. - REAL ONE, ZERO - PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) -* .. -* .. Local Scalars .. - INTEGER I, J, K - REAL ABSA, SCALE, SUM, VALUE -* .. -* .. External Subroutines .. - EXTERNAL SLASSQ -* .. -* .. External Functions .. - LOGICAL LSAME, SISNAN - EXTERNAL LSAME, SISNAN -* .. -* .. Intrinsic Functions .. - INTRINSIC ABS, SQRT -* .. -* .. Executable Statements .. -* - VALUE = ZERO - IF( N.EQ.0 ) THEN - VALUE = ZERO - ELSE IF( LSAME( NORM, 'M' ) ) THEN -* -* Find max(abs(A(i,j))). -* - VALUE = ZERO - IF( LSAME( UPLO, 'U' ) ) THEN - K = 1 - DO 20 J = 1, N - DO 10 I = K, K + J - 1 - SUM = ABS( AP( I ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 10 CONTINUE - K = K + J - 20 CONTINUE - ELSE - K = 1 - DO 40 J = 1, N - DO 30 I = K, K + N - J - SUM = ABS( AP( I ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 30 CONTINUE - K = K + N - J + 1 - 40 CONTINUE - END IF - ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. - $ ( NORM.EQ.'1' ) ) THEN -* -* Find normI(A) ( = norm1(A), since A is symmetric). -* - VALUE = ZERO - K = 1 - IF( LSAME( UPLO, 'U' ) ) THEN - DO 60 J = 1, N - SUM = ZERO - DO 50 I = 1, J - 1 - ABSA = ABS( AP( K ) ) - SUM = SUM + ABSA - WORK( I ) = WORK( I ) + ABSA - K = K + 1 - 50 CONTINUE - WORK( J ) = SUM + ABS( AP( K ) ) - K = K + 1 - 60 CONTINUE - DO 70 I = 1, N - SUM = WORK( I ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 70 CONTINUE - ELSE - DO 80 I = 1, N - WORK( I ) = ZERO - 80 CONTINUE - DO 100 J = 1, N - SUM = WORK( J ) + ABS( AP( K ) ) - K = K + 1 - DO 90 I = J + 1, N - ABSA = ABS( AP( K ) ) - SUM = SUM + ABSA - WORK( I ) = WORK( I ) + ABSA - K = K + 1 - 90 CONTINUE - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 100 CONTINUE - END IF - ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN -* -* Find normF(A). -* - SCALE = ZERO - SUM = ONE - K = 2 - IF( LSAME( UPLO, 'U' ) ) THEN - DO 110 J = 2, N - CALL SLASSQ( J-1, AP( K ), 1, SCALE, SUM ) - K = K + J - 110 CONTINUE - ELSE - DO 120 J = 1, N - 1 - CALL SLASSQ( N-J, AP( K ), 1, SCALE, SUM ) - K = K + N - J + 1 - 120 CONTINUE - END IF - SUM = 2*SUM - K = 1 - DO 130 I = 1, N - IF( AP( K ).NE.ZERO ) THEN - ABSA = ABS( AP( K ) ) - IF( SCALE.LT.ABSA ) THEN - SUM = ONE + SUM*( SCALE / ABSA )**2 - SCALE = ABSA - ELSE - SUM = SUM + ( ABSA / SCALE )**2 - END IF - END IF - IF( LSAME( UPLO, 'U' ) ) THEN - K = K + I + 1 - ELSE - K = K + N - I + 1 - END IF - 130 CONTINUE - VALUE = SCALE*SQRT( SUM ) - END IF -* - SLANSP = VALUE - RETURN -* -* End of SLANSP -* - END diff --git a/blas_fix/slanst.f b/blas_fix/slanst.f deleted file mode 100644 index 92c9227b7..000000000 --- a/blas_fix/slanst.f +++ /dev/null @@ -1,187 +0,0 @@ -*> \brief \b SLANST returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a real symmetric tridiagonal matrix. -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download SLANST + dependencies -*> -*> [TGZ] -*> -*> [ZIP] -*> -*> [TXT] -*> \endhtmlonly -* -* Definition: -* =========== -* -* REAL FUNCTION SLANST( NORM, N, D, E ) -* -* .. Scalar Arguments .. -* CHARACTER NORM -* INTEGER N -* .. -* .. Array Arguments .. -* REAL D( * ), E( * ) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> SLANST returns the value of the one norm, or the Frobenius norm, or -*> the infinity norm, or the element of largest absolute value of a -*> real symmetric tridiagonal matrix A. -*> \endverbatim -*> -*> \return SLANST -*> \verbatim -*> -*> SLANST = ( max(abs(A(i,j))), NORM = 'M' or 'm' -*> ( -*> ( norm1(A), NORM = '1', 'O' or 'o' -*> ( -*> ( normI(A), NORM = 'I' or 'i' -*> ( -*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' -*> -*> where norm1 denotes the one norm of a matrix (maximum column sum), -*> normI denotes the infinity norm of a matrix (maximum row sum) and -*> normF denotes the Frobenius norm of a matrix (square root of sum of -*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[in] NORM -*> \verbatim -*> NORM is CHARACTER*1 -*> Specifies the value to be returned in SLANST as described -*> above. -*> \endverbatim -*> -*> \param[in] N -*> \verbatim -*> N is INTEGER -*> The order of the matrix A. N >= 0. When N = 0, SLANST is -*> set to zero. -*> \endverbatim -*> -*> \param[in] D -*> \verbatim -*> D is REAL array, dimension (N) -*> The diagonal elements of A. -*> \endverbatim -*> -*> \param[in] E -*> \verbatim -*> E is REAL array, dimension (N-1) -*> The (n-1) sub-diagonal or super-diagonal elements of A. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date September 2012 -* -*> \ingroup auxOTHERauxiliary -* -* ===================================================================== - REAL FUNCTION SLANST( NORM, N, D, E ) -* -* -- LAPACK auxiliary routine (version 3.4.2) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* September 2012 -* -* .. Scalar Arguments .. - CHARACTER NORM - INTEGER N -* .. -* .. Array Arguments .. - REAL D( * ), E( * ) -* .. -* -* ===================================================================== -* -* .. Parameters .. - REAL ONE, ZERO - PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) -* .. -* .. Local Scalars .. - INTEGER I - REAL ANORM, SCALE, SUM -* .. -* .. External Functions .. - LOGICAL LSAME, SISNAN - EXTERNAL LSAME, SISNAN -* .. -* .. External Subroutines .. - EXTERNAL SLASSQ -* .. -* .. Intrinsic Functions .. - INTRINSIC ABS, SQRT -* .. -* .. Executable Statements .. -* - ANORM = ZERO - IF( N.LE.0 ) THEN - ANORM = ZERO - ELSE IF( LSAME( NORM, 'M' ) ) THEN -* -* Find max(abs(A(i,j))). -* - ANORM = ABS( D( N ) ) - DO 10 I = 1, N - 1 - SUM = ABS( D( I ) ) - IF( ANORM .LT. SUM .OR. SISNAN( SUM ) ) ANORM = SUM - SUM = ABS( E( I ) ) - IF( ANORM .LT. SUM .OR. SISNAN( SUM ) ) ANORM = SUM - 10 CONTINUE - ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' .OR. - $ LSAME( NORM, 'I' ) ) THEN -* -* Find norm1(A). -* - IF( N.EQ.1 ) THEN - ANORM = ABS( D( 1 ) ) - ELSE - ANORM = ABS( D( 1 ) )+ABS( E( 1 ) ) - SUM = ABS( E( N-1 ) )+ABS( D( N ) ) - IF( ANORM .LT. SUM .OR. SISNAN( SUM ) ) ANORM = SUM - DO 20 I = 2, N - 1 - SUM = ABS( D( I ) )+ABS( E( I ) )+ABS( E( I-1 ) ) - IF( ANORM .LT. SUM .OR. SISNAN( SUM ) ) ANORM = SUM - 20 CONTINUE - END IF - ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN -* -* Find normF(A). -* - SCALE = ZERO - SUM = ONE - IF( N.GT.1 ) THEN - CALL SLASSQ( N-1, E, 1, SCALE, SUM ) - SUM = 2*SUM - END IF - CALL SLASSQ( N, D, 1, SCALE, SUM ) - ANORM = SCALE*SQRT( SUM ) - END IF -* - SLANST = ANORM - RETURN -* -* End of SLANST -* - END diff --git a/blas_fix/slansy.f b/blas_fix/slansy.f deleted file mode 100644 index 54c28a25a..000000000 --- a/blas_fix/slansy.f +++ /dev/null @@ -1,242 +0,0 @@ -*> \brief \b SLANSY returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a real symmetric matrix. -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download SLANSY + dependencies -*> -*> [TGZ] -*> -*> [ZIP] -*> -*> [TXT] -*> \endhtmlonly -* -* Definition: -* =========== -* -* REAL FUNCTION SLANSY( NORM, UPLO, N, A, LDA, WORK ) -* -* .. Scalar Arguments .. -* CHARACTER NORM, UPLO -* INTEGER LDA, N -* .. -* .. Array Arguments .. -* REAL A( LDA, * ), WORK( * ) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> SLANSY returns the value of the one norm, or the Frobenius norm, or -*> the infinity norm, or the element of largest absolute value of a -*> real symmetric matrix A. -*> \endverbatim -*> -*> \return SLANSY -*> \verbatim -*> -*> SLANSY = ( max(abs(A(i,j))), NORM = 'M' or 'm' -*> ( -*> ( norm1(A), NORM = '1', 'O' or 'o' -*> ( -*> ( normI(A), NORM = 'I' or 'i' -*> ( -*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' -*> -*> where norm1 denotes the one norm of a matrix (maximum column sum), -*> normI denotes the infinity norm of a matrix (maximum row sum) and -*> normF denotes the Frobenius norm of a matrix (square root of sum of -*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[in] NORM -*> \verbatim -*> NORM is CHARACTER*1 -*> Specifies the value to be returned in SLANSY as described -*> above. -*> \endverbatim -*> -*> \param[in] UPLO -*> \verbatim -*> UPLO is CHARACTER*1 -*> Specifies whether the upper or lower triangular part of the -*> symmetric matrix A is to be referenced. -*> = 'U': Upper triangular part of A is referenced -*> = 'L': Lower triangular part of A is referenced -*> \endverbatim -*> -*> \param[in] N -*> \verbatim -*> N is INTEGER -*> The order of the matrix A. N >= 0. When N = 0, SLANSY is -*> set to zero. -*> \endverbatim -*> -*> \param[in] A -*> \verbatim -*> A is REAL array, dimension (LDA,N) -*> The symmetric matrix A. If UPLO = 'U', the leading n by n -*> upper triangular part of A contains the upper triangular part -*> of the matrix A, and the strictly lower triangular part of A -*> is not referenced. If UPLO = 'L', the leading n by n lower -*> triangular part of A contains the lower triangular part of -*> the matrix A, and the strictly upper triangular part of A is -*> not referenced. -*> \endverbatim -*> -*> \param[in] LDA -*> \verbatim -*> LDA is INTEGER -*> The leading dimension of the array A. LDA >= max(N,1). -*> \endverbatim -*> -*> \param[out] WORK -*> \verbatim -*> WORK is REAL array, dimension (MAX(1,LWORK)), -*> where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, -*> WORK is not referenced. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date September 2012 -* -*> \ingroup realSYauxiliary -* -* ===================================================================== - REAL FUNCTION SLANSY( NORM, UPLO, N, A, LDA, WORK ) -* -* -- LAPACK auxiliary routine (version 3.4.2) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* September 2012 -* -* .. Scalar Arguments .. - CHARACTER NORM, UPLO - INTEGER LDA, N -* .. -* .. Array Arguments .. - REAL A( LDA, * ), WORK( * ) -* .. -* -* ===================================================================== -* -* .. Parameters .. - REAL ONE, ZERO - PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) -* .. -* .. Local Scalars .. - INTEGER I, J - REAL ABSA, SCALE, SUM, VALUE -* .. -* .. External Subroutines .. - EXTERNAL SLASSQ -* .. -* .. External Functions .. - LOGICAL LSAME, SISNAN - EXTERNAL LSAME, SISNAN -* .. -* .. Intrinsic Functions .. - INTRINSIC ABS, SQRT -* .. -* .. Executable Statements .. -* - VALUE = ZERO - IF( N.EQ.0 ) THEN - VALUE = ZERO - ELSE IF( LSAME( NORM, 'M' ) ) THEN -* -* Find max(abs(A(i,j))). -* - VALUE = ZERO - IF( LSAME( UPLO, 'U' ) ) THEN - DO 20 J = 1, N - DO 10 I = 1, J - SUM = ABS( A( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 10 CONTINUE - 20 CONTINUE - ELSE - DO 40 J = 1, N - DO 30 I = J, N - SUM = ABS( A( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 30 CONTINUE - 40 CONTINUE - END IF - ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. - $ ( NORM.EQ.'1' ) ) THEN -* -* Find normI(A) ( = norm1(A), since A is symmetric). -* - VALUE = ZERO - IF( LSAME( UPLO, 'U' ) ) THEN - DO 60 J = 1, N - SUM = ZERO - DO 50 I = 1, J - 1 - ABSA = ABS( A( I, J ) ) - SUM = SUM + ABSA - WORK( I ) = WORK( I ) + ABSA - 50 CONTINUE - WORK( J ) = SUM + ABS( A( J, J ) ) - 60 CONTINUE - DO 70 I = 1, N - SUM = WORK( I ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 70 CONTINUE - ELSE - DO 80 I = 1, N - WORK( I ) = ZERO - 80 CONTINUE - DO 100 J = 1, N - SUM = WORK( J ) + ABS( A( J, J ) ) - DO 90 I = J + 1, N - ABSA = ABS( A( I, J ) ) - SUM = SUM + ABSA - WORK( I ) = WORK( I ) + ABSA - 90 CONTINUE - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 100 CONTINUE - END IF - ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN -* -* Find normF(A). -* - SCALE = ZERO - SUM = ONE - IF( LSAME( UPLO, 'U' ) ) THEN - DO 110 J = 2, N - CALL SLASSQ( J-1, A( 1, J ), 1, SCALE, SUM ) - 110 CONTINUE - ELSE - DO 120 J = 1, N - 1 - CALL SLASSQ( N-J, A( J+1, J ), 1, SCALE, SUM ) - 120 CONTINUE - END IF - SUM = 2*SUM - CALL SLASSQ( N, A, LDA+1, SCALE, SUM ) - VALUE = SCALE*SQRT( SUM ) - END IF -* - SLANSY = VALUE - RETURN -* -* End of SLANSY -* - END diff --git a/blas_fix/slantb.f b/blas_fix/slantb.f deleted file mode 100644 index 5135bd625..000000000 --- a/blas_fix/slantb.f +++ /dev/null @@ -1,362 +0,0 @@ -*> \brief \b SLANTB returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a triangular band matrix. -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download SLANTB + dependencies -*> -*> [TGZ] -*> -*> [ZIP] -*> -*> [TXT] -*> \endhtmlonly -* -* Definition: -* =========== -* -* REAL FUNCTION SLANTB( NORM, UPLO, DIAG, N, K, AB, -* LDAB, WORK ) -* -* .. Scalar Arguments .. -* CHARACTER DIAG, NORM, UPLO -* INTEGER K, LDAB, N -* .. -* .. Array Arguments .. -* REAL AB( LDAB, * ), WORK( * ) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> SLANTB returns the value of the one norm, or the Frobenius norm, or -*> the infinity norm, or the element of largest absolute value of an -*> n by n triangular band matrix A, with ( k + 1 ) diagonals. -*> \endverbatim -*> -*> \return SLANTB -*> \verbatim -*> -*> SLANTB = ( max(abs(A(i,j))), NORM = 'M' or 'm' -*> ( -*> ( norm1(A), NORM = '1', 'O' or 'o' -*> ( -*> ( normI(A), NORM = 'I' or 'i' -*> ( -*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' -*> -*> where norm1 denotes the one norm of a matrix (maximum column sum), -*> normI denotes the infinity norm of a matrix (maximum row sum) and -*> normF denotes the Frobenius norm of a matrix (square root of sum of -*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[in] NORM -*> \verbatim -*> NORM is CHARACTER*1 -*> Specifies the value to be returned in SLANTB as described -*> above. -*> \endverbatim -*> -*> \param[in] UPLO -*> \verbatim -*> UPLO is CHARACTER*1 -*> Specifies whether the matrix A is upper or lower triangular. -*> = 'U': Upper triangular -*> = 'L': Lower triangular -*> \endverbatim -*> -*> \param[in] DIAG -*> \verbatim -*> DIAG is CHARACTER*1 -*> Specifies whether or not the matrix A is unit triangular. -*> = 'N': Non-unit triangular -*> = 'U': Unit triangular -*> \endverbatim -*> -*> \param[in] N -*> \verbatim -*> N is INTEGER -*> The order of the matrix A. N >= 0. When N = 0, SLANTB is -*> set to zero. -*> \endverbatim -*> -*> \param[in] K -*> \verbatim -*> K is INTEGER -*> The number of super-diagonals of the matrix A if UPLO = 'U', -*> or the number of sub-diagonals of the matrix A if UPLO = 'L'. -*> K >= 0. -*> \endverbatim -*> -*> \param[in] AB -*> \verbatim -*> AB is REAL array, dimension (LDAB,N) -*> The upper or lower triangular band matrix A, stored in the -*> first k+1 rows of AB. The j-th column of A is stored -*> in the j-th column of the array AB as follows: -*> if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j; -*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+k). -*> Note that when DIAG = 'U', the elements of the array AB -*> corresponding to the diagonal elements of the matrix A are -*> not referenced, but are assumed to be one. -*> \endverbatim -*> -*> \param[in] LDAB -*> \verbatim -*> LDAB is INTEGER -*> The leading dimension of the array AB. LDAB >= K+1. -*> \endverbatim -*> -*> \param[out] WORK -*> \verbatim -*> WORK is REAL array, dimension (MAX(1,LWORK)), -*> where LWORK >= N when NORM = 'I'; otherwise, WORK is not -*> referenced. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date September 2012 -* -*> \ingroup realOTHERauxiliary -* -* ===================================================================== - REAL FUNCTION SLANTB( NORM, UPLO, DIAG, N, K, AB, - $ LDAB, WORK ) -* -* -- LAPACK auxiliary routine (version 3.4.2) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* September 2012 -* -* .. Scalar Arguments .. - CHARACTER DIAG, NORM, UPLO - INTEGER K, LDAB, N -* .. -* .. Array Arguments .. - REAL AB( LDAB, * ), WORK( * ) -* .. -* -* ===================================================================== -* -* .. Parameters .. - REAL ONE, ZERO - PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) -* .. -* .. Local Scalars .. - LOGICAL UDIAG - INTEGER I, J, L - REAL SCALE, SUM, VALUE -* .. -* .. External Subroutines .. - EXTERNAL SLASSQ -* .. -* .. External Functions .. - LOGICAL LSAME, SISNAN - EXTERNAL LSAME, SISNAN -* .. -* .. Intrinsic Functions .. - INTRINSIC ABS, MAX, MIN, SQRT -* .. -* .. Executable Statements .. -* - VALUE = ZERO - IF( N.EQ.0 ) THEN - VALUE = ZERO - ELSE IF( LSAME( NORM, 'M' ) ) THEN -* -* Find max(abs(A(i,j))). -* - IF( LSAME( DIAG, 'U' ) ) THEN - VALUE = ONE - IF( LSAME( UPLO, 'U' ) ) THEN - DO 20 J = 1, N - DO 10 I = MAX( K+2-J, 1 ), K - SUM = ABS( AB( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 10 CONTINUE - 20 CONTINUE - ELSE - DO 40 J = 1, N - DO 30 I = 2, MIN( N+1-J, K+1 ) - SUM = ABS( AB( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 30 CONTINUE - 40 CONTINUE - END IF - ELSE - VALUE = ZERO - IF( LSAME( UPLO, 'U' ) ) THEN - DO 60 J = 1, N - DO 50 I = MAX( K+2-J, 1 ), K + 1 - SUM = ABS( AB( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 50 CONTINUE - 60 CONTINUE - ELSE - DO 80 J = 1, N - DO 70 I = 1, MIN( N+1-J, K+1 ) - SUM = ABS( AB( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 70 CONTINUE - 80 CONTINUE - END IF - END IF - ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN -* -* Find norm1(A). -* - VALUE = ZERO - UDIAG = LSAME( DIAG, 'U' ) - IF( LSAME( UPLO, 'U' ) ) THEN - DO 110 J = 1, N - IF( UDIAG ) THEN - SUM = ONE - DO 90 I = MAX( K+2-J, 1 ), K - SUM = SUM + ABS( AB( I, J ) ) - 90 CONTINUE - ELSE - SUM = ZERO - DO 100 I = MAX( K+2-J, 1 ), K + 1 - SUM = SUM + ABS( AB( I, J ) ) - 100 CONTINUE - END IF - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 110 CONTINUE - ELSE - DO 140 J = 1, N - IF( UDIAG ) THEN - SUM = ONE - DO 120 I = 2, MIN( N+1-J, K+1 ) - SUM = SUM + ABS( AB( I, J ) ) - 120 CONTINUE - ELSE - SUM = ZERO - DO 130 I = 1, MIN( N+1-J, K+1 ) - SUM = SUM + ABS( AB( I, J ) ) - 130 CONTINUE - END IF - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 140 CONTINUE - END IF - ELSE IF( LSAME( NORM, 'I' ) ) THEN -* -* Find normI(A). -* - VALUE = ZERO - IF( LSAME( UPLO, 'U' ) ) THEN - IF( LSAME( DIAG, 'U' ) ) THEN - DO 150 I = 1, N - WORK( I ) = ONE - 150 CONTINUE - DO 170 J = 1, N - L = K + 1 - J - DO 160 I = MAX( 1, J-K ), J - 1 - WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) - 160 CONTINUE - 170 CONTINUE - ELSE - DO 180 I = 1, N - WORK( I ) = ZERO - 180 CONTINUE - DO 200 J = 1, N - L = K + 1 - J - DO 190 I = MAX( 1, J-K ), J - WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) - 190 CONTINUE - 200 CONTINUE - END IF - ELSE - IF( LSAME( DIAG, 'U' ) ) THEN - DO 210 I = 1, N - WORK( I ) = ONE - 210 CONTINUE - DO 230 J = 1, N - L = 1 - J - DO 220 I = J + 1, MIN( N, J+K ) - WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) - 220 CONTINUE - 230 CONTINUE - ELSE - DO 240 I = 1, N - WORK( I ) = ZERO - 240 CONTINUE - DO 260 J = 1, N - L = 1 - J - DO 250 I = J, MIN( N, J+K ) - WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) - 250 CONTINUE - 260 CONTINUE - END IF - END IF - DO 270 I = 1, N - SUM = WORK( I ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 270 CONTINUE - ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN -* -* Find normF(A). -* - IF( LSAME( UPLO, 'U' ) ) THEN - IF( LSAME( DIAG, 'U' ) ) THEN - SCALE = ONE - SUM = N - IF( K.GT.0 ) THEN - DO 280 J = 2, N - CALL SLASSQ( MIN( J-1, K ), - $ AB( MAX( K+2-J, 1 ), J ), 1, SCALE, - $ SUM ) - 280 CONTINUE - END IF - ELSE - SCALE = ZERO - SUM = ONE - DO 290 J = 1, N - CALL SLASSQ( MIN( J, K+1 ), AB( MAX( K+2-J, 1 ), J ), - $ 1, SCALE, SUM ) - 290 CONTINUE - END IF - ELSE - IF( LSAME( DIAG, 'U' ) ) THEN - SCALE = ONE - SUM = N - IF( K.GT.0 ) THEN - DO 300 J = 1, N - 1 - CALL SLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE, - $ SUM ) - 300 CONTINUE - END IF - ELSE - SCALE = ZERO - SUM = ONE - DO 310 J = 1, N - CALL SLASSQ( MIN( N-J+1, K+1 ), AB( 1, J ), 1, SCALE, - $ SUM ) - 310 CONTINUE - END IF - END IF - VALUE = SCALE*SQRT( SUM ) - END IF -* - SLANTB = VALUE - RETURN -* -* End of SLANTB -* - END diff --git a/blas_fix/slantp.f b/blas_fix/slantp.f deleted file mode 100644 index e27d62c0e..000000000 --- a/blas_fix/slantp.f +++ /dev/null @@ -1,356 +0,0 @@ -*> \brief \b SLANTP returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a triangular matrix supplied in packed form. -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download SLANTP + dependencies -*> -*> [TGZ] -*> -*> [ZIP] -*> -*> [TXT] -*> \endhtmlonly -* -* Definition: -* =========== -* -* REAL FUNCTION SLANTP( NORM, UPLO, DIAG, N, AP, WORK ) -* -* .. Scalar Arguments .. -* CHARACTER DIAG, NORM, UPLO -* INTEGER N -* .. -* .. Array Arguments .. -* REAL AP( * ), WORK( * ) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> SLANTP returns the value of the one norm, or the Frobenius norm, or -*> the infinity norm, or the element of largest absolute value of a -*> triangular matrix A, supplied in packed form. -*> \endverbatim -*> -*> \return SLANTP -*> \verbatim -*> -*> SLANTP = ( max(abs(A(i,j))), NORM = 'M' or 'm' -*> ( -*> ( norm1(A), NORM = '1', 'O' or 'o' -*> ( -*> ( normI(A), NORM = 'I' or 'i' -*> ( -*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' -*> -*> where norm1 denotes the one norm of a matrix (maximum column sum), -*> normI denotes the infinity norm of a matrix (maximum row sum) and -*> normF denotes the Frobenius norm of a matrix (square root of sum of -*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[in] NORM -*> \verbatim -*> NORM is CHARACTER*1 -*> Specifies the value to be returned in SLANTP as described -*> above. -*> \endverbatim -*> -*> \param[in] UPLO -*> \verbatim -*> UPLO is CHARACTER*1 -*> Specifies whether the matrix A is upper or lower triangular. -*> = 'U': Upper triangular -*> = 'L': Lower triangular -*> \endverbatim -*> -*> \param[in] DIAG -*> \verbatim -*> DIAG is CHARACTER*1 -*> Specifies whether or not the matrix A is unit triangular. -*> = 'N': Non-unit triangular -*> = 'U': Unit triangular -*> \endverbatim -*> -*> \param[in] N -*> \verbatim -*> N is INTEGER -*> The order of the matrix A. N >= 0. When N = 0, SLANTP is -*> set to zero. -*> \endverbatim -*> -*> \param[in] AP -*> \verbatim -*> AP is REAL array, dimension (N*(N+1)/2) -*> The upper or lower triangular matrix A, packed columnwise in -*> a linear array. The j-th column of A is stored in the array -*> AP as follows: -*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; -*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. -*> Note that when DIAG = 'U', the elements of the array AP -*> corresponding to the diagonal elements of the matrix A are -*> not referenced, but are assumed to be one. -*> \endverbatim -*> -*> \param[out] WORK -*> \verbatim -*> WORK is REAL array, dimension (MAX(1,LWORK)), -*> where LWORK >= N when NORM = 'I'; otherwise, WORK is not -*> referenced. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date September 2012 -* -*> \ingroup realOTHERauxiliary -* -* ===================================================================== - REAL FUNCTION SLANTP( NORM, UPLO, DIAG, N, AP, WORK ) -* -* -- LAPACK auxiliary routine (version 3.4.2) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* September 2012 -* -* .. Scalar Arguments .. - CHARACTER DIAG, NORM, UPLO - INTEGER N -* .. -* .. Array Arguments .. - REAL AP( * ), WORK( * ) -* .. -* -* ===================================================================== -* -* .. Parameters .. - REAL ONE, ZERO - PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) -* .. -* .. Local Scalars .. - LOGICAL UDIAG - INTEGER I, J, K - REAL SCALE, SUM, VALUE -* .. -* .. External Subroutines .. - EXTERNAL SLASSQ -* .. -* .. External Functions .. - LOGICAL LSAME, SISNAN - EXTERNAL LSAME, SISNAN -* .. -* .. Intrinsic Functions .. - INTRINSIC ABS, SQRT -* .. -* .. Executable Statements .. -* - VALUE = ZERO - IF( N.EQ.0 ) THEN - VALUE = ZERO - ELSE IF( LSAME( NORM, 'M' ) ) THEN -* -* Find max(abs(A(i,j))). -* - K = 1 - IF( LSAME( DIAG, 'U' ) ) THEN - VALUE = ONE - IF( LSAME( UPLO, 'U' ) ) THEN - DO 20 J = 1, N - DO 10 I = K, K + J - 2 - SUM = ABS( AP( I ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 10 CONTINUE - K = K + J - 20 CONTINUE - ELSE - DO 40 J = 1, N - DO 30 I = K + 1, K + N - J - SUM = ABS( AP( I ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 30 CONTINUE - K = K + N - J + 1 - 40 CONTINUE - END IF - ELSE - VALUE = ZERO - IF( LSAME( UPLO, 'U' ) ) THEN - DO 60 J = 1, N - DO 50 I = K, K + J - 1 - SUM = ABS( AP( I ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 50 CONTINUE - K = K + J - 60 CONTINUE - ELSE - DO 80 J = 1, N - DO 70 I = K, K + N - J - SUM = ABS( AP( I ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 70 CONTINUE - K = K + N - J + 1 - 80 CONTINUE - END IF - END IF - ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN -* -* Find norm1(A). -* - VALUE = ZERO - K = 1 - UDIAG = LSAME( DIAG, 'U' ) - IF( LSAME( UPLO, 'U' ) ) THEN - DO 110 J = 1, N - IF( UDIAG ) THEN - SUM = ONE - DO 90 I = K, K + J - 2 - SUM = SUM + ABS( AP( I ) ) - 90 CONTINUE - ELSE - SUM = ZERO - DO 100 I = K, K + J - 1 - SUM = SUM + ABS( AP( I ) ) - 100 CONTINUE - END IF - K = K + J - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 110 CONTINUE - ELSE - DO 140 J = 1, N - IF( UDIAG ) THEN - SUM = ONE - DO 120 I = K + 1, K + N - J - SUM = SUM + ABS( AP( I ) ) - 120 CONTINUE - ELSE - SUM = ZERO - DO 130 I = K, K + N - J - SUM = SUM + ABS( AP( I ) ) - 130 CONTINUE - END IF - K = K + N - J + 1 - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 140 CONTINUE - END IF - ELSE IF( LSAME( NORM, 'I' ) ) THEN -* -* Find normI(A). -* - K = 1 - IF( LSAME( UPLO, 'U' ) ) THEN - IF( LSAME( DIAG, 'U' ) ) THEN - DO 150 I = 1, N - WORK( I ) = ONE - 150 CONTINUE - DO 170 J = 1, N - DO 160 I = 1, J - 1 - WORK( I ) = WORK( I ) + ABS( AP( K ) ) - K = K + 1 - 160 CONTINUE - K = K + 1 - 170 CONTINUE - ELSE - DO 180 I = 1, N - WORK( I ) = ZERO - 180 CONTINUE - DO 200 J = 1, N - DO 190 I = 1, J - WORK( I ) = WORK( I ) + ABS( AP( K ) ) - K = K + 1 - 190 CONTINUE - 200 CONTINUE - END IF - ELSE - IF( LSAME( DIAG, 'U' ) ) THEN - DO 210 I = 1, N - WORK( I ) = ONE - 210 CONTINUE - DO 230 J = 1, N - K = K + 1 - DO 220 I = J + 1, N - WORK( I ) = WORK( I ) + ABS( AP( K ) ) - K = K + 1 - 220 CONTINUE - 230 CONTINUE - ELSE - DO 240 I = 1, N - WORK( I ) = ZERO - 240 CONTINUE - DO 260 J = 1, N - DO 250 I = J, N - WORK( I ) = WORK( I ) + ABS( AP( K ) ) - K = K + 1 - 250 CONTINUE - 260 CONTINUE - END IF - END IF - VALUE = ZERO - DO 270 I = 1, N - SUM = WORK( I ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 270 CONTINUE - ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN -* -* Find normF(A). -* - IF( LSAME( UPLO, 'U' ) ) THEN - IF( LSAME( DIAG, 'U' ) ) THEN - SCALE = ONE - SUM = N - K = 2 - DO 280 J = 2, N - CALL SLASSQ( J-1, AP( K ), 1, SCALE, SUM ) - K = K + J - 280 CONTINUE - ELSE - SCALE = ZERO - SUM = ONE - K = 1 - DO 290 J = 1, N - CALL SLASSQ( J, AP( K ), 1, SCALE, SUM ) - K = K + J - 290 CONTINUE - END IF - ELSE - IF( LSAME( DIAG, 'U' ) ) THEN - SCALE = ONE - SUM = N - K = 2 - DO 300 J = 1, N - 1 - CALL SLASSQ( N-J, AP( K ), 1, SCALE, SUM ) - K = K + N - J + 1 - 300 CONTINUE - ELSE - SCALE = ZERO - SUM = ONE - K = 1 - DO 310 J = 1, N - CALL SLASSQ( N-J+1, AP( K ), 1, SCALE, SUM ) - K = K + N - J + 1 - 310 CONTINUE - END IF - END IF - VALUE = SCALE*SQRT( SUM ) - END IF -* - SLANTP = VALUE - RETURN -* -* End of SLANTP -* - END diff --git a/blas_fix/slantr.f b/blas_fix/slantr.f deleted file mode 100644 index 8ce04ea0f..000000000 --- a/blas_fix/slantr.f +++ /dev/null @@ -1,354 +0,0 @@ -*> \brief \b SLANTR returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a trapezoidal or triangular matrix. -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download SLANTR + dependencies -*> -*> [TGZ] -*> -*> [ZIP] -*> -*> [TXT] -*> \endhtmlonly -* -* Definition: -* =========== -* -* REAL FUNCTION SLANTR( NORM, UPLO, DIAG, M, N, A, LDA, -* WORK ) -* -* .. Scalar Arguments .. -* CHARACTER DIAG, NORM, UPLO -* INTEGER LDA, M, N -* .. -* .. Array Arguments .. -* REAL A( LDA, * ), WORK( * ) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> SLANTR returns the value of the one norm, or the Frobenius norm, or -*> the infinity norm, or the element of largest absolute value of a -*> trapezoidal or triangular matrix A. -*> \endverbatim -*> -*> \return SLANTR -*> \verbatim -*> -*> SLANTR = ( max(abs(A(i,j))), NORM = 'M' or 'm' -*> ( -*> ( norm1(A), NORM = '1', 'O' or 'o' -*> ( -*> ( normI(A), NORM = 'I' or 'i' -*> ( -*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' -*> -*> where norm1 denotes the one norm of a matrix (maximum column sum), -*> normI denotes the infinity norm of a matrix (maximum row sum) and -*> normF denotes the Frobenius norm of a matrix (square root of sum of -*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[in] NORM -*> \verbatim -*> NORM is CHARACTER*1 -*> Specifies the value to be returned in SLANTR as described -*> above. -*> \endverbatim -*> -*> \param[in] UPLO -*> \verbatim -*> UPLO is CHARACTER*1 -*> Specifies whether the matrix A is upper or lower trapezoidal. -*> = 'U': Upper trapezoidal -*> = 'L': Lower trapezoidal -*> Note that A is triangular instead of trapezoidal if M = N. -*> \endverbatim -*> -*> \param[in] DIAG -*> \verbatim -*> DIAG is CHARACTER*1 -*> Specifies whether or not the matrix A has unit diagonal. -*> = 'N': Non-unit diagonal -*> = 'U': Unit diagonal -*> \endverbatim -*> -*> \param[in] M -*> \verbatim -*> M is INTEGER -*> The number of rows of the matrix A. M >= 0, and if -*> UPLO = 'U', M <= N. When M = 0, SLANTR is set to zero. -*> \endverbatim -*> -*> \param[in] N -*> \verbatim -*> N is INTEGER -*> The number of columns of the matrix A. N >= 0, and if -*> UPLO = 'L', N <= M. When N = 0, SLANTR is set to zero. -*> \endverbatim -*> -*> \param[in] A -*> \verbatim -*> A is REAL array, dimension (LDA,N) -*> The trapezoidal matrix A (A is triangular if M = N). -*> If UPLO = 'U', the leading m by n upper trapezoidal part of -*> the array A contains the upper trapezoidal matrix, and the -*> strictly lower triangular part of A is not referenced. -*> If UPLO = 'L', the leading m by n lower trapezoidal part of -*> the array A contains the lower trapezoidal matrix, and the -*> strictly upper triangular part of A is not referenced. Note -*> that when DIAG = 'U', the diagonal elements of A are not -*> referenced and are assumed to be one. -*> \endverbatim -*> -*> \param[in] LDA -*> \verbatim -*> LDA is INTEGER -*> The leading dimension of the array A. LDA >= max(M,1). -*> \endverbatim -*> -*> \param[out] WORK -*> \verbatim -*> WORK is REAL array, dimension (MAX(1,LWORK)), -*> where LWORK >= M when NORM = 'I'; otherwise, WORK is not -*> referenced. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date September 2012 -* -*> \ingroup realOTHERauxiliary -* -* ===================================================================== - REAL FUNCTION SLANTR( NORM, UPLO, DIAG, M, N, A, LDA, - $ WORK ) -* -* -- LAPACK auxiliary routine (version 3.4.2) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* September 2012 -* -* .. Scalar Arguments .. - CHARACTER DIAG, NORM, UPLO - INTEGER LDA, M, N -* .. -* .. Array Arguments .. - REAL A( LDA, * ), WORK( * ) -* .. -* -* ===================================================================== -* -* .. Parameters .. - REAL ONE, ZERO - PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) -* .. -* .. Local Scalars .. - LOGICAL UDIAG - INTEGER I, J - REAL SCALE, SUM, VALUE -* .. -* .. External Subroutines .. - EXTERNAL SLASSQ -* .. -* .. External Functions .. - LOGICAL LSAME, SISNAN - EXTERNAL LSAME, SISNAN -* .. -* .. Intrinsic Functions .. - INTRINSIC ABS, MIN, SQRT -* .. -* .. Executable Statements .. -* - VALUE = ZERO - IF( MIN( M, N ).EQ.0 ) THEN - VALUE = ZERO - ELSE IF( LSAME( NORM, 'M' ) ) THEN -* -* Find max(abs(A(i,j))). -* - IF( LSAME( DIAG, 'U' ) ) THEN - VALUE = ONE - IF( LSAME( UPLO, 'U' ) ) THEN - DO 20 J = 1, N - DO 10 I = 1, MIN( M, J-1 ) - SUM = ABS( A( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 10 CONTINUE - 20 CONTINUE - ELSE - DO 40 J = 1, N - DO 30 I = J + 1, M - SUM = ABS( A( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 30 CONTINUE - 40 CONTINUE - END IF - ELSE - VALUE = ZERO - IF( LSAME( UPLO, 'U' ) ) THEN - DO 60 J = 1, N - DO 50 I = 1, MIN( M, J ) - SUM = ABS( A( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 50 CONTINUE - 60 CONTINUE - ELSE - DO 80 J = 1, N - DO 70 I = J, M - SUM = ABS( A( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 70 CONTINUE - 80 CONTINUE - END IF - END IF - ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN -* -* Find norm1(A). -* - VALUE = ZERO - UDIAG = LSAME( DIAG, 'U' ) - IF( LSAME( UPLO, 'U' ) ) THEN - DO 110 J = 1, N - IF( ( UDIAG ) .AND. ( J.LE.M ) ) THEN - SUM = ONE - DO 90 I = 1, J - 1 - SUM = SUM + ABS( A( I, J ) ) - 90 CONTINUE - ELSE - SUM = ZERO - DO 100 I = 1, MIN( M, J ) - SUM = SUM + ABS( A( I, J ) ) - 100 CONTINUE - END IF - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 110 CONTINUE - ELSE - DO 140 J = 1, N - IF( UDIAG ) THEN - SUM = ONE - DO 120 I = J + 1, M - SUM = SUM + ABS( A( I, J ) ) - 120 CONTINUE - ELSE - SUM = ZERO - DO 130 I = J, M - SUM = SUM + ABS( A( I, J ) ) - 130 CONTINUE - END IF - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 140 CONTINUE - END IF - ELSE IF( LSAME( NORM, 'I' ) ) THEN -* -* Find normI(A). -* - IF( LSAME( UPLO, 'U' ) ) THEN - IF( LSAME( DIAG, 'U' ) ) THEN - DO 150 I = 1, M - WORK( I ) = ONE - 150 CONTINUE - DO 170 J = 1, N - DO 160 I = 1, MIN( M, J-1 ) - WORK( I ) = WORK( I ) + ABS( A( I, J ) ) - 160 CONTINUE - 170 CONTINUE - ELSE - DO 180 I = 1, M - WORK( I ) = ZERO - 180 CONTINUE - DO 200 J = 1, N - DO 190 I = 1, MIN( M, J ) - WORK( I ) = WORK( I ) + ABS( A( I, J ) ) - 190 CONTINUE - 200 CONTINUE - END IF - ELSE - IF( LSAME( DIAG, 'U' ) ) THEN - DO 210 I = 1, N - WORK( I ) = ONE - 210 CONTINUE - DO 220 I = N + 1, M - WORK( I ) = ZERO - 220 CONTINUE - DO 240 J = 1, N - DO 230 I = J + 1, M - WORK( I ) = WORK( I ) + ABS( A( I, J ) ) - 230 CONTINUE - 240 CONTINUE - ELSE - DO 250 I = 1, M - WORK( I ) = ZERO - 250 CONTINUE - DO 270 J = 1, N - DO 260 I = J, M - WORK( I ) = WORK( I ) + ABS( A( I, J ) ) - 260 CONTINUE - 270 CONTINUE - END IF - END IF - VALUE = ZERO - DO 280 I = 1, M - SUM = WORK( I ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM - 280 CONTINUE - ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN -* -* Find normF(A). -* - IF( LSAME( UPLO, 'U' ) ) THEN - IF( LSAME( DIAG, 'U' ) ) THEN - SCALE = ONE - SUM = MIN( M, N ) - DO 290 J = 2, N - CALL SLASSQ( MIN( M, J-1 ), A( 1, J ), 1, SCALE, SUM ) - 290 CONTINUE - ELSE - SCALE = ZERO - SUM = ONE - DO 300 J = 1, N - CALL SLASSQ( MIN( M, J ), A( 1, J ), 1, SCALE, SUM ) - 300 CONTINUE - END IF - ELSE - IF( LSAME( DIAG, 'U' ) ) THEN - SCALE = ONE - SUM = MIN( M, N ) - DO 310 J = 1, N - CALL SLASSQ( M-J, A( MIN( M, J+1 ), J ), 1, SCALE, - $ SUM ) - 310 CONTINUE - ELSE - SCALE = ZERO - SUM = ONE - DO 320 J = 1, N - CALL SLASSQ( M-J+1, A( J, J ), 1, SCALE, SUM ) - 320 CONTINUE - END IF - END IF - VALUE = SCALE*SQRT( SUM ) - END IF -* - SLANTR = VALUE - RETURN -* -* End of SLANTR -* - END diff --git a/blas_fix/slapy2.f b/blas_fix/slapy2.f deleted file mode 100644 index 85f502bd1..000000000 --- a/blas_fix/slapy2.f +++ /dev/null @@ -1,104 +0,0 @@ -*> \brief \b SLAPY2 returns sqrt(x2+y2). -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download SLAPY2 + dependencies -*> -*> [TGZ] -*> -*> [ZIP] -*> -*> [TXT] -*> \endhtmlonly -* -* Definition: -* =========== -* -* REAL FUNCTION SLAPY2( X, Y ) -* -* .. Scalar Arguments .. -* REAL X, Y -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> SLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary -*> overflow. -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[in] X -*> \verbatim -*> X is REAL -*> \endverbatim -*> -*> \param[in] Y -*> \verbatim -*> Y is REAL -*> X and Y specify the values x and y. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date September 2012 -* -*> \ingroup auxOTHERauxiliary -* -* ===================================================================== - REAL FUNCTION SLAPY2( X, Y ) -* -* -- LAPACK auxiliary routine (version 3.4.2) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* September 2012 -* -* .. Scalar Arguments .. - REAL X, Y -* .. -* -* ===================================================================== -* -* .. Parameters .. - REAL ZERO - PARAMETER ( ZERO = 0.0E0 ) - REAL ONE - PARAMETER ( ONE = 1.0E0 ) -* .. -* .. Local Scalars .. - REAL W, XABS, YABS, Z -* .. -* .. Intrinsic Functions .. - INTRINSIC ABS, MAX, MIN, SQRT -* .. -* .. Executable Statements .. -* - XABS = ABS( X ) - YABS = ABS( Y ) - W = MAX( XABS, YABS ) - Z = MIN( XABS, YABS ) - IF( Z.EQ.ZERO ) THEN - SLAPY2 = W - ELSE - SLAPY2 = W*SQRT( ONE+( Z / W )**2 ) - END IF - RETURN -* -* End of SLAPY2 -* - END diff --git a/blas_fix/slapy3.f b/blas_fix/slapy3.f deleted file mode 100644 index 34bf5e15e..000000000 --- a/blas_fix/slapy3.f +++ /dev/null @@ -1,111 +0,0 @@ -*> \brief \b SLAPY3 returns sqrt(x2+y2+z2). -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download SLAPY3 + dependencies -*> -*> [TGZ] -*> -*> [ZIP] -*> -*> [TXT] -*> \endhtmlonly -* -* Definition: -* =========== -* -* REAL FUNCTION SLAPY3( X, Y, Z ) -* -* .. Scalar Arguments .. -* REAL X, Y, Z -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> SLAPY3 returns sqrt(x**2+y**2+z**2), taking care not to cause -*> unnecessary overflow. -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[in] X -*> \verbatim -*> X is REAL -*> \endverbatim -*> -*> \param[in] Y -*> \verbatim -*> Y is REAL -*> \endverbatim -*> -*> \param[in] Z -*> \verbatim -*> Z is REAL -*> X, Y and Z specify the values x, y and z. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date September 2012 -* -*> \ingroup auxOTHERauxiliary -* -* ===================================================================== - REAL FUNCTION SLAPY3( X, Y, Z ) -* -* -- LAPACK auxiliary routine (version 3.4.2) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* September 2012 -* -* .. Scalar Arguments .. - REAL X, Y, Z -* .. -* -* ===================================================================== -* -* .. Parameters .. - REAL ZERO - PARAMETER ( ZERO = 0.0E0 ) -* .. -* .. Local Scalars .. - REAL W, XABS, YABS, ZABS -* .. -* .. Intrinsic Functions .. - INTRINSIC ABS, MAX, SQRT -* .. -* .. Executable Statements .. -* - XABS = ABS( X ) - YABS = ABS( Y ) - ZABS = ABS( Z ) - W = MAX( XABS, YABS, ZABS ) - IF( W.EQ.ZERO ) THEN -* W can be zero for max(0,nan,0) -* adding all three entries together will make sure -* NaN will not disappear. - SLAPY3 = XABS + YABS + ZABS - ELSE - SLAPY3 = W*SQRT( ( XABS / W )**2+( YABS / W )**2+ - $ ( ZABS / W )**2 ) - END IF - RETURN -* -* End of SLAPY3 -* - END diff --git a/blas_fix/snrm2.f b/blas_fix/snrm2.f deleted file mode 100644 index a3674a6d7..000000000 --- a/blas_fix/snrm2.f +++ /dev/null @@ -1,112 +0,0 @@ -*> \brief \b SNRM2 -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -* Definition: -* =========== -* -* REAL FUNCTION SNRM2(N,X,INCX) -* -* .. Scalar Arguments .. -* INTEGER INCX,N -* .. -* .. Array Arguments .. -* REAL X(*) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> SNRM2 returns the euclidean norm of a vector via the function -*> name, so that -*> -*> SNRM2 := sqrt( x'*x ). -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date November 2011 -* -*> \ingroup single_blas_level1 -* -*> \par Further Details: -* ===================== -*> -*> \verbatim -*> -*> -- This version written on 25-October-1982. -*> Modified on 14-October-1993 to inline the call to SLASSQ. -*> Sven Hammarling, Nag Ltd. -*> \endverbatim -*> -* ===================================================================== - REAL FUNCTION SNRM2(N,X,INCX) -* -* -- Reference BLAS level1 routine (version 3.4.0) -- -* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* November 2011 -* -* .. Scalar Arguments .. - INTEGER INCX,N -* .. -* .. Array Arguments .. - REAL X(*) -* .. -* -* ===================================================================== -* -* .. Parameters .. - REAL ONE,ZERO - PARAMETER (ONE=1.0E+0,ZERO=0.0E+0) -* .. -* .. Local Scalars .. - REAL ABSXI,NORM,SCALE,SSQ - INTEGER IX -* .. -* .. Intrinsic Functions .. - INTRINSIC ABS,SQRT -* .. - IF (N.LT.1 .OR. INCX.LT.1) THEN - NORM = ZERO - ELSE IF (N.EQ.1) THEN - NORM = ABS(X(1)) - ELSE - SCALE = ZERO - SSQ = ONE -* The following loop is equivalent to this call to the LAPACK -* auxiliary routine: -* CALL SLASSQ( N, X, INCX, SCALE, SSQ ) -* - DO 10 IX = 1,1 + (N-1)*INCX,INCX - IF (X(IX).NE.ZERO) THEN - ABSXI = ABS(X(IX)) - IF (SCALE.LT.ABSXI) THEN - SSQ = ONE + SSQ* (SCALE/ABSXI)**2 - SCALE = ABSXI - ELSE - SSQ = SSQ + (ABSXI/SCALE)**2 - END IF - END IF - 10 CONTINUE - NORM = SCALE*SQRT(SSQ) - END IF -* - SNRM2 = NORM - RETURN -* -* End of SNRM2. -* - END From 1b5739dc70ebf6793fad0a29d926d4c0aee44c4f Mon Sep 17 00:00:00 2001 From: Mark Gates Date: Fri, 14 Feb 2025 13:35:46 +0100 Subject: [PATCH 06/27] remove macOS support --- CMakeLists.txt | 53 +------- Makefile | 47 +------ Makefile.internal | 3 - Makefile.subdir | 1 - README | 12 +- ReleaseChecklist | 7 +- control/magma_internal.h | 8 +- docs/contributors-guide.txt | 39 +++--- docs/documentation.txt | 13 +- fortran/Makefile | 3 +- testing/Makefile.src | 2 - testing/testing_veclib.cpp | 241 ------------------------------------ tools/build.sh | 2 +- tools/checklist.csh | 2 +- 14 files changed, 44 insertions(+), 389 deletions(-) delete mode 100644 testing/testing_veclib.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 9324be3eb..3e051965b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -327,13 +327,13 @@ set(BLA_VENDOR "" CACHE STRING # List from CMake 3.17, minus some obsolete ones: # PhiPACK, Compaq CXML, DEC Alpha DXML, SunPerf, SGI SCSL, SGIMATH, -# Intel, NAS (Apple veclib). +# Intel, NAS. +# macOS Accelerate is not supported, since Apple dropped CUDA. # FLAME is BLIS. set_property(CACHE BLA_VENDOR PROPERTY STRINGS "" "All" "Goto" "OpenBLAS" "FLAME" "ATLAS" "IBMESSL" "Intel10_64lp" "Intel10_64lp_seq" "Intel10_64ilp" "Intel10_64ilp_seq" "ACML" "ACML_MP" "ACML_GPU" - "Apple" "Arm" "Arm_mp" "Arm_ilp64" "Arm_ilp64_mp" "Generic") @@ -407,38 +407,6 @@ else() set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-unused-function" ) endif() -if (CMAKE_HOST_APPLE) - # Use rpaths, which is on by default in CMake 3. - set( CMAKE_MACOSX_RPATH 1 ) - - # 64-bit veclib (Accelerate) has issues; substitute correct functions from LAPACK. - # (The issue is single precision functions that return doubles; - # if a consistent prototype is used, the problem goes away in C, - # but this is not feasible in Fortran.) - if (LAPACK_LIBRARIES MATCHES "Accelerate") - if (USE_FORTRAN) - message( STATUS "MacOS X: adding blas_fix library" ) - add_library( blas_fix ${libblas_fix_src} ) - target_link_libraries( blas_fix - ${LAPACK_LIBRARIES} - ) - set( blas_fix blas_fix ) - set( blas_fix_lib -lblas_fix ) - else() - message( WARNING "\n Warning: cannot compile blas_fix library for MacOS X without Fortran compiler.\n" ) - endif() - endif() - - set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMAGMA_NOAFFINITY" ) - set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMAGMA_NOAFFINITY" ) - - # previously, just compile as 32-bit, but CUDA 6.5 no longer has 32-bit FAT libraries - ## set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -m32" ) - ## set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32" ) - ## set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -m32" ) - ## set( CUDA_64_BIT_DEVICE_CODE OFF ) -endif() - include_directories( "${CMAKE_BINARY_DIR}/include" ) include_directories( include ) @@ -506,7 +474,6 @@ else() #message(FATAL_ERROR "${libmagma_all}") add_library( magma ${libmagma_all} ) target_link_libraries( magma - ${blas_fix} ${LAPACK_LIBRARIES} CUDA::cudart CUDA::cublas @@ -525,7 +492,6 @@ else() add_library( magma ${libmagma_all} ) target_link_libraries( magma hip::host - ${blas_fix} ${LAPACK_LIBRARIES} hip::device roc::hipblas @@ -568,7 +534,6 @@ else() add_library( lapacktest ${liblapacktest_all_cpp} ) endif() target_link_libraries( lapacktest - ${blas_fix} ${LAPACK_LIBRARIES} ) @@ -579,7 +544,6 @@ add_library( tester ${libtest_all} ) target_link_libraries( tester magma lapacktest - ${blas_fix} ${LAPACK_LIBRARIES} ) @@ -602,7 +566,6 @@ if (MAGMA_ENABLE_CUDA) set_property(TARGET magma_sparse PROPERTY CUDA_STANDARD 14) target_link_libraries( magma_sparse magma - ${blas_fix} ${LAPACK_LIBRARIES} CUDA::cudart CUDA::cublas @@ -613,7 +576,6 @@ else() add_library( magma_sparse ${libsparse_all} ) target_link_libraries( magma_sparse magma - ${blas_fix} ${LAPACK_LIBRARIES} hip::device roc::hipblas @@ -673,7 +635,7 @@ add_custom_target( sparse-testing DEPENDS ${sparse-testing} ) # ---------------------------------------- # what to install -install( TARGETS magma magma_sparse ${blas_fix} +install( TARGETS magma magma_sparse RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib ) @@ -701,15 +663,13 @@ message( STATUS "pkgconfig ${pkgconfig}" ) set( INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}" ) set( CFLAGS "${CMAKE_C_FLAGS}" ) set( CXXFLAGS "${CMAKE_CXX_FLAGS}" ) -# CMake finds the Accelerate directory; we want -framework Accelerate for linking. -string( REPLACE "/System/Library/Frameworks/Accelerate.framework" "-framework Accelerate" LAPACK_LIBS "${LAPACK_LIBRARIES}" ) if (MAGMA_ENABLE_CUDA) string( REPLACE ";" " " LIBS - "${blas_fix_lib} ${LAPACK_LIBS} -L${CUDAToolkit_LIBRARY_DIR} -lcudart -lcublas -lcusparse") + "${LAPACK_LIBS} -L${CUDAToolkit_LIBRARY_DIR} -lcudart -lcublas -lcusparse") else() string( REPLACE ";" " " LIBS - "${blas_fix_lib} ${LAPACK_LIBS} ${HIP} ${rocBLAS} ${rocSPARSE}" ) -# "${blas_fix_lib} ${LAPACK_LIBS} hip::device roc::hipblas roc::hipsparse" ) + "${LAPACK_LIBS} ${HIP} ${rocBLAS} ${rocSPARSE}" ) +# "${LAPACK_LIBS} hip::device roc::hipblas roc::hipsparse" ) endif() set( MAGMA_REQUIRED "" ) configure_file( "${pkgconfig}.in" "${pkgconfig}" @ONLY ) @@ -730,7 +690,6 @@ else() endif() message( STATUS " FFLAGS: ${CMAKE_Fortran_FLAGS}" ) message( STATUS " LIBS: ${LIBS}" ) -message( STATUS " blas_fix: ${blas_fix} (MacOS Accelerate only)" ) message( STATUS " LAPACK_LIBRARIES: ${LAPACK_LIBRARIES}" ) message( STATUS " INCLUDE_DIRECTORIES: ${MAGMA_INCLUDE}" ) if (MAGMA_ENABLE_CUDA) diff --git a/Makefile b/Makefile index 1fabf54ca..df8238d96 100644 --- a/Makefile +++ b/Makefile @@ -106,11 +106,6 @@ prefix ?= /usr/local/magma # ------------------------------------------------------------------------------ # MAGMA-specific programs & flags -ifeq ($(blas_fix),1) - # prepend -lblas_fix to LIB (it must come before LAPACK library/framework) - LIB := -lblas_fix $(LIB) -endif - LIBS = $(LIBDIR) $(LIB) # preprocessor flags. See below for MAGMA_INC @@ -323,7 +318,6 @@ libsparse_dynamic_src:= sparse_testing_src := subdirs := \ - blas_fix \ control \ include \ src \ @@ -384,7 +378,6 @@ ifeq ($(FORT),) endif libmagma_obj := $(addsuffix .$(o_ext), $(basename $(libmagma_all))) -libblas_fix_obj := $(addsuffix .$(o_ext), $(basename $(libblas_fix_src))) libtest_obj := $(addsuffix .$(o_ext), $(basename $(libtest_all))) liblapacktest_obj := $(addsuffix .$(o_ext), $(basename $(liblapacktest_all2))) testing_obj := $(addsuffix .$(o_ext), $(basename $(testing_all))) @@ -418,7 +411,6 @@ endif deps := deps += $(addsuffix .d, $(basename $(libmagma_all))) -deps += $(addsuffix .d, $(basename $(libblas_fix_src))) deps += $(addsuffix .d, $(basename $(libtest_all))) deps += $(addsuffix .d, $(basename $(lapacktest_all2))) deps += $(addsuffix .d, $(basename $(testing_all))) @@ -441,7 +433,6 @@ $(sparse_testing_obj): testing/testings.h # this allows "make force=force" to force re-compiling $(libmagma_obj): $(force) -$(libblas_fix_obj): $(force) $(libtest_obj): $(force) $(liblapacktest_obj): $(force) $(testing_obj): $(force) @@ -520,7 +511,6 @@ test_headers: $(header_gch) # ----- libraries libmagma_a := lib/libmagma.a libmagma_so := lib/libmagma.so -libblas_fix_a := lib/libblas_fix.a libtest_a := testing/libtest.a liblapacktest_a := testing/lin/liblapacktest.a libsparse_a := lib/libmagma_sparse.a @@ -532,7 +522,6 @@ libs_a := \ $(libmagma_a) \ $(libtest_a) \ $(liblapacktest_a) \ - $(libblas_fix_a) \ $(libsparse_a) \ # shared libraries @@ -545,7 +534,6 @@ libs_so := \ # add objects to libraries $(libmagma_a): $(libmagma_obj) $(libmagma_so): $(libmagma_obj) -$(libblas_fix_a): $(libblas_fix_obj) $(libtest_a): $(libtest_obj) $(liblapacktest_a): $(liblapacktest_obj) $(libsparse_a): $(libsparse_obj) @@ -571,26 +559,6 @@ $(testers): $(libtest_a) $(liblapacktest_a) $(testers_f): $(libtest_a) $(liblapacktest_a) $(sparse_testers): $(libtest_a) # doesn't use liblapacktest -# ----- blas_fix -# if using blas_fix (e.g., on MacOS), libmagma requires libblas_fix -ifeq ($(blas_fix),1) - $(libmagma_a): | $(libblas_fix_a) - $(libmagma_so): | $(libblas_fix_a) - $(testers): | $(libblas_fix_a) - $(testers_f): | $(libblas_fix_a) - $(sparse_testers): | $(libblas_fix_a) -endif - - -# ------------------------------------------------------------------------------ -# MacOS (darwin) needs shared library's path set -# $OSTYPE may not be exported from the shell, so echo it -ostype = ${shell echo $${OSTYPE}} -ifneq ($(findstring darwin, ${ostype}),) - $(libmagma_so): LDFLAGS += -install_name @rpath/$(notdir $(libmagma_so)) - $(libsparse_so): LDFLAGS += -install_name @rpath/$(notdir $(libsparse_so)) -endif - # ------------------------------------------------------------------------------ # targets @@ -683,10 +651,6 @@ else endif # -------------------- -ifeq ($(blas_fix),1) - libs += $(libblas_fix_a) -endif - # ------------------------------------------------------------------------------ # static libraries @@ -721,8 +685,6 @@ endif # sub-directory builds include: $(header_all) -blas_fix: $(libblas_fix_a) - control: $(control_obj) @@ -757,9 +719,6 @@ run_test: test include/clean: -rm -f $(shdr) $(dhdr) $(chdr) -blas_fix/clean: - -rm -f $(libblas_fix_a) $(libblas_fix_obj) - control/clean: -rm -f $(control_obj) include/*.mod control/*.mod @@ -794,7 +753,7 @@ testing/lin/clean: -rm -f $(liblapacktest_a) $(liblapacktest_obj) # hmm... what should lib/clean do? just the libraries, not objects? -lib/clean: blas_fix/clean sparse/clean +lib/clean: sparse/clean -rm -f $(libmagma_a) $(libmagma_so) $(libmagma_obj) sparse/clean: sparse/testing/clean @@ -1066,10 +1025,6 @@ echo: @echo "libsparse_a $(libsparse_a)" @echo "libsparse_so $(libsparse_so)" @echo "=====" - @echo "blas_fix $(blas_fix)" - @echo "libblas_fix_src $(libblas_fix_src)" - @echo "libblas_fix_a $(libblas_fix_a)" - @echo "=====" @echo "libtest_src $(libtest_src)\n" @echo "libtest_all $(libtest_all)\n" @echo "libtest_obj $(libtest_obj)\n" diff --git a/Makefile.internal b/Makefile.internal index 55e12bfa0..afd5d25f6 100644 --- a/Makefile.internal +++ b/Makefile.internal @@ -27,9 +27,6 @@ $(CMAKESRC): Makefile.gen.$(BACKEND) echo "set( libmagma_all" >> $@ echo "$(libmagma_all) )" | $(newlines) >> $@ echo >> $@ - echo "set( libblas_fix_src" >> $@ - echo "$(libblas_fix_src) )" | $(newlines) >> $@ - echo >> $@ echo "set( libtest_all" >> $@ echo "$(libtest_all) )" | $(newlines) >> $@ echo >> $@ diff --git a/Makefile.subdir b/Makefile.subdir index 05d30bd2b..419e72dcb 100644 --- a/Makefile.subdir +++ b/Makefile.subdir @@ -13,7 +13,6 @@ # alphabetic order targets := \ all \ - blas_fix \ cleanall \ cleandep \ cleangen \ diff --git a/README b/README index ff1720593..c9f72b77e 100644 --- a/README +++ b/README @@ -22,14 +22,14 @@ MAGMA README FILE make install prefix=/usr/local/magma MAGMA uses HIP code generation to compile for AMD GPUs. MAGMA HIP sources will be - generated from interface_cuda, magmablas, and sparse into interface_hip, magmablas_hip, + generated from interface_cuda, magmablas, and sparse into interface_hip, magmablas_hip, and sparse_hip, respectively. * Quick start (CMake) There is also a CMake option to configure and build MAGMA. For more Windows-specific instructions, see README-Windows. - On Unix & MacOS: + On Unix: Step 0: setup If you downloaded an official release (e.g., magma-2.6.0.tar.gz), you can @@ -57,8 +57,8 @@ MAGMA README FILE make install Options include: - -DMAGMA_ENABLE_CUDA=ON to install MAGMA for CUDA. This is default. - -DMAGMA_ENABLE_HIP=ON to install MAGMA for HIP. This option requires + -DMAGMA_ENABLE_CUDA=ON to install MAGMA for CUDA. This is default. + -DMAGMA_ENABLE_HIP=ON to install MAGMA for HIP. This option requires to specify the hipcc compiler, e.g., cmake -DMAGMA_ENABLE_HIP=ON -DCMAKE_CXX_COMPILER=hipcc .. @@ -69,11 +69,11 @@ MAGMA README FILE Kepler, Maxwell, Pascal, Volta, Turing, Ampere or valid sm_[0-9][0-9] for NVIDIA GPUs. For AMD GPUs include one or more valid GPU gfx numbers - (https://llvm.org/docs/AMDGPUUsage.html#target-triples). + (https://llvm.org/docs/AMDGPUUsage.html#target-triples). -DBLA_VENDOR=vendor, where vendor is one of: Intel10_64lp, Intel10_64lp_seq, Intel10_64ilp, Intel10_64ilp_seq, - Intel10_32, OpenBLAS, FLAME, ACML, Apple, NAS, Generic. + Intel10_32, OpenBLAS, FLAME, Generic, etc. See https://cmake.org/cmake/help/latest/module/FindLAPACK.html https://cmake.org/cmake/help/latest/module/FindBLAS.html diff --git a/ReleaseChecklist b/ReleaseChecklist index 09c652347..c8f6d58f5 100644 --- a/ReleaseChecklist +++ b/ReleaseChecklist @@ -34,7 +34,7 @@ Compilation checks Documentation cd docs && ./groups.sh -- any groups unused or undefined? (currently QUARK undefined; geqrf_tile unused) - + cd docs && make check errors in output_err open docs/html/index.html and browse through Modules to see that things look right @@ -52,15 +52,14 @@ Compile & run on: MKL ILP64 ACML 5 or 6 OpenBLAS - MacOS using gcc & veclib Windows using CMake & MKL - + CUDA 5.0 CUDA 5.5 CUDA 6.0 CUDA 6.5 CUDA 7.0 - + pgi compilers? Any compiler warnings? diff --git a/control/magma_internal.h b/control/magma_internal.h index e4e533403..0097b23d3 100644 --- a/control/magma_internal.h +++ b/control/magma_internal.h @@ -52,12 +52,12 @@ #include // our magma_winthread doesn't have pthread_key; - // assume other platforms (Linux, MacOS, etc.) do. + // assume other platforms (Linux, etc.) do. #define HAVE_PTHREAD_KEY #endif -// provide our own support for pthread_barrier on MacOS and Windows +// provide our own support for pthread_barrier on Windows #include "pthread_barrier.h" #include "magma_v2.h" @@ -118,9 +118,9 @@ struct magma_queue dCarray__ = dBarray__ + maxbatch__; } } - + #ifdef MAGMA_HAVE_HIP - + hipStream_t hip_stream() { return stream__; }; hipblasHandle_t hipblas_handle() { return hipblas__; }; diff --git a/docs/contributors-guide.txt b/docs/contributors-guide.txt index 8e4ba2171..a42dfa378 100644 --- a/docs/contributors-guide.txt +++ b/docs/contributors-guide.txt @@ -24,9 +24,10 @@ This guide is based on the PLASMA coding style guide, though slightly different. gcc -std=c99 -Wall -pedantic -c magma.c This can be done by setting CFLAGS and CXXFLAGS appropriately in make.inc. - MAGMA code needs to be portable and work on Unix, MacOS, and Windows. Most - MAGMA code is compiled as C++, but uses essentially C style conventions. For - example, all externally visible functions are declared `extern "C"` and + MAGMA code needs to be portable and work on Unix and Windows. + (Since Apple dropped CUDA, macOS support has been dropped.) + Most MAGMA code is compiled as C++, but uses essentially C style conventions. + For example, all externally visible functions are declared `extern "C"` and C-style malloc routines are used, instead of the C++ `new` operator. - **No Trailing Whitespace:** @@ -80,49 +81,49 @@ This guide is based on the PLASMA coding style guide, though slightly different. { if (m <= 0 || n <= 0 || k <= 0) return; // Avoid code_not_inside_if(); - + if (m <= 0 || n <= 0 || k <= 0) // Okay return; - + if (m <= 0 || n <= 0 || k <= 0) { // Better return; } - + if (condition) // Avoid single statement, multi-line if and loops! magma_zherk( uplo, trans, n, k, alpha, dA(0,0), ldda, beta, dB(0,0), lddb ); - + if (condition) { // Better magma_zherk( uplo, trans, n, k, alpha, dA(0,0), ldda, beta, dB(0,0), lddb ); } - + if (condition) // Avoid single block, multi-statement if and loops! for (i=0; i < n; ++i) { A(i,i) = 0; cnt += 1; } - + if (condition) { // Avoid inconsistent indentation! Always use 4 spaces. for (i=0; i < n; ++i) { A(i,i) = 0; cnt += 1; } } - + if (condition) { // Better for (i=0; i < n; ++i) { A(i,i) = 0; cnt += 1; } } - + if (condition) { // Avoid excessive newlines - + norm = magma_dnrm2( n, dx(0), incx ); - + } } \endcode @@ -139,7 +140,7 @@ This guide is based on the PLASMA coding style guide, though slightly different. \code // host pointers are the same for clBLAS, CUDA, and Xeon Phi. #define A(i_,j_) (A + (i_) + (j_)*lda) - + // for clBLAS, return cl_mem object and offset (2 values); // for others, return (pointer + offset) (1 value). #ifdef MAGMA_HAVE_OPENCL @@ -147,12 +148,12 @@ This guide is based on the PLASMA coding style guide, though slightly different. #else #define dA(i_,j_) (dA + (i_) + ((size_t) j_)*lda) #endif - + // for host pointers, A == A(0,0). blasf77_zherk( lapack_uplo_const(uplo), lapack_trans_const(trans), &n, &k, &alpha, A, &lda, &beta, B(i,j), &ldb ); - + // for device pointers, use dA(0,0) instead of dA, to aid porting to OpenCL. magma_zherk( uplo, trans, n, k, alpha, dA(0,0), ldda, @@ -211,17 +212,17 @@ This guide is based on the PLASMA coding style guide, though slightly different. fine. Clarity is paramount. For multi-line function calls it is recommended that new lines start either after the left paranthesis or indented 4 spaces, always aligned with the first argument. - + \code magma_zherk( // Okay; preferred style for prototypes uplo, trans, n, k, alpha, dA(0,0), ldda, beta, dB(i,j), lddb ); - + magma_zherk( uplo, trans, n, k, // Okay alpha, dA(0,0), ldda, beta, dB(i,j), lddb ); - + magma_zherk( uplo, trans, n, k, // Avoid alpha, dA(0,0), ldda, beta, dB(i,j), lddb ); diff --git a/docs/documentation.txt b/docs/documentation.txt index 200198361..68172dc78 100644 --- a/docs/documentation.txt +++ b/docs/documentation.txt @@ -140,16 +140,6 @@ For csh/tcsh, put in ~/.cshrc: Some bugs exist with OpenBLAS 0.2.19; see BUGS.txt. -#### MacOS Accelerate (previously Veclib) - -Unfortunately, the MacOS Accelerate framework uses an old ABI for BLAS and -LAPACK, where single precision functions -- such as `sdot`, `cdot`, `slange`, -and `clange` -- return a double precision result. This makes them incompatibile -with our C/C++ headers and with the Fortran code used in our testers. The fix is -to substitute reference implementations of these functions, found in -`magma/blas_fix`. Setting `blas_fix = 1` in `make.inc` will compile these into -`magma/lib/libblas_fix.a`, with which your application should link. - Linking to BLAS -------------------------------------------------------------------------------- @@ -221,10 +211,9 @@ For multi-GPU functions, set `\$MAGMA_NUM_GPUS` to the number of GPUs to use. - `\$OMP_NUM_THREADS` - `\$MKL_NUM_THREADS` -- `\$VECLIB_MAXIMUM_THREADS` For multi-core BLAS libraries, set `\$OMP_NUM_THREADS` or `\$MKL_NUM_THREADS` - or `\$VECLIB_MAXIMUM_THREADS` to the number of CPU threads, depending on your + to the number of CPU threads, depending on your BLAS library. See the documentation for your BLAS and LAPACK libraries. diff --git a/fortran/Makefile b/fortran/Makefile index 2017e4e42..018408495 100644 --- a/fortran/Makefile +++ b/fortran/Makefile @@ -17,8 +17,7 @@ LDFLAGS = LIBS = -L$(MAGMADIR)/lib -lmagma -Wl,-rpath,$(MAGMADIR)/lib # BLAS and LAPACK libraries -# framework Accelerate is for MacOS -LIBS += -framework Accelerate +LIBS += -lopenblas obj = \ test.o \ diff --git a/testing/Makefile.src b/testing/Makefile.src index 6ce84e7d9..e6bdbd283 100644 --- a/testing/Makefile.src +++ b/testing/Makefile.src @@ -63,8 +63,6 @@ testing_src += \ $(cdir)/testing_parse_opts.cpp \ $(cdir)/testing_zgenerate.cpp \ - #$(cdir)/testing_veclib.cpp \ - # ---------- # Cholesky, GPU interface testing_src += \ diff --git a/testing/testing_veclib.cpp b/testing/testing_veclib.cpp deleted file mode 100644 index a908dc910..000000000 --- a/testing/testing_veclib.cpp +++ /dev/null @@ -1,241 +0,0 @@ -/* - -- MAGMA (version 2.0) -- - Univ. of Tennessee, Knoxville - Univ. of California, Berkeley - Univ. of Colorado, Denver - @date - - @author Mark Gates -*/ - -// Checks vector and matrix norms, for -m32 and -m64, with return as float or as double. -// In MacOS, -m64 must return double for both single and double precision -// functions, e.g., {s,d}dot, {s,d}nrm2, {s,d}lange, {s,d}lansy. -// Oddly, with -m32 both return float and double for single precision functions works. -// This is essentially a bug from an old f2c version of lapack (clapack). -// -// We work around this bug by putting replacement routines in the magma/lib/libblas_fix.a library. -// These correctly return float for the single precision functions that had issues. - -#include -#include -#include - -// hack to NOT include magma_lapack.h, by pre-defining MAGMA_LAPACK_H. -// we re-define the lapack prototypes below, so can't include that header. -#define MAGMA_LAPACK_H -#include "testings.h" -#include "magma_mangling.h" - -// ------------------------------------------------------------ -//#define LAPACK_RETURN_DOUBLE - -#ifdef LAPACK_RETURN_DOUBLE -typedef double RETURN_FLOAT; -#else -typedef float RETURN_FLOAT; -#endif - - -// ------------------------------------------------------------ -#ifdef __cplusplus -extern "C" { -#endif - -#define blasf77_sdot FORTRAN_NAME( sdot, SDOT ) -#define blasf77_snrm2 FORTRAN_NAME( snrm2, SNRM2 ) -#define lapackf77_slange FORTRAN_NAME( slange, SLANGE ) -#define lapackf77_slansy FORTRAN_NAME( slansy, SLANSY ) - -#define blasf77_ddot FORTRAN_NAME( ddot, DDOT ) -#define blasf77_dnrm2 FORTRAN_NAME( dnrm2, DNRM2 ) -#define lapackf77_dlange FORTRAN_NAME( dlange, DLANGE ) -#define lapackf77_dlansy FORTRAN_NAME( dlansy, DLANSY ) - -RETURN_FLOAT -blasf77_sdot( const magma_int_t *n, - const float *x, const magma_int_t *incx, - const float *y, const magma_int_t *incy ); - -RETURN_FLOAT -blasf77_snrm2( const magma_int_t *n, - const float *x, const magma_int_t *incx ); - -RETURN_FLOAT -lapackf77_slange( const char *norm, - const magma_int_t *m, const magma_int_t *n, - const float *A, const magma_int_t *lda, - float *work ); - -RETURN_FLOAT -lapackf77_slansy( const char *norm, const char* uplo, - const magma_int_t *n, - const float *A, const magma_int_t *lda, - float *work ); - -double -blasf77_ddot( const magma_int_t *n, - const double *x, const magma_int_t *incx, - const double *y, const magma_int_t *incy ); - -double -blasf77_dnrm2( const magma_int_t *n, - const double *x, const magma_int_t *incx ); - -double -lapackf77_dlange( const char *norm, - const magma_int_t *m, const magma_int_t *n, - const double *A, const magma_int_t *lda, - double *work ); - -double -lapackf77_dlansy( const char *norm, const char* uplo, - const magma_int_t *n, - const double *A, const magma_int_t *lda, - double *work ); - -#ifdef __cplusplus -} -#endif - -// ------------------------------------------------------------ -// call matrix norms {s,d}lan{ge,sy}. -// return value, to check that the call stack isn't messed up. -float test( magma_int_t m, magma_int_t n ) -{ -#define sA(i,j) (sA + (i) + (j)*lda) -#define dA(i,j) (dA + (i) + (j)*lda) - - float *sA, *swork; - float snorm_one, snorm_inf, snorm_fro, snorm_max; - - double *dA, *dwork; - double dnorm_one, dnorm_inf, dnorm_fro, dnorm_max; - - const magma_int_t ione = 1; - magma_int_t lda = max(m,n); - - TESTING_CHECK( magma_smalloc_cpu( &&sA, lda*n )); - TESTING_CHECK( magma_dmalloc_cpu( &&dA, lda*n )); - TESTING_CHECK( magma_smalloc_cpu( &&swork, m )); - TESTING_CHECK( magma_dmalloc_cpu( &&dwork, m )); - - for( magma_int_t j = 0; j < n; ++j ) { - for( magma_int_t i = 0; i < lda; ++i ) { - double tmp = rand() / (double)(RAND_MAX); - *sA(i,j) = tmp; - *dA(i,j) = tmp; - } - } - - double error; - int status = 0; - - // can repeat multiple times, but shows same results every time - status = 0; - for( magma_int_t i=0; i < 1; ++i ) { - snorm_one = blasf77_sdot( &m, sA, &ione, sA, &ione ); - dnorm_one = blasf77_ddot( &m, dA, &ione, dA, &ione ); - snorm_fro = blasf77_snrm2( &m, sA, &ione ); - dnorm_fro = blasf77_dnrm2( &m, dA, &ione ); - printf( "m %lld, sdot %12.8f, snrm2 %12.8f\n", (long long) m, snorm_one, snorm_fro ); - printf( "m %lld, ddot %12.8f, dnrm2 %12.8f\n", (long long) m, dnorm_one, dnorm_fro ); - error = fabs(snorm_one - dnorm_one) / dnorm_one; - status |= ! (error < 1e-6); - } - if ( status ) { - printf( "**** failed ****\n" ); - } - else { - printf( "ok\n" ); - } - printf( "\n" ); - - status = 0; - for( magma_int_t i=0; i < 1; ++i ) { - snorm_one = lapackf77_slange( "one", &m, &n, sA, &lda, swork ); - snorm_inf = lapackf77_slange( "inf", &m, &n, sA, &lda, swork ); - snorm_max = lapackf77_slange( "max", &m, &n, sA, &lda, swork ); - snorm_fro = lapackf77_slange( "fro", &m, &n, sA, &lda, swork ); - - dnorm_one = lapackf77_dlange( "one", &m, &n, dA, &lda, dwork ); - dnorm_inf = lapackf77_dlange( "inf", &m, &n, dA, &lda, dwork ); - dnorm_max = lapackf77_dlange( "max", &m, &n, dA, &lda, dwork ); - dnorm_fro = lapackf77_dlange( "fro", &m, &n, dA, &lda, dwork ); - - printf( "m %lld, n %lld, slange norm one %12.8f, inf %12.8f, max %12.8f, fro %12.8f\n", - (long long) m, (long long) n, snorm_one, snorm_inf, snorm_max, snorm_fro ); - - printf( "m %lld, n %lld, dlange norm one %12.8f, inf %12.8f, max %12.8f, fro %12.8f\n", - (long long) m, (long long) n, dnorm_one, dnorm_inf, dnorm_max, dnorm_fro ); - error = fabs(snorm_one - dnorm_one) / dnorm_one; - status |= ! (error < 1e-6); - } - if ( status ) { - printf( "**** failed ****\n" ); - } - else { - printf( "ok\n" ); - } - printf( "\n" ); - - status = 0; - for( magma_int_t i=0; i < 1; ++i ) { - snorm_one = lapackf77_slansy( "one", "up", &n, sA, &lda, swork ); - snorm_inf = lapackf77_slansy( "inf", "up", &n, sA, &lda, swork ); - snorm_max = lapackf77_slansy( "max", "up", &n, sA, &lda, swork ); - snorm_fro = lapackf77_slansy( "fro", "up", &n, sA, &lda, swork ); - - dnorm_one = lapackf77_dlansy( "one", "up", &n, dA, &lda, dwork ); - dnorm_inf = lapackf77_dlansy( "inf", "up", &n, dA, &lda, dwork ); - dnorm_max = lapackf77_dlansy( "max", "up", &n, dA, &lda, dwork ); - dnorm_fro = lapackf77_dlansy( "fro", "up", &n, dA, &lda, dwork ); - - printf( "m %lld, n %lld, slansy norm one %12.8f, inf %12.8f, max %12.8f, fro %12.8f\n", - (long long) m, (long long) n, snorm_one, snorm_inf, snorm_max, snorm_fro ); - - printf( "m %lld, n %lld, dlansy norm one %12.8f, inf %12.8f, max %12.8f, fro %12.8f\n", - (long long) m, (long long) n, dnorm_one, dnorm_inf, dnorm_max, dnorm_fro ); - error = fabs(snorm_one - dnorm_one) / dnorm_one; - status |= ! (error < 1e-6); - } - if ( status ) { - printf( "**** failed ****\n" ); - } - else { - printf( "ok\n" ); - } - printf( "\n" ); - - magma_free_cpu( sA ); - magma_free_cpu( dA ); - magma_free_cpu( swork ); - magma_free_cpu( dwork ); - - return 1.125; -} - - -// ------------------------------------------------------------ -int main( int argc, char** argv ) -{ - magma_int_t m = 100; - magma_int_t n = m; - if ( argc > 1 ) { - n = atoi( argv[1] ); - } - - float value; - - printf( "--------------------\n" ); - printf( "sizeof(void*) %lu, sizeof(RETURN_FLOAT) %lu\n\n", - sizeof(void*), sizeof(RETURN_FLOAT) ); - - // can repeat multiple times, but shows same results every time - for( magma_int_t i=0; i < 1; ++i ) { - value = test( m, n ); - printf( "value %.4f\n\n", value ); - } - - return 0; -} diff --git a/tools/build.sh b/tools/build.sh index b88881020..08f47fcb4 100755 --- a/tools/build.sh +++ b/tools/build.sh @@ -20,7 +20,7 @@ link="" tar="" pause="" -usage="Usage: $0 [options] [acml macos mkl-gcc openblas ...] +usage="Usage: $0 [options] [mkl-gcc openblas ...] -h --help help -j # parallel make threads, default $j --no-clean skip 'make clean' before build; only one configuration allowed diff --git a/tools/checklist.csh b/tools/checklist.csh index e638280fb..5b31e36fc 100755 --- a/tools/checklist.csh +++ b/tools/checklist.csh @@ -180,7 +180,7 @@ echo "============================================================ should be fix # fixed echo "========== use @date instead of a specific hard-coded date" egrep '(January|February|March|April|May|June|July|August|September|October|November|December) +[0-9]{4} *$' $FILES \ - | egrep -v 'testing/(checkdiag|lin|matgen)|blas_fix' + | egrep -v 'testing/(checkdiag|lin|matgen)' echo echo "========== cuda, cublas functions *** should be fixed ***" From 8f9121c5cf87e5accad644723cf8985c907a6734 Mon Sep 17 00:00:00 2001 From: Mark Gates Date: Fri, 14 Feb 2025 13:38:48 +0100 Subject: [PATCH 07/27] remove ACML support, since AMD dropped it circa 2017. Also remove out-dated flock references. --- CMakeLists.txt | 1 - Makefile | 2 +- ReleaseChecklist | 5 ++--- docs/documentation.txt | 37 ++++++++++++------------------------ interface_cuda/interface.cpp | 17 +++-------------- make.check-acml | 8 -------- tools/checklist.csh | 2 +- 7 files changed, 19 insertions(+), 53 deletions(-) delete mode 100644 make.check-acml diff --git a/CMakeLists.txt b/CMakeLists.txt index 3e051965b..d55ea406b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -333,7 +333,6 @@ set(BLA_VENDOR "" CACHE STRING set_property(CACHE BLA_VENDOR PROPERTY STRINGS "" "All" "Goto" "OpenBLAS" "FLAME" "ATLAS" "IBMESSL" "Intel10_64lp" "Intel10_64lp_seq" "Intel10_64ilp" "Intel10_64ilp_seq" - "ACML" "ACML_MP" "ACML_GPU" "Arm" "Arm_mp" "Arm_ilp64" "Arm_ilp64_mp" "Generic") diff --git a/Makefile b/Makefile index df8238d96..cc0d52749 100644 --- a/Makefile +++ b/Makefile @@ -941,7 +941,7 @@ $(sparse_testers): %: %.$(o_ext) # filter out MAGMA-specific options for pkg-config #TODO: add hip specific ones INSTALL_FLAGS := $(filter-out \ - -DMAGMA_NOAFFINITY -DMAGMA_SETAFFINITY -DMAGMA_WITH_ACML -DMAGMA_WITH_MKL -DUSE_FLOCK \ + -DMAGMA_NOAFFINITY -DMAGMA_SETAFFINITY -DMAGMA_WITH_MKL \ -DMAGMA_CUDA_ARCH=% -DMAGMA_CUDA_ARCH_MIN=% \ -DMAGMA_HAVE_CUDA -DMAGMA_HAVE_HIP -DMAGMA_HAVE_clBLAS \ -fno-strict-aliasing -fPIC -O0 -O1 -O2 -O3 -pedantic -std=c99 -stdc++98 -stdc++14 \ diff --git a/ReleaseChecklist b/ReleaseChecklist index c8f6d58f5..f29eb5f5a 100644 --- a/ReleaseChecklist +++ b/ReleaseChecklist @@ -44,13 +44,12 @@ Compile & run with -DNDEBUG (no asserts) Compile & run on: This script will compile and save output from each make.inc file: - tools/checklist_builds.csh [ acml atlas mkl-gcc mkl-icc mkl-gcc-ilp64 mkl-icc-ilp64 ... ] - + tools/checklist_builds.csh [ atlas mkl-gcc mkl-icc mkl-gcc-ilp64 mkl-icc-ilp64 ... ] + Linux icc, gcc MKL MKL ILP64 - ACML 5 or 6 OpenBLAS Windows using CMake & MKL diff --git a/docs/documentation.txt b/docs/documentation.txt index 68172dc78..a3618a904 100644 --- a/docs/documentation.txt +++ b/docs/documentation.txt @@ -98,21 +98,6 @@ For csh/tcsh, put in ~/.cshrc: MAGMA is tested with MKL 11.3.3 (2016), both LP64 and ILP64; other versions may work. -#### AMD ACML - -The ACML `make.inc` file assumes `\$ACMLDIR` is set in your environment. -For bash (sh), put in ~/.bashrc (with your system's path): - - export ACMLDIR=/opt/acml-5.3.1 - -For csh/tcsh, put in ~/.cshrc: - - setenv ACMLDIR /opt/acml-5.3.1 - -MAGMA is tested with ACML 5.3.1; other versions may work. -See comments in `make.inc.acml` regarding ACML 4; -a couple testers fail to compile with ACML 4. - #### ATLAS The ATLAS `make.inc` file assumes `\$ATLASDIR` and `\$LAPACKDIR` are set in your environment. @@ -154,12 +139,19 @@ Set `-DADD_`, `-DUPCASE`, or `-DNOCHANGE`, respectively, in all FLAGS in your `make.inc` file to select the appropriate one. Use `nm` to examine your BLAS library: - acml-5.3.1/gfortran64_mp/lib> nm libacml_mp.a | grep -i 'T.*dgemm' - 0000000000000000 T dgemm - 00000000000004e0 T dgemm_ + sh methane lib> nm libopenblas.so | grep -i dsyr2k + 000000000017ee50 T cblas_dsyr2k + 000000000017c8b0 T dsyr2k_ # Note this line + 00000000001fa690 T dsyr2k_LN + 00000000001fb2e0 T dsyr2k_LT + 00000000001f8f70 T dsyr2k_UN + 00000000001f9b70 T dsyr2k_UT + 00000000001fcab0 T dsyr2k_kernel_L + 00000000001fc750 T dsyr2k_kernel_U -In this case, it shows that either `-DADD_ (dgemm_)` or `-DNOCHANGE (dgemm)` -should work. The default in all make.inc files is `-DADD_`. +In this case, it shows that `-DADD_ (dsyr2k_)` should work. +The default in all example make.inc files is `-DADD_`, +except for IBM ESSL, which uses `-DNOCHANGE`. Compile-time options @@ -173,11 +165,6 @@ using the `-D` compiler flag, e.g., `-DMAGMA_WITH_MKL` in CFLAGS. If linked with MKL, allows MAGMA to get MKL's version and set MKL's number of threads. -- `MAGMA_WITH_ACML` - - If linked with ACML 5 or later, allows MAGMA to get ACML's version. - ACML's number of threads are set via OpenMP. - - `MAGMA_NOAFFINITY` Disables thread affinity, available in glibc 2.6 and later. diff --git a/interface_cuda/interface.cpp b/interface_cuda/interface.cpp index 19dd6dc4b..c3ae4e0eb 100644 --- a/interface_cuda/interface.cpp +++ b/interface_cuda/interface.cpp @@ -26,15 +26,11 @@ #include #endif -#if defined(MAGMA_WITH_ACML) -#include -#endif - #include // defining MAGMA_LAPACK_H is a hack to NOT include magma_lapack.h -// via magma_internal.h here, since it conflicts with acml.h and we don't -// need lapack here, but we want acml.h for the acmlversion() function. +// via magma_internal.h here, since it conflicts with some vendor's +// headers (acml.h), and we don't need lapack here. #define MAGMA_LAPACK_H #include "magma_internal.h" @@ -327,7 +323,7 @@ magma_print_environment() printf( "%% Compiled for CUDA architectures %s\n", MAGMA_CUDA_ARCH ); - // CUDA, OpenCL, OpenMP, MKL, ACML versions all printed on same line + // CUDA, OpenCL, OpenMP, MKL versions all printed on same line int cuda_runtime=0, cuda_driver=0; cudaError_t err; err = cudaDriverGetVersion( &cuda_driver ); @@ -381,13 +377,6 @@ magma_print_environment() mkl_get_max_threads() ); #endif -#if defined(MAGMA_WITH_ACML) - // ACML 4 doesn't have acml_build parameter - int acml_major, acml_minor, acml_patch, acml_build; - acmlversion( &acml_major, &acml_minor, &acml_patch, &acml_build ); - printf( "ACML %d.%d.%d.%d ", acml_major, acml_minor, acml_patch, acml_build ); -#endif - printf( "\n" ); // print devices diff --git a/make.check-acml b/make.check-acml deleted file mode 100644 index 095cdd024..000000000 --- a/make.check-acml +++ /dev/null @@ -1,8 +0,0 @@ -# check for ACML directories and give user hint how to set them -ifeq ($(ACMLDIR),) -$(error Set $$ACMLDIR, preferably in your environment, e.g., run "export ACMLDIR=/opt/acml" in ~/.bashrc, or "setenv ACMLDIR /opt/acml" in ~/.cshrc) -endif - -ifeq ($(wildcard $(ACMLDIR)),) -$(error $$ACMLDIR=$(ACMLDIR) does not exist. Please set $$ACMLDIR to where ACML is installed.) -endif diff --git a/tools/checklist.csh b/tools/checklist.csh index 5b31e36fc..4c6778c2d 100755 --- a/tools/checklist.csh +++ b/tools/checklist.csh @@ -170,7 +170,7 @@ echo # needs lots of work; also lots of exceptions echo "========== int instead of magma_int_t (in headers) *** required fix ***" egrep '\bint\b' $HEADERS \ - | egrep -v 'int argc|int flock|quark|magma_timer\.h|commonblas_z\.h|magma_templates\.h|magma_winthread\.h|pthread_barrier\.h|cblas\.h|typedef int +magma_|const char\* func, const char\* file, int line|int mm_' + | egrep -v 'int argc|quark|magma_timer\.h|commonblas_z\.h|magma_templates\.h|magma_winthread\.h|pthread_barrier\.h|cblas\.h|typedef int +magma_|const char\* func, const char\* file, int line|int mm_' echo echo From 4c76e7c6bbca49268cfc683c649cdb720ea8e8b3 Mon Sep 17 00:00:00 2001 From: Mark Gates Date: Tue, 8 Jul 2025 02:48:09 -0400 Subject: [PATCH 08/27] make: remove extraneous check from make.inc files that is repeated in Makefile --- make.inc-examples/make.inc.hip-gcc-mkl | 5 ----- make.inc-examples/make.inc.hip-gcc-openblas | 5 ----- make.inc-examples/make.inc.openblas | 5 ----- 3 files changed, 15 deletions(-) diff --git a/make.inc-examples/make.inc.hip-gcc-mkl b/make.inc-examples/make.inc.hip-gcc-mkl index 24ee6bf6b..7a04a97bb 100644 --- a/make.inc-examples/make.inc.hip-gcc-mkl +++ b/make.inc-examples/make.inc.hip-gcc-mkl @@ -36,11 +36,6 @@ BACKEND = hip ROCM_PATH ?= /opt/rocm CUDADIR ?= /usr/local/cuda -# require either hip or cuda -ifeq (,$(findstring $(BACKEND),hip cuda)) - $(error "'BACKEND' should be either 'cuda' or 'hip' (got $$BACKEND=$(BACKEND))") -endif - # -------------------- # programs diff --git a/make.inc-examples/make.inc.hip-gcc-openblas b/make.inc-examples/make.inc.hip-gcc-openblas index 547169bb1..0bea560b2 100644 --- a/make.inc-examples/make.inc.hip-gcc-openblas +++ b/make.inc-examples/make.inc.hip-gcc-openblas @@ -37,11 +37,6 @@ OPENBLASDIR ?= /usr/local/openblas ROCM_PATH ?= /opt/rocm CUDADIR ?= /usr/local/cuda -# require either hip or cuda -ifeq (,$(findstring $(BACKEND),hip cuda)) - $(error "'BACKEND' should be either 'cuda' or 'hip' (got $$BACKEND=$(BACKEND))") -endif - # -------------------- # programs diff --git a/make.inc-examples/make.inc.openblas b/make.inc-examples/make.inc.openblas index 9e73c4e5e..81a48e994 100644 --- a/make.inc-examples/make.inc.openblas +++ b/make.inc-examples/make.inc.openblas @@ -20,11 +20,6 @@ OPENBLASDIR ?= /usr/local/openblas CUDADIR ?= /usr/local/cuda ROCM_PATH ?= /opt/rocm -# require either hip or cuda -ifeq (,$(findstring $(BACKEND),hip cuda)) - $(error "'BACKEND' should be either 'cuda' or 'hip' (got $(BACKEND))") -endif - # -------------------- # programs From f8c541bacb2ef407b716d4c7e85d8f5d0db3cde5 Mon Sep 17 00:00:00 2001 From: Mark Gates Date: Tue, 8 Jul 2025 02:48:33 -0400 Subject: [PATCH 09/27] make: add AMD AOCL / BLIS & FLAME config --- docs/documentation.txt | 20 +++- make.check-aocl-blis | 17 +++ make.inc-examples/make.inc.aocl-blis | 120 ++++++++++++++++++++ make.inc-examples/make.inc.hip-gcc-openblas | 2 +- make.inc-examples/make.inc.openblas | 4 +- 5 files changed, 155 insertions(+), 8 deletions(-) create mode 100644 make.check-aocl-blis create mode 100644 make.inc-examples/make.inc.aocl-blis diff --git a/docs/documentation.txt b/docs/documentation.txt index a3618a904..38a03ae43 100644 --- a/docs/documentation.txt +++ b/docs/documentation.txt @@ -82,7 +82,20 @@ For csh/tcsh, put in `~/.cshrc`: setenv CUDADIR /usr/local/cuda -MAGMA is tested with CUDA >= 7.5. Some functionality requires a newer version. +#### AMD Optimizing CPU Libraries (AOCL) / BLIS & FLAME + +AOCL has adopted the BLIS and libFLAME libraries. These may be installed +in separate directories. Set `\$BLIS_DIR` and `\$FLAME_DIR` in your +environment or make.inc file. +For bash (sh), put in ~/.bashrc (with your system's paths): + + export BLIS_DIR=/opt/blis + export FLAME_DIR=/opt/libflame + +For csh/tcsh, put in ~/.cshrc: + + setenv BLIS_DIR /opt/blis + setenv FLAME_DIR /opt/libflame #### Intel MKL @@ -95,8 +108,7 @@ For csh/tcsh, put in ~/.cshrc: source /opt/intel/bin/compilervars.csh intel64 -MAGMA is tested with MKL 11.3.3 (2016), both LP64 and ILP64; -other versions may work. +MAGMA is tested with both LP64 and ILP64. #### ATLAS @@ -123,8 +135,6 @@ For csh/tcsh, put in ~/.cshrc: setenv OPENBLASDIR /opt/openblas -Some bugs exist with OpenBLAS 0.2.19; see BUGS.txt. - Linking to BLAS -------------------------------------------------------------------------------- diff --git a/make.check-aocl-blis b/make.check-aocl-blis new file mode 100644 index 000000000..cc2af06a2 --- /dev/null +++ b/make.check-aocl-blis @@ -0,0 +1,17 @@ +# check for BLIS & FLAME directories and give user hint how to set them + +ifeq ($(BLIS_DIR),) +$(error Set $$BLIS_DIR, preferably in your environment, e.g., run "export BLIS_DIR=/opt/blis" in ~/.bashrc, or "setenv BLIS_DIR /opt/blis" in ~/.cshrc) +endif + +ifeq ($(wildcard $(BLIS_DIR)),) +$(error $$BLIS_DIR=$(BLIS_DIR) does not exist. Please set $$BLIS_DIR to where BLIS is installed.) +endif + +ifeq ($(FLAME_DIR),) +$(error Set $$FLAME_DIR, preferably in your environment, e.g., run "export FLAME_DIR=/opt/libflame" in ~/.bashrc, or "setenv FLAME_DIR /opt/libflame" in ~/.cshrc) +endif + +ifeq ($(wildcard $(FLAME_DIR)),) +$(error $$FLAME_DIR=$(FLAME_DIR) does not exist. Please set $$FLAME_DIR to where libFLAME is installed.) +endif diff --git a/make.inc-examples/make.inc.aocl-blis b/make.inc-examples/make.inc.aocl-blis new file mode 100644 index 000000000..ec25c0f86 --- /dev/null +++ b/make.inc-examples/make.inc.aocl-blis @@ -0,0 +1,120 @@ +#////////////////////////////////////////////////////////////////////////////// +# -- MAGMA (version 2.x) -- +# Univ. of Tennessee, Knoxville +# Univ. of California, Berkeley +# Univ. of Colorado, Denver +# @date +#////////////////////////////////////////////////////////////////////////////// + + + +# -------------------- +# configuration + +# should MAGMA be built on CUDA (NVIDIA only) or ROCM (AMD or NVIDIA) +# enter 'cuda' or 'hip' respectively +BACKEND ?= cuda + +# set these to their real paths +BLIS_DIR ?= /usr/local/blis +FLAME_DIR ?= /usr/local/libflame +CUDADIR ?= /usr/local/cuda +ROCM_PATH ?= /opt/rocm + +# -------------------- +# programs + +# set compilers +CC ?= gcc +CXX ?= g++ +FORT ?= gfortran +HIPCC ?= hipcc +NVCC ?= nvcc +DEVCC ?= NONE + +# set from 'BACKEND' +ifeq ($(BACKEND),cuda) + DEVCC = $(NVCC) +else ifeq ($(BACKEND),hip) + DEVCC = $(HIPCC) +endif + +# and utilities +ARCH = ar +ARCHFLAGS = cr +RANLIB = ranlib + + +# -------------------- +# flags/settings + +# set our GPU targets +ifeq ($(BACKEND),cuda) + GPU_TARGET ?= Volta Turing Ampere +else ifeq ($(BACKEND),hip) + GPU_TARGET ?= gfx900 gfx906 gfx908 +endif + +# Use -fPIC to make shared (.so) and static (.a) library; +# can be commented out if making only static library. +FPIC = -fPIC + +# now, generate our flags +CFLAGS = -O3 $(FPIC) -DNDEBUG -DADD_ -Wall -fopenmp -std=c99 +CXXFLAGS = -O3 $(FPIC) -DNDEBUG -DADD_ -Wall -fopenmp -std=c++11 +FFLAGS = -O3 $(FPIC) -DNDEBUG -DADD_ -Wall -Wno-unused-dummy-argument +F90FLAGS = -O3 $(FPIC) -DNDEBUG -DADD_ -Wall -Wno-unused-dummy-argument -x f95-cpp-input +LDFLAGS = $(FPIC) -fopenmp + +DEVCCFLAGS = -O3 -DNDEBUG -DADD_ + +# DEVCCFLAGS are populated later in `backend-specific` + + +# -------------------- +# libraries + +# gcc with BLIS/FLAME +LIB += -lflame -lblis + +# -------------------- +# directories + +# define library directories preferably in your environment, or here. +LIBDIR += -L${FLAME_DIR}/lib -Wl,-rpath,${FLAME_DIR}/lib \ + -L${BLIS_DIR}/lib -Wl,-rpath,${BLIS_DIR}/lib +INC += -I${FLAME_DIR}/include -I${BLIS_DIR}/include + + +# -------------------- +# checks + +# check for BLIS & FLAME +-include make.check-aocl-blis + + +# -------------------- +# backend-specific + +# add appropriate cuda flags +ifeq ($(BACKEND),cuda) + -include make.check-cuda + + DEVCCFLAGS += -Xcompiler "$(FPIC)" -std=c++11 + + # link with cuda specific libraries + INC += -I$(CUDADIR)/include + LIBDIR += -L$(CUDADIR)/lib64 -Wl,-rpath,${CUDADIR}/lib64 + LIB += -lcublas -lcusparse -lcudart -lcudadevrt +endif + +# add appropriate ROCM flags +ifeq ($(BACKEND),hip) + -include make.check-hip + + DEVCCFLAGS += $(FPIC) -std=c++11 + + INC += -I$(ROCM_PATH)/include + LIBDIR += -L$(ROCM_PATH)/lib -Wl,-rpath,${ROCM_PATH}/lib + LIB += -lhipblas -lhipsparse +endif diff --git a/make.inc-examples/make.inc.hip-gcc-openblas b/make.inc-examples/make.inc.hip-gcc-openblas index 0bea560b2..4e3fe3809 100644 --- a/make.inc-examples/make.inc.hip-gcc-openblas +++ b/make.inc-examples/make.inc.hip-gcc-openblas @@ -71,7 +71,7 @@ RANLIB ?= ranlib ifeq ($(BACKEND),cuda) # See a full table: https://nouveau.freedesktop.org/wiki/CodeNames/ # note: provided by freedesktop, which is reliable, but not from the vendor - GPU_TARGET = Volta Turing Ampere + GPU_TARGET ?= Volta Turing Ampere else ifeq ($(BACKEND),hip) # See a full table: # https://llvm.org/docs/AMDGPUUsage.html#amdgpu-processor-table diff --git a/make.inc-examples/make.inc.openblas b/make.inc-examples/make.inc.openblas index 81a48e994..ec6472b41 100644 --- a/make.inc-examples/make.inc.openblas +++ b/make.inc-examples/make.inc.openblas @@ -49,9 +49,9 @@ RANLIB = ranlib # set our GPU targets ifeq ($(BACKEND),cuda) - GPU_TARGET = Volta Turing Ampere + GPU_TARGET ?= Volta Turing Ampere else ifeq ($(BACKEND),hip) - GPU_TARGET = gfx900 gfx906 gfx908 + GPU_TARGET ?= gfx900 gfx906 gfx908 endif # Use -fPIC to make shared (.so) and static (.a) library; From 49aaf1d6edd6ff8249bd2ab2b77514e976e06353 Mon Sep 17 00:00:00 2001 From: Mark Gates Date: Tue, 8 Jul 2025 00:08:51 -0400 Subject: [PATCH 10/27] Fix testing_zhetrf for complex [cz] case. Symmetrize with conj and make diagonal real. --- testing/testing_zhetrf.cpp | 48 +++++++++++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/testing/testing_zhetrf.cpp b/testing/testing_zhetrf.cpp index d48b52470..ad8665092 100644 --- a/testing/testing_zhetrf.cpp +++ b/testing/testing_zhetrf.cpp @@ -329,11 +329,11 @@ double get_residual_aasen( } /******************************************************************************/ -// On input, LU and ipiv is LU factorization of A. On output, LU is overwritten. -// Works for any m, n. +// On input, LD and ipiv is LDL^T factorization of A. On output, LD is overwritten. +// Works for any n. // Uses init_matrix() to re-generate original A as needed. // Returns error in factorization, |PA - LU| / (n |A|) -// This allocates 3 more matrices to store A, L, and U. +// This allocates 3 more matrices to store A, L, and D. double get_LDLt_error( magma_opts &opts, bool nopiv, magma_uplo_t uplo, magma_int_t N, @@ -363,21 +363,27 @@ double get_LDLt_error( // symmetrize; the pivoting code below assumes a full matrix if (opts.uplo == MagmaLower) { - // copy L to U + // Copy conj(L) to U, and make diagonal real. for (j = 0; j < N; ++j) { for (i = 0; i < j; ++i) { - A(i,j) = A(j,i); + A(i, j) = MAGMA_Z_CONJ( A(j, i) ); } + A(j, j) = MAGMA_Z_MAKE( MAGMA_Z_REAL( A(j, j) ), 0 ); } } else { - // copy U to L + // Copy conj(U) to L, and make diagonal real. for (j = 0; j < N; ++j) { for (i = 0; i < j; ++i) { - A(j,i) = A(i,j); + A(j, i) = MAGMA_Z_CONJ( A(i, j) ); } + A(j, j) = MAGMA_Z_MAKE( MAGMA_Z_REAL( A(j, j) ), 0 ); } } + if (opts.verbose) { + printf( "Aerr = " ); + magma_zprint( N, N, A, lda ); + } if (uplo == MagmaUpper) { for (j=N-1; j >= 0; j--) { @@ -557,6 +563,11 @@ double get_LDLt_error( } residual = lapackf77_zlange( "Fro", &N, &N, D, &N, work); + if (opts.verbose) { + printf( "Residual = " ); + magma_zprint( N, N, D, N ); + } + magma_free_cpu( A ); magma_free_cpu( L ); magma_free_cpu( D ); @@ -651,21 +662,27 @@ double get_LTLt_error( // symmetrize; the pivoting code below assumes a full matrix if (opts.uplo == MagmaLower) { - // copy L to U + // Copy conj(L) to U, and make diagonal real. for (j = 0; j < N; ++j) { for (i = 0; i < j; ++i) { - A(i,j) = A(j,i); + A(i, j) = MAGMA_Z_CONJ( A(j, i) ); } + A(j, j) = MAGMA_Z_MAKE( MAGMA_Z_REAL( A(j, j) ), 0 ); } } else { - // copy U to L + // Copy conj(U) to L, and make diagonal real. for (j = 0; j < N; ++j) { for (i = 0; i < j; ++i) { - A(j,i) = A(i,j); + A(j, i) = MAGMA_Z_CONJ( A(i, j) ); } + A(j, j) = MAGMA_Z_MAKE( MAGMA_Z_REAL( A(j, j) ), 0 ); } } + if (opts.verbose) { + printf( "Aerr = " ); + magma_zprint( N, N, A, lda ); + } // apply symmetric pivoting for (j=0; j < N; j++) { @@ -694,6 +711,11 @@ double get_LTLt_error( } residual = lapackf77_zlange( "Fro", &N, &N, T, &N, work); + if (opts.verbose) { + printf( "Residual = " ); + magma_zprint( N, N, T, N ); + } + magma_free_cpu( A ); magma_free_cpu( L ); magma_free_cpu( T ); @@ -829,6 +851,10 @@ int main( int argc, char** argv) Performs operation using MAGMA =================================================================== */ init_matrix( opts, N, N, h_A, lda ); + if (opts.verbose) { + printf( "A = " ); + magma_zprint( N, N, h_A, lda ); + } //printf( "A0=" ); //magma_zprlong( N, N, h_A, lda ); From afdf8d3741ee7caeb5b96730551a1881d2fe3687 Mon Sep 17 00:00:00 2001 From: Mark Gates Date: Mon, 7 Jul 2025 23:57:42 -0400 Subject: [PATCH 11/27] Cleanup testing_zhetrf. Make spacing more consistent, use std::swap. --- testing/testing_zhetrf.cpp | 82 ++++++++++++++------------------------ 1 file changed, 29 insertions(+), 53 deletions(-) diff --git a/testing/testing_zhetrf.cpp b/testing/testing_zhetrf.cpp index ad8665092..bf0a668eb 100644 --- a/testing/testing_zhetrf.cpp +++ b/testing/testing_zhetrf.cpp @@ -13,6 +13,7 @@ #include #include #include +#include // includes, project #include "flops.h" @@ -408,21 +409,15 @@ double get_LDLt_error( if (piv != j) { // apply row-pivoting to previous L for (i=j+2; i < N; i++) { - magmaDoubleComplex val = L(j,i); - L(j,i) = L(piv,i); - L(piv,i) = val; + std::swap( L(j, i), L(piv, i) ); } // apply row-pivoting to A for (i=0; i < N; i++) { - magmaDoubleComplex val = A(j,i); - A(j,i) = A(piv,i); - A(piv,i) = val; + std::swap( A(j, i), A(piv, i) ); } // apply col-pivoting to A for (i=0; i < N; i++) { - magmaDoubleComplex val = A(i,j); - A(i,j) = A(i,piv); - A(i,piv) = val; + std::swap( A(i, j), A(i, piv) ); } } } @@ -438,21 +433,15 @@ double get_LDLt_error( if (piv != j) { // apply row-pivoting to previous L for (i=j+1; i < N; i++) { - magmaDoubleComplex val = L(j,i); - L(j,i) = L(piv,i); - L(piv,i) = val; + std::swap( L(j, i), L(piv, i) ); } // apply row-pivoting to A for (i=0; i < N; i++) { - magmaDoubleComplex val = A(j,i); - A(j,i) = A(piv,i); - A(piv,i) = val; + std::swap( A(j, i), A(piv, i) ); } // apply col-pivoting to A for (i=0; i < N; i++) { - magmaDoubleComplex val = A(i,j); - A(i,j) = A(i,piv); - A(i,piv) = val; + std::swap( A(i, j), A(i,piv) ); } } } @@ -497,21 +486,15 @@ double get_LDLt_error( if (piv != j) { // apply row-pivoting to previous L for (i=0; i < j-1; i++) { - magmaDoubleComplex val = L(j,i); - L(j,i) = L(piv,i); - L(piv,i) = val; + std::swap( L(j, i), L(piv, i) ); } // apply row-pivoting to A for (i=0; i < N; i++) { - magmaDoubleComplex val = A(j,i); - A(j,i) = A(piv,i); - A(piv,i) = val; + std::swap( A(j, i), A(piv, i) ); } // apply col-pivoting to A for (i=0; i < N; i++) { - magmaDoubleComplex val = A(i,j); - A(i,j) = A(i,piv); - A(i,piv) = val; + std::swap( A(i, j), A(i, piv) ); } } } @@ -527,21 +510,15 @@ double get_LDLt_error( if (piv != j) { // apply row-pivoting to previous L for (i=0; i < j; i++) { - magmaDoubleComplex val = L(j,i); - L(j,i) = L(piv,i); - L(piv,i) = val; + std::swap( L(j, i), L(piv, i) ); } // apply row-pivoting to A for (i=0; i < N; i++) { - magmaDoubleComplex val = A(j,i); - A(j,i) = A(piv,i); - A(piv,i) = val; + std::swap( A(j, i), A(piv, i) ); } // apply col-pivoting to A for (i=0; i < N; i++) { - magmaDoubleComplex val = A(i,j); - A(i,j) = A(i,piv); - A(i,piv) = val; + std::swap( A(i, j), A(i, piv) ); } } } @@ -690,15 +667,11 @@ double get_LTLt_error( if (piv != j) { // apply row-pivoting to A for (i=0; i < N; i++) { - magmaDoubleComplex val = A(j,i); - A(j,i) = A(piv,i); - A(piv,i) = val; + std::swap( A(j, i), A(piv, i) ); } // apply col-pivoting to A for (i=0; i < N; i++) { - magmaDoubleComplex val = A(i,j); - A(i,j) = A(i,piv); - A(i,piv) = val; + std::swap( A(i, j), A(i, piv) ); } } } @@ -728,7 +701,7 @@ double get_LTLt_error( /* //////////////////////////////////////////////////////////////////////////// -- Testing zhetrf */ -int main( int argc, char** argv) +int main( int argc, char** argv ) { TESTING_CHECK( magma_init() ); magma_print_environment(); @@ -752,8 +725,8 @@ int main( int argc, char** argv) // TODO: this doesn't work. Options need to be added to parse_opts() //for (int i = 1; i < argc; ++i) { - // if ( strcmp("--cpu-panel", argv[i]) == 0) cpu_panel = 1; - // if ( strcmp("--gpu-panel", argv[i]) == 0) cpu_panel = 0; + // if (strcmp("--cpu-panel", argv[i]) == 0) cpu_panel = 1; + // if (strcmp("--gpu-panel", argv[i]) == 0) cpu_panel = 0; //} printf( "%% --version 1 = Bunch-Kauffman (CPU)\n" @@ -796,7 +769,7 @@ int main( int argc, char** argv) double tol = opts.tolerance * lapackf77_dlamch("E"); - if ( opts.check == 2 ) { + if (opts.check == 2) { printf("%% M N CPU Gflop/s (sec) GPU Gflop/s (sec) |Ax-b|/(N*|A|*|x|)\n"); } else { @@ -816,7 +789,7 @@ int main( int argc, char** argv) /* ===================================================================== Performs operation using LAPACK =================================================================== */ - if ( opts.lapack ) { + if (opts.lapack) { lwork = -1; lapackf77_zhetrf( lapack_uplo_const(opts.uplo), &N, h_A, &lda, ipiv, &temp, &lwork, &info ); lwork = (magma_int_t)MAGMA_Z_REAL( temp ); @@ -891,6 +864,7 @@ int main( int argc, char** argv) magmaDoubleComplex_ptr d_A; TESTING_CHECK( magma_zmalloc( &d_A, N*ldda )); magma_zsetmatrix(N, N, h_A, lda, d_A, ldda, opts.queue ); + gpu_time = magma_wtime(); magma_zhetrf_nopiv_gpu( opts.uplo, N, d_A, ldda, &info); gpu_time = magma_wtime() - gpu_time; @@ -928,6 +902,7 @@ int main( int argc, char** argv) } printf("inertia: positive / negative / zero = %d / %d / %d\n", inert[0], inert[1], inert[2]); + magma_free( d_A ); } else if (gpu) { @@ -936,12 +911,13 @@ int main( int argc, char** argv) magmaDoubleComplex_ptr d_A; TESTING_CHECK( magma_zmalloc( &d_A, N*ldda )); magma_zsetmatrix(N, N, h_A, lda, d_A, ldda, opts.queue ); + gpu_time = magma_wtime(); magma_zhetrf_gpu( opts.uplo, N, d_A, ldda, ipiv, &info); gpu_time = magma_wtime() - gpu_time; magma_zgetmatrix(N, N, d_A, ldda, h_A, lda, opts.queue ); - if ( opts.check == 2 && info == 0) { + if (opts.check == 2 && info == 0) { error = get_residual_gpu( opts, (nopiv | nopiv_gpu), opts.uplo, N, h_A, lda, d_A, ldda, ipiv, solve_time ); magma_zgetmatrix(N, N, d_A, ldda, h_A, lda, opts.queue ); @@ -980,7 +956,7 @@ int main( int argc, char** argv) /* ===================================================================== Check the factorization =================================================================== */ - if ( opts.lapack ) { + if (opts.lapack) { printf("%5lld %5lld %7.2f (%7.2f) %7.2f (%7.2f)", (long long) N, (long long) N, cpu_perf, cpu_time, gpu_perf, gpu_time ); } @@ -988,11 +964,11 @@ int main( int argc, char** argv) printf("%5lld %5lld --- ( --- ) %7.2f (%7.2f)", (long long) N, (long long) N, gpu_perf, gpu_time ); } - if ( opts.check == 2 && info == 0) { + if (opts.check == 2 && info == 0) { if (aasen) { error = get_residual_aasen( opts, (nopiv | nopiv_gpu), opts.uplo, N, h_A, lda, ipiv ); } - else if (!gpu) { + else if (! gpu) { error = get_residual( opts, (nopiv | nopiv_gpu), opts.uplo, N, h_A, lda, ipiv ); } // gpu case calls get_residual_gpu before to initialize error and timing. @@ -1011,7 +987,7 @@ int main( int argc, char** argv) printf("\n"); status += ! (error < tol); } - else if ( opts.check && info == 0 ) { + else if (opts.check && info == 0) { if (aasen) { error = get_LTLt_error( opts, (nopiv | nopiv_gpu), opts.uplo, N, h_A, lda, ipiv ); } @@ -1029,7 +1005,7 @@ int main( int argc, char** argv) magma_free_pinned( h_A ); fflush( stdout ); } - if ( opts.niter > 1 ) { + if (opts.niter > 1) { printf( "\n" ); } } From 471406c257d097d4fe776230e80b63d903e1e4a1 Mon Sep 17 00:00:00 2001 From: weilinscenccs <135019969+weilinscenccs@users.noreply.github.com> Date: Tue, 24 Jun 2025 16:35:55 +0200 Subject: [PATCH 12/27] Update magma2.F90 bind C name should be magma_sync_wtime --- fortran/magma2.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fortran/magma2.F90 b/fortran/magma2.F90 index e004c8e21..5bb18dbed 100644 --- a/fortran/magma2.F90 +++ b/fortran/magma2.F90 @@ -82,7 +82,7 @@ real(c_double) function magma_wtime() & end function real(c_double) function magma_sync_wtime( queue ) & - bind(C, name="magma_wtime") + bind(C, name="magma_sync_wtime") use iso_c_binding type(c_ptr), value :: queue end function From ebd725118f12d583c266eecf9f97e70833941bab Mon Sep 17 00:00:00 2001 From: cyy Date: Sat, 17 May 2025 07:51:45 +0800 Subject: [PATCH 13/27] Improve C/C++ standard setting in CMake --- CMakeLists.txt | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d55ea406b..40f5dc73f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,9 +6,9 @@ cmake_minimum_required( VERSION 3.18 ) option( USE_FORTRAN "Fortran is required for some tester checks, but can be disabled with reduced functionality" ON ) if (USE_FORTRAN) - project( MAGMA C CXX Fortran ) + project( MAGMA LANGUAGES C CXX Fortran ) else() - project( MAGMA C CXX ) + project( MAGMA LANGUAGES C CXX ) endif() FIND_PROGRAM(PROGRAM_CCACHE ccache) @@ -70,26 +70,10 @@ endif() # ---------------------------------------- # use C++14 and C99 # see http://stackoverflow.com/questions/10851247/how-to-activate-c-11-in-cmake -include(CheckCXXCompilerFlag) -include(CheckCCompilerFlag) -CHECK_CXX_COMPILER_FLAG("-std=c++14" COMPILER_SUPPORTS_CXX14) -CHECK_CXX_COMPILER_FLAG("-std=c++0x" COMPILER_SUPPORTS_CXX0X) -CHECK_CXX_COMPILER_FLAG("-fPIC" COMPILER_SUPPORTS_FPIC) -if (COMPILER_SUPPORTS_CXX14) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14") -elseif(COMPILER_SUPPORTS_CXX0X) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x") -else() - message( WARNING "The compiler ${CMAKE_CXX_COMPILER} doesn't support the -std=c++14 flag. Some code may not compile.") -endif() - -CHECK_C_COMPILER_FLAG("-std=c99" COMPILER_SUPPORTS_C99) -if (COMPILER_SUPPORTS_C99) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99") -else() - message( WARNING "The compiler ${CMAKE_C_COMPILER} doesn't support the -std=c99 flag. Some code may not compile.") -endif() - +set( CMAKE_C_STANDARD 99 ) +set( CMAKE_C_STANDARD_REQUIRED OFF ) +set( CMAKE_CXX_STANDARD 14 ) +set( CMAKE_CXX_STANDARD_REQUIRED OFF ) # ---------------------------------------- # check Fortran name mangling From 27c325404cd8844e7af3bcc7a893097bad5589cf Mon Sep 17 00:00:00 2001 From: cyy Date: Thu, 29 May 2025 17:18:56 +0800 Subject: [PATCH 14/27] Fix fpic Signed-off-by: cyy --- CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 40f5dc73f..4f436243f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -69,11 +69,12 @@ endif() # ---------------------------------------- # use C++14 and C99 -# see http://stackoverflow.com/questions/10851247/how-to-activate-c-11-in-cmake set( CMAKE_C_STANDARD 99 ) set( CMAKE_C_STANDARD_REQUIRED OFF ) set( CMAKE_CXX_STANDARD 14 ) set( CMAKE_CXX_STANDARD_REQUIRED OFF ) +include(CheckCXXCompilerFlag) +CHECK_CXX_COMPILER_FLAG("-fPIC" COMPILER_SUPPORTS_FPIC) # ---------------------------------------- # check Fortran name mangling From 675053b340f8859b7999cd9dbbccb2bacad3a73c Mon Sep 17 00:00:00 2001 From: Mark Gates Date: Thu, 17 Jul 2025 15:04:01 +0000 Subject: [PATCH 15/27] cmake: require c99 and c++14; prohibit decay to older standards and compiler extensions (gnu, etc.) --- CMakeLists.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4f436243f..f6d9feb86 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -70,9 +70,11 @@ endif() # ---------------------------------------- # use C++14 and C99 set( CMAKE_C_STANDARD 99 ) -set( CMAKE_C_STANDARD_REQUIRED OFF ) +set( CMAKE_C_STANDARD_REQUIRED on ) +set( CMAKE_C_EXTENSIONS off ) set( CMAKE_CXX_STANDARD 14 ) -set( CMAKE_CXX_STANDARD_REQUIRED OFF ) +set( CMAKE_CXX_STANDARD_REQUIRED on ) # prohibit "decay" to older standards (std=c++11, etc.) +set( CMAKE_CXX_EXTENSIONS off ) # prohibit std=gnu++17, etc. include(CheckCXXCompilerFlag) CHECK_CXX_COMPILER_FLAG("-fPIC" COMPILER_SUPPORTS_FPIC) From c26c19bc78bcac31c3645f00074a8233dc7dad73 Mon Sep 17 00:00:00 2001 From: Mark Gates Date: Thu, 17 Jul 2025 15:32:53 +0000 Subject: [PATCH 16/27] fortran: use c_sizeof from f2008. Fixes #55. --- fortran/magma2_common.F90 | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/fortran/magma2_common.F90 b/fortran/magma2_common.F90 index ac18317b8..09c285370 100644 --- a/fortran/magma2_common.F90 +++ b/fortran/magma2_common.F90 @@ -12,15 +12,13 @@ module magma2_common integer(c_int), parameter :: idummy = 0 type(c_ptr), parameter :: ptr_dummy = c_null_ptr -!! Intel ifort chokes on c_sizeof here, so use extension sizeof -!! see https://software.intel.com/en-us/forums/intel-visual-fortran-compiler-for-windows/topic/495001 integer(c_size_t), parameter :: & - sizeof_real = sizeof(sdummy), & - sizeof_double = sizeof(ddummy), & - sizeof_complex = sizeof(cdummy), & - sizeof_complex16 = sizeof(zdummy), & - sizeof_int = sizeof(idummy), & - sizeof_ptr = sizeof(ptr_dummy) + sizeof_real = c_sizeof(sdummy), & + sizeof_double = c_sizeof(ddummy), & + sizeof_complex = c_sizeof(cdummy), & + sizeof_complex16 = c_sizeof(zdummy), & + sizeof_int = c_sizeof(idummy), & + sizeof_ptr = c_sizeof(ptr_dummy) !! ============================================================================= From 6914cea1e75a59930e695bba3030914bd180d1e6 Mon Sep 17 00:00:00 2001 From: Ahmad Abdelfattah Date: Wed, 23 Jul 2025 15:51:05 -0400 Subject: [PATCH 17/27] add support to Blackwell GPUs (might want to add more sm_xx) --- Makefile | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index cc0d52749..713a28a42 100644 --- a/Makefile +++ b/Makefile @@ -154,6 +154,10 @@ ifeq ($(BACKEND),cuda) CUDA_ARCH_ += sm_90 CUDA_ARCH_ += sm_90a endif + ifneq ($(findstring Blackwell, $(GPU_TARGET)),) + CUDA_ARCH_ += sm_100 + endif + # Remember to add to CMakeLists.txt too! @@ -167,7 +171,7 @@ ifeq ($(BACKEND),cuda) # See also $(info compile for ...) in Makefile - CUDA_ARCH_UNKNOWN_ = $(filter-out sm_% Kepler Maxwell Pascal Volta Turing Ampere Ada Hopper, $(CUDA_ARCH_)) + CUDA_ARCH_UNKNOWN_ = $(filter-out sm_% Kepler Maxwell Pascal Volta Turing Ampere Ada Hopper Blackwell, $(CUDA_ARCH_)) ifneq ($(CUDA_ARCH_UNKNOWN_),) $(error ERROR: unknown `$(CUDA_ARCH_UNKNOWN_)` in GPU_TARGET) endif @@ -186,7 +190,7 @@ ifeq ($(BACKEND),cuda) # Check for empty ifeq ($(NV_SM),) - $(error ERROR: unknown `GPU_TARGET=$(GPU_TARGET)`. Set cuda_arch to one or more of Kepler, Maxwell, Pascal, Volta, Turing, Ampere, Ada, Hopper, or valid sm_XX from nvcc -h) + $(error ERROR: unknown `GPU_TARGET=$(GPU_TARGET)`. Set cuda_arch to one or more of Kepler, Maxwell, Pascal, Volta, Turing, Ampere, Ada, Hopper, Blackwell, or valid sm_XX from nvcc -h) else # Get last option (last 2 words) of nv_compute. nwords := $(words $(NV_COMP)) @@ -203,7 +207,7 @@ ifeq ($(BACKEND),cuda) CUDA_ARCH := $(SMS) CUDA_ARCH_MIN := $(word 1, $(SMS))0 ifeq ($(CUDA_ARCH_MIN),) - $(error GPU_TARGET, currently $(GPU_TARGET), must contain one or more of Fermi, Kepler, Maxwell, Pascal, Volta, Turing, Ampere, Ada, Hopper, or valid sm_[0-9][0-9]. Please edit your make.inc file) + $(error GPU_TARGET, currently $(GPU_TARGET), must contain one or more of Fermi, Kepler, Maxwell, Pascal, Volta, Turing, Ampere, Ada, Hopper, Blackwell, or valid sm_[0-9][0-9]. Please edit your make.inc file) endif else ifeq ($(BACKEND),hip) From 004a8b2fc7ded1b636340ac75937b1c93b0bc052 Mon Sep 17 00:00:00 2001 From: Ahmad Abdelfattah Date: Wed, 23 Jul 2025 15:54:21 -0400 Subject: [PATCH 18/27] add blackwell to make.inc files --- make.inc-examples/make.inc.mkl-gcc | 1 + make.inc-examples/make.inc.mkl-gcc-debug | 1 + make.inc-examples/make.inc.mkl-gcc-ilp64 | 1 + make.inc-examples/make.inc.mkl-gcc-ilp64-debug | 1 + make.inc-examples/make.inc.mkl-icx | 1 + make.inc-examples/make.inc.mkl-icx-debug | 1 + make.inc-examples/make.inc.mkl-icx-ilp64 | 1 + make.inc-examples/make.inc.mkl-icx-ilp64-debug | 1 + make.inc-examples/make.inc.openblas-debug | 1 + make.inc-examples/make.inc.power9-essl | 1 + 10 files changed, 10 insertions(+) diff --git a/make.inc-examples/make.inc.mkl-gcc b/make.inc-examples/make.inc.mkl-gcc index 71cffeb65..641789475 100644 --- a/make.inc-examples/make.inc.mkl-gcc +++ b/make.inc-examples/make.inc.mkl-gcc @@ -15,6 +15,7 @@ # Volta/Turing - NVIDIA compute capability 7.x cards # Ampere/Ada-Lovelace - NVIDIA compute capability 8.x cards # Hopper - NVIDIA compute capability 9.x cards +# Blackwell - NVIDIA compute capability 10.x and 12.x cards # The default is "Volta Turing Ampere". # Note that depending on the CUDA version, some compute capabilities may not be supported # See http://developer.nvidia.com/cuda-gpus diff --git a/make.inc-examples/make.inc.mkl-gcc-debug b/make.inc-examples/make.inc.mkl-gcc-debug index 785c1e7c7..5dead18fb 100644 --- a/make.inc-examples/make.inc.mkl-gcc-debug +++ b/make.inc-examples/make.inc.mkl-gcc-debug @@ -15,6 +15,7 @@ # Volta/Turing - NVIDIA compute capability 7.x cards # Ampere/Ada-Lovelace - NVIDIA compute capability 8.x cards # Hopper - NVIDIA compute capability 9.x cards +# Blackwell - NVIDIA compute capability 10.x and 12.x cards # The default is "Volta Turing Ampere". # Note that depending on the CUDA version, some compute capabilities may not be supported # See http://developer.nvidia.com/cuda-gpus diff --git a/make.inc-examples/make.inc.mkl-gcc-ilp64 b/make.inc-examples/make.inc.mkl-gcc-ilp64 index 81ff2b1ac..992b9878f 100644 --- a/make.inc-examples/make.inc.mkl-gcc-ilp64 +++ b/make.inc-examples/make.inc.mkl-gcc-ilp64 @@ -15,6 +15,7 @@ # Volta/Turing - NVIDIA compute capability 7.x cards # Ampere/Ada-Lovelace - NVIDIA compute capability 8.x cards # Hopper - NVIDIA compute capability 9.x cards +# Blackwell - NVIDIA compute capability 10.x and 12.x cards # The default is "Volta Turing Ampere". # Note that depending on the CUDA version, some compute capabilities may not be supported # See http://developer.nvidia.com/cuda-gpus diff --git a/make.inc-examples/make.inc.mkl-gcc-ilp64-debug b/make.inc-examples/make.inc.mkl-gcc-ilp64-debug index df9a12ae3..b093e8af3 100644 --- a/make.inc-examples/make.inc.mkl-gcc-ilp64-debug +++ b/make.inc-examples/make.inc.mkl-gcc-ilp64-debug @@ -15,6 +15,7 @@ # Volta/Turing - NVIDIA compute capability 7.x cards # Ampere/Ada-Lovelace - NVIDIA compute capability 8.x cards # Hopper - NVIDIA compute capability 9.x cards +# Blackwell - NVIDIA compute capability 10.x and 12.x cards # The default is "Volta Turing Ampere". # Note that depending on the CUDA version, some compute capabilities may not be supported # See http://developer.nvidia.com/cuda-gpus diff --git a/make.inc-examples/make.inc.mkl-icx b/make.inc-examples/make.inc.mkl-icx index 86d59801b..f423354fb 100644 --- a/make.inc-examples/make.inc.mkl-icx +++ b/make.inc-examples/make.inc.mkl-icx @@ -15,6 +15,7 @@ # Volta/Turing - NVIDIA compute capability 7.x cards # Ampere/Ada-Lovelace - NVIDIA compute capability 8.x cards # Hopper - NVIDIA compute capability 9.x cards +# Blackwell - NVIDIA compute capability 10.x and 12.x cards # The default is "Volta Turing Ampere". # Note that depending on the CUDA version, some compute capabilities may not be supported # See http://developer.nvidia.com/cuda-gpus diff --git a/make.inc-examples/make.inc.mkl-icx-debug b/make.inc-examples/make.inc.mkl-icx-debug index 94e81c6ba..f42014f0f 100644 --- a/make.inc-examples/make.inc.mkl-icx-debug +++ b/make.inc-examples/make.inc.mkl-icx-debug @@ -15,6 +15,7 @@ # Volta/Turing - NVIDIA compute capability 7.x cards # Ampere/Ada-Lovelace - NVIDIA compute capability 8.x cards # Hopper - NVIDIA compute capability 9.x cards +# Blackwell - NVIDIA compute capability 10.x and 12.x cards # The default is "Volta Turing Ampere". # Note that depending on the CUDA version, some compute capabilities may not be supported # See http://developer.nvidia.com/cuda-gpus diff --git a/make.inc-examples/make.inc.mkl-icx-ilp64 b/make.inc-examples/make.inc.mkl-icx-ilp64 index 93b8b2c00..d038b93af 100644 --- a/make.inc-examples/make.inc.mkl-icx-ilp64 +++ b/make.inc-examples/make.inc.mkl-icx-ilp64 @@ -15,6 +15,7 @@ # Volta/Turing - NVIDIA compute capability 7.x cards # Ampere/Ada-Lovelace - NVIDIA compute capability 8.x cards # Hopper - NVIDIA compute capability 9.x cards +# Blackwell - NVIDIA compute capability 10.x and 12.x cards # The default is "Volta Turing Ampere". # Note that depending on the CUDA version, some compute capabilities may not be supported # See http://developer.nvidia.com/cuda-gpus diff --git a/make.inc-examples/make.inc.mkl-icx-ilp64-debug b/make.inc-examples/make.inc.mkl-icx-ilp64-debug index e2a693a6f..e02b6a7d6 100644 --- a/make.inc-examples/make.inc.mkl-icx-ilp64-debug +++ b/make.inc-examples/make.inc.mkl-icx-ilp64-debug @@ -15,6 +15,7 @@ # Volta/Turing - NVIDIA compute capability 7.x cards # Ampere/Ada-Lovelace - NVIDIA compute capability 8.x cards # Hopper - NVIDIA compute capability 9.x cards +# Blackwell - NVIDIA compute capability 10.x and 12.x cards # The default is "Volta Turing Ampere". # Note that depending on the CUDA version, some compute capabilities may not be supported # See http://developer.nvidia.com/cuda-gpus diff --git a/make.inc-examples/make.inc.openblas-debug b/make.inc-examples/make.inc.openblas-debug index bcf97e19b..e6661ae2a 100644 --- a/make.inc-examples/make.inc.openblas-debug +++ b/make.inc-examples/make.inc.openblas-debug @@ -15,6 +15,7 @@ # Volta/Turing - NVIDIA compute capability 7.x cards # Ampere/Ada-Lovelace - NVIDIA compute capability 8.x cards # Hopper - NVIDIA compute capability 9.x cards +# Blackwell - NVIDIA compute capability 10.x and 12.x cards # The default is "Volta Turing Ampere". # Note that depending on the CUDA version, some compute capabilities may not be supported # See http://developer.nvidia.com/cuda-gpus diff --git a/make.inc-examples/make.inc.power9-essl b/make.inc-examples/make.inc.power9-essl index cafdb2ab9..1c9410ece 100644 --- a/make.inc-examples/make.inc.power9-essl +++ b/make.inc-examples/make.inc.power9-essl @@ -15,6 +15,7 @@ # Volta/Turing - NVIDIA compute capability 7.x cards # Ampere/Ada-Lovelace - NVIDIA compute capability 8.x cards # Hopper - NVIDIA compute capability 9.x cards +# Blackwell - NVIDIA compute capability 10.x and 12.x cards # The default is "Volta Turing Ampere". # Note that depending on the CUDA version, some compute capabilities may not be supported # See http://developer.nvidia.com/cuda-gpus From b9f0a0f3344ccc1aff0cd57a6ce152f75950a7af Mon Sep 17 00:00:00 2001 From: Ahmad Abdelfattah Date: Wed, 23 Jul 2025 16:03:46 -0400 Subject: [PATCH 19/27] remove very old CUDA archs --- make.inc-examples/make.inc.mkl-gcc | 4 +--- make.inc-examples/make.inc.mkl-gcc-debug | 4 +--- make.inc-examples/make.inc.mkl-gcc-ilp64 | 4 +--- make.inc-examples/make.inc.mkl-gcc-ilp64-debug | 4 +--- make.inc-examples/make.inc.mkl-icx | 4 +--- make.inc-examples/make.inc.mkl-icx-debug | 4 +--- make.inc-examples/make.inc.mkl-icx-ilp64 | 4 +--- make.inc-examples/make.inc.mkl-icx-ilp64-debug | 4 +--- make.inc-examples/make.inc.openblas-debug | 4 +--- make.inc-examples/make.inc.power9-essl | 4 +--- 10 files changed, 10 insertions(+), 30 deletions(-) diff --git a/make.inc-examples/make.inc.mkl-gcc b/make.inc-examples/make.inc.mkl-gcc index 641789475..159ea0c65 100644 --- a/make.inc-examples/make.inc.mkl-gcc +++ b/make.inc-examples/make.inc.mkl-gcc @@ -8,9 +8,6 @@ # GPU_TARGET contains one or more of the architectures below # to specify for which GPUs you want to compile MAGMA: -# Fermi - NVIDIA compute capability 2.x cards -# Kepler - NVIDIA compute capability 3.x cards -# Maxwell - NVIDIA compute capability 5.x cards # Pascal - NVIDIA compute capability 6.x cards # Volta/Turing - NVIDIA compute capability 7.x cards # Ampere/Ada-Lovelace - NVIDIA compute capability 8.x cards @@ -19,6 +16,7 @@ # The default is "Volta Turing Ampere". # Note that depending on the CUDA version, some compute capabilities may not be supported # See http://developer.nvidia.com/cuda-gpus +# Older architectures are supported if the proper version of CUDA is used # GPU_TARGET ?= Volta Turing Ampere diff --git a/make.inc-examples/make.inc.mkl-gcc-debug b/make.inc-examples/make.inc.mkl-gcc-debug index 5dead18fb..12cbd9a49 100644 --- a/make.inc-examples/make.inc.mkl-gcc-debug +++ b/make.inc-examples/make.inc.mkl-gcc-debug @@ -8,9 +8,6 @@ # GPU_TARGET contains one or more of the architectures below # to specify for which GPUs you want to compile MAGMA: -# Fermi - NVIDIA compute capability 2.x cards -# Kepler - NVIDIA compute capability 3.x cards -# Maxwell - NVIDIA compute capability 5.x cards # Pascal - NVIDIA compute capability 6.x cards # Volta/Turing - NVIDIA compute capability 7.x cards # Ampere/Ada-Lovelace - NVIDIA compute capability 8.x cards @@ -19,6 +16,7 @@ # The default is "Volta Turing Ampere". # Note that depending on the CUDA version, some compute capabilities may not be supported # See http://developer.nvidia.com/cuda-gpus +# Older architectures are supported if the proper version of CUDA is used # GPU_TARGET ?= Volta Turing Ampere diff --git a/make.inc-examples/make.inc.mkl-gcc-ilp64 b/make.inc-examples/make.inc.mkl-gcc-ilp64 index 992b9878f..cd52fb7c2 100644 --- a/make.inc-examples/make.inc.mkl-gcc-ilp64 +++ b/make.inc-examples/make.inc.mkl-gcc-ilp64 @@ -8,9 +8,6 @@ # GPU_TARGET contains one or more of the architectures below # to specify for which GPUs you want to compile MAGMA: -# Fermi - NVIDIA compute capability 2.x cards -# Kepler - NVIDIA compute capability 3.x cards -# Maxwell - NVIDIA compute capability 5.x cards # Pascal - NVIDIA compute capability 6.x cards # Volta/Turing - NVIDIA compute capability 7.x cards # Ampere/Ada-Lovelace - NVIDIA compute capability 8.x cards @@ -19,6 +16,7 @@ # The default is "Volta Turing Ampere". # Note that depending on the CUDA version, some compute capabilities may not be supported # See http://developer.nvidia.com/cuda-gpus +# Older architectures are supported if the proper version of CUDA is used # GPU_TARGET ?= Volta Turing Ampere diff --git a/make.inc-examples/make.inc.mkl-gcc-ilp64-debug b/make.inc-examples/make.inc.mkl-gcc-ilp64-debug index b093e8af3..e6947d425 100644 --- a/make.inc-examples/make.inc.mkl-gcc-ilp64-debug +++ b/make.inc-examples/make.inc.mkl-gcc-ilp64-debug @@ -8,9 +8,6 @@ # GPU_TARGET contains one or more of the architectures below # to specify for which GPUs you want to compile MAGMA: -# Fermi - NVIDIA compute capability 2.x cards -# Kepler - NVIDIA compute capability 3.x cards -# Maxwell - NVIDIA compute capability 5.x cards # Pascal - NVIDIA compute capability 6.x cards # Volta/Turing - NVIDIA compute capability 7.x cards # Ampere/Ada-Lovelace - NVIDIA compute capability 8.x cards @@ -19,6 +16,7 @@ # The default is "Volta Turing Ampere". # Note that depending on the CUDA version, some compute capabilities may not be supported # See http://developer.nvidia.com/cuda-gpus +# Older architectures are supported if the proper version of CUDA is used # GPU_TARGET ?= Volta Turing Ampere diff --git a/make.inc-examples/make.inc.mkl-icx b/make.inc-examples/make.inc.mkl-icx index f423354fb..c9efc42f4 100644 --- a/make.inc-examples/make.inc.mkl-icx +++ b/make.inc-examples/make.inc.mkl-icx @@ -8,9 +8,6 @@ # GPU_TARGET contains one or more of the architectures below # to specify for which GPUs you want to compile MAGMA: -# Fermi - NVIDIA compute capability 2.x cards -# Kepler - NVIDIA compute capability 3.x cards -# Maxwell - NVIDIA compute capability 5.x cards # Pascal - NVIDIA compute capability 6.x cards # Volta/Turing - NVIDIA compute capability 7.x cards # Ampere/Ada-Lovelace - NVIDIA compute capability 8.x cards @@ -19,6 +16,7 @@ # The default is "Volta Turing Ampere". # Note that depending on the CUDA version, some compute capabilities may not be supported # See http://developer.nvidia.com/cuda-gpus +# Older architectures are supported if the proper version of CUDA is used # GPU_TARGET ?= Volta Turing Ampere diff --git a/make.inc-examples/make.inc.mkl-icx-debug b/make.inc-examples/make.inc.mkl-icx-debug index f42014f0f..9d5dd6fa4 100644 --- a/make.inc-examples/make.inc.mkl-icx-debug +++ b/make.inc-examples/make.inc.mkl-icx-debug @@ -8,9 +8,6 @@ # GPU_TARGET contains one or more of the architectures below # to specify for which GPUs you want to compile MAGMA: -# Fermi - NVIDIA compute capability 2.x cards -# Kepler - NVIDIA compute capability 3.x cards -# Maxwell - NVIDIA compute capability 5.x cards # Pascal - NVIDIA compute capability 6.x cards # Volta/Turing - NVIDIA compute capability 7.x cards # Ampere/Ada-Lovelace - NVIDIA compute capability 8.x cards @@ -19,6 +16,7 @@ # The default is "Volta Turing Ampere". # Note that depending on the CUDA version, some compute capabilities may not be supported # See http://developer.nvidia.com/cuda-gpus +# Older architectures are supported if the proper version of CUDA is used # GPU_TARGET ?= Volta Turing Ampere diff --git a/make.inc-examples/make.inc.mkl-icx-ilp64 b/make.inc-examples/make.inc.mkl-icx-ilp64 index d038b93af..68765ea58 100644 --- a/make.inc-examples/make.inc.mkl-icx-ilp64 +++ b/make.inc-examples/make.inc.mkl-icx-ilp64 @@ -8,9 +8,6 @@ # GPU_TARGET contains one or more of the architectures below # to specify for which GPUs you want to compile MAGMA: -# Fermi - NVIDIA compute capability 2.x cards -# Kepler - NVIDIA compute capability 3.x cards -# Maxwell - NVIDIA compute capability 5.x cards # Pascal - NVIDIA compute capability 6.x cards # Volta/Turing - NVIDIA compute capability 7.x cards # Ampere/Ada-Lovelace - NVIDIA compute capability 8.x cards @@ -19,6 +16,7 @@ # The default is "Volta Turing Ampere". # Note that depending on the CUDA version, some compute capabilities may not be supported # See http://developer.nvidia.com/cuda-gpus +# Older architectures are supported if the proper version of CUDA is used # GPU_TARGET ?= Volta Turing Ampere diff --git a/make.inc-examples/make.inc.mkl-icx-ilp64-debug b/make.inc-examples/make.inc.mkl-icx-ilp64-debug index e02b6a7d6..5e174205a 100644 --- a/make.inc-examples/make.inc.mkl-icx-ilp64-debug +++ b/make.inc-examples/make.inc.mkl-icx-ilp64-debug @@ -8,9 +8,6 @@ # GPU_TARGET contains one or more of the architectures below # to specify for which GPUs you want to compile MAGMA: -# Fermi - NVIDIA compute capability 2.x cards -# Kepler - NVIDIA compute capability 3.x cards -# Maxwell - NVIDIA compute capability 5.x cards # Pascal - NVIDIA compute capability 6.x cards # Volta/Turing - NVIDIA compute capability 7.x cards # Ampere/Ada-Lovelace - NVIDIA compute capability 8.x cards @@ -19,6 +16,7 @@ # The default is "Volta Turing Ampere". # Note that depending on the CUDA version, some compute capabilities may not be supported # See http://developer.nvidia.com/cuda-gpus +# Older architectures are supported if the proper version of CUDA is used # GPU_TARGET ?= Volta Turing Ampere diff --git a/make.inc-examples/make.inc.openblas-debug b/make.inc-examples/make.inc.openblas-debug index e6661ae2a..f4215ab53 100644 --- a/make.inc-examples/make.inc.openblas-debug +++ b/make.inc-examples/make.inc.openblas-debug @@ -8,9 +8,6 @@ # GPU_TARGET contains one or more of the architectures below # to specify for which GPUs you want to compile MAGMA: -# Fermi - NVIDIA compute capability 2.x cards -# Kepler - NVIDIA compute capability 3.x cards -# Maxwell - NVIDIA compute capability 5.x cards # Pascal - NVIDIA compute capability 6.x cards # Volta/Turing - NVIDIA compute capability 7.x cards # Ampere/Ada-Lovelace - NVIDIA compute capability 8.x cards @@ -19,6 +16,7 @@ # The default is "Volta Turing Ampere". # Note that depending on the CUDA version, some compute capabilities may not be supported # See http://developer.nvidia.com/cuda-gpus +# Older architectures are supported if the proper version of CUDA is used # GPU_TARGET ?= Volta Turing Ampere diff --git a/make.inc-examples/make.inc.power9-essl b/make.inc-examples/make.inc.power9-essl index 1c9410ece..254d7135f 100644 --- a/make.inc-examples/make.inc.power9-essl +++ b/make.inc-examples/make.inc.power9-essl @@ -8,9 +8,6 @@ # GPU_TARGET contains one or more of the architectures below # to specify for which GPUs you want to compile MAGMA: -# Fermi - NVIDIA compute capability 2.x cards -# Kepler - NVIDIA compute capability 3.x cards -# Maxwell - NVIDIA compute capability 5.x cards # Pascal - NVIDIA compute capability 6.x cards # Volta/Turing - NVIDIA compute capability 7.x cards # Ampere/Ada-Lovelace - NVIDIA compute capability 8.x cards @@ -19,6 +16,7 @@ # The default is "Volta Turing Ampere". # Note that depending on the CUDA version, some compute capabilities may not be supported # See http://developer.nvidia.com/cuda-gpus +# Older architectures are supported if the proper version of CUDA is used # GPU_TARGET ?= Volta Turing Ampere From 6ae9acd9919cc6b4dd45ebddc6c97b14906ddeb2 Mon Sep 17 00:00:00 2001 From: Ahmad Abdelfattah Date: Wed, 23 Jul 2025 16:24:52 -0400 Subject: [PATCH 20/27] also add sm 12.0 when Blackwell is selected --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index 713a28a42..bbecfee83 100644 --- a/Makefile +++ b/Makefile @@ -156,6 +156,7 @@ ifeq ($(BACKEND),cuda) endif ifneq ($(findstring Blackwell, $(GPU_TARGET)),) CUDA_ARCH_ += sm_100 + CUDA_ARCH_ += sm_120 endif From ac4262a82ea6a462c747759a623a8be05d9e0d59 Mon Sep 17 00:00:00 2001 From: Ahmad Abdelfattah Date: Thu, 24 Jul 2025 10:28:18 -0400 Subject: [PATCH 21/27] update release notes --- ReleaseNotes | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ReleaseNotes b/ReleaseNotes index a3cb5f0ec..136001a36 100644 --- a/ReleaseNotes +++ b/ReleaseNotes @@ -26,6 +26,8 @@ Most routines have all four precisions: single (s), double (d), single-complex (c), double-complex (z). 2.10.0 - XXX XX, XXXX + * Support added for NVIDIA Blackwell GPUs (sm_100 and sm_120) + - Requires CUDA-12.8 or higher * New functionality: Variable-size batch non-pivoting LU factorization - Contributed by Wajih Boukaram, Yang Liu, and Sherry Li at LBNL - magma_getrf_nopiv_vbatched performs a non-pivoting LU factorization on a From 0955da3feb16a38084b3284680e144a11af8dc58 Mon Sep 17 00:00:00 2001 From: Mark Gates Date: Fri, 25 Jul 2025 11:20:35 -0400 Subject: [PATCH 22/27] cmake: add Blackwell --- CMakeLists.txt | 4 ++++ Makefile | 4 +--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f6d9feb86..155e6f73c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -166,6 +166,10 @@ if (MAGMA_ENABLE_CUDA) set( GPU_TARGET "${GPU_TARGET} sm_90 sm_90a" ) endif() + if (GPU_TARGET MATCHES Blackwell) + set( GPU_TARGET "${GPU_TARGET} sm_100 sm_120" ) + endif() + # Find all sm_XY and sm_XYZ, then strip off sm_. string( REGEX MATCHALL "sm_[0-9][0-9a-z]+" sms "${GPU_TARGET}" ) string( REPLACE "sm_" "" __cuda_architectures "${sms}" ) diff --git a/Makefile b/Makefile index bbecfee83..b8fa66a63 100644 --- a/Makefile +++ b/Makefile @@ -158,9 +158,7 @@ ifeq ($(BACKEND),cuda) CUDA_ARCH_ += sm_100 CUDA_ARCH_ += sm_120 endif - - - # Remember to add to CMakeLists.txt too! + # Remember to add new architectures to CMakeLists.txt too! # Next, add compile options for specific smXX # sm_xx is binary, compute_xx is PTX for forward compatability From 21d102f90096ecba5ab5d1656a6463634d14c592 Mon Sep 17 00:00:00 2001 From: Mark Gates Date: Sat, 2 Aug 2025 16:51:14 -0400 Subject: [PATCH 23/27] archive files that are not in Makefile.src. Note magma_zmlumerge.cpp is still in sparse/control (for now); rm copy in sparse/src. --- .../magmablas}/zswapdblk_batched.cu | 0 .../sparse}/blas/clag2z_sparse.cu | 0 .../sparse}/blas/magma_clag2z.cpp | 0 .../sparse}/blas/magma_zlag2c.cpp | 0 .../sparse}/blas/magma_zthrsselect.cu | 0 .../sparse}/blas/zbcsrblockinfo.cu | 0 {sparse => archive/sparse}/blas/zbcsrcpy.cu | 0 .../sparse}/blas/zbcsrlugemm.cu | 0 .../sparse}/blas/zbcsrlupivloc.cu | 0 {sparse => archive/sparse}/blas/zbcsrswp.cu | 0 {sparse => archive/sparse}/blas/zbcsrtrsv.cu | 0 .../sparse}/blas/zgeblockstruct.cu | 0 {sparse => archive/sparse}/blas/zgeisai.cu | 0 {sparse => archive/sparse}/blas/zgeisai_16.cu | 0 {sparse => archive/sparse}/blas/zgeisai_32.cu | 0 {sparse => archive/sparse}/blas/zgeisai_8.cu | 0 {sparse => archive/sparse}/blas/zilut.cpp | 0 .../sparse}/blas/zlag2c_sparse.cu | 0 .../sparse}/control/magma_zdummy.cpp | 0 .../sparse}/src/magma_zmlumerge.cpp | 0 .../sparse}/src/magma_zwrapper.cpp | 0 {sparse => archive/sparse}/src/zdummy.cpp | 0 {sparse => archive/sparse}/src/zgeisai.cpp | 0 {sparse => archive/sparse}/src/ziterict.cpp | 0 {sparse => archive/sparse}/src/zsyisai.cpp | 0 .../sparse}/testing/testing_zdiagdom.cpp | 0 .../sparse}/testing/testing_zgemv_cpu_gpu.cpp | 0 .../sparse}/testing/testing_zjaccard.cpp | 0 .../sparse}/testing/testing_zoperation.cpp | 0 .../sparse}/testing/testing_zparilu.cpp | 0 .../testing/testing_zparilu_weight.cpp | 0 .../testing/testing_zsolver_energy.cpp | 0 .../sparse}/testing/testing_zthresselect.cpp | 0 .../testing/testing_zusemagma_example.cpp | 0 docs/Doxyfile-fast | 26 ------------------- include/magma_zbatched.h | 7 ----- sparse/control/Makefile.src | 5 ---- sparse/testing/Makefile.src | 4 --- tools/MakeMagmaRelease.pl | 10 +++---- 39 files changed, 3 insertions(+), 49 deletions(-) rename {magmablas => archive/magmablas}/zswapdblk_batched.cu (100%) rename {sparse => archive/sparse}/blas/clag2z_sparse.cu (100%) rename {sparse => archive/sparse}/blas/magma_clag2z.cpp (100%) rename {sparse => archive/sparse}/blas/magma_zlag2c.cpp (100%) rename {sparse => archive/sparse}/blas/magma_zthrsselect.cu (100%) rename {sparse => archive/sparse}/blas/zbcsrblockinfo.cu (100%) rename {sparse => archive/sparse}/blas/zbcsrcpy.cu (100%) rename {sparse => archive/sparse}/blas/zbcsrlugemm.cu (100%) rename {sparse => archive/sparse}/blas/zbcsrlupivloc.cu (100%) rename {sparse => archive/sparse}/blas/zbcsrswp.cu (100%) rename {sparse => archive/sparse}/blas/zbcsrtrsv.cu (100%) rename {sparse => archive/sparse}/blas/zgeblockstruct.cu (100%) rename {sparse => archive/sparse}/blas/zgeisai.cu (100%) rename {sparse => archive/sparse}/blas/zgeisai_16.cu (100%) rename {sparse => archive/sparse}/blas/zgeisai_32.cu (100%) rename {sparse => archive/sparse}/blas/zgeisai_8.cu (100%) rename {sparse => archive/sparse}/blas/zilut.cpp (100%) rename {sparse => archive/sparse}/blas/zlag2c_sparse.cu (100%) rename {sparse => archive/sparse}/control/magma_zdummy.cpp (100%) rename {sparse => archive/sparse}/src/magma_zmlumerge.cpp (100%) rename {sparse => archive/sparse}/src/magma_zwrapper.cpp (100%) rename {sparse => archive/sparse}/src/zdummy.cpp (100%) rename {sparse => archive/sparse}/src/zgeisai.cpp (100%) rename {sparse => archive/sparse}/src/ziterict.cpp (100%) rename {sparse => archive/sparse}/src/zsyisai.cpp (100%) rename {sparse => archive/sparse}/testing/testing_zdiagdom.cpp (100%) rename {sparse => archive/sparse}/testing/testing_zgemv_cpu_gpu.cpp (100%) rename {sparse => archive/sparse}/testing/testing_zjaccard.cpp (100%) rename {sparse => archive/sparse}/testing/testing_zoperation.cpp (100%) rename {sparse => archive/sparse}/testing/testing_zparilu.cpp (100%) rename {sparse => archive/sparse}/testing/testing_zparilu_weight.cpp (100%) rename {sparse => archive/sparse}/testing/testing_zsolver_energy.cpp (100%) rename {sparse => archive/sparse}/testing/testing_zthresselect.cpp (100%) rename {sparse => archive/sparse}/testing/testing_zusemagma_example.cpp (100%) diff --git a/magmablas/zswapdblk_batched.cu b/archive/magmablas/zswapdblk_batched.cu similarity index 100% rename from magmablas/zswapdblk_batched.cu rename to archive/magmablas/zswapdblk_batched.cu diff --git a/sparse/blas/clag2z_sparse.cu b/archive/sparse/blas/clag2z_sparse.cu similarity index 100% rename from sparse/blas/clag2z_sparse.cu rename to archive/sparse/blas/clag2z_sparse.cu diff --git a/sparse/blas/magma_clag2z.cpp b/archive/sparse/blas/magma_clag2z.cpp similarity index 100% rename from sparse/blas/magma_clag2z.cpp rename to archive/sparse/blas/magma_clag2z.cpp diff --git a/sparse/blas/magma_zlag2c.cpp b/archive/sparse/blas/magma_zlag2c.cpp similarity index 100% rename from sparse/blas/magma_zlag2c.cpp rename to archive/sparse/blas/magma_zlag2c.cpp diff --git a/sparse/blas/magma_zthrsselect.cu b/archive/sparse/blas/magma_zthrsselect.cu similarity index 100% rename from sparse/blas/magma_zthrsselect.cu rename to archive/sparse/blas/magma_zthrsselect.cu diff --git a/sparse/blas/zbcsrblockinfo.cu b/archive/sparse/blas/zbcsrblockinfo.cu similarity index 100% rename from sparse/blas/zbcsrblockinfo.cu rename to archive/sparse/blas/zbcsrblockinfo.cu diff --git a/sparse/blas/zbcsrcpy.cu b/archive/sparse/blas/zbcsrcpy.cu similarity index 100% rename from sparse/blas/zbcsrcpy.cu rename to archive/sparse/blas/zbcsrcpy.cu diff --git a/sparse/blas/zbcsrlugemm.cu b/archive/sparse/blas/zbcsrlugemm.cu similarity index 100% rename from sparse/blas/zbcsrlugemm.cu rename to archive/sparse/blas/zbcsrlugemm.cu diff --git a/sparse/blas/zbcsrlupivloc.cu b/archive/sparse/blas/zbcsrlupivloc.cu similarity index 100% rename from sparse/blas/zbcsrlupivloc.cu rename to archive/sparse/blas/zbcsrlupivloc.cu diff --git a/sparse/blas/zbcsrswp.cu b/archive/sparse/blas/zbcsrswp.cu similarity index 100% rename from sparse/blas/zbcsrswp.cu rename to archive/sparse/blas/zbcsrswp.cu diff --git a/sparse/blas/zbcsrtrsv.cu b/archive/sparse/blas/zbcsrtrsv.cu similarity index 100% rename from sparse/blas/zbcsrtrsv.cu rename to archive/sparse/blas/zbcsrtrsv.cu diff --git a/sparse/blas/zgeblockstruct.cu b/archive/sparse/blas/zgeblockstruct.cu similarity index 100% rename from sparse/blas/zgeblockstruct.cu rename to archive/sparse/blas/zgeblockstruct.cu diff --git a/sparse/blas/zgeisai.cu b/archive/sparse/blas/zgeisai.cu similarity index 100% rename from sparse/blas/zgeisai.cu rename to archive/sparse/blas/zgeisai.cu diff --git a/sparse/blas/zgeisai_16.cu b/archive/sparse/blas/zgeisai_16.cu similarity index 100% rename from sparse/blas/zgeisai_16.cu rename to archive/sparse/blas/zgeisai_16.cu diff --git a/sparse/blas/zgeisai_32.cu b/archive/sparse/blas/zgeisai_32.cu similarity index 100% rename from sparse/blas/zgeisai_32.cu rename to archive/sparse/blas/zgeisai_32.cu diff --git a/sparse/blas/zgeisai_8.cu b/archive/sparse/blas/zgeisai_8.cu similarity index 100% rename from sparse/blas/zgeisai_8.cu rename to archive/sparse/blas/zgeisai_8.cu diff --git a/sparse/blas/zilut.cpp b/archive/sparse/blas/zilut.cpp similarity index 100% rename from sparse/blas/zilut.cpp rename to archive/sparse/blas/zilut.cpp diff --git a/sparse/blas/zlag2c_sparse.cu b/archive/sparse/blas/zlag2c_sparse.cu similarity index 100% rename from sparse/blas/zlag2c_sparse.cu rename to archive/sparse/blas/zlag2c_sparse.cu diff --git a/sparse/control/magma_zdummy.cpp b/archive/sparse/control/magma_zdummy.cpp similarity index 100% rename from sparse/control/magma_zdummy.cpp rename to archive/sparse/control/magma_zdummy.cpp diff --git a/sparse/src/magma_zmlumerge.cpp b/archive/sparse/src/magma_zmlumerge.cpp similarity index 100% rename from sparse/src/magma_zmlumerge.cpp rename to archive/sparse/src/magma_zmlumerge.cpp diff --git a/sparse/src/magma_zwrapper.cpp b/archive/sparse/src/magma_zwrapper.cpp similarity index 100% rename from sparse/src/magma_zwrapper.cpp rename to archive/sparse/src/magma_zwrapper.cpp diff --git a/sparse/src/zdummy.cpp b/archive/sparse/src/zdummy.cpp similarity index 100% rename from sparse/src/zdummy.cpp rename to archive/sparse/src/zdummy.cpp diff --git a/sparse/src/zgeisai.cpp b/archive/sparse/src/zgeisai.cpp similarity index 100% rename from sparse/src/zgeisai.cpp rename to archive/sparse/src/zgeisai.cpp diff --git a/sparse/src/ziterict.cpp b/archive/sparse/src/ziterict.cpp similarity index 100% rename from sparse/src/ziterict.cpp rename to archive/sparse/src/ziterict.cpp diff --git a/sparse/src/zsyisai.cpp b/archive/sparse/src/zsyisai.cpp similarity index 100% rename from sparse/src/zsyisai.cpp rename to archive/sparse/src/zsyisai.cpp diff --git a/sparse/testing/testing_zdiagdom.cpp b/archive/sparse/testing/testing_zdiagdom.cpp similarity index 100% rename from sparse/testing/testing_zdiagdom.cpp rename to archive/sparse/testing/testing_zdiagdom.cpp diff --git a/sparse/testing/testing_zgemv_cpu_gpu.cpp b/archive/sparse/testing/testing_zgemv_cpu_gpu.cpp similarity index 100% rename from sparse/testing/testing_zgemv_cpu_gpu.cpp rename to archive/sparse/testing/testing_zgemv_cpu_gpu.cpp diff --git a/sparse/testing/testing_zjaccard.cpp b/archive/sparse/testing/testing_zjaccard.cpp similarity index 100% rename from sparse/testing/testing_zjaccard.cpp rename to archive/sparse/testing/testing_zjaccard.cpp diff --git a/sparse/testing/testing_zoperation.cpp b/archive/sparse/testing/testing_zoperation.cpp similarity index 100% rename from sparse/testing/testing_zoperation.cpp rename to archive/sparse/testing/testing_zoperation.cpp diff --git a/sparse/testing/testing_zparilu.cpp b/archive/sparse/testing/testing_zparilu.cpp similarity index 100% rename from sparse/testing/testing_zparilu.cpp rename to archive/sparse/testing/testing_zparilu.cpp diff --git a/sparse/testing/testing_zparilu_weight.cpp b/archive/sparse/testing/testing_zparilu_weight.cpp similarity index 100% rename from sparse/testing/testing_zparilu_weight.cpp rename to archive/sparse/testing/testing_zparilu_weight.cpp diff --git a/sparse/testing/testing_zsolver_energy.cpp b/archive/sparse/testing/testing_zsolver_energy.cpp similarity index 100% rename from sparse/testing/testing_zsolver_energy.cpp rename to archive/sparse/testing/testing_zsolver_energy.cpp diff --git a/sparse/testing/testing_zthresselect.cpp b/archive/sparse/testing/testing_zthresselect.cpp similarity index 100% rename from sparse/testing/testing_zthresselect.cpp rename to archive/sparse/testing/testing_zthresselect.cpp diff --git a/sparse/testing/testing_zusemagma_example.cpp b/archive/sparse/testing/testing_zusemagma_example.cpp similarity index 100% rename from sparse/testing/testing_zusemagma_example.cpp rename to archive/sparse/testing/testing_zusemagma_example.cpp diff --git a/docs/Doxyfile-fast b/docs/Doxyfile-fast index 6f9d0869e..2d5411161 100644 --- a/docs/Doxyfile-fast +++ b/docs/Doxyfile-fast @@ -1078,7 +1078,6 @@ INPUT = \ ../magmablas/zswap.cu \ ../magmablas/zswapblk.cu \ ../magmablas/zswapdblk.cu \ - ../magmablas/zswapdblk_batched.cu \ ../magmablas/zsymmetrize.cu \ ../magmablas/zsymmetrize_tiles.cu \ ../magmablas/zsymv.cu \ @@ -1116,32 +1115,21 @@ INPUT = \ ../sparse/blas/atomicopsfloat.h \ ../sparse/blas/atomicopsmagmaDoubleComplex.h \ ../sparse/blas/atomicopsmagmaFloatComplex.h \ - ../sparse/blas/clag2z_sparse.cu \ - ../sparse/blas/magma_clag2z.cpp \ ../sparse/blas/magma_z_blaswrapper.cpp \ ../sparse/blas/magma_zcuspaxpy.cpp \ ../sparse/blas/magma_zcuspmm.cpp \ ../sparse/blas/magma_zdiagcheck.cu \ ../sparse/blas/magma_zget_rowptr.cu \ - ../sparse/blas/magma_zlag2c.cpp \ ../sparse/blas/magma_zmconjugate.cu \ ../sparse/blas/magma_zmcsrcompressor_gpu.cu \ ../sparse/blas/magma_zpreselect.cu \ ../sparse/blas/magma_zthrsrm.cu \ - ../sparse/blas/magma_zthrsselect.cu \ ../sparse/blas/zbajac_csr.cu \ ../sparse/blas/zbajac_csr_overlap.cu \ - ../sparse/blas/zbcsrblockinfo.cu \ - ../sparse/blas/zbcsrcpy.cu \ - ../sparse/blas/zbcsrlugemm.cu \ - ../sparse/blas/zbcsrlupivloc.cu \ - ../sparse/blas/zbcsrswp.cu \ - ../sparse/blas/zbcsrtrsv.cu \ ../sparse/blas/zcgecsrmv_mixed_prec.cu \ ../sparse/blas/zcompact.cu \ ../sparse/blas/zge3pt.cu \ ../sparse/blas/zgeaxpy.cu \ - ../sparse/blas/zgeblockstruct.cu \ ../sparse/blas/zgecscsyncfreetrsm.cu \ ../sparse/blas/zgecsr5mv.cu \ ../sparse/blas/zgecsrmv.cu \ @@ -1150,10 +1138,6 @@ INPUT = \ ../sparse/blas/zgeellmv.cu \ ../sparse/blas/zgeellrtmv.cu \ ../sparse/blas/zgeelltmv.cu \ - ../sparse/blas/zgeisai.cu \ - ../sparse/blas/zgeisai_16.cu \ - ../sparse/blas/zgeisai_32.cu \ - ../sparse/blas/zgeisai_8.cu \ ../sparse/blas/zgeisai_batched32.cu \ ../sparse/blas/zgeisai_maxblock.cu \ ../sparse/blas/zgeisai_trsv.cu \ @@ -1161,10 +1145,8 @@ INPUT = \ ../sparse/blas/zgesellcmmv.cu \ ../sparse/blas/zgesellcmv.cu \ ../sparse/blas/zilu.cpp \ - ../sparse/blas/zilut.cpp \ ../sparse/blas/zjaccard_weights.cu \ ../sparse/blas/zjacobisetup.cu \ - ../sparse/blas/zlag2c_sparse.cu \ ../sparse/blas/zlobpcg_maxpy.cu \ ../sparse/blas/zlobpcg_residuals.cu \ ../sparse/blas/zlobpcg_shift.cu \ @@ -1192,7 +1174,6 @@ INPUT = \ ../sparse/control/error.cpp \ ../sparse/control/magma_zcsrsplit.cpp \ ../sparse/control/magma_zdomainoverlap.cpp \ - ../sparse/control/magma_zdummy.cpp \ ../sparse/control/magma_zfree.cpp \ ../sparse/control/magma_zmatrixchar.cpp \ ../sparse/control/magma_zmconvert.cpp \ @@ -1205,7 +1186,6 @@ INPUT = \ ../sparse/control/magma_zmgenerator.cpp \ ../sparse/control/magma_zmilustruct.cpp \ ../sparse/control/magma_zmio.cpp \ - ../sparse/control/magma_zmlumerge.cpp \ ../sparse/control/magma_zmscale.cpp \ ../sparse/control/magma_zmshrink.cpp \ ../sparse/control/magma_zmslice.cpp \ @@ -1238,9 +1218,7 @@ INPUT = \ ../sparse/src/magma_z_solver_wrapper.cpp \ ../sparse/src/magma_zcustomprecond.cpp \ ../sparse/src/magma_zcustomspmv.cpp \ - ../sparse/src/magma_zmlumerge.cpp \ ../sparse/src/magma_zqr_wrapper.cpp \ - ../sparse/src/magma_zwrapper.cpp \ ../sparse/src/zbaiter.cpp \ ../sparse/src/zbaiter_overlap.cpp \ ../sparse/src/zbicg.cpp \ @@ -1258,17 +1236,14 @@ INPUT = \ ../sparse/src/zcgs_merge.cpp \ ../sparse/src/zcustomic.cpp \ ../sparse/src/zcustomilu.cpp \ - ../sparse/src/zdummy.cpp \ ../sparse/src/zfgmres.cpp \ ../sparse/src/zftjacobi.cpp \ - ../sparse/src/zgeisai.cpp \ ../sparse/src/zgeisai_apply.cpp \ ../sparse/src/zgeisai_lower.cpp \ ../sparse/src/zgeisai_upper.cpp \ ../sparse/src/zidr.cpp \ ../sparse/src/zidr_merge.cpp \ ../sparse/src/zidr_strms.cpp \ - ../sparse/src/ziterict.cpp \ ../sparse/src/ziterref.cpp \ ../sparse/src/zjacobi.cpp \ ../sparse/src/zjacobidomainoverlap.cpp \ @@ -1301,7 +1276,6 @@ INPUT = \ ../sparse/src/zqmr_merge.cpp \ ../sparse/src/zresidual.cpp \ ../sparse/src/zresidualvec.cpp \ - ../sparse/src/zsyisai.cpp \ ../sparse/src/ztfqmr.cpp \ ../sparse/src/ztfqmr_merge.cpp \ ../sparse/src/ztfqmr_unrolled.cpp \ diff --git a/include/magma_zbatched.h b/include/magma_zbatched.h index 5c4681620..97d1063a0 100644 --- a/include/magma_zbatched.h +++ b/include/magma_zbatched.h @@ -66,13 +66,6 @@ magma_int_t magma_get_zgetri_batched_ntcol(magma_int_t m, magma_int_t n); magma_int_t magma_get_ztrsm_batched_stop_nb(magma_side_t side, magma_int_t m, magma_int_t n); void magma_get_zgbtrf_batched_params(magma_int_t m, magma_int_t n, magma_int_t kl, magma_int_t ku, magma_int_t *nb, magma_int_t *threads); -void -magmablas_zswapdblk_batched( - magma_int_t n, magma_int_t nb, - magmaDoubleComplex **dA, magma_int_t ldda, magma_int_t inca, - magmaDoubleComplex **dB, magma_int_t lddb, magma_int_t incb, - magma_int_t batchCount, magma_queue_t queue ); - /* * LAPACK batched routines */ diff --git a/sparse/control/Makefile.src b/sparse/control/Makefile.src index 369243c40..9cfe78871 100644 --- a/sparse/control/Makefile.src +++ b/sparse/control/Makefile.src @@ -58,11 +58,6 @@ libsparse_src += \ $(cdir)/magma_zparict_tools.cpp \ - -# dummy to compensate for routines not included in release -libsparse_src += \ -# $(cdir)/magma_zdummy.cpp \ - # ---------------------------------------------------------------------- # pop first directory cdir := $(firstword $(dir_stack)) diff --git a/sparse/testing/Makefile.src b/sparse/testing/Makefile.src index f089ce7ef..66962a6b3 100644 --- a/sparse/testing/Makefile.src +++ b/sparse/testing/Makefile.src @@ -60,10 +60,6 @@ sparse_testing_src += \ # $(cdir)/testing_z_mpk_4.cpp \ # $(cdir)/testing_z_mpk_5.cpp \ -# Big Data Analytics -#sparse_testing_src += \ - $(cdir)/testing_zjaccard.cpp \ - # debugging sparse_testing_src += \ $(cdir)/testing_zsptrsv.cpp \ diff --git a/tools/MakeMagmaRelease.pl b/tools/MakeMagmaRelease.pl index a45a0464f..a53d2ee90 100755 --- a/tools/MakeMagmaRelease.pl +++ b/tools/MakeMagmaRelease.pl @@ -35,11 +35,7 @@ sparse/python sparse/testing/testing_zpardiso.cpp - sparse/testing/testing_zparilu_weight.cpp sparse/testing/testing_zsolver_allufmc.cpp - sparse/testing/testing_zsolver_energy.cpp - - sparse/blas/zilut.cpp testing/*.txt testing/fortran2.cpp @@ -128,7 +124,7 @@ sub MakeRelease } myCmd("mkdir $RELEASE_PATH"); - + # Save current directory my $dir = `pwd`; chomp $dir; @@ -220,9 +216,9 @@ sub MakeRelease # Generate cuda files myCmd("echo -e 'GPU_TARGET = Volta\nFORT = true' > make.inc"); - + # Compile the documentation - print "Compile the documentation\n"; + print "Compile the documentation\n"; myCmd("make docs"); myCmd("make -j generate"); From b89e846c2f2a95539c104a4d0c0ae937aafa9821 Mon Sep 17 00:00:00 2001 From: Mark Gates Date: Mon, 21 Jul 2025 23:57:07 -0400 Subject: [PATCH 24/27] make: remove out-dated hg commands --- Makefile | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/Makefile b/Makefile index b8fa66a63..14bb784de 100644 --- a/Makefile +++ b/Makefile @@ -977,33 +977,6 @@ pkgconfig: > $(DESTDIR)$(prefix)/lib$(LIB_SUFFIX)/pkgconfig/magma.pc -# ------------------------------------------------------------------------------ -# files.txt is nearly all (active) files in SVN, excluding directories. Useful for rsync, etc. -# files-doxygen.txt is all (active) source files in SVN, used by Doxyfile-fast - -# excludes non-active directories like obsolete. -# excludes directories by matching *.* files (\w\.\w) and some exceptions like Makefile. -files.txt: force - hg st -m -a -c \ - | perl -pe 's/^. +//' | sort \ - | egrep -v '^\.$$|obsolete|deprecated|contrib\b|^exp' \ - | egrep '\w\.\w|Makefile|docs|run' \ - > files.txt - egrep -v '(\.html|\.css|\.f|\.in|\.m|\.mtx|\.pl|\.png|\.sh|\.txt)$$|checkdiag|COPYRIGHT|docs|example|make\.|Makefile|quark|README|Release|results|testing_|testing/lin|testing/matgen|tools' files.txt \ - | perl -pe 'chomp; $$_ = sprintf("\t../%-57s\\\n", $$_);' \ - > files-doxygen.txt - -# files.txt per sub-directory -subdir_files = $(addsuffix /files.txt,$(subdirs) $(sparse_subdirs)) - -$(subdir_files): force - cd $(dir $@) && hg st -m -a -c -X '*/*' . \ - | perl -pe 's/^. +//' | sort \ - | egrep -v '^\.$$|obsolete|deprecated|contrib\b|^exp' \ - | egrep '\w\.\w|Makefile|docs|run' \ - > files.txt - - # ------------------------------------------------------------------------------ echo: @echo "=====" From 0176416df31837680d9cbdd42e55b8fc7721ea1a Mon Sep 17 00:00:00 2001 From: Ahmad Abdelfattah Date: Fri, 22 Aug 2025 11:56:30 -0400 Subject: [PATCH 25/27] fix compilation issue with cuda 13 --- interface_cuda/interface.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/interface_cuda/interface.cpp b/interface_cuda/interface.cpp index c3ae4e0eb..9c8e6c67f 100644 --- a/interface_cuda/interface.cpp +++ b/interface_cuda/interface.cpp @@ -391,10 +391,18 @@ magma_print_environment() check_error( err ); #ifdef MAGMA_HAVE_CUDA + + int clock_khz; + #if CUDA_VERSION >= 13000 + cudaDeviceGetAttribute (&clock_khz, cudaDevAttrClockRate, dev); + #else + clock_khz = prop.clockRate; + #endif + printf( "%% device %d: %s, %.1f MHz clock, %.1f MiB memory, capability %d.%d\n", dev, prop.name, - prop.clockRate / 1000., + clock_khz / 1000., prop.totalGlobalMem / (1024.*1024.), prop.major, prop.minor ); From d42d1b552023f4222615d1810199618503bc7516 Mon Sep 17 00:00:00 2001 From: Ahmad Abdelfattah Date: Fri, 22 Aug 2025 11:57:16 -0400 Subject: [PATCH 26/27] remove gbtf2 kernels that use cooperative groups --- control/magma_zf77.cpp | 28 -- include/magma_z.h | 28 -- magmablas/zgbtf2_kernels.cu | 592 --------------------------------- src/zgbtrf_gpu.cpp | 21 -- testing/testing_zgbtrf_gpu.cpp | 6 +- 5 files changed, 1 insertion(+), 674 deletions(-) diff --git a/control/magma_zf77.cpp b/control/magma_zf77.cpp index 07bf53999..0e5b5ca54 100644 --- a/control/magma_zf77.cpp +++ b/control/magma_zf77.cpp @@ -208,34 +208,6 @@ void magmaf_zgbsv_native( info ); } -#define magmaf_zgbtf2_native_v2 FORTRAN_NAME( magmaf_zgbtf2_native_v2, MAGMAF_ZGBTF2_NATIVE_V2 ) -void magmaf_zgbtf2_native_v2( - magma_int_t *m, magma_int_t *n, magma_int_t *kl, magma_int_t *ku, - magmaDoubleComplex* dA, magma_int_t *ldda, - magma_int_t* ipiv, - magma_int_t* info, magma_queue_t *queue ) -{ - magma_zgbtf2_native_v2( - *m, *n, *kl, *ku, - dA, *ldda, - ipiv, - info, *queue ); -} - -#define magmaf_zgbtf2_native FORTRAN_NAME( magmaf_zgbtf2_native, MAGMAF_ZGBTF2_NATIVE ) -void magmaf_zgbtf2_native( - magma_int_t *m, magma_int_t *n, magma_int_t *kl, magma_int_t *ku, - magmaDoubleComplex* dA, magma_int_t *ldda, - magma_int_t* ipiv, - magma_int_t* info, magma_queue_t *queue ) -{ - magma_zgbtf2_native( - *m, *n, *kl, *ku, - dA, *ldda, - ipiv, - info, *queue ); -} - #define magmaf_zgbtrf_native FORTRAN_NAME( magmaf_zgbtrf_native, MAGMAF_ZGBTRF_NATIVE ) void magmaf_zgbtrf_native( magma_int_t *m, magma_int_t *n, magma_int_t *kl, magma_int_t *ku, diff --git a/include/magma_z.h b/include/magma_z.h index d8d0091c9..d9923b11c 100644 --- a/include/magma_z.h +++ b/include/magma_z.h @@ -108,20 +108,6 @@ magma_zgbsv_native( magmaDoubleComplex* dB, magma_int_t lddb, magma_int_t *info); -magma_int_t -magma_zgbtf2_native_v2( - magma_int_t m, magma_int_t n, magma_int_t kl, magma_int_t ku, - magmaDoubleComplex* dA, magma_int_t ldda, magma_int_t* ipiv, - magma_int_t* info, magma_queue_t queue); - -magma_int_t -magma_zgbtf2_native_v2_work( - magma_int_t m, magma_int_t n, magma_int_t kl, magma_int_t ku, - magmaDoubleComplex* dA, magma_int_t ldda, magma_int_t* ipiv, - magma_int_t* info, - void* device_work, magma_int_t* lwork, - magma_queue_t queue); - void magma_zgbsv_native_work( magma_int_t n, magma_int_t kl, magma_int_t ku, magma_int_t nrhs, @@ -130,20 +116,6 @@ magma_zgbsv_native_work( magma_int_t *info, void* device_work, magma_int_t* lwork, magma_queue_t queue); -magma_int_t -magma_zgbtf2_native( - magma_int_t m, magma_int_t n, magma_int_t kl, magma_int_t ku, - magmaDoubleComplex* dA, magma_int_t ldda, magma_int_t* ipiv, - magma_int_t* info, magma_queue_t queue); - -magma_int_t -magma_zgbtf2_native_work( - magma_int_t m, magma_int_t n, magma_int_t kl, magma_int_t ku, - magmaDoubleComplex* dA, magma_int_t ldda, magma_int_t* ipiv, - magma_int_t* info, - void* device_work, magma_int_t* lwork, - magma_queue_t queue); - magma_int_t magma_zgbtrf_native( magma_int_t m, magma_int_t n, diff --git a/magmablas/zgbtf2_kernels.cu b/magmablas/zgbtf2_kernels.cu index 6f7f1fa3a..d1b2faef3 100644 --- a/magmablas/zgbtf2_kernels.cu +++ b/magmablas/zgbtf2_kernels.cu @@ -11,19 +11,6 @@ */ #include "magma_internal.h" -#if defined(MAGMA_HAVE_CUDA) -#if CUDA_VERSION >= 12060 -#undef max -#undef min -#endif -#include -namespace cg = cooperative_groups; -#elif defined(MAGMA_HAVE_HIP) -// the hip_cooperative_groups.h file conflicts with magma's definition of min -#undef min -#include -namespace cg = cooperative_groups; -#endif #include "batched_kernel_param.h" #include "magma_templates.h" @@ -189,582 +176,3 @@ magma_zgbtf2_scal_ger_batched( } return 0; } - -/******************************************************************************/ -#if CUDA_VERSION >= 11000 -__global__ -void zgbtf2_native_kernel( - int m, int n, int nb, int kl, int ku, - magmaDoubleComplex *dA, int ldda, magma_int_t *ipiv, - int* ju, int gbstep, magma_int_t *dinfo) -{ -#define dA(i,j) dA[(j)*ldda + (i)] - extern __shared__ magmaDoubleComplex zdata[]; - cg::grid_group grid = cg::this_grid(); - const int tx = threadIdx.x; - const int ntx = blockDim.x; - const int bx = blockIdx.x; - const int jc = bx + gbstep; - const int kv = kl + ku; - const int mband = kv + 1 + kl; - - int linfo = (gbstep == 0) ? 0 : *dinfo; - int local_ju = (gbstep == 0) ? -1 : *ju; - int jp = 0; - double rx_abs_max = 0; - magmaDoubleComplex tmp = MAGMA_Z_ZERO, reg = MAGMA_Z_ZERO; - - // setup shared memory - magmaDoubleComplex* sA = zdata; - double* sX = (double*)(sA + mband); - int* sI = (int*)(sX + kl+1); - - // init sA to zero & sX to [0, 1, 2, ...] - for(int i = tx; i < mband; i+=ntx) - sA[i] = MAGMA_Z_ZERO; - for(int i = tx; i < (kl+1); i+=ntx) - sX[i] = i; - __syncthreads(); - - - // determine column start/end - int col_start = 0, col_end = 0; - if( jc <= local_ju ) { - // columns affected by previous factorization steps - col_start = 0; - col_end = mband-1; - } - else { - // new columns - col_start = kl + max(ku-jc,0); - col_end = kl + ku + min(kl, n-1-jc); - } - - // read columns - for(int i = col_start+tx; i <= col_end; i+=ntx) { - sA[i] = dA(i, jc); - } - __syncthreads(); - - // main loop - for(int j = 0; j < nb; j++) { - int gbj = j + gbstep; - int km = 1 + min( kl, m-gbj ); // diagonal and subdiagonal(s) - // find pivot - if(bx == j) { - if(km >= 128) { - for(int i = tx; i < km; i+=ntx) { - sX[i] = fabs( MAGMA_Z_REAL(sA[kv+i]) ) + fabs( MAGMA_Z_IMAG(sA[kv+i]) ); - sI[i] = i; - } - __syncthreads(); - - magma_getidmax_n(km, tx, sX, sI); - jp = sI[0]; - rx_abs_max = sX[0]; - } - else{ - for(int i = tx; i < km; i+=ntx) { - sX[i] = fabs( MAGMA_Z_REAL(sA[kv+i]) ) + fabs( MAGMA_Z_IMAG(sA[kv+i]) ); - } - __syncthreads(); - - rx_abs_max = sX[0]; - jp = 0; - for(int i = 1; i < km; i++) { - if( sX[i] > rx_abs_max ) { - rx_abs_max = sX[i]; - jp = i; - } - } - } - - linfo = ( rx_abs_max == MAGMA_D_ZERO && linfo == 0) ? (gbj+1) : linfo; - local_ju = max(local_ju, min(gbj+ku+jp, n-1)); - - if(tx == 0) { - ipiv[gbj] = jp + gbj + 1; // +1 for fortran indexing - *dinfo = (magma_int_t)linfo; - *ju = local_ju; - } - } - grid.sync(); - - // read information written by j-th block - if(bx >= j) { - jp = ipiv[gbj] - gbj - 1; - linfo = (int)(*dinfo); - local_ju = *ju; - } - //local_ju = max(local_ju, min(gbstep+j+ku+jp, n-1)); - //swap_length = local_ju - (j+gbstep) + 1; - __syncthreads(); - - // swap - if(jc >= (j+gbstep) && jc <= local_ju && tx == 0) { - if(jp != 0) { - int j1 = (kv + 0) - (bx-j); - int j2 = (kv + jp) - (bx-j); - tmp = sA[j1]; - sA[j1] = sA[j2]; - sA[j2] = tmp; - } - } - __syncthreads(); - - // scal & write to global memory - if(bx == j) { - reg = ( rx_abs_max == MAGMA_D_ZERO ) ? MAGMA_Z_ONE : MAGMA_Z_DIV(MAGMA_Z_ONE, sA[kv] ); - for(int i = tx; i < (km-1); i+=ntx) { - sA[kv+1+i] *= reg; - } - __syncthreads(); - - for(int i = tx; i < mband; i+=ntx) { - dA(i,jc) = sA[i]; - } - } - grid.sync(); - - // ger - if(jc > gbj && jc <= local_ju) { - int j1 = (kv + 0) - (bx-j); - for(int i = tx; i < km-1; i+=ntx) { - sA[j1+1+i] -= sA[j1] * dA(kv+1+i,gbj); - } - __syncthreads(); - } - } - - // write columns [nb : ju] - if(jc >= gbstep && jc <= local_ju) { - for(int i = tx; i < mband; i+=ntx) { - dA(i,jc) = sA[i]; - } - } -#undef dA -} - -/******************************************************************************/ -extern "C" -magma_int_t -magma_zgbtf2_native_work( - magma_int_t m, magma_int_t n, magma_int_t kl, magma_int_t ku, - magmaDoubleComplex* dA, magma_int_t ldda, magma_int_t* ipiv, - magma_int_t* info, - void* device_work, magma_int_t* lwork, - magma_queue_t queue) -{ - magma_int_t kv = kl + ku; - magma_int_t mband = kv + 1 + kl; - - *info = 0; - if( m < 0 ) - *info = -1; - else if ( n < 0 ) - *info = -2; - else if ( kl < 0 ) - *info = -3; - else if ( ku < 0 ) - *info = -4; - else if ( ldda < mband ) - *info = -6; - - // calculate workspace required - magma_int_t lwork_required = 0; - lwork_required += 1 * sizeof(magma_int_t); // ju - lwork_required += 1 * sizeof(magma_int_t); // dinfo - - if(*lwork < 0) { - // query assumed - *lwork = lwork_required; - return *info; - } - - if(*lwork < lwork_required) { - *info = -11; - } - - if (*info != 0) { - magma_xerbla( __func__, -(*info) ); - return *info; - } - - magma_int_t sm_count = magma_getdevice_multiprocessor_count(); - magma_int_t nb = max((magma_int_t)8, sm_count - (kv + 1)); - magma_int_t nthreads = magma_roundup(kv+1,32); - - // device pointers - magma_int_t *ju = (magma_int_t*)device_work; - magma_int_t *dinfo = ju + 1; - - magma_int_t shmem = 0; - shmem += mband * sizeof(magmaDoubleComplex); - shmem += (kl+1) * sizeof(double); - shmem += (kl+1) * sizeof(int); - - dim3 threads(nthreads, 1, 1); - for(magma_int_t gbstep = 0; gbstep < n; gbstep += nb) { - magma_int_t ib = min(nb, n-gbstep); - magma_int_t nblocks = min(ib+kv+1, n-gbstep); - dim3 grid(nblocks, 1, 1); - void *kernel_args[] = {&m, &n, &ib, &kl, &ku, &dA, &ldda, &ipiv, &ju, &gbstep, &dinfo}; - cudaError_t e = cudaLaunchCooperativeKernel((void*)zgbtf2_native_kernel, grid, threads, kernel_args, shmem, queue->cuda_stream()); - } - - magma_igetvector_async( 1, dinfo, 1, info, 1, queue ); - - return *info; -} -#endif - -/******************************************************************************/ -extern "C" -magma_int_t -magma_zgbtf2_native( - magma_int_t m, magma_int_t n, magma_int_t kl, magma_int_t ku, - magmaDoubleComplex* dA, magma_int_t ldda, magma_int_t* ipiv, - magma_int_t* info, magma_queue_t queue) -{ -#if CUDA_VERSION < 11000 - fprintf( stderr, "%s is not supported for CUDA < 11.0\n", __func__); - *info = MAGMA_ERR_NOT_SUPPORTED; -#else - magma_int_t kv = kl + ku; - magma_int_t mband = kv + 1 + kl; - - *info = 0; - if( m < 0 ) - *info = -1; - else if ( n < 0 ) - *info = -2; - else if ( kl < 0 ) - *info = -3; - else if ( ku < 0 ) - *info = -4; - else if ( ldda < mband ) - *info = -6; - - if (*info != 0) { - magma_xerbla( __func__, -(*info) ); - return *info; - } - - // query workspace - magma_int_t lwork[1] = {-1}; - magma_zgbtf2_native_work(m, n, kl, ku, NULL, ldda, NULL, info, NULL, lwork, queue); - - void* device_work = NULL; - magma_malloc(&device_work, lwork[0]); - - magma_zgbtf2_native_work(m, n, kl, ku, dA, ldda, ipiv, info, device_work, lwork, queue); - - magma_free(device_work); -#endif - return *info; -} - -/******************************************************************************/ -// kernel for gbtf2 using cooperative groups and 1D cyclic dist. of columns -// among thread-blocks -#if CUDA_VERSION >= 11000 -__global__ -void zgbtf2_native_kernel_v2( - int m, int n, int nb, int NB, int kl, int ku, - magmaDoubleComplex *dA, int ldda, magma_int_t *ipiv, - int* ju, int gbstep, magma_int_t *dinfo) -{ -#define dA(i, j) dA[(j)*ldda + (i)] -#define sA(i, j) sA[(j)*slda + (i)] - extern __shared__ magmaDoubleComplex zdata[]; - cg::grid_group grid = cg::this_grid(); - const int nb1 = nb+1; - const int tx = threadIdx.x; - const int ntx = blockDim.x; - const int bx = blockIdx.x; - const int nbx = gridDim.x; - const int kv = kl + ku; - const int mband = kv + 1 + kl; - const int slda = mband; - - int linfo = (gbstep == 0) ? 0 : *dinfo; - int local_ju = (gbstep == 0) ? -1 : *ju; - int jp = 0; - double rx_abs_max = 0; - magmaDoubleComplex tmp = MAGMA_Z_ZERO, reg = MAGMA_Z_ZERO; - - // setup shared memory - magmaDoubleComplex* sA = zdata; - double* sX = (double*)( sA + slda * nb1 ); - int* sI = (int*)(sX + kl+1); - - // init sA to zero & sX to [0, 1, 2, ...] - for(int i = tx; i < slda*nb1; i+=ntx) - sA[i] = MAGMA_Z_ZERO; - __syncthreads(); - - // read columns -- nb1 cols/TB - const int total_columns = min(n-gbstep, nbx * nb1); - const int total_factorize = min(total_columns, NB); //min(nbx * nb, total_columns); - const int my_total_columns = (total_columns / nbx) + ((bx < (total_columns % nbx)) ? 1 : 0); - //const int my_last_column = (my_total_columns-1) * nbx + bx; - - int col_start = 0, col_end = 0, lj = 0, glj = 0; - for(int j = bx; j < total_columns; j += nbx) { - // determine column start/end - int jc = j + gbstep; - col_start = (jc <= local_ju) ? 0 : kl + max(ku-jc,0); - col_end = (jc <= local_ju) ? mband-1 : kl + ku + min(kl, n-1-jc); - - // read columns - for(int i = col_start+tx; i <= col_end; i+=ntx) { - sA(i,lj) = dA(i, jc); - } - lj++; - } - __syncthreads(); - - for(int j = 0; j < total_factorize; j++) { - int gbj = j + gbstep; - int km = 1 + min( kl, m-gbj ); - int pivoter = j%nbx; - - // find pivot - if( bx == pivoter) { - lj = j / nbx; - if(km >= 128) { - for(int i = tx; i < km; i+=ntx) { - sX[i] = fabs( MAGMA_Z_REAL(sA(kv+i,lj)) ) + fabs( MAGMA_Z_IMAG(sA(kv+i,lj)) ); - sI[i] = i; - } - __syncthreads(); - - magma_getidmax_n(km, tx, sX, sI); - jp = sI[0]; - rx_abs_max = sX[0]; - } - else{ - for(int i = tx; i < km; i+=ntx) { - sX[i] = fabs( MAGMA_Z_REAL(sA(kv+i,lj)) ) + fabs( MAGMA_Z_IMAG(sA(kv+i,lj)) ); - } - __syncthreads(); - - rx_abs_max = sX[0]; - jp = 0; - for(int i = 1; i < km; i++) { - if( sX[i] > rx_abs_max ) { - rx_abs_max = sX[i]; - jp = i; - } - } - } - linfo = ( rx_abs_max == MAGMA_D_ZERO && linfo == 0) ? (gbj+1) : linfo; - local_ju = max(local_ju, min(gbj+ku+jp, n-1)); - __syncthreads(); - - // swap current col. only, and write pivot info - if(tx == 0) { - ipiv[gbj] = jp + gbj + 1; // +1 for fortran indexing - *dinfo = (magma_int_t)linfo; - *ju = local_ju; - - if(jp != 0) { - int j1 = (kv + 0); - int j2 = (kv + jp); - tmp = sA(j1,lj); - sA(j1,lj) = sA(j2,lj); - sA(j2,lj) = tmp; - } - } - __syncthreads(); - - // scal current column and write it to global mem. - reg = ( rx_abs_max == MAGMA_D_ZERO ) ? MAGMA_Z_ONE : MAGMA_Z_DIV(MAGMA_Z_ONE, sA(kv,lj) ); - for(int i = tx; i < (km-1); i+=ntx) { - sA(kv+1+i,lj) *= reg; - } - __syncthreads(); - - for(int i = tx; i < mband; i+=ntx) { - dA(i,gbj) = sA(i,lj); - } - } - grid.sync(); - - // other blocks read the information - if( !(bx == pivoter) ) { - jp = ipiv[gbj] - gbj - 1; - linfo = (int)(*dinfo); - local_ju = *ju; - } - __syncthreads(); - - // determine your next update column - //lj = (bx <= pivoter) ? (gbj / nb1) + 1 : (gbj / nb1); - lj = (bx <= pivoter) ? (j / nbx) + 1 : (j / nbx); - lj = min(lj, my_total_columns-1); - glj = min(gbstep + (lj * nbx) + bx, n-1); - - // swap - if(tx == 0 && glj > gbj && glj <= local_ju) { - if(jp != 0) { - int j1 = (kv + 0) - (glj-gbj); - int j2 = (kv + jp) - (glj-gbj); - tmp = sA(j1,lj); - sA(j1,lj) = sA(j2,lj); - sA(j2,lj) = tmp; - } - } - __syncthreads(); - - // ger - if(glj > gbj && glj <= local_ju) { - int j1 = (kv + 0) - (glj-gbj); - for(int i = tx; i < km-1; i+=ntx) { - sA(j1+1+i,lj) -= sA(j1,lj) * dA(kv+1+i,gbj); - } - __syncthreads(); - } - } // end of main loop - - - int jj = gbstep + (nb * nbx + bx); - - if( jj < n ) { - for(int i = tx; i < mband; i+=ntx) { - dA(i,jj) = sA(i,nb); - } - } -#undef dA -#undef sA -} - -/******************************************************************************/ -extern "C" -magma_int_t -magma_zgbtf2_native_v2_work( - magma_int_t m, magma_int_t n, magma_int_t kl, magma_int_t ku, - magmaDoubleComplex* dA, magma_int_t ldda, magma_int_t* ipiv, - magma_int_t* info, - void* device_work, magma_int_t* lwork, - magma_queue_t queue) -{ - magma_int_t kv = kl + ku; - magma_int_t mband = kv + 1 + kl; - - *info = 0; - if( m < 0 ) - *info = -1; - else if ( n < 0 ) - *info = -2; - else if ( kl < 0 ) - *info = -3; - else if ( ku < 0 ) - *info = -4; - else if ( ldda < mband ) - *info = -6; - - // calculate workspace required - magma_int_t lwork_required = 0; - lwork_required += 1 * sizeof(magma_int_t); // ju - lwork_required += 1 * sizeof(magma_int_t); // dinfo - - if(*lwork < 0) { - // query assumed - *lwork = lwork_required; - return *info; - } - - if(*lwork < lwork_required) { - *info = -11; - } - - if (*info != 0) { - magma_xerbla( __func__, -(*info) ); - return *info; - } - magma_int_t ione= 1; - magma_int_t nb = 64; - magma_int_t nb1 = nb+1; - magma_int_t NB = nb * (kv + 1); - - magma_int_t nthreads = magma_roundup(kv+1,32); - magma_int_t slda = mband; - - // device pointers - magma_int_t *ju = (magma_int_t*)device_work; - magma_int_t *dinfo = ju + 1; - - magma_int_t shmem = 0; - shmem += slda * nb1 * sizeof(magmaDoubleComplex); - shmem += (kl+1) * sizeof(double); - shmem += (kl+1) * sizeof(int); - - dim3 threads(nthreads, 1, 1); - - for(magma_int_t gbstep = 0; gbstep < n; gbstep += NB) { - magma_int_t ib = min(NB, n-gbstep); - magma_int_t nblocks = min(ib, kv+1); - magma_int_t nb = max(ione, ib / nblocks); - dim3 grid(nblocks, 1, 1); - - void *kernel_args[] = {&m, &n, &nb, &NB, &kl, &ku, &dA, &ldda, &ipiv, &ju, &gbstep, &dinfo}; - cudaError_t e = cudaLaunchCooperativeKernel((void*)zgbtf2_native_kernel_v2, grid, threads, kernel_args, shmem, queue->cuda_stream()); - if(e != cudaSuccess) { - printf("ERROR: %s \n", cudaGetErrorString(e)); - *info = -100; - return *info; - } - } - - magma_igetvector_async( 1, dinfo, 1, info, 1, queue ); - - return *info; -} -#endif - -/******************************************************************************/ -extern "C" -magma_int_t -magma_zgbtf2_native_v2( - magma_int_t m, magma_int_t n, magma_int_t kl, magma_int_t ku, - magmaDoubleComplex* dA, magma_int_t ldda, magma_int_t* ipiv, - magma_int_t* info, magma_queue_t queue) -{ -#if CUDA_VERSION < 11000 - fprintf( stderr, "%s is not supported for CUDA < 11.0\n", __func__); - *info = MAGMA_ERR_NOT_SUPPORTED; -#else - magma_int_t kv = kl + ku; - magma_int_t mband = kv + 1 + kl; - - *info = 0; - if( m < 0 ) - *info = -1; - else if ( n < 0 ) - *info = -2; - else if ( kl < 0 ) - *info = -3; - else if ( ku < 0 ) - *info = -4; - else if ( ldda < mband ) - *info = -6; - - if (*info != 0) { - magma_xerbla( __func__, -(*info) ); - return *info; - } - - // query workspace - magma_int_t lwork[1] = {-1}; - magma_zgbtf2_native_v2_work(m, n, kl, ku, NULL, ldda, NULL, info, NULL, lwork, queue); - - void* device_work = NULL; - magma_malloc(&device_work, lwork[0]); - - magma_zgbtf2_native_v2_work(m, n, kl, ku, dA, ldda, ipiv, info, device_work, lwork, queue); - - magma_free(device_work); -#endif - return *info; -} diff --git a/src/zgbtrf_gpu.cpp b/src/zgbtrf_gpu.cpp index 5cf3a554c..a4357b0ab 100644 --- a/src/zgbtrf_gpu.cpp +++ b/src/zgbtrf_gpu.cpp @@ -13,8 +13,6 @@ #include "magma_internal.h" #include "batched_kernel_param.h" -#define MAGMA_ZGBTRF_NATIVE_DISABLE_COOP_KERNEL - extern "C" void magma_zgbtrf_native_work( magma_int_t m, magma_int_t n, @@ -50,22 +48,9 @@ magma_zgbtrf_native_work( NULL, lddab, lddab*n, NULL, min(m,n), NULL, NULL, gbtrf_batch_lwork, 1, queue); - #ifndef MAGMA_ZGBTRF_NATIVE_DISABLE_COOP_KERNEL - // [2] workspace of native gbtrf with cooperative groups - magma_int_t gbtrf_cogroups_lwork[1] = {-1}; - magma_zgbtf2_native_v2_work( - m, n, kl, ku, - NULL, lddab, NULL, info, - NULL, gbtrf_cogroups_lwork, queue); - #endif - // [3] we need a "device_info" on device memory magma_int_t gbtrf_native_lwork[1] = {0}; - #ifndef MAGMA_ZGBTRF_NATIVE_DISABLE_COOP_KERNEL - gbtrf_native_lwork[0] = gbtrf_batch_lwork[0] + gbtrf_cogroups_lwork[0] + sizeof(magma_int_t); - #else gbtrf_native_lwork[0] = gbtrf_batch_lwork[0] + sizeof(magma_int_t); - #endif if(*lwork < 0) { // workspace query assumed @@ -79,12 +64,6 @@ magma_zgbtrf_native_work( return; } - #ifndef MAGMA_ZGBTRF_NATIVE_DISABLE_COOP_KERNEL - // try cooperative groups kernel first - magma_zgbtf2_native_v2_work(m, n, kl, ku, dAB, lddab, dipiv, info, device_work, gbtrf_cogroups_lwork, queue); - if(*info != -100) return; // cooperative group kernel finished successfully - #endif - magma_int_t* device_info = (magma_int_t*)((uint8_t*)device_work + gbtrf_batch_lwork[0]); magma_zgbtrf_batched_strided_work( m, n, kl, ku, diff --git a/testing/testing_zgbtrf_gpu.cpp b/testing/testing_zgbtrf_gpu.cpp index 2ddd0164f..10faf71c8 100644 --- a/testing/testing_zgbtrf_gpu.cpp +++ b/testing/testing_zgbtrf_gpu.cpp @@ -283,11 +283,7 @@ int main( int argc, char** argv) device_work, lwork, opts.queue); magma_time = magma_sync_wtime( opts.queue ) - magma_time; } - else if(opts.version == 3) { - magma_time = magma_wtime(); - magma_zgbtf2_native_v2(M, N, KL, KU, dA, lddab, dipiv_magma, &info, opts.queue); - magma_time = magma_wtime() - magma_time; - } + magma_perf = gflops / magma_time; magma_zgetmatrix( Mband, Nband, dA, lddab, h_Amagma, ldab, opts.queue ); From d29cb95ae8fb5fe1a5636b35ab8ce843248ffb0b Mon Sep 17 00:00:00 2001 From: Ahmad Abdelfattah Date: Fri, 22 Aug 2025 12:26:13 -0400 Subject: [PATCH 27/27] remove unwanted fortran wrappers --- control/magma_cf77.cpp | 28 ---------------------------- control/magma_df77.cpp | 28 ---------------------------- control/magma_sf77.cpp | 28 ---------------------------- 3 files changed, 84 deletions(-) diff --git a/control/magma_cf77.cpp b/control/magma_cf77.cpp index 84690cc1b..1bc8f10e6 100644 --- a/control/magma_cf77.cpp +++ b/control/magma_cf77.cpp @@ -208,34 +208,6 @@ void magmaf_cgbsv_native( info ); } -#define magmaf_cgbtf2_native_v2 FORTRAN_NAME( magmaf_cgbtf2_native_v2, MAGMAF_CGBTF2_NATIVE_V2 ) -void magmaf_cgbtf2_native_v2( - magma_int_t *m, magma_int_t *n, magma_int_t *kl, magma_int_t *ku, - magmaFloatComplex* dA, magma_int_t *ldda, - magma_int_t* ipiv, - magma_int_t* info, magma_queue_t *queue ) -{ - magma_cgbtf2_native_v2( - *m, *n, *kl, *ku, - dA, *ldda, - ipiv, - info, *queue ); -} - -#define magmaf_cgbtf2_native FORTRAN_NAME( magmaf_cgbtf2_native, MAGMAF_CGBTF2_NATIVE ) -void magmaf_cgbtf2_native( - magma_int_t *m, magma_int_t *n, magma_int_t *kl, magma_int_t *ku, - magmaFloatComplex* dA, magma_int_t *ldda, - magma_int_t* ipiv, - magma_int_t* info, magma_queue_t *queue ) -{ - magma_cgbtf2_native( - *m, *n, *kl, *ku, - dA, *ldda, - ipiv, - info, *queue ); -} - #define magmaf_cgbtrf_native FORTRAN_NAME( magmaf_cgbtrf_native, MAGMAF_CGBTRF_NATIVE ) void magmaf_cgbtrf_native( magma_int_t *m, magma_int_t *n, magma_int_t *kl, magma_int_t *ku, diff --git a/control/magma_df77.cpp b/control/magma_df77.cpp index 8c4704eec..265a98fe9 100644 --- a/control/magma_df77.cpp +++ b/control/magma_df77.cpp @@ -280,34 +280,6 @@ void magmaf_dgbsv_native( info ); } -#define magmaf_dgbtf2_native_v2 FORTRAN_NAME( magmaf_dgbtf2_native_v2, MAGMAF_DGBTF2_NATIVE_V2 ) -void magmaf_dgbtf2_native_v2( - magma_int_t *m, magma_int_t *n, magma_int_t *kl, magma_int_t *ku, - double* dA, magma_int_t *ldda, - magma_int_t* ipiv, - magma_int_t* info, magma_queue_t *queue ) -{ - magma_dgbtf2_native_v2( - *m, *n, *kl, *ku, - dA, *ldda, - ipiv, - info, *queue ); -} - -#define magmaf_dgbtf2_native FORTRAN_NAME( magmaf_dgbtf2_native, MAGMAF_DGBTF2_NATIVE ) -void magmaf_dgbtf2_native( - magma_int_t *m, magma_int_t *n, magma_int_t *kl, magma_int_t *ku, - double* dA, magma_int_t *ldda, - magma_int_t* ipiv, - magma_int_t* info, magma_queue_t *queue ) -{ - magma_dgbtf2_native( - *m, *n, *kl, *ku, - dA, *ldda, - ipiv, - info, *queue ); -} - #define magmaf_dgbtrf_native FORTRAN_NAME( magmaf_dgbtrf_native, MAGMAF_DGBTRF_NATIVE ) void magmaf_dgbtrf_native( magma_int_t *m, magma_int_t *n, magma_int_t *kl, magma_int_t *ku, diff --git a/control/magma_sf77.cpp b/control/magma_sf77.cpp index a23a1b1a3..3602baa46 100644 --- a/control/magma_sf77.cpp +++ b/control/magma_sf77.cpp @@ -280,34 +280,6 @@ void magmaf_sgbsv_native( info ); } -#define magmaf_sgbtf2_native_v2 FORTRAN_NAME( magmaf_sgbtf2_native_v2, MAGMAF_SGBTF2_NATIVE_V2 ) -void magmaf_sgbtf2_native_v2( - magma_int_t *m, magma_int_t *n, magma_int_t *kl, magma_int_t *ku, - float* dA, magma_int_t *ldda, - magma_int_t* ipiv, - magma_int_t* info, magma_queue_t *queue ) -{ - magma_sgbtf2_native_v2( - *m, *n, *kl, *ku, - dA, *ldda, - ipiv, - info, *queue ); -} - -#define magmaf_sgbtf2_native FORTRAN_NAME( magmaf_sgbtf2_native, MAGMAF_SGBTF2_NATIVE ) -void magmaf_sgbtf2_native( - magma_int_t *m, magma_int_t *n, magma_int_t *kl, magma_int_t *ku, - float* dA, magma_int_t *ldda, - magma_int_t* ipiv, - magma_int_t* info, magma_queue_t *queue ) -{ - magma_sgbtf2_native( - *m, *n, *kl, *ku, - dA, *ldda, - ipiv, - info, *queue ); -} - #define magmaf_sgbtrf_native FORTRAN_NAME( magmaf_sgbtrf_native, MAGMAF_SGBTRF_NATIVE ) void magmaf_sgbtrf_native( magma_int_t *m, magma_int_t *n, magma_int_t *kl, magma_int_t *ku,