Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

cmake_minimum_required (VERSION 3.0)
set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})

project(RGL)
#https://stackoverflow.com/questions/13298504/using-cmake-with-setup-py
Expand Down Expand Up @@ -55,5 +56,24 @@ endif()
message(STATUS "Python wrappers will be installed in " ${PYTHON_DEST})



# include(ExternalProject)
# ExternalProject_Add(
# GLIB2
# #URL http://download.dre.vanderbilt.edu/previous_versions/ACE-${ACE_VERSION}.zip
# GIT_REPOSITORY https://github.com/GNOME/glib.git
# GIT_TAG 2.62.6
# CONFIGURE_COMMAND meson _build
# BUILD_COMMAND ninja -C _build
# INSTALL_COMMAND ninja -C _build install
# LOG_INSTALL 1
# BINARY_DIR ${CMAKE_BINARY_DIR}/build/
# INSTALL_DIR ${LIBRARY_DIR}
# STAMP_DIR ${CMAKE_BINARY_DIR}/stamp
# TMP_DIR ${CMAKE_BINARY_DIR}/tmp
# DOWNLOAD_DIR ${CMAKE_BINARY_DIR}/download
# SOURCE_DIR ${CMAKE_BINARY_DIR}/source
# )

#add_subdirectory(src/Core)
add_subdirectory(src)
57 changes: 57 additions & 0 deletions cmake/FindGLIB2.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# - Try to find the GLIB2 libraries
# Once done this will define
#
# GLIB2_FOUND - system has glib2
# GLIB2_DIR - path to the glib2 base directory
# GLIB2_INCLUDE_DIR - the glib2 include directory
# GLIB2_LIBRARIES - glib2 library

set(GLIB2_DIR GLIB2_DIR-NOTFOUND CACHE PATH "Location of GLIB2 package")
message(STATUS "GLIB2_INCLUDE_DIR > ${GLIB2_INCLUDE_DIR} <")
message(STATUS "GLIB2_LIBRARIES > ${GLIB2_LIBRARIES} <")
if(GLIB2_INCLUDE_DIR AND GLIB2_LIBRARY_DIR)
# Already in cache, be silent
message(STATUS "Should have variables set")
set(GLIB2_FIND_QUIETLY TRUE)
set (GLIB2_DIR ${GLIB2_LIBRARY_DIR})
endif(GLIB2_INCLUDE_DIR AND GLIB2_LIBRARY_DIR)

if (GLIB2_DIR)
set(PKG_GLIB_LIBRARY_DIRS ${GLIB2_DIR}/lib${CMAKE_BUILD_ARCH} ${GLIB2_DIR}/lib)
set(PKG_GLIB_INCLUDE_DIRS ${GLIB2_DIR}/include/ ${GLIB2_DIR}/lib/glib-2.0/include)
else (GLIB2_DIR)
if (NOT WIN32)
find_package(PkgConfig REQUIRED)
pkg_check_modules(PKG_GLIB REQUIRED glib-2.0)
endif(NOT WIN32)
endif (GLIB2_DIR)

find_path(GLIB2_MAIN_INCLUDE_DIR glib.h
PATH_SUFFIXES glib-2.0
PATHS ${PKG_GLIB_INCLUDE_DIRS} )

# search the glibconfig.h include dir under the same root where the library is found
find_library(GLIB2_LIBRARIES
NAMES glib-2.0
PATHS ${PKG_GLIB_LIBRARY_DIRS} )

find_library(GTHREAD2_LIBRARIES
NAMES gthread-2.0
PATHS ${PKG_GLIB_LIBRARY_DIRS} )

find_path(GLIB2_INTERNAL_INCLUDE_DIR glibconfig.h
PATH_SUFFIXES glib-2.0/include
PATHS ${PKG_GLIB_INCLUDE_DIRS} ${PKG_GLIB_LIBRARY_DIRS} ${CMAKE_SYSTEM_LIBRARY_PATH})

set(GLIB2_INCLUDE_DIR ${GLIB2_MAIN_INCLUDE_DIR})

# not sure if this include dir is optional or required
# for now it is optional
if(GLIB2_INTERNAL_INCLUDE_DIR)
set(GLIB2_INCLUDE_DIR ${GLIB2_INCLUDE_DIR} ${GLIB2_INTERNAL_INCLUDE_DIR})
endif(GLIB2_INTERNAL_INCLUDE_DIR)

include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(GLIB2 DEFAULT_MSG GLIB2_LIBRARIES GTHREAD2_LIBRARIES GLIB2_MAIN_INCLUDE_DIR)

mark_as_advanced(GLIB2_INCLUDE_DIR GLIB2_LIBRARIES GTHREAD2_LIBRARIES GLIB2_INTERNAL_INCLUDE_DIR GLIB2_MAIN_INCLUDE_DIR)
2 changes: 1 addition & 1 deletion recipe/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@ cp -rv "$RECIPE_DIR/../test" "$SRC_DIR/"

cd $SRC_DIR

cmake -G "Unix Makefiles" $RECIPE_DIR/../ -DBUILD_PYTHON_WRAPPER=ON -DCONDA_BUILD=ON -DBUILD_CUDA=ON -DCMAKE_BUILD_TYPE="Release" -DLIBRARY_LIB=$CONDA_PREFIX/lib -DLIBRARY_INC=$CONDA_PREFIX -DCMAKE_INSTALL_PREFIX=$PREFIX
cmake -G "Unix Makefiles" $RECIPE_DIR/../ -DBUILD_PYTHON_WRAPPER=ON -DCONDA_BUILD=ON -DBUILD_CUDA=ON -DCMAKE_BUILD_TYPE="Release" -DLIBRARY_LIB=$CONDA_PREFIX/lib -DLIBRARY_INC=$CONDA_PREFIX -DCMAKE_INSTALL_PREFIX=$PREFIX -DGLIB2_INCLUDE_DIR=$CONDA_PREFIX -DGLIB2_LIBRARY_DIR=$CONDA_PREFIX

make install
2 changes: 2 additions & 0 deletions recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ requirements:
- vc 9 # [win and py27]
- cmake
- ripgrep
- glib # [unix]

run:
- {{ pin_compatible('numpy', max_pin='x.x') }}
Expand All @@ -37,6 +38,7 @@ requirements:
- vc 14 # [win and py35]
- vc 9 # [win and py27]
- libgcc-ng # [unix]
- glib # [unix]

about:
home: http://www.ccpi.ac.uk
Expand Down
47 changes: 41 additions & 6 deletions src/Core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,23 @@ message("CIL_VERSION ${CIL_VERSION}")
## Build the regularisers package as a library
message("Creating Regularisers as a shared library")

find_package(GLIB2 REQUIRED)
find_package(OpenMP REQUIRED)
if (OPENMP_FOUND)
message(STATUS "OpenMP_C_FLAGS ${OpenMP_C_FLAGS}")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
# set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS} ${OpenMP_C_FLAGS}")
# set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_SHARED_LINKER_FLAGS} ${OpenMP_C_FLAGS}")
# set (CMAKE_STATIC_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_STATIC_LINKER_FLAGS} ${OpenMP_C_FLAGS}")
endif()

message("CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} -fno-omit-frame-pointer")
message("CMAKE_C_FLAGS ${CMAKE_C_FLAGS} -fno-omit-frame-pointer")
message("CMAKE_EXE_LINKER_FLAGS ${CMAKE_EXE_LINKER_FLAGS}")
message("CMAKE_SHARED_LINKER_FLAGS ${CMAKE_SHARED_LINKER_FLAGS}")
message("CMAKE_STATIC_LINKER_FLAGS ${CMAKE_STATIC_LINKER_FLAGS}")

set(CMAKE_BUILD_TYPE "Release")

set (EXTRA_LIBRARIES "")
Expand All @@ -29,16 +46,32 @@ elseif(UNIX)
set(EXTRA_LIBRARIES "m")
endif()
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAGS}")
set (CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS} ${FLAGS}")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLAGS}")
message("CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}")

## Build the regularisers package as a library
message("Adding regularisers as a shared library")

#set(CMAKE_C_COMPILER /apps/pgi/linux86-64/17.4/bin/pgcc)
#set(CMAKE_C_COMPILER /opt/intel/compilers_and_libraries/linux/bin/intel64/icc)
#set(CMAKE_C_FLAGS "-Ofast -mtune=sandybridge -xSSE4.2 -qopt-report=5 -qopt-report-file=stdout -qopt-report-phase=vec -qopenmp")
#set(CMAKE_C_FLAGS "-Ofast -mtune=sandybridge -axAVX2 -xAVX2 -qopt-report=5 -qopt-report-file=stdout -qopt-report-phase=vec -qopenmp")
#set(CMAKE_C_FLAGS "-Ofast -mtune=sandybridge -mavx512f -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512vl -qopt-report=5 -qopt-report-file=stdout -qopt-report-phase=vec -qopenmp")

#set(CMAKE_C_COMPILER clang)
#set(CMAKE_C_FLAGS "-march=nocona -msse -msse2 -msse3 -mssse3 -msse4 -msse4.1 -msse4.2 -ftree-vectorize -fopenmp")

#set(CMAKE_C_COMPILER gcc-9)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native -ftree-vectorize -fopt-info-vec-optimized -fopt-info-vec -fopenmp")
#set(CMAKE_C_FLAGS "-march=nocona -msse -msse2 -msse3 -mssse3 -msse4 -msse4.1 -msse4.2 -ftree-vectorize -fopt-info-vec-optimized -fopt-info-vec -mprefer-vector-width=128 -fopenmp")
#set(CMAKE_C_FLAGS "-march=native -mavx2 -ftree-vectorize -fopt-info-vec-optimized -fopt-info-vec -mprefer-vector-width=512 -fopenmp")
#set(CMAKE_C_FLAGS "-march=native -mavx512f -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512vl -ftree-vectorize -fopt-info-vec-optimized -fopt-info-vec -mprefer-vector-width=512 -fopenmp")
#set(CMAKE_C_FLAGS_RELEASE "-g -gdwarf-2 -g3 -fno-omit-frame-pointer")

#set(CMAKE_C_FLAGS "-acc -Minfo -ta=tesla:cc20 -openmp")
#set(CMAKE_C_FLAGS "-acc -Minfo -ta=multicore -openmp -fPIC")
add_library(cilreg SHARED
${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/hw_sched.c
${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/hw_thread.c
${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/FGP_TV_core.c
${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/SB_TV_core.c
${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/PD_TV_core.c
Expand All @@ -49,18 +82,20 @@ add_library(cilreg SHARED
${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/ROF_TV_core.c
${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/FGP_dTV_core.c
${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/TNV_core.c
#${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/TNV_core_fast.c
${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/Nonlocal_TV_core.c
${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/PatchSelect_core.c
${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/utils.c
${CMAKE_CURRENT_SOURCE_DIR}/inpainters_CPU/Diffusion_Inpaint_core.c
${CMAKE_CURRENT_SOURCE_DIR}/inpainters_CPU/NonlocalMarching_Inpaint_core.c
)
target_link_libraries(cilreg ${OpenMP_EXE_LINKER_FLAGS} ${EXTRA_LIBRARIES})
target_link_libraries(cilreg ${EXTRA_LIBRARIES} ${GLIB2_LIBRARIES} ${GTHREAD2_LIBRARIES} )
include_directories(cilreg PUBLIC
${GLIB2_INCLUDE_DIR}
${LIBRARY_INC}/include
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/
${CMAKE_CURRENT_SOURCE_DIR}/inpainters_CPU/ )
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/
${CMAKE_CURRENT_SOURCE_DIR}/inpainters_CPU/ )

## Install

Expand Down
25 changes: 25 additions & 0 deletions src/Core/performance_CPU/README
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
TNV_core.c.v4.stdver: Fully equivalent (signle-threads). There is two potential breakage points.
- If OpenMP is enabled, the acces to div_upd will not be serialized and results will breaj
- The results will slightly differ due to different order of summation if loop summing resprimal/resdual organized in a logical way
TNV_core.c.v15: Multi-threads. Works correctly only in the single-threaded mode (if TNV_NEW_STYLE is disabled). In multi-threaded there results slightly differ due to changed order of operation
- TNV_NEW_STYLE slightly disturbs results in both single- and multi-threaded modes
- Resprimal/resdual are summed in groups (not sequentially) if multiple threads. But his actually should improve precision. Use TNV_CHECK_RES to check conformance
- Afterwards, in multi-threaded moded there is a still minor descripancy which first occurs in resprimal (after a few iterations). This is because of changed order of operations while computing
div_upd (only on the first lines of each new sub-block). Normally, we first compute the vertical and, then, add horizontal. On the border rows, instead we first add horizontals...
To check, div/div_upd changed to double. There is no difference then.
TNV_core.c.v17: Computationaly comptabile with v15.
- Padding actually harms performance
- Intel compiler gives about 10% speed-up
TNV_core.c.v18: Blocking helps to boost performance further but only with Intel Compiler. Gcc/Clang is slightly slower here.
- Padding here doesn't harm performance, but is not helpful either
- Difference between icc and gcc is probably due to auto-vectorization.
- Results slightly changed due to different order of operations
TNV_core.c.v19: Eliminate conditionals in the inner loops to help gcc-autovectorisation
- Last version implementing full algrotithm with backtrack in the middle of iterations.
- Again results slightly diverge from v18 due to different order of operations
TNV_core.c.v27: v18 with backtracking only on first iterations (otherwise warning reported)
TNV_core.c.v32: v19 with backtracking only on first iterations (otherwise warning reported)


Repo:
- Padding seems to have effect on the newer AVX2 systems. Re-enabled.
Loading