Skip to content

Commit 401c37a

Browse files
author
Anudeep
committed
Minor fixes and perf improvements
1 parent 483f2ef commit 401c37a

16 files changed

+544
-447
lines changed

cudaSift/CUDA/CMakeLists.txt

Lines changed: 47 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,25 @@
1+
# Modifications Copyright (C) 2023 Intel Corporation
2+
3+
# Permission is hereby granted, free of charge, to any person obtaining a copy
4+
# of this software and associated documentation files (the "Software"),
5+
# to deal in the Software without restriction, including without limitation
6+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
7+
# and/or sell copies of the Software, and to permit persons to whom
8+
# the Software is furnished to do so, subject to the following conditions:
9+
10+
# The above copyright notice and this permission notice shall be included
11+
# in all copies or substantial portions of the Software.
12+
13+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
14+
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
16+
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
17+
# OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
18+
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
19+
# OR OTHER DEALINGS IN THE SOFTWARE.
20+
21+
# SPDX-License-Identifier: MIT
22+
123
cmake_minimum_required(VERSION 3.10)
224
project(cudaSift C CXX)
325

@@ -7,28 +29,16 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
729
option(DEVICE_TIMER "Build using Device Timer" OFF)
830
option(USE_SM "Specifies which streaming multiprocessor architecture to use" )
931

32+
set(DEF_WL_CXX_FLAGS " -msse2 ")
33+
set(DEF_GENERAL_CXX_FLAGS " -O3 ")
34+
set(DEF_COMBINED_CXX_FLAGS "${DEF_GENERAL_CXX_FLAGS} ${DEF_WL_CXX_FLAGS}")
35+
1036
find_package(OpenCV REQUIRED)
1137
find_package(CUDA)
1238
if (NOT CUDA_FOUND)
1339
message(STATUS "CUDA not found. Project will not be built.")
1440
endif(NOT CUDA_FOUND)
1541

16-
if (WIN32)
17-
set(EXTRA_CXX_FLAGS "/DVERBOSE /D_CRT_SECURE_NO_WARNINGS ")
18-
list(APPEND CUDA_NVCC_FLAGS "-arch=sm_35;--compiler-options;-O2;-DVERBOSE")
19-
endif()
20-
if (UNIX)
21-
# if (APPLE)
22-
# set(EXTRA_CXX_FLAGS "-DVERBOSE -msse2")
23-
# list(APPEND CUDA_NVCC_FLAGS "-arch=sm_35;--compiler-options;-O2;-DVERBOSE")
24-
# else()
25-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -msse2")
26-
# list(APPEND CUDA_NVCC_FLAGS "-lineinfo;-ccbin;/usr/bin/gcc;--compiler-options;-O2;-D_FORCE_INLINES;-DVERBOSE_NOT")
27-
# list(APPEND CUDA_NVCC_FLAGS "-lineinfo;-ccbin")
28-
# list(APPEND CUDA_NVCC_FLAGS "-g;-G;-lineinfo;-ccbin;/usr/bin/gcc-8;--compiler-options;-O2;-D_FORCE_INLINES;-DVERBOSE_NOT;")
29-
# endif()
30-
endif()
31-
3242
set(cuda_sources
3343
cudaImage.cu
3444
cudaImage.h
@@ -55,11 +65,28 @@ if(DEVICE_TIMER)
5565
add_compile_options(-DDEVICE_TIMER)
5666
endif()
5767

58-
cuda_add_executable(cudasift ${cuda_sources} ${sources} OPTIONS -arch=sm_${USE_SM})
59-
set_target_properties(cudasift PROPERTIES
60-
COMPILE_FLAGS "${EXTRA_CXX_FLAGS}"
61-
)
68+
# -DCMAKE_CXX_FLAGS=" -blah -blah " overrides the default flags (BOTH general and WL specific)
69+
# -DOVERRIDE_GENERAL_CXX_FLAGS=" -blah -blah " overrides the general flags only (and not the workload specific flags)
70+
# passing in both CMAKE_CXX_FLAGS and OVERRIDE_GENERAL_CXX_FLAGS is not allowed, in order to prevent ambiguity
71+
72+
if(NOT "${CMAKE_CXX_FLAGS}" STREQUAL "" AND NOT "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "")
73+
message(FATAL_ERROR "Both CMAKE_CXX_FLAGS and OVERRIDE_GENERAL_CXX_FLAGS cannot be passed in together")
74+
elseif("${CMAKE_CXX_FLAGS}" STREQUAL "" AND "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "")
75+
message(STATUS "Using DEFAULT compilation flags")
76+
set(CMAKE_CXX_FLAGS "${DEF_COMBINED_CXX_FLAGS}")
77+
elseif(NOT "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "")
78+
message(STATUS "OVERRIDING GENERAL compilation flags")
79+
set(CMAKE_CXX_FLAGS "${OVERRIDE_GENERAL_CXX_FLAGS}")
80+
string(APPEND CMAKE_CXX_FLAGS ${DEF_WL_CXX_FLAGS})
81+
elseif(NOT "${CMAKE_CXX_FLAGS}" STREQUAL "")
82+
message(STATUS "OVERRIDING GENERAL and WORKLOAD SPECIFIC compilation flags")
83+
endif()
84+
6285

86+
set(CUDA_SEPARABLE_COMPILATION ON)
87+
message(STATUS "CXX Compilation flags to: ${CMAKE_CXX_FLAGS}")
88+
89+
cuda_add_executable(cudasift ${cuda_sources} ${sources} OPTIONS -arch=sm_${USE_SM})
6390
target_link_libraries(cudasift ${CUDA_cudadevrt_LIBRARY} ${OpenCV_LIBS})
6491

6592
install(FILES

cudaSift/CUDA/mainSift.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ int main(int argc, char **argv)
143143

144144
// data validation
145145
auto dataVerficationTimer_start = std::chrono::steady_clock::now();
146-
Utility::RunDataVerification(thresh, matchPercentage);
146+
int data_verification_flag = Utility::RunDataVerification(thresh, matchPercentage);
147147
auto dataVerficationTimer_stop = std::chrono::steady_clock::now();
148148
dataVerificationTime = std::chrono::duration<float, std::micro>(dataVerficationTimer_stop - dataVerficationTimer_start).count();
149149
// // Print out and store summary data
@@ -161,7 +161,7 @@ int main(int argc, char **argv)
161161
std::cout << "Total workload time = " << totalProgramTime / 1000 << " ms"
162162
<< "\n"
163163
<< std::endl;
164-
return 0;
164+
return data_verification_flag;
165165
}
166166

167167
void MatchAll(SiftData &siftData1, SiftData &siftData2, float *homography)

cudaSift/HIP/CMakeLists.txt

Lines changed: 57 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,57 @@
1-
cmake_minimum_required(VERSION 3.10)
1+
# Modifications Copyright (C) 2023 Intel Corporation
22

3-
if(NOT DEFINED HIP_PATH)
4-
if(NOT DEFINED ENV{HIP_PATH})
5-
set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed")
6-
else()
7-
set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed")
8-
endif()
9-
endif()
3+
# Permission is hereby granted, free of charge, to any person obtaining a copy
4+
# of this software and associated documentation files (the "Software"),
5+
# to deal in the Software without restriction, including without limitation
6+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
7+
# and/or sell copies of the Software, and to permit persons to whom
8+
# the Software is furnished to do so, subject to the following conditions:
9+
10+
# The above copyright notice and this permission notice shall be included
11+
# in all copies or substantial portions of the Software.
1012

11-
set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})
13+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
14+
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
16+
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
17+
# OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
18+
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
19+
# OR OTHER DEALINGS IN THE SOFTWARE.
1220

13-
project(cudasift)
21+
# SPDX-License-Identifier: MIT
1422

15-
set(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} "-Wall -O3 -Wextra -D__HIP_PLATFORM_AMD__=1")
16-
set(CMAKE_CXX_STANDARD 11)
23+
cmake_minimum_required(VERSION 3.10)
24+
project(cudasift LANGUAGES CXX)
25+
set(CMAKE_CXX_STANDARD 17)
1726
set(CMAKE_CXX_STANDARD_REQUIRED ON)
1827
set(CMAKE_CXX_EXTENSIONS OFF)
1928

29+
set(DEF_WL_CXX_FLAGS " -D__HIP_PLATFORM_AMD__ ")
30+
set(DEF_GENERAL_CXX_FLAGS " -Wall -O3 -Wextra ")
31+
set(DEF_COMBINED_CXX_FLAGS "${DEF_GENERAL_CXX_FLAGS} ${DEF_WL_CXX_FLAGS}")
32+
33+
if(NOT DEFINED ROCM_PATH)
34+
if(NOT DEFINED ENV{ROCM_PATH})
35+
set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to which HIP has been installed")
36+
else()
37+
set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which HIP has been installed")
38+
endif()
39+
endif()
40+
41+
set(CMAKE_MODULE_PATH "${ROCM_PATH}/hip/cmake" ${CMAKE_MODULE_PATH})
42+
set(HIP_INCLUDE_DIRS "${ROCM_PATH}/include" ${HIP_INCLUDE_DIRS})
43+
set(HIP_LIBRARIES "${ROCM_PATH}/lib" ${HIP_LIBRARIES})
44+
2045
option(DEVICE_TIMER "Build using Device Timer" OFF)
2146

22-
find_package(HIP QUIET)
47+
find_package(HIP REQUIRED)
2348

2449
if(HIP_FOUND)
2550
message(STATUS "Found HIP: " ${HIP_VERSION})
2651
else()
2752
message(FATAL_ERROR "Could not find HIP!")
2853
endif()
2954

30-
set(HIP_SEPARABLE_COMPILATION ON)
31-
3255
find_package(OpenCV REQUIRED)
3356
include_directories(${OpenCV_INCLUDE_DIRS})
3457

@@ -51,41 +74,35 @@ include_directories(
5174
${CMAKE_CURRENT_SOURCE_DIR}
5275
)
5376

77+
# -DCMAKE_CXX_FLAGS=" -blah -blah " overrides the default flags (BOTH general and WL specific)
78+
# -DOVERRIDE_GENERAL_CXX_FLAGS=" -blah -blah " overrides the general flags only (and not the workload specific flags)
79+
# passing in both CMAKE_CXX_FLAGS and OVERRIDE_GENERAL_CXX_FLAGS is not allowed, in order to prevent ambiguity
80+
if(NOT "${CMAKE_CXX_FLAGS}" STREQUAL "" AND NOT "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "")
81+
message(FATAL_ERROR "Both CMAKE_CXX_FLAGS and OVERRIDE_GENERAL_CXX_FLAGS cannot be passed in together")
82+
elseif("${CMAKE_CXX_FLAGS}" STREQUAL "" AND "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "")
83+
message(STATUS "Using DEFAULT compilation flags")
84+
set(CMAKE_CXX_FLAGS "${DEF_COMBINED_CXX_FLAGS}")
85+
elseif(NOT "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "")
86+
message(STATUS "OVERRIDING GENERAL compilation flags")
87+
set(CMAKE_CXX_FLAGS "${OVERRIDE_GENERAL_CXX_FLAGS}")
88+
string(APPEND CMAKE_CXX_FLAGS ${DEF_WL_CXX_FLAGS})
89+
elseif(NOT "${CMAKE_CXX_FLAGS}" STREQUAL "")
90+
message(STATUS "OVERRIDING GENERAL and WORKLOAD SPECIFIC compilation flags")
91+
endif()
92+
93+
message(STATUS "CXX Compilation flags to: ${CMAKE_CXX_FLAGS}")
94+
5495
if(DEVICE_TIMER)
5596
message(STATUS "Enabling Device Timer")
5697
add_compile_options(-DDEVICE_TIMER)
5798
endif()
5899

100+
set(HIP_SEPARABLE_COMPILATION ON)
59101
set(MY_TARGET_NAME ${PROJECT_NAME})
60102
set(MY_HIPCC_OPTIONS)
61103
set(MY_NVCC_OPTIONS)
62104
set(CMAKE_HIP_ARCHITECTURES OFF)
63-
set(CMAKE_NVCC_FLAGS ${CMAKE_NVCC_FLAGS} -std=c++11)
64105

65106
set_source_files_properties(${cuda_sources} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
66107
hip_add_executable(${MY_TARGET_NAME} ${SOURCES} ${MY_HIPCC_OPTIONS} NVCC_OPTIONS ${MY_NVCC_OPTIONS})
67108
target_link_libraries(cudasift stdc++ stdc++fs ${OpenCV_LIBS})
68-
69-
# SET(CUDA_SEPARABLE_COMPILATION ON)
70-
# hip_add_executable(cudasift ${cuda_sources} ${sources} OPTIONS -arch=sm_61)
71-
72-
# cuda_add_executable(l2net l2netD.cu OPTIONS -arch=sm_35)
73-
# set_target_properties(cudasift PROPERTIES
74-
# COMPILE_FLAGS "${EXTRA_CXX_FLAGS}"
75-
# )
76-
77-
# target_link_libraries(cudasift ${CUDA_cudadevrt_LIBRARY} ${OpenCV_LIBS})
78-
79-
# /usr/local/cuda/lib64/libcudadevrt.a ${OpenCV_LIBS}
80-
# )
81-
# install(FILES
82-
# ${cuda_sources}
83-
# ${sources}
84-
# cudaSiftD.cu
85-
# CMakeLists.txt
86-
# Copyright.txt
87-
# DESTINATION .
88-
# )
89-
# install(FILES data/left.pgm data/righ.pgm
90-
# DESTINATION data
91-
# )

cudaSift/HIP/mainSift.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ int main(int argc, char **argv)
145145
#endif
146146
// data validation
147147
auto dataVerficationTimer_start = std::chrono::steady_clock::now();
148-
Utility::RunDataVerification(thresh, matchPercentage);
148+
int data_verification_flag = Utility::RunDataVerification(thresh, matchPercentage);
149149
auto dataVerficationTimer_stop = std::chrono::steady_clock::now();
150150
dataVerificationTime = std::chrono::duration<float, std::micro>(dataVerficationTimer_stop - dataVerficationTimer_start).count();
151151
// // Print out and store summary data
@@ -163,6 +163,7 @@ int main(int argc, char **argv)
163163
std::cout << "Total workload time = " << totalProgramTime / 1000 << " ms"
164164
<< "\n"
165165
<< std::endl;
166+
return data_verification_flag;
166167
}
167168

168169
void MatchAll(SiftData &siftData1, SiftData &siftData2, float *homography)

cudaSift/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ make -sj
6464

6565
mkdir build && cd build
6666

67-
CXX=hipcc cmake ../
67+
CXX=hipcc cmake ../ -DROCM_PATH=/path/to/rocm
68+
For e.g CXX=hipcc cmake ../ -DROCM_PATH/opt/rocm-5.4.3
6869

6970
make -sj
7071

0 commit comments

Comments
 (0)