Skip to content

Commit c4a2c36

Browse files
committed
Merge remote-tracking branch 'upstream/main'
2 parents b866443 + 2834619 commit c4a2c36

File tree

85 files changed

+7561
-3379
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

85 files changed

+7561
-3379
lines changed

External/HIP/CMakeLists.txt

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,54 @@ message(STATUS "TEST_SUITE_HIP_ROOT: ${TEST_SUITE_HIP_ROOT}")
77
get_filename_component(HIP_CLANG_PATH ${CMAKE_CXX_COMPILER} DIRECTORY)
88
message(STATUS "HIP_CLANG_PATH: ${HIP_CLANG_PATH}")
99

10+
# Inspired from create_one_local_test. Runs hipify on the TestSource and then compiles it.
11+
# Search for the reference files next to TestSource.
12+
macro(create_one_hipify_cuda_test TestName TestSource VairantOffload VariantSuffix VariantCPPFlags VariantLibs)
13+
set(_cuda_src "${TestSource}")
14+
set(_hip_src "${TestName}.hip")
15+
set(_hipify_target "${TestName}-hipify")
16+
17+
set_source_files_properties(${_hip_src} PROPERTIES LANGUAGE CXX)
18+
add_custom_command(OUTPUT ${_hip_src}
19+
COMMAND ${HIPIFY_EXE} "${_cuda_src}" -o "${_hip_src}"
20+
DEPENDS "${_cuda_src}")
21+
add_custom_target(${_hipify_target} DEPENDS ${_hip_src})
22+
23+
set(_executable ${TestName}-${VariantSuffix})
24+
set(_executable_path ${CMAKE_CURRENT_BINARY_DIR}/${_executable})
25+
llvm_test_run()
26+
27+
get_filename_component(_test_source_dir "${TestSource}" DIRECTORY)
28+
get_filename_component(_test_source_name "${TestSource}" NAME_WE)
29+
set(REFERENCE_OUTPUT "${_test_source_dir}/${test_source_name}.reference_output")
30+
if(EXISTS "${REFERENCE_OUTPUT}")
31+
llvm_test_verify(WORKDIR %S
32+
%b/${FPCMP} %o ${REFERENCE_OUTPUT}-${VariantSuffix}
33+
)
34+
llvm_test_executable(${_executable} ${_hip_src})
35+
llvm_test_data(${_executable}
36+
DEST_SUFFIX "-${VariantSuffix}"
37+
${REFERENCE_OUTPUT})
38+
else()
39+
llvm_test_executable(${_executable} ${_hip_src})
40+
endif()
41+
42+
target_compile_options(${_executable} PUBLIC ${VariantCPPFLAGS})
43+
44+
# In External/CUDA, tests define a STDLIB_VERSION that matches the C++
45+
# standard supported by the standard library.
46+
# For the HIP case, we set a huge number and assume that the latest C++
47+
# standard version is supported by the library.
48+
target_compile_definitions(${_executable} PRIVATE STDLIB_VERSION=9999)
49+
add_dependencies(${_executable} ${_hipify_target})
50+
if(VariantLibs)
51+
target_link_libraries(${_executable} ${VariantLibs})
52+
endif()
53+
54+
add_dependencies(hip-tests-simple-${VariantSuffix} ${_executable})
55+
list(APPEND VARIANT_SIMPLE_TEST_TARGETS ${_executable}.test)
56+
endmacro()
57+
1058
# Create targets for HIP tests that are part of the test suite.
1159
macro(create_local_hip_tests VariantSuffix)
1260
set(VariantOffload "hip")
@@ -17,11 +65,13 @@ macro(create_local_hip_tests VariantSuffix)
1765
#set_source_files_properties(split-kernel-args.hip PROPERTIES
1866
# COMPILE_FLAGS "-mllvm -amdgpu-enable-split-kernel-args")
1967
# Add HIP tests to be added to hip-tests-simple
68+
list(APPEND HIP_LOCAL_TESTS array)
2069
list(APPEND HIP_LOCAL_TESTS empty)
2170
list(APPEND HIP_LOCAL_TESTS with-fopenmp)
2271
list(APPEND HIP_LOCAL_TESTS saxpy)
2372
list(APPEND HIP_LOCAL_TESTS memmove)
2473
list(APPEND HIP_LOCAL_TESTS split-kernel-args)
74+
list(APPEND HIP_LOCAL_TESTS builtin-logb-scalbn)
2575

2676
# TODO: Re-enable InOneWeekend after it is fixed
2777
#list(APPEND HIP_LOCAL_TESTS InOneWeekend)
@@ -48,6 +98,27 @@ macro(create_local_hip_tests VariantSuffix)
4898
"${VariantCPPFLAGS}" "${VariantLibs}")
4999
endforeach()
50100

101+
list(APPEND CUDA_LOCAL_TESTS algorithm)
102+
list(APPEND CUDA_LOCAL_TESTS cmath)
103+
list(APPEND CUDA_LOCAL_TESTS complex)
104+
list(APPEND CUDA_LOCAL_TESTS math_h)
105+
list(APPEND CUDA_LOCAL_TESTS new)
106+
107+
find_program(HIPIFY_EXE
108+
NAME hipify-perl
109+
PATHS ${_RocmPath}/bin)
110+
111+
if(HIPIFY_EXE)
112+
foreach(_cuda_test IN LISTS CUDA_LOCAL_TESTS)
113+
set(_cuda_src "${CMAKE_CURRENT_SOURCE_DIR}/../CUDA/${_cuda_test}.cu")
114+
create_one_hipify_cuda_test(${_cuda_test} ${_cuda_src}
115+
${VariantOffload} ${VariantSuffix}
116+
"${VariantCPPFLAGS}" "${VariantLibs}")
117+
endforeach()
118+
else()
119+
message(WARNING "hipify-perl not found for ROCm installation in ${_RocmPath}.")
120+
endif()
121+
51122
# Add test for Blender.
52123
configure_file(workload/blender/test_blender.sh.in ${CMAKE_CURRENT_BINARY_DIR}/test_blender.sh @ONLY)
53124
configure_file(workload/blender/verify_blender.sh.in ${CMAKE_CURRENT_BINARY_DIR}/verify_blender.sh @ONLY)

External/HIP/array.hip

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
#include "hip/hip_runtime.h"
2+
// Check that we can use std::array on device code
3+
//
4+
// After libstdc++ 15, some internal asserts rely on function that are neither
5+
// constexpr nor device. This can trigger errors when using std::array members
6+
// on device code.
7+
//
8+
// This workaround is implemented in bits/c++config.h
9+
10+
#include <stdio.h>
11+
12+
#if __cplusplus >= 201103L
13+
14+
#include <array>
15+
#include <assert.h>
16+
17+
#if __cplusplus >= 201402L && STDLIB_VERSION >= 2014
18+
// call the function in a constexpr and a non-constexpr context
19+
#define TEST(expr) \
20+
do { \
21+
size_t M = expr; \
22+
(void)(M); \
23+
constexpr size_t N = expr; \
24+
(void)(N); \
25+
} while (0)
26+
#define MAYBE_CONSTEXPR constexpr
27+
#else
28+
#define TEST(expr) \
29+
do { \
30+
size_t M = expr; \
31+
(void)(M); \
32+
} while (0)
33+
#define MAYBE_CONSTEXPR
34+
#endif
35+
36+
MAYBE_CONSTEXPR __host__ __device__ size_t test_array() {
37+
// Before C++17 only "operator[] const" is constexpr (thus available on
38+
// device).
39+
#if __cplusplus < 201703L && STDLIB_VERSION < 2017
40+
const
41+
#endif
42+
std::array<int, 4>
43+
A = {0, 1, 2, 3};
44+
45+
size_t N = A.size();
46+
assert(N == 4);
47+
48+
#if __cplusplus >= 201402L && STDLIB_VERSION >= 2014
49+
int fst = A[0];
50+
assert(fst == 0);
51+
#endif
52+
53+
#if __cplusplus >= 201703L && STDLIB_VERSION >= 2017
54+
A[0] = 4;
55+
int snd = A[0];
56+
assert(snd == 4);
57+
#endif
58+
return N;
59+
}
60+
61+
__host__ __device__ void do_all_tests() { TEST(test_array()); }
62+
63+
__global__ void kernel() { do_all_tests(); }
64+
65+
int main() {
66+
kernel<<<32, 32>>>();
67+
hipError_t err = hipDeviceSynchronize();
68+
if (err != hipSuccess) {
69+
printf("CUDA error %d\n", (int)err);
70+
return 1;
71+
}
72+
73+
do_all_tests();
74+
75+
printf("Success!\n");
76+
return 0;
77+
}
78+
79+
#else
80+
81+
int main() {
82+
printf("Success!\n");
83+
return 0;
84+
}
85+
86+
#endif
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
#include <hip/hip_runtime.h>
2+
#include <cmath>
3+
#include <cstdio>
4+
#include <iostream>
5+
6+
// Simple error check macro
7+
#define HIP_CHECK(call) \
8+
do { \
9+
hipError_t err = call; \
10+
if (err != hipSuccess) { \
11+
std::cerr << "HIP error: " << hipGetErrorString(err) \
12+
<< " at " << __FILE__ << ":" << __LINE__ << std::endl; \
13+
std::exit(EXIT_FAILURE); \
14+
} \
15+
} while (0)
16+
17+
__global__ void my_kernel(float a[], int alen, int exp[], int explen, float *t_res) {
18+
for (int i = 0; i < alen; i++) {
19+
t_res[4*i] = logbf(a[i]);
20+
t_res[4*i + 1] = logb(a[i]);
21+
t_res[4*i + 2] = __builtin_logbf(a[i]);
22+
t_res[4*i + 3] = __builtin_logb(a[i]);
23+
}
24+
25+
for (int i = 0; i < alen; i++) {
26+
for (int j = 0; j < explen; j++) {
27+
t_res[4*alen + 4*explen*i + 4*j] = scalbnf(a[i], exp[j]);
28+
t_res[4*alen + 4*explen*i + 4*j + 1] = scalbn(a[i], exp[j]);
29+
t_res[4*alen + 4*explen*i + 4*j + 2] = __builtin_scalbnf(a[i], exp[j]);
30+
t_res[4*alen + 4*explen*i + 4*j + 3] = __builtin_scalbn(a[i], exp[j]);
31+
}
32+
}
33+
}
34+
35+
void __attribute__((noinline)) test(float a[], int alen, int exp[], int explen, float *h_res) {
36+
for (int i = 0; i < alen; i++) {
37+
h_res[4*i] = logbf(a[i]);
38+
h_res[4*i + 1] = logb(a[i]);
39+
h_res[4*i + 2] = __builtin_logbf(a[i]);
40+
h_res[4*i + 3] = __builtin_logb(a[i]);
41+
}
42+
43+
for (int i = 0; i < alen; i++) {
44+
for (int j = 0; j < explen; j++) {
45+
h_res[4*alen + 4*explen*i + 4*j] = scalbnf(a[i], exp[j]);
46+
h_res[4*alen + 4*explen*i + 4*j + 1] = scalbn(a[i], exp[j]);
47+
h_res[4*alen + 4*explen*i + 4*j + 2] = __builtin_scalbnf(a[i], exp[j]);
48+
h_res[4*alen + 4*explen*i + 4*j + 3] = __builtin_scalbn(a[i], exp[j]);
49+
}
50+
}
51+
}
52+
53+
int main(int argc, char **argv) {
54+
// Init input data
55+
float a[] = {16.0f, 3.14f, 0.0f, -0.0f, INFINITY, NAN};
56+
int alen = sizeof(a) / sizeof(a[0]);
57+
int exp[] = {10, 0, -5};
58+
int explen = sizeof(exp) / sizeof(exp[0]);
59+
60+
// Compute on CPU
61+
int res_len = 4 * alen + 4 * alen * explen; // logb + scalbn
62+
int res_bsize = sizeof(float) * res_len;
63+
float *h_res = (float *)malloc(res_bsize);
64+
test(a, alen, exp, explen, h_res);
65+
66+
// Make a copy for GPU
67+
float *d_a;
68+
int *d_exp;
69+
float *t_res;
70+
HIP_CHECK(hipMalloc((void**)&d_a, sizeof(a)));
71+
HIP_CHECK(hipMalloc((void**)&d_exp, sizeof(exp)));
72+
HIP_CHECK(hipMalloc((void**)&t_res, res_bsize));
73+
HIP_CHECK(hipMemcpy(d_a, a, sizeof(a), hipMemcpyHostToDevice));
74+
HIP_CHECK(hipMemcpy(d_exp, exp, sizeof(exp), hipMemcpyHostToDevice));
75+
HIP_CHECK(hipMemset(t_res, 0, res_bsize));
76+
77+
// Launch a GPU kernel
78+
my_kernel<<<1,1>>>(d_a, alen, d_exp, explen, t_res);
79+
80+
// Copy the device results to host
81+
float *d_res = (float *)malloc(res_bsize);
82+
HIP_CHECK(hipDeviceSynchronize());
83+
HIP_CHECK(hipMemcpy(d_res, t_res, res_bsize, hipMemcpyDeviceToHost));
84+
85+
// Verify the results match CPU.
86+
int errs = 0;
87+
for(int i = 0; i < res_len; i++) {
88+
if (fabs(h_res[i] - d_res[i]) > fabs(h_res[i] * 0.0001f)) {
89+
printf("found error i=%i h=%f d=%f\n", i, h_res[i], d_res[i]);
90+
errs++;
91+
}
92+
}
93+
if (errs != 0)
94+
printf("%i errors\n", errs);
95+
else
96+
printf("PASSED!\n");
97+
98+
free(h_res);
99+
HIP_CHECK(hipFree(d_a));
100+
HIP_CHECK(hipFree(d_exp));
101+
HIP_CHECK(hipFree(t_res));
102+
free(d_res);
103+
return errs;
104+
}
105+
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
PASSED!
2+
exit 0

External/SPEC/CFP2017rate/503.bwaves_r/CMakeLists.txt

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -48,17 +48,19 @@ speccpu2017_run_test(
4848
RUN_TYPE ref
4949
)
5050

51-
speccpu2017_run_test(
52-
< "${RUN_ref_DIR_REL}/bwaves_3.in"
53-
STDOUT bwaves_3.out
54-
RUN_TYPE ref
55-
)
51+
if (BENCHMARK_SUITE_TYPE STREQUAL rate)
52+
speccpu2017_run_test(
53+
< "${RUN_ref_DIR_REL}/bwaves_3.in"
54+
STDOUT bwaves_3.out
55+
RUN_TYPE ref
56+
)
5657

57-
speccpu2017_run_test(
58-
< "${RUN_ref_DIR}/bwaves_4.in"
59-
STDOUT bwaves_4.out
60-
RUN_TYPE ref
61-
)
58+
speccpu2017_run_test(
59+
< "${RUN_ref_DIR}/bwaves_4.in"
60+
STDOUT bwaves_4.out
61+
RUN_TYPE ref
62+
)
63+
endif ()
6264

6365

6466
################################################################################

External/SPEC/CFP2017rate/521.wrf_r/CMakeLists.txt

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,12 @@ speccpu2017_benchmark(RATE)
1717
# that will be processed by specpp and is common to ${PROG} and ${VALIDATOR}
1818
# wrf_netcdf an object library for the netcdf sources
1919

20-
# There is only one argument to the validator. Only the location
21-
# of the reference data changes. Tolerances and filenames are hard
22-
# coded.
23-
#
2420
# ${SPECDIFF_BIN} is used to check that all the test pass in
2521
# diffwrf_output_01.txt. We replace this with a call to `diff -b` so
2622
# that SPEC need not be installed. The `-b` is used to ignore
2723
# extraneous whitespace.
2824
macro(wrf_validator)
29-
cmake_parse_arguments(_carg "" "RUN_TYPE" "" ${ARGN})
25+
cmake_parse_arguments(_carg "" "RUN_TYPE;WRF_OUT_FILE" "" ${ARGN})
3026

3127
set(VALIDATOR wrf_validate-target_${BENCHMARK_SUITE_TYPE})
3228
if (NOT TARGET ${VALIDATOR})
@@ -41,7 +37,7 @@ macro(wrf_validator)
4137
llvm_test_verify(WORKDIR ${RUN_${_carg_RUN_TYPE}_DIR_REL}
4238
"%S/${VALIDATOR}"
4339
"${RUN_${_carg_RUN_TYPE}_DIR_REL}/compare/wrf_reference_01"
44-
wrfout_d01_2000-01-24_14_00_00 >
40+
${_carg_WRF_OUT_FILE} >
4541
"${RUN_${_carg_RUN_TYPE}_DIR_REL}/diffwrf_output_01.txt" &&
4642
diff -b "${RUN_${_carg_RUN_TYPE}_DIR_REL}/diffwrf_output_01.txt" "${RUN_${_carg_RUN_TYPE}_DIR_REL}/compare/diffwrf_output_01.txt"
4743
RUN_TYPE ${_carg_RUN_TYPE}
@@ -79,19 +75,24 @@ endif ()
7975

8076
speccpu2017_run_test(RUN_TYPE test)
8177

82-
wrf_validator(RUN_TYPE test)
78+
wrf_validator(RUN_TYPE test WRF_OUT_FILE wrfout_d01_2000-01-24_12_10_00)
8379

8480
## train #######################################################################
8581

8682
speccpu2017_run_test(RUN_TYPE train)
8783

88-
wrf_validator(RUN_TYPE train)
84+
wrf_validator(RUN_TYPE train WRF_OUT_FILE wrfout_d01_2000-01-24_14_00_00)
8985

9086
## ref #########################################################################
9187

9288
speccpu2017_run_test(RUN_TYPE ref)
9389

94-
wrf_validator(RUN_TYPE ref)
90+
if (BENCHMARK_SUITE_TYPE STREQUAL rate)
91+
wrf_validator(RUN_TYPE ref WRF_OUT_FILE wrfout_d01_2000-01-24_20_00_00)
92+
endif ()
93+
if (BENCHMARK_SUITE_TYPE STREQUAL speed)
94+
wrf_validator(RUN_TYPE ref WRF_OUT_FILE wrfout_d01_2000-01-24_15_00_00)
95+
endif ()
9596

9697
################################################################################
9798

0 commit comments

Comments
 (0)