Skip to content

Commit 410320e

Browse files
saiislamDavid Salinas
authored andcommitted
[OpenMP] Support gfx1152 in libm
Support of gfx1152 was missing from libm and libc. Change-Id: I8676befdfc4f9812bd5eacbead6694f1dffb9a4c
1 parent dab6a0d commit 410320e

File tree

2 files changed

+232
-0
lines changed

2 files changed

+232
-0
lines changed
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
//===-- AMDGPU specific platform definitions for math support -------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC_MATH_GPU_AMDGPU_PLATFORM_H
10+
#define LLVM_LIBC_SRC_MATH_GPU_AMDGPU_PLATFORM_H
11+
12+
#include "src/__support/macros/attributes.h"
13+
14+
#include <stdint.h>
15+
16+
namespace LIBC_NAMESPACE {
17+
18+
// The ROCm device library uses control globals to alter codegen for the
19+
// different targets. To avoid needing to link them in manually we simply
20+
// define them here.
21+
extern "C" {
22+
23+
// Disable unsafe math optimizations in the implementation.
24+
extern const LIBC_INLINE_VAR uint8_t __oclc_unsafe_math_opt = 0;
25+
26+
// Disable denormalization at zero optimizations in the implementation.
27+
extern const LIBC_INLINE_VAR uint8_t __oclc_daz_opt = 0;
28+
29+
// Disable rounding optimizations for 32-bit square roots.
30+
extern const LIBC_INLINE_VAR uint8_t __oclc_correctly_rounded_sqrt32 = 1;
31+
32+
// Disable finite math optimizations.
33+
extern const LIBC_INLINE_VAR uint8_t __oclc_finite_only_opt = 0;
34+
35+
#if defined(__gfx700__)
36+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 7000;
37+
#elif defined(__gfx701__)
38+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 7001;
39+
#elif defined(__gfx702__)
40+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 7002;
41+
#elif defined(__gfx703__)
42+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 7003;
43+
#elif defined(__gfx704__)
44+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 7004;
45+
#elif defined(__gfx705__)
46+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 7005;
47+
#elif defined(__gfx801__)
48+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 8001;
49+
#elif defined(__gfx802__)
50+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 8002;
51+
#elif defined(__gfx803__)
52+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 8003;
53+
#elif defined(__gfx805__)
54+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 8005;
55+
#elif defined(__gfx810__)
56+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 8100;
57+
#elif defined(__gfx900__)
58+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 9000;
59+
#elif defined(__gfx902__)
60+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 9002;
61+
#elif defined(__gfx904__)
62+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 9004;
63+
#elif defined(__gfx906__)
64+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 9006;
65+
#elif defined(__gfx908__)
66+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 9008;
67+
#elif defined(__gfx909__)
68+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 9009;
69+
#elif defined(__gfx90a__)
70+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 9010;
71+
#elif defined(__gfx90c__)
72+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 9012;
73+
#elif defined(__gfx940__)
74+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 9400;
75+
#elif defined(__gfx941__)
76+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 9401;
77+
#elif defined(__gfx942__)
78+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 9402;
79+
#elif defined(__gfx1010__)
80+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 10100;
81+
#elif defined(__gfx1011__)
82+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 10101;
83+
#elif defined(__gfx1012__)
84+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 10102;
85+
#elif defined(__gfx1013__)
86+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 10103;
87+
#elif defined(__gfx1030__)
88+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 10300;
89+
#elif defined(__gfx1031__)
90+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 10301;
91+
#elif defined(__gfx1032__)
92+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 10302;
93+
#elif defined(__gfx1033__)
94+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 10303;
95+
#elif defined(__gfx1034__)
96+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 10304;
97+
#elif defined(__gfx1035__)
98+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 10305;
99+
#elif defined(__gfx1036__)
100+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 10306;
101+
#elif defined(__gfx1100__)
102+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 11000;
103+
#elif defined(__gfx1101__)
104+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 11001;
105+
#elif defined(__gfx1102__)
106+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 11002;
107+
#elif defined(__gfx1103__)
108+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 11003;
109+
#elif defined(__gfx1150__)
110+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 11500;
111+
#elif defined(__gfx1151__)
112+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 11501;
113+
#elif defined(__gfx1152__)
114+
extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 11502;
115+
#else
116+
#error "Unknown AMDGPU architecture"
117+
#endif
118+
}
119+
120+
// These aliases cause clang to emit the control constants with ODR linkage.
121+
// This allows us to link against the symbols without preventing them from being
122+
// optimized out or causing symbol collisions.
123+
[[gnu::alias("__oclc_unsafe_math_opt")]] const uint8_t __oclc_unsafe_math_opt__;
124+
[[gnu::alias("__oclc_daz_opt")]] const uint8_t __oclc_daz_opt__;
125+
[[gnu::alias("__oclc_correctly_rounded_sqrt32")]] const uint8_t
126+
__oclc_correctly_rounded_sqrt32__;
127+
[[gnu::alias("__oclc_finite_only_opt")]] const uint8_t __oclc_finite_only_opt__;
128+
[[gnu::alias("__oclc_ISA_version")]] const uint32_t __oclc_ISA_version__;
129+
130+
} // namespace LIBC_NAMESPACE
131+
132+
#endif // LLVM_LIBC_SRC_MATH_GPU_AMDGPU_PLATFORM_H
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
##===----------------------------------------------------------------------===##
2+
#
3+
# The LLVM Compiler Infrastructure
4+
#
5+
# This file is dual licensed under the MIT and the University of Illinois Open
6+
# Source Licenses. See LICENSE.txt for details.
7+
##===----------------------------------------------------------------------===##
8+
#
9+
# libomptarget/libm/libdevice/CMakeLists.txt
10+
#
11+
##===----------------------------------------------------------------------===##
12+
13+
cmake_minimum_required(VERSION 3.0 FATAL_ERROR)
14+
if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
15+
project(libm-project)
16+
message("--------------------------------------------")
17+
endif()
18+
19+
# find_package(LLVM done in libomptarget/CMakeLists.txt
20+
if (LLVM_DIR)
21+
message(" -- Building libm bitcodes with LLVM ${LLVM_PACKAGE_VERSION} using ${CLANG_TOOL}")
22+
else()
23+
message(" ERROR: NO LLVM FOUND! Not building libm libdevice.")
24+
return()
25+
endif()
26+
27+
#optimization level
28+
set(optimization_level 2)
29+
30+
# Get list of AMD GPUs to build for
31+
set(amdgpu_mcpus gfx700 gfx701 gfx801 gfx803 gfx900 gfx902 gfx906 gfx908 gfx90a gfx90c gfx940 gfx941 gfx942 gfx1010 gfx1030 gfx1031 gfx1032 gfx1033 gfx1034 gfx1035 gfx1036 gfx1100 gfx1101 gfx1102 gfx1103 gfx1150 gfx1151 gfx1152 gfx1200 gfx1201)
32+
if (DEFINED LIBOMPTARGET_AMDGCN_GFXLIST)
33+
set(amdgpu_mcpus ${LIBOMPTARGET_AMDGCN_GFXLIST})
34+
endif()
35+
36+
# see if this build is for LLVM_ENABLE_RUNTIMES='openmp'
37+
set(_xdir "")
38+
foreach(proj ${LLVM_ENABLE_RUNTIMES})
39+
string(TOUPPER "${proj}" canon_name)
40+
if ("${canon_name}" STREQUAL "OPENMP")
41+
set(_xdir "/openmp")
42+
endif()
43+
endforeach()
44+
45+
# prepare variables used by build_static_device_bc_lib
46+
set(triple "amdgcn-amd-amdhsa")
47+
set(systemarch "amdgcn")
48+
49+
foreach(mcpu ${amdgpu_mcpus})
50+
set(cpu_target x86_64-pc-linux-gnu)
51+
if(${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL "ppc64le")
52+
set(cpu_target powerpc64le-linux-gnu)
53+
endif()
54+
# Need omp.h from the build directory
55+
set(omp_common_args
56+
-c
57+
-I ${CMAKE_BINARY_DIR}${_xdir}/runtime/src
58+
-emit-llvm
59+
-target ${cpu_target}
60+
-fopenmp
61+
-fopenmp-targets=${triple}
62+
-Xopenmp-target=${triple}
63+
-march=${mcpu}
64+
--cuda-device-only
65+
-nocudalib
66+
-Xclang -mcode-object-version=none
67+
-O${optimization_level} )
68+
set(libm-bc ${CMAKE_BINARY_DIR}/libm-${mcpu}.bc)
69+
set(openmp_c_cmd ${CLANG_TOOL} ${omp_common_args}
70+
${CMAKE_CURRENT_SOURCE_DIR}/src/libm.c)
71+
add_custom_command(
72+
OUTPUT ${libm-bc}
73+
COMMAND ${openmp_c_cmd} -o ${libm-bc}
74+
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/src/libm.c"
75+
COMMENT "Built libm file ${libm-bc}")
76+
add_custom_target(libm-target-${mcpu} ALL DEPENDS ${libm-bc})
77+
if("${CLANG_TOOL}" STREQUAL "$<TARGET_FILE:clang")
78+
add_custom_command(OUTPUT ${libm-bc}
79+
DEPENDS clang
80+
APPEND)
81+
endif()
82+
endforeach()
83+
84+
#set(nvptx_numbers $ENV{NVPTXGPUS})
85+
#if(NOT nvptx_numbers)
86+
#set(nvptx_numbers "35,37,50,52,53,60,61,62,70,72,75,80,86")
87+
#endif()
88+
#set(nvptxgpus)
89+
#string(REGEX REPLACE "," ";" nvptx_numbers_list ${nvptx_numbers})
90+
#foreach(sm_number ${nvptx_numbers_list})
91+
#list(APPEND nvptxgpus "sm_${sm_number}")
92+
#endforeach()
93+
#set(gpulist ${nvptxgpus})
94+
#set(systemarch "nvptx")
95+
#set(triple "nvptx64-nvidia-cuda ")
96+
#build_static_device_bc_lib()
97+
98+
if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
99+
message("--------------------------------------------")
100+
endif()

0 commit comments

Comments
 (0)