Skip to content

Commit 7a2d074

Browse files
mjklemmskc7
andauthored
[Flang][Runtime] Add support for pooled memory allocator (llvm#2033)
This PR adds support to the AMD Next-gen Fortran compiler for doing memory management through `ALLOCATE` and `DEALLOCATE` via a dynamic memory pool. To enable this for user application, the users will have to use an extra command line flag (`-ffast-amd-memory-allocator`). The runtime needs to have `libflang_rt.amd.a`, which is enabled via `AOMP_SKIP_AMD_FLANGRT=0`. --------- Co-authored-by: ksankisa_amdeng <[email protected]>
1 parent 62ac81f commit 7a2d074

File tree

24 files changed

+908
-8
lines changed

24 files changed

+908
-8
lines changed

clang/include/clang/Driver/Options.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3624,6 +3624,9 @@ defm disable_block_signature_string : BoolFOption<"disable-block-signature-strin
36243624
NegFlag<SetFalse, [], [ClangOption], "Don't disable">,
36253625
BothFlags<[], [CC1Option], " block signature string)">>;
36263626

3627+
def ffast_amd_memory_allocator : Flag<["-"], "ffast-amd-memory-allocator">, Group<f_Group>,
3628+
Visibility<[FlangOption,FC1Option]>,
3629+
HelpText<"Enable optimized memory allocator for AMD Instinct(tm) APUs">;
36273630
def fomit_frame_pointer : Flag<["-"], "fomit-frame-pointer">, Group<f_Group>,
36283631
Visibility<[ClangOption, FlangOption]>,
36293632
HelpText<"Omit the frame pointer from functions that don't need it. "

clang/lib/Driver/ToolChains/Flang.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -833,6 +833,9 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA,
833833

834834
addFortranDialectOptions(Args, CmdArgs);
835835

836+
if (Args.hasArg(options::OPT_ffast_amd_memory_allocator))
837+
CmdArgs.push_back("-ffast-amd-memory-allocator");
838+
836839
// 'flang -E' always produces output that is suitable for use as fixed form
837840
// Fortran. However it is only valid free form source if the original is also
838841
// free form. Ensure this logic does not incorrectly assume fixed-form for

flang-rt/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,7 @@ if (FLANG_RT_INCLUDE_CUF)
216216
find_package(CUDAToolkit REQUIRED)
217217
endif()
218218

219+
option(FLANG_RT_INCLUDE_AMD "Build Fortran runtime with special support for AMD GPUs" OFF)
219220

220221
########################
221222
# System Introspection #

flang-rt/README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,11 @@ CMake itself provide.
164164
[CUDA Toolkit installation](https://cmake.org/cmake/help/latest/module/FindCUDAToolkit.html)
165165
(no `CMAKE_CUDA_COMPILER`).
166166

167+
* `FLANG_RT_INCLUDE_AMD` (bool, default: `OFF`)
168+
169+
Compiles the `libflang_rt.a/.so` library with special support for AMD
170+
Instinct(tm) Accelerators.
171+
167172

168173
### Experimental CUDA Support
169174

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
//////////////////////////////////////////////////////////////////////////////
2+
// Copyright (c) 2016-25, Lawrence Livermore National Security, LLC and Umpire
3+
// project contributors. See the COPYRIGHT file for details.
4+
//
5+
// SPDX-License-Identifier: (MIT)
6+
//////////////////////////////////////////////////////////////////////////////
7+
#ifndef UMPIRE_config_HPP
8+
#define UMPIRE_config_HPP
9+
10+
//
11+
// Please keep the list below organized in alphabetical order.
12+
//
13+
/* #undef UMPIRE_ENABLE_BACKTRACE */
14+
/* #undef UMPIRE_ENABLE_BACKTRACE_SYMBOLS */
15+
/* #undef UMPIRE_ENABLE_DEVELOPER_BENCHMARKS */
16+
/* #undef UMPIRE_ENABLE_CONST */
17+
/* #undef UMPIRE_ENABLE_CUDA */
18+
#define UMPIRE_ENABLE_DEVICE
19+
/* #undef UMPIRE_ENABLE_FILESYSTEM */
20+
#define UMPIRE_ENABLE_FILE_RESOURCE
21+
/* #undef UMPIRE_ENABLE_UMAP */
22+
#define UMPIRE_ENABLE_HIP
23+
/* #undef UMPIRE_ENABLE_HIP_COHERENCE_GRANULARITY */
24+
/* #undef UMPIRE_ENABLE_IPC_SHARED_MEMORY */
25+
/* #undef UMPIRE_ENABLE_MPI3_SHARED_MEMORY */
26+
/* #undef UMPIRE_ENABLE_INACCESSIBILITY_TESTS */
27+
#define UMPIRE_ENABLE_LOGGING
28+
/* #undef UMPIRE_ENABLE_MPI */
29+
/* #undef UMPIRE_ENABLE_NUMA */
30+
/* #undef UMPIRE_ENABLE_OPENMP_TARGET */
31+
#define UMPIRE_ENABLE_PINNED
32+
/* #undef UMPIRE_ENABLE_SLIC */
33+
/* #undef UMPIRE_ENABLE_SYCL */
34+
#define UMPIRE_ENABLE_UM
35+
/* #undef UMPIRE_ENABLE_ASAN */
36+
/* #undef UMPIRE_ENABLE_DEVICE_ALLOCATOR */
37+
/* #undef UMPIRE_ENABLE_SQLITE_EXPERIMENTAL */
38+
/* #undef UMPIRE_DISABLE_ALLOCATIONMAP_DEBUG */
39+
40+
#define UMPIRE_VERSION_MAJOR 2025
41+
#define UMPIRE_VERSION_MINOR 3
42+
#define UMPIRE_VERSION_PATCH 0
43+
#define UMPIRE_VERSION_RC "6b4cb9e9"
44+
45+
#ifdef __cplusplus
46+
47+
// umpire_EXPORTS gets defined by CMake when we use
48+
// -DCMAKE_WINDOWS_EXPORT_ALL_SYMBOLS=On
49+
#if (defined(_WIN32) || defined(_WIN64)) && !defined(UMPIRE_WIN_STATIC_BUILD)
50+
#ifdef umpire_EXPORTS
51+
#define UMPIRE_EXPORT __declspec(dllexport)
52+
#else
53+
#define UMPIRE_EXPORT __declspec(dllimport)
54+
#endif
55+
#else
56+
#define UMPIRE_EXPORT
57+
#endif
58+
59+
#define UMPIRE_VERSION_SYM umpire_ver_2025_3_found
60+
UMPIRE_EXPORT extern int UMPIRE_VERSION_SYM;
61+
#define UMPIRE_VERSION_OK() UMPIRE_VERSION_SYM == 0
62+
63+
namespace umpire {
64+
constexpr int invalid_allocator_id = 0xDEADBEE;
65+
}
66+
67+
#endif
68+
69+
#endif
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
// typesUmpire.h
2+
// This file is generated by Shroud 0.12.2. Do not edit.
3+
// Copyright (c) 2016-25, Lawrence Livermore National Security, LLC and Umpire
4+
// project contributors. See the COPYRIGHT file for details.
5+
//
6+
// SPDX-License-Identifier: (MIT)
7+
// For C users and C++ implementation
8+
9+
#ifndef TYPESUMPIRE_H
10+
#define TYPESUMPIRE_H
11+
12+
#include <stddef.h>
13+
14+
15+
#ifdef __cplusplus
16+
extern "C" {
17+
#endif
18+
19+
// helper capsule_data_helper
20+
struct s_umpire_SHROUD_capsule_data {
21+
void *addr; /* address of C++ memory */
22+
int idtor; /* index of destructor */
23+
};
24+
typedef struct s_umpire_SHROUD_capsule_data umpire_SHROUD_capsule_data;
25+
26+
/* helper ShroudTypeDefines */
27+
/* Shroud type defines */
28+
#define SH_TYPE_SIGNED_CHAR 1
29+
#define SH_TYPE_SHORT 2
30+
#define SH_TYPE_INT 3
31+
#define SH_TYPE_LONG 4
32+
#define SH_TYPE_LONG_LONG 5
33+
#define SH_TYPE_SIZE_T 6
34+
35+
#define SH_TYPE_UNSIGNED_SHORT SH_TYPE_SHORT + 100
36+
#define SH_TYPE_UNSIGNED_INT SH_TYPE_INT + 100
37+
#define SH_TYPE_UNSIGNED_LONG SH_TYPE_LONG + 100
38+
#define SH_TYPE_UNSIGNED_LONG_LONG SH_TYPE_LONG_LONG + 100
39+
40+
#define SH_TYPE_INT8_T 7
41+
#define SH_TYPE_INT16_T 8
42+
#define SH_TYPE_INT32_T 9
43+
#define SH_TYPE_INT64_T 10
44+
45+
#define SH_TYPE_UINT8_T SH_TYPE_INT8_T + 100
46+
#define SH_TYPE_UINT16_T SH_TYPE_INT16_T + 100
47+
#define SH_TYPE_UINT32_T SH_TYPE_INT32_T + 100
48+
#define SH_TYPE_UINT64_T SH_TYPE_INT64_T + 100
49+
50+
/* least8 least16 least32 least64 */
51+
/* fast8 fast16 fast32 fast64 */
52+
/* intmax_t intptr_t ptrdiff_t */
53+
54+
#define SH_TYPE_FLOAT 22
55+
#define SH_TYPE_DOUBLE 23
56+
#define SH_TYPE_LONG_DOUBLE 24
57+
#define SH_TYPE_FLOAT_COMPLEX 25
58+
#define SH_TYPE_DOUBLE_COMPLEX 26
59+
#define SH_TYPE_LONG_DOUBLE_COMPLEX 27
60+
61+
#define SH_TYPE_BOOL 28
62+
#define SH_TYPE_CHAR 29
63+
#define SH_TYPE_CPTR 30
64+
#define SH_TYPE_STRUCT 31
65+
#define SH_TYPE_OTHER 32
66+
67+
// helper array_context
68+
struct s_umpire_SHROUD_array {
69+
umpire_SHROUD_capsule_data cxx; /* address of C++ memory */
70+
union {
71+
const void * base;
72+
const char * ccharp;
73+
} addr;
74+
int type; /* type of element */
75+
size_t elem_len; /* bytes-per-item or character len in c++ */
76+
size_t size; /* size of data in c++ */
77+
int rank; /* number of dimensions, 0=scalar */
78+
long shape[7];
79+
};
80+
typedef struct s_umpire_SHROUD_array umpire_SHROUD_array;
81+
82+
// helper capsule_umpire_allocator
83+
struct s_umpire_allocator {
84+
void *addr; /* address of C++ memory */
85+
int idtor; /* index of destructor */
86+
};
87+
typedef struct s_umpire_allocator umpire_allocator;
88+
89+
// helper capsule_umpire_resourcemanager
90+
struct s_umpire_resourcemanager {
91+
void *addr; /* address of C++ memory */
92+
int idtor; /* index of destructor */
93+
};
94+
typedef struct s_umpire_resourcemanager umpire_resourcemanager;
95+
96+
// helper capsule_umpire_strategy_alignedallocator
97+
struct s_umpire_strategy_alignedallocator {
98+
void *addr; /* address of C++ memory */
99+
int idtor; /* index of destructor */
100+
};
101+
typedef struct s_umpire_strategy_alignedallocator umpire_strategy_alignedallocator;
102+
103+
// helper capsule_umpire_strategy_allocationadvisor
104+
struct s_umpire_strategy_allocationadvisor {
105+
void *addr; /* address of C++ memory */
106+
int idtor; /* index of destructor */
107+
};
108+
typedef struct s_umpire_strategy_allocationadvisor umpire_strategy_allocationadvisor;
109+
110+
// helper capsule_umpire_strategy_allocationprefetcher
111+
struct s_umpire_strategy_allocationprefetcher {
112+
void *addr; /* address of C++ memory */
113+
int idtor; /* index of destructor */
114+
};
115+
typedef struct s_umpire_strategy_allocationprefetcher umpire_strategy_allocationprefetcher;
116+
117+
// helper capsule_umpire_strategy_dynamicpoollist
118+
struct s_umpire_strategy_dynamicpoollist {
119+
void *addr; /* address of C++ memory */
120+
int idtor; /* index of destructor */
121+
};
122+
typedef struct s_umpire_strategy_dynamicpoollist umpire_strategy_dynamicpoollist;
123+
124+
// helper capsule_umpire_strategy_fixedpool
125+
struct s_umpire_strategy_fixedpool {
126+
void *addr; /* address of C++ memory */
127+
int idtor; /* index of destructor */
128+
};
129+
typedef struct s_umpire_strategy_fixedpool umpire_strategy_fixedpool;
130+
131+
// helper capsule_umpire_strategy_namedallocationstrategy
132+
struct s_umpire_strategy_namedallocationstrategy {
133+
void *addr; /* address of C++ memory */
134+
int idtor; /* index of destructor */
135+
};
136+
typedef struct s_umpire_strategy_namedallocationstrategy umpire_strategy_namedallocationstrategy;
137+
138+
// helper capsule_umpire_strategy_quickpool
139+
struct s_umpire_strategy_quickpool {
140+
void *addr; /* address of C++ memory */
141+
int idtor; /* index of destructor */
142+
};
143+
typedef struct s_umpire_strategy_quickpool umpire_strategy_quickpool;
144+
145+
// helper capsule_umpire_strategy_threadsafeallocator
146+
struct s_umpire_strategy_threadsafeallocator {
147+
void *addr; /* address of C++ memory */
148+
int idtor; /* index of destructor */
149+
};
150+
typedef struct s_umpire_strategy_threadsafeallocator umpire_strategy_threadsafeallocator;
151+
152+
void umpire_SHROUD_memory_destructor(umpire_SHROUD_capsule_data *cap);
153+
154+
#ifdef __cplusplus
155+
}
156+
#endif
157+
158+
#endif // TYPESUMPIRE_H
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# 0 "umpire.h"
2+
# 0 "<built-in>"
3+
# 0 "<command-line>"
4+
# 1 "/usr/include/stdc-predef.h" 1 3 4
5+
# 0 "<command-line>" 2
6+
# 1 "umpire.h"
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
//////////////////////////////////////////////////////////////////////////////
2+
// Copyright (c) 2016-25, Lawrence Livermore National Security, LLC and Umpire
3+
// project contributors. See the COPYRIGHT file for details.
4+
//
5+
// SPDX-License-Identifier: (MIT)
6+
//////////////////////////////////////////////////////////////////////////////
7+
#ifndef UMPIRE_H_
8+
#define UMPIRE_H_
9+
10+
#include "flang-rt/runtime/amd/umpire/config.hpp"
11+
12+
#define UMPIRE_INVALID_ALLOCATOR_ID 0xDEADBEE
13+
14+
#include "flang-rt/runtime/amd/umpire/interface/c_fortran/wrapUmpire.h"
15+
#include "flang-rt/runtime/amd/umpire/interface/c_fortran/wrapAllocator.h"
16+
#include "flang-rt/runtime/amd/umpire/interface/c_fortran/wrapResourceManager.h"
17+
18+
#endif // UMPIRE_H_
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
// wrapAllocator.h
2+
// This file is generated by Shroud 0.12.2. Do not edit.
3+
// Copyright (c) 2016-25, Lawrence Livermore National Security, LLC and Umpire
4+
// project contributors. See the COPYRIGHT file for details.
5+
//
6+
// SPDX-License-Identifier: (MIT)
7+
/**
8+
* \file wrapAllocator.h
9+
* \brief Shroud generated wrapper for Allocator class
10+
*/
11+
// For C users and C++ implementation
12+
13+
#ifndef WRAPALLOCATOR_H
14+
#define WRAPALLOCATOR_H
15+
16+
#include "typesUmpire.h"
17+
#ifdef __cplusplus
18+
#include <cstddef>
19+
#else
20+
#include <stddef.h>
21+
#endif
22+
23+
// splicer begin class.Allocator.CXX_declarations
24+
// splicer end class.Allocator.CXX_declarations
25+
26+
#ifdef __cplusplus
27+
extern "C" {
28+
#endif
29+
30+
// splicer begin class.Allocator.C_declarations
31+
// splicer end class.Allocator.C_declarations
32+
33+
void umpire_allocator_delete(umpire_allocator * self);
34+
35+
void * umpire_allocator_allocate(umpire_allocator * self, size_t bytes);
36+
37+
void umpire_allocator_deallocate(umpire_allocator * self, void * ptr);
38+
39+
void umpire_allocator_release(umpire_allocator * self);
40+
41+
size_t umpire_allocator_get_size(umpire_allocator * self, void * ptr);
42+
43+
size_t umpire_allocator_get_high_watermark(umpire_allocator * self);
44+
45+
size_t umpire_allocator_get_current_size(umpire_allocator * self);
46+
47+
size_t umpire_allocator_get_actual_size(umpire_allocator * self);
48+
49+
size_t umpire_allocator_get_allocation_count(umpire_allocator * self);
50+
51+
const char * umpire_allocator_get_name(umpire_allocator * self);
52+
53+
void umpire_allocator_get_name_bufferify(umpire_allocator * self,
54+
umpire_SHROUD_array *DSHF_rv);
55+
56+
size_t umpire_allocator_get_id(umpire_allocator * self);
57+
58+
#ifdef __cplusplus
59+
}
60+
#endif
61+
62+
#endif // WRAPALLOCATOR_H

0 commit comments

Comments
 (0)