Skip to content

Commit ca2eea7

Browse files
adurangkrishna2803
authored andcommitted
[OFFLOAD][OPENMP] 6.0 compatible interop interface (llvm#143491)
The following patch introduces a new interop interface implementation with the following characteristics: * It supports the new 6.0 prefer_type specification * It supports both explicit objects (from interop constructs) and implicit objects (from variant calls). * Implements a per-thread reuse mechanism for implicit objects to reduce overheads. * It provides a plugin interface that allows selecting the supported interop types, and managing all the backend related interop operations (init, sync, ...). * It enables cooperation with the OpenMP runtime to allow progress on OpenMP synchronizations. * It cleanups some vendor/fr_id mismatchs from the current query routines. * It supports extension to define interop callbacks for library cleanup.
1 parent a72fc14 commit ca2eea7

File tree

16 files changed

+826
-174
lines changed

16 files changed

+826
-174
lines changed

offload/include/OpenMP/InteropAPI.h

Lines changed: 145 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,69 @@
1313

1414
#include "omp.h"
1515

16+
#include "PerThreadTable.h"
1617
#include "omptarget.h"
1718

1819
extern "C" {
1920

2021
typedef enum kmp_interop_type_t {
2122
kmp_interop_type_unknown = -1,
22-
kmp_interop_type_platform,
23-
kmp_interop_type_device,
24-
kmp_interop_type_tasksync,
23+
kmp_interop_type_target,
24+
kmp_interop_type_targetsync,
2525
} kmp_interop_type_t;
2626

27+
struct interop_attrs_t {
28+
bool inorder : 1;
29+
int reserved : 31;
30+
31+
/// Check if the supported attributes are compatible with the current
32+
/// attributes. Only if an attribute is supported can the value be true,
33+
/// otherwise it needs to be false
34+
bool checkSupportedOnly(interop_attrs_t supported) const {
35+
return supported.inorder || (!supported.inorder && !inorder);
36+
}
37+
};
38+
39+
struct interop_spec_t {
40+
int32_t fr_id;
41+
interop_attrs_t attrs; // Common attributes
42+
int64_t impl_attrs; // Implementation specific attributes (recognized by each
43+
// plugin)
44+
};
45+
46+
struct interop_flags_t {
47+
bool implicit : 1; // dispatch (true) or interop (false)
48+
bool nowait : 1; // has nowait flag
49+
int reserved : 30;
50+
};
51+
52+
struct interop_ctx_t {
53+
uint32_t version; // version of the interface (current is 0)
54+
interop_flags_t flags;
55+
int gtid;
56+
};
57+
58+
struct dep_pack_t {
59+
int32_t ndeps;
60+
int32_t ndeps_noalias;
61+
kmp_depend_info_t *deplist;
62+
kmp_depend_info_t *noalias_deplist;
63+
};
64+
65+
struct omp_interop_val_t;
66+
67+
typedef void ompx_interop_cb_t(omp_interop_val_t *interop, void *data);
68+
69+
struct omp_interop_cb_instance_t {
70+
ompx_interop_cb_t *cb;
71+
void *data;
72+
73+
omp_interop_cb_instance_t(ompx_interop_cb_t *cb, void *data)
74+
: cb(cb), data(data) {}
75+
76+
void operator()(omp_interop_val_t *interop) { cb(interop, data); }
77+
};
78+
2779
/// The interop value type, aka. the interop object.
2880
typedef struct omp_interop_val_t {
2981
/// Device and interop-type are determined at construction time and fix.
@@ -34,10 +86,98 @@ typedef struct omp_interop_val_t {
3486
__tgt_device_info device_info;
3587
const kmp_interop_type_t interop_type;
3688
const intptr_t device_id;
37-
const omp_foreign_runtime_ids_t vendor_id = cuda;
38-
const intptr_t backend_type_id = omp_interop_backend_type_cuda_1;
89+
omp_vendor_id_t vendor_id = omp_vendor_llvm;
90+
tgt_foreign_runtime_id_t fr_id = tgt_fr_none;
91+
interop_attrs_t attrs{false, 0}; // Common prefer specification attributes
92+
int64_t impl_attrs = 0; // Implementation prefer specification attributes
93+
94+
// Constants
95+
static constexpr int no_owner = -1; // This interop has no current owner
96+
97+
void *rtl_property = nullptr; // Plugin dependent information
98+
// For implicitly created Interop objects (e.g., from a dispatch construct)
99+
// who owns the object
100+
int owner_gtid = no_owner;
101+
// Marks whether the object was requested since the last time it was synced
102+
bool clean = true;
103+
104+
typedef llvm::SmallVector<omp_interop_cb_instance_t> callback_list_t;
105+
106+
callback_list_t completion_cbs;
107+
108+
void reset() {
109+
owner_gtid = no_owner;
110+
markClean();
111+
clearCompletionCbs();
112+
}
113+
114+
llvm::Expected<DeviceTy &> getDevice() const;
115+
116+
bool hasOwner() const { return owner_gtid != no_owner; }
117+
118+
void setOwner(int gtid) { owner_gtid = gtid; }
119+
bool isOwnedBy(int gtid) { return owner_gtid == gtid; }
120+
bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec);
121+
bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec,
122+
int64_t DeviceNum, int gtid);
123+
void markClean() { clean = true; }
124+
void markDirty() { clean = false; }
125+
bool isClean() const { return clean; }
126+
127+
int32_t flush(DeviceTy &Device);
128+
int32_t sync_barrier(DeviceTy &Device);
129+
int32_t async_barrier(DeviceTy &Device);
130+
int32_t release(DeviceTy &Device);
131+
132+
void addCompletionCb(ompx_interop_cb_t *cb, void *data) {
133+
completion_cbs.push_back(omp_interop_cb_instance_t(cb, data));
134+
}
135+
136+
int numCompletionCbs() const { return completion_cbs.size(); }
137+
void clearCompletionCbs() { completion_cbs.clear(); }
138+
139+
void runCompletionCbs() {
140+
for (auto &cbInstance : completion_cbs)
141+
cbInstance(this);
142+
clearCompletionCbs();
143+
}
39144
} omp_interop_val_t;
40145

41146
} // extern "C"
42147

148+
struct InteropTableEntry {
149+
using ContainerTy = typename std::vector<omp_interop_val_t *>;
150+
using iterator = typename ContainerTy::iterator;
151+
152+
ContainerTy Interops;
153+
154+
static constexpr int reservedEntriesPerThread =
155+
20; // reserve some entries to avoid reallocation
156+
157+
void add(omp_interop_val_t *obj) {
158+
if (Interops.capacity() == 0)
159+
Interops.reserve(reservedEntriesPerThread);
160+
Interops.push_back(obj);
161+
}
162+
163+
template <class ClearFuncTy> void clear(ClearFuncTy f) {
164+
for (auto &Obj : Interops) {
165+
f(Obj);
166+
}
167+
}
168+
169+
/// vector interface
170+
int size() const { return Interops.size(); }
171+
iterator begin() { return Interops.begin(); }
172+
iterator end() { return Interops.end(); }
173+
iterator erase(iterator it) { return Interops.erase(it); }
174+
};
175+
176+
struct InteropTblTy
177+
: public PerThreadTable<InteropTableEntry, omp_interop_val_t *> {
178+
void clear();
179+
};
180+
181+
void syncImplicitInterops(int gtid, void *event);
182+
43183
#endif // OMPTARGET_OPENMP_INTEROP_API_H

offload/include/OpenMP/omp.h

Lines changed: 29 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -80,15 +80,18 @@ typedef enum omp_interop_rc {
8080
omp_irc_other = -6
8181
} omp_interop_rc_t;
8282

83-
typedef enum omp_interop_fr {
84-
omp_ifr_cuda = 1,
85-
omp_ifr_cuda_driver = 2,
86-
omp_ifr_opencl = 3,
87-
omp_ifr_sycl = 4,
88-
omp_ifr_hip = 5,
89-
omp_ifr_level_zero = 6,
90-
omp_ifr_last = 7
91-
} omp_interop_fr_t;
83+
/* Foreign runtime values from OpenMP Additional Definitions document v2.1 */
84+
typedef enum tgt_foreign_runtime_id_t {
85+
tgt_fr_none = 0,
86+
tgt_fr_cuda = 1,
87+
tgt_fr_cuda_driver = 2,
88+
tgt_fr_opencl = 3,
89+
tgt_fr_sycl = 4,
90+
tgt_fr_hip = 5,
91+
tgt_fr_level_zero = 6,
92+
tgt_fr_hsa = 7,
93+
tgt_fr_last = 8
94+
} tgt_foreign_runtime_id_t;
9295

9396
typedef void *omp_interop_t;
9497

@@ -134,19 +137,23 @@ omp_get_interop_type_desc(const omp_interop_t, omp_interop_property_t);
134137
extern const char *__KAI_KMPC_CONVENTION
135138
omp_get_interop_rc_desc(const omp_interop_t, omp_interop_rc_t);
136139

137-
typedef enum omp_interop_backend_type_t {
138-
// reserve 0
139-
omp_interop_backend_type_cuda_1 = 1,
140-
} omp_interop_backend_type_t;
141-
142-
typedef enum omp_foreign_runtime_ids {
143-
cuda = 1,
144-
cuda_driver = 2,
145-
opencl = 3,
146-
sycl = 4,
147-
hip = 5,
148-
level_zero = 6,
149-
} omp_foreign_runtime_ids_t;
140+
/* Vendor defined values from OpenMP Additional Definitions document v2.1*/
141+
typedef enum omp_vendor_id {
142+
omp_vendor_unknown = 0,
143+
omp_vendor_amd = 1,
144+
omp_vendor_arm = 2,
145+
omp_vendor_bsc = 3,
146+
omp_vendor_fujitsu = 4,
147+
omp_vendor_gnu = 5,
148+
omp_vendor_hpe = 6,
149+
omp_vendor_ibm = 7,
150+
omp_vendor_intel = 8,
151+
omp_vendor_llvm = 9,
152+
omp_vendor_nec = 10,
153+
omp_vendor_nvidia = 11,
154+
omp_vendor_ti = 12,
155+
omp_vendor_last = 13
156+
} omp_vendor_id_t;
150157

151158
///} InteropAPI
152159

offload/include/PerThreadTable.h

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
//===-- PerThreadTable.h -- PerThread Storage Structure ----*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// Table indexed with one entry per thread.
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#ifndef OFFLOAD_PERTHREADTABLE_H
14+
#define OFFLOAD_PERTHREADTABLE_H
15+
16+
#include <list>
17+
#include <memory>
18+
#include <mutex>
19+
20+
// Using an STL container (such as std::vector) indexed by thread ID has
21+
// too many race conditions issues so we store each thread entry into a
22+
// thread_local variable.
23+
// T is the container type used to store the objects, e.g., std::vector,
24+
// std::set, etc. by each thread. O is the type of the stored objects e.g.,
25+
// omp_interop_val_t *, ...
26+
27+
template <typename ContainerType, typename ObjectType> struct PerThreadTable {
28+
using iterator = typename ContainerType::iterator;
29+
30+
struct PerThreadData {
31+
size_t NElements = 0;
32+
std::unique_ptr<ContainerType> ThEntry;
33+
};
34+
35+
std::mutex Mtx;
36+
std::list<std::shared_ptr<PerThreadData>> ThreadDataList;
37+
38+
// define default constructors, disable copy and move constructors
39+
PerThreadTable() = default;
40+
PerThreadTable(const PerThreadTable &) = delete;
41+
PerThreadTable(PerThreadTable &&) = delete;
42+
PerThreadTable &operator=(const PerThreadTable &) = delete;
43+
PerThreadTable &operator=(PerThreadTable &&) = delete;
44+
~PerThreadTable() {
45+
std::lock_guard<std::mutex> Lock(Mtx);
46+
ThreadDataList.clear();
47+
}
48+
49+
private:
50+
PerThreadData &getThreadData() {
51+
static thread_local std::shared_ptr<PerThreadData> ThData = nullptr;
52+
if (!ThData) {
53+
ThData = std::make_shared<PerThreadData>();
54+
std::lock_guard<std::mutex> Lock(Mtx);
55+
ThreadDataList.push_back(ThData);
56+
}
57+
return *ThData;
58+
}
59+
60+
protected:
61+
ContainerType &getThreadEntry() {
62+
auto &ThData = getThreadData();
63+
if (ThData.ThEntry)
64+
return *ThData.ThEntry;
65+
ThData.ThEntry = std::make_unique<ContainerType>();
66+
return *ThData.ThEntry;
67+
}
68+
69+
size_t &getThreadNElements() {
70+
auto &ThData = getThreadData();
71+
return ThData.NElements;
72+
}
73+
74+
public:
75+
void add(ObjectType obj) {
76+
auto &Entry = getThreadEntry();
77+
auto &NElements = getThreadNElements();
78+
NElements++;
79+
Entry.add(obj);
80+
}
81+
82+
iterator erase(iterator it) {
83+
auto &Entry = getThreadEntry();
84+
auto &NElements = getThreadNElements();
85+
NElements--;
86+
return Entry.erase(it);
87+
}
88+
89+
size_t size() { return getThreadNElements(); }
90+
91+
// Iterators to traverse objects owned by
92+
// the current thread
93+
iterator begin() {
94+
auto &Entry = getThreadEntry();
95+
return Entry.begin();
96+
}
97+
iterator end() {
98+
auto &Entry = getThreadEntry();
99+
return Entry.end();
100+
}
101+
102+
template <class F> void clear(F f) {
103+
std::lock_guard<std::mutex> Lock(Mtx);
104+
for (auto ThData : ThreadDataList) {
105+
if (!ThData->ThEntry || ThData->NElements == 0)
106+
continue;
107+
ThData->ThEntry->clear(f);
108+
ThData->NElements = 0;
109+
}
110+
ThreadDataList.clear();
111+
}
112+
};
113+
114+
#endif

offload/include/PluginManager.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535
#include <mutex>
3636
#include <string>
3737

38+
#include "OpenMP/InteropAPI.h"
39+
3840
using GenericPluginTy = llvm::omp::target::plugin::GenericPluginTy;
3941

4042
/// Struct for the data required to handle plugins
@@ -88,6 +90,9 @@ struct PluginManager {
8890
HostPtrToTableMapTy HostPtrToTableMap;
8991
std::mutex TblMapMtx; ///< For HostPtrToTableMap
9092

93+
/// Table of cached implicit interop objects
94+
InteropTblTy InteropTbl;
95+
9196
// Work around for plugins that call dlopen on shared libraries that call
9297
// tgt_register_lib during their initialisation. Stash the pointers in a
9398
// vector until the plugins are all initialised and then register them.
@@ -185,5 +190,6 @@ void initRuntime();
185190
void deinitRuntime();
186191

187192
extern PluginManager *PM;
188-
193+
extern std::atomic<bool> RTLAlive; // Indicates if the RTL has been initialized
194+
extern std::atomic<int> RTLOngoingSyncs; // Counts ongoing external syncs
189195
#endif // OMPTARGET_PLUGIN_MANAGER_H

offload/include/Shared/APITypes.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ struct __tgt_device_image {
3636
struct __tgt_device_info {
3737
void *Context = nullptr;
3838
void *Device = nullptr;
39+
void *Platform = nullptr;
3940
};
4041

4142
/// This struct is a record of all the host code that may be offloaded to a

0 commit comments

Comments
 (0)