Skip to content

Commit 486da12

Browse files
authored
[SYCLomatic][ASM] Support migration of PTX instruction "fence" with thread level on sope '.cta', '.gpu' and '.sys' (#2471)
Signed-off-by: chenwei.sun <[email protected]>
1 parent 063de1c commit 486da12

File tree

4 files changed

+71
-1
lines changed

4 files changed

+71
-1
lines changed

clang/lib/DPCT/RulesAsm/AsmMigration.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1219,6 +1219,43 @@ class SYCLGen : public SYCLGenBase {
12191219
return SYCLGenError();
12201220
}
12211221

1222+
bool handle_fence(const InlineAsmInstruction *Inst) override {
1223+
if (Inst->getNumInputOperands() != 0)
1224+
return SYCLGenError();
1225+
1226+
OS() << MapNames::getClNamespace() << "atomic_fence(";
1227+
if (Inst->hasAttr(InstAttr::sc) && Inst->hasAttr(InstAttr::cta)) {
1228+
OS() << MapNames::getClNamespace() << "memory_order::seq_cst,"
1229+
<< MapNames::getClNamespace() << "memory_scope::work_group";
1230+
1231+
} else if (Inst->hasAttr(InstAttr::sc) && Inst->hasAttr(InstAttr::gpu)) {
1232+
OS() << MapNames::getClNamespace() << "memory_order::seq_cst,"
1233+
<< MapNames::getClNamespace() << "memory_scope::device";
1234+
} else if (Inst->hasAttr(InstAttr::sc) && Inst->hasAttr(InstAttr::sys)) {
1235+
OS() << MapNames::getClNamespace() << "memory_order::seq_cst,"
1236+
<< MapNames::getClNamespace() << "memory_scope::system";
1237+
} else if (Inst->hasAttr(InstAttr::acq_rel) &&
1238+
Inst->hasAttr(InstAttr::cta)) {
1239+
OS() << MapNames::getClNamespace() << "memory_order::acq_rel,"
1240+
<< MapNames::getClNamespace() << "memory_scope::work_group";
1241+
1242+
} else if (Inst->hasAttr(InstAttr::acq_rel) &&
1243+
Inst->hasAttr(InstAttr::gpu)) {
1244+
OS() << MapNames::getClNamespace() << "memory_order::acq_rel,"
1245+
<< MapNames::getClNamespace() << "memory_scope::device";
1246+
} else if (Inst->hasAttr(InstAttr::acq_rel) &&
1247+
Inst->hasAttr(InstAttr::sys)) {
1248+
OS() << MapNames::getClNamespace() << "memory_order::acq_rel,"
1249+
<< MapNames::getClNamespace() << "memory_scope::system";
1250+
} else {
1251+
SYCLGenError();
1252+
}
1253+
1254+
OS() << ')';
1255+
endstmt();
1256+
return SYCLGenSuccess();
1257+
}
1258+
12221259
bool handle_sub(const InlineAsmInstruction *Inst) override {
12231260
return HandleAddSub(Inst);
12241261
}

clang/lib/DPCT/RulesAsm/Parser/AsmTokenKinds.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,7 @@ MODIFIER(up, ".up")
402402
MODIFIER(down, ".down")
403403
MODIFIER(idx, ".idx")
404404
MODIFIER(bfly, ".bfly")
405+
MODIFIER(sc, ".sc")
405406

406407
#undef LINKAGE
407408
#undef TARGET

clang/lib/DPCT/SrcAPI/APINames_ASM.inc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,8 @@ ENTRY("dp4a", "dp4a", true, NO_FLAG, P1, "Successful")
6767
ENTRY("elect", "elect", false, NO_FLAG, P1, "Comment")
6868
ENTRY("ex2", "ex2", true, NO_FLAG, P1, "Successful")
6969
ENTRY("exit", "exit", false, NO_FLAG, P1, "Comment")
70-
ENTRY("fence", "fence", false, NO_FLAG, P1, "Comment")
70+
ENTRY("fence", "true", false, NO_FLAG, P1,
71+
"Only support thread fence on scope '.cta', '.gpu' and '.sys'")
7172
ENTRY("fma", "fma", true, NO_FLAG, P1, "Partial")
7273
ENTRY("fns", "fns", false, NO_FLAG, P1, "Comment")
7374
ENTRY("getctarank", "getctarank", false, NO_FLAG, P1, "Comment")
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// UNSUPPORTED: cuda-8.0, cuda-9.0, cuda-9.1, cuda-9.2, cuda-10.0, cuda-10.1, cuda-10.2
2+
// UNSUPPORTED: v8.0, v9.0, v9.1, v9.2, v10.0, v10.1, v10.2
3+
// RUN: dpct --format-range=none -out-root %T/membar_fence %s --cuda-include-path="%cuda-path/include" -- -std=c++14 -x cuda --cuda-host-only
4+
// RUN: FileCheck %s --match-full-lines --input-file %T/membar_fence/membar_fence.dp.cpp
5+
// RUN: %if build_lit %{icpx -c -fsycl %T/membar_fence/membar_fence.dp.cpp -o %T/membar_fence/membar_fence.dp.o %}
6+
7+
// clang-format off
8+
#include <cuda_runtime.h>
9+
#include <cstdint>
10+
11+
__global__ void fence() {
12+
// CHECK: sycl::atomic_fence(sycl::memory_order::seq_cst,sycl::memory_scope::work_group);
13+
asm volatile("fence.sc.cta; " : : : "memory");
14+
15+
// CHECK: sycl::atomic_fence(sycl::memory_order::seq_cst,sycl::memory_scope::device);
16+
asm volatile("fence.sc.gpu; " : : : "memory");
17+
18+
// CHECK: sycl::atomic_fence(sycl::memory_order::seq_cst,sycl::memory_scope::system);
19+
asm volatile("fence.sc.sys; " : : : "memory");
20+
21+
// CHECK: sycl::atomic_fence(sycl::memory_order::acq_rel,sycl::memory_scope::work_group);
22+
asm volatile("fence.acq_rel.cta; " : : : "memory");
23+
24+
// CHECK: sycl::atomic_fence(sycl::memory_order::acq_rel,sycl::memory_scope::device);
25+
asm volatile("fence.acq_rel.gpu; " : : : "memory");
26+
27+
// CHECK: sycl::atomic_fence(sycl::memory_order::acq_rel,sycl::memory_scope::system);
28+
asm volatile("fence.acq_rel.sys; " : : : "memory");
29+
}
30+
31+
// clang-format off

0 commit comments

Comments
 (0)