Skip to content

Commit af16fc2

Browse files
authored
[libclc] Move mem_fence and barrier to clc library (#151446)
__clc_mem_fence and __clc_work_group_barrier function have two parameters memory_scope and memory_order. The design allows the clc functions to implement SPIR-V ControlBarrier and MemoryBarrier functions in the future. The default memory ordering in clc is set to __ATOMIC_SEQ_CST, which is also the default and strongest ordering in OpenCL and C++. OpenCL cl_mem_fence_flags parameter is converted to combination of __MEMORY_SCOPE_DEVICE and __MEMORY_SCOPE_WRKGRP, which is passed to clc. llvm-diff shows no change to nvptx64--nvidiacl.bc. llvm-diff show a small change to amdgcn--amdhsa.bc and the number of LLVM IR instruction is reduced by 1: https://alive2.llvm.org/ce/z/_Uhqvt
1 parent a15b629 commit af16fc2

File tree

14 files changed

+165
-30
lines changed

14 files changed

+165
-30
lines changed
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_MEM_FENCE_CLC_MEM_FENCE_H__
10+
#define __CLC_MEM_FENCE_CLC_MEM_FENCE_H__
11+
12+
#include <clc/internal/clc.h>
13+
14+
_CLC_OVERLOAD _CLC_DECL void __clc_mem_fence(int memory_scope,
15+
int memory_order);
16+
17+
#endif // __CLC_MEM_FENCE_CLC_MEM_FENCE_H__
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_SYNCHRONIZATION_CLC_WORK_GROUP_BARRIER_H__
10+
#define __CLC_SYNCHRONIZATION_CLC_WORK_GROUP_BARRIER_H__
11+
12+
#include <clc/internal/clc.h>
13+
14+
_CLC_OVERLOAD _CLC_DECL void __clc_work_group_barrier(int memory_scope,
15+
int memory_order);
16+
17+
#endif // __CLC_SYNCHRONIZATION_CLC_WORK_GROUP_BARRIER_H__

libclc/clc/lib/amdgcn/SOURCES

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
math/clc_ldexp_override.cl
2+
mem_fence/clc_mem_fence.cl
3+
synchronization/clc_work_group_barrier.cl
24
workitem/clc_get_global_offset.cl
35
workitem/clc_get_global_size.cl
46
workitem/clc_get_group_id.cl
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <clc/mem_fence/clc_mem_fence.h>
10+
11+
void __clc_amdgcn_s_waitcnt(unsigned flags);
12+
13+
// s_waitcnt takes 16bit argument with a combined number of maximum allowed
14+
// pending operations:
15+
// [12:8] LGKM -- LDS, GDS, Konstant (SMRD), Messages
16+
// [7] -- undefined
17+
// [6:4] -- exports, GDS, and mem write
18+
// [3:0] -- vector memory operations
19+
20+
// Newer clang supports __builtin_amdgcn_s_waitcnt
21+
#if __clang_major__ >= 5
22+
#define __waitcnt(x) __builtin_amdgcn_s_waitcnt(x)
23+
#else
24+
#define __waitcnt(x) __clc_amdgcn_s_waitcnt(x)
25+
_CLC_DEF void __clc_amdgcn_s_waitcnt(unsigned) __asm("llvm.amdgcn.s.waitcnt");
26+
#endif
27+
28+
_CLC_OVERLOAD _CLC_DEF void __clc_mem_fence(int memory_scope,
29+
int memory_order) {
30+
if (memory_scope & __MEMORY_SCOPE_DEVICE) {
31+
// scalar loads are counted with LGKM but we don't know whether
32+
// the compiler turned any loads to scalar
33+
__waitcnt(0);
34+
} else if (memory_scope & __MEMORY_SCOPE_WRKGRP)
35+
__waitcnt(0xff); // LGKM is [12:8]
36+
}
37+
#undef __waitcnt
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <clc/mem_fence/clc_mem_fence.h>
10+
#include <clc/synchronization/clc_work_group_barrier.h>
11+
12+
_CLC_OVERLOAD _CLC_DEF void __clc_work_group_barrier(int memory_scope,
13+
int memory_order) {
14+
__clc_mem_fence(memory_scope, memory_order);
15+
__builtin_amdgcn_s_barrier();
16+
}

libclc/clc/lib/ptx-nvidiacl/SOURCES

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
mem_fence/clc_mem_fence.cl
2+
synchronization/clc_work_group_barrier.cl
13
workitem/clc_get_global_id.cl
24
workitem/clc_get_group_id.cl
35
workitem/clc_get_local_id.cl
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <clc/mem_fence/clc_mem_fence.h>
10+
11+
_CLC_OVERLOAD _CLC_DEF void __clc_mem_fence(int memory_scope,
12+
int memory_order) {
13+
if (memory_scope & (__MEMORY_SCOPE_DEVICE | __MEMORY_SCOPE_WRKGRP))
14+
__nvvm_membar_cta();
15+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <clc/synchronization/clc_work_group_barrier.h>
10+
11+
_CLC_OVERLOAD _CLC_DEF void __clc_work_group_barrier(int memory_scope,
12+
int memory_order) {
13+
__syncthreads();
14+
}

libclc/opencl/include/clc/opencl/synchronization/cl_mem_fence_flags.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,6 @@ typedef uint cl_mem_fence_flags;
1313

1414
#define CLK_LOCAL_MEM_FENCE 1
1515
#define CLK_GLOBAL_MEM_FENCE 2
16+
#define CLK_IMAGE_MEM_FENCE 4
1617

1718
#endif // __CLC_OPENCL_SYNCHRONIZATION_CL_MEM_FENCE_FLAGS_H__
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_OPENCL_SYNCHRONIZATION_UTILS_H__
10+
#define __CLC_OPENCL_SYNCHRONIZATION_UTILS_H__
11+
12+
#include <clc/internal/clc.h>
13+
#include <clc/opencl/synchronization/cl_mem_fence_flags.h>
14+
15+
_CLC_INLINE int getCLCMemoryScope(cl_mem_fence_flags flag) {
16+
int memory_scope = 0;
17+
if (flag & CLK_GLOBAL_MEM_FENCE)
18+
memory_scope |= __MEMORY_SCOPE_DEVICE;
19+
if (flag & CLK_LOCAL_MEM_FENCE)
20+
memory_scope |= __MEMORY_SCOPE_WRKGRP;
21+
return memory_scope;
22+
}
23+
24+
#endif // __CLC_OPENCL_SYNCHRONIZATION_UTILS_H__

0 commit comments

Comments
 (0)