Skip to content

Commit d5a9468

Browse files
committed
Merge branch 'sycl' of https://github.com/intel/llvm into benchmarking-workflow
2 parents 5173a0d + 38e6e1b commit d5a9468

File tree

28 files changed

+1384
-242
lines changed

28 files changed

+1384
-242
lines changed

.github/workflows/sycl-linux-run-tests.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,9 @@ jobs:
363363
cts_exclude_filter=""
364364
# If CTS_TESTS_TO_BUILD is null - use filter
365365
if [ -z "$CTS_TESTS_TO_BUILD" ]; then
366-
if [ "${{ contains(inputs.target_devices, 'opencl:cpu') }}" = "true" ]; then
366+
if [ "${{ contains(inputs.cts_testing_mode, 'build-only') }}" = "true" ]; then
367+
cts_exclude_filter=$PWD/devops/cts_exclude_filter_compfails
368+
elif [ "${{ contains(inputs.target_devices, 'opencl:cpu') }}" = "true" ]; then
367369
cts_exclude_filter=$PWD/devops/cts_exclude_filter_OCL_CPU
368370
elif [ "${{ contains(inputs.target_devices, 'level_zero:gpu') }}" = "true" ]; then
369371
cts_exclude_filter=$PWD/devops/cts_exclude_filter_L0_GPU

.github/workflows/sycl-nightly.yml

Lines changed: 6 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ jobs:
242242
body: "Daily build ${{ steps.tag.outputs.TAG }}"
243243
target_commitish: ${{ github.sha }}
244244

245-
ubuntu2204_docker_build_push:
245+
docker_build_push:
246246
if: github.repository == 'intel/llvm'
247247
runs-on: [Linux, build]
248248
permissions:
@@ -254,42 +254,16 @@ jobs:
254254
with:
255255
name: sycl_linux_default
256256
path: devops/
257-
- name: Build and Push Container (with drivers)
257+
- name: Build and Push Container
258258
uses: ./devops/actions/build_container
259259
with:
260260
push: ${{ github.ref_name == 'sycl' }}
261-
file: ubuntu2204_preinstalled
261+
file: nightly
262262
username: ${{ github.repository_owner }}
263263
password: ${{ secrets.GITHUB_TOKEN }}
264264
build-args: |
265265
base_image=ghcr.io/intel/llvm/ubuntu2404_intel_drivers
266-
base_tag=latest
266+
base_tag=alldeps
267267
tags: |
268-
ghcr.io/${{ github.repository }}/sycl_ubuntu2204_nightly:${{ github.sha }}
269-
ghcr.io/${{ github.repository }}/sycl_ubuntu2204_nightly:latest
270-
- name: Build and Push Container (no drivers)
271-
uses: ./devops/actions/build_container
272-
with:
273-
push: ${{ github.ref_name == 'sycl' }}
274-
file: ubuntu2204_preinstalled
275-
username: ${{ github.repository_owner }}
276-
password: ${{ secrets.GITHUB_TOKEN }}
277-
build-args: |
278-
base_image=ghcr.io/intel/llvm/ubuntu2204_base
279-
base_tag=latest
280-
tags: |
281-
ghcr.io/${{ github.repository }}/sycl_ubuntu2204_nightly:no-drivers-${{ github.sha }}
282-
ghcr.io/${{ github.repository }}/sycl_ubuntu2204_nightly:no-drivers
283-
- name: Build and Push Container (Build image)
284-
uses: ./devops/actions/build_container
285-
with:
286-
push: ${{ github.ref_name == 'sycl' }}
287-
file: ubuntu2204_preinstalled
288-
username: ${{ github.repository_owner }}
289-
password: ${{ secrets.GITHUB_TOKEN }}
290-
build-args: |
291-
base_image=ghcr.io/intel/llvm/ubuntu2204_build
292-
base_tag=latest
293-
tags: |
294-
ghcr.io/${{ github.repository }}/sycl_ubuntu2204_nightly:build-${{ github.sha }}
295-
ghcr.io/${{ github.repository }}/sycl_ubuntu2204_nightly:build
268+
ghcr.io/${{ github.repository }}/sycl_ubuntu2404_nightly:${{ github.sha }}
269+
ghcr.io/${{ github.repository }}/sycl_ubuntu2404_nightly:latest

devops/containers/ubuntu2204_preinstalled.Dockerfile renamed to devops/containers/nightly.Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
ARG base_tag=latest
2-
ARG base_image=ghcr.io/intel/llvm/ubuntu2204_intel_drivers
1+
ARG base_tag=alldeps
2+
ARG base_image=ghcr.io/intel/llvm/ubuntu2404_intel_drivers
33

44
FROM $base_image:$base_tag
55

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# Please use "#" to add comments here.
2+
# Do not delete the file even if it's empty.

libdevice/nativecpu_utils.cpp

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -31,16 +31,7 @@ using __nativecpu_state = native_cpu::state;
3131

3232
#define OCL_LOCAL __attribute__((opencl_local))
3333
#define OCL_GLOBAL __attribute__((opencl_global))
34-
35-
DEVICE_EXTERNAL OCL_LOCAL void *
36-
__spirv_GenericCastToPtrExplicit_ToLocal(void *p, int) {
37-
return (OCL_LOCAL void *)p;
38-
}
39-
40-
DEVICE_EXTERNAL OCL_GLOBAL void *
41-
__spirv_GenericCastToPtrExplicit_ToGlobal(void *p, int) {
42-
return (OCL_GLOBAL void *)p;
43-
}
34+
#define OCL_PRIVATE __attribute__((opencl_private))
4435

4536
DEVICE_EXTERN_C void __mux_work_group_barrier(uint32_t id, uint32_t scope,
4637
uint32_t semantics);
@@ -61,6 +52,23 @@ __spirv_MemoryBarrier(uint32_t Memory, uint32_t Semantics) {
6152
// Turning clang format off here because it reorders macro invocations
6253
// making the following code very difficult to read.
6354
// clang-format off
55+
56+
#define DefGenericCastToPtrExplImpl(sfx, asp, cv)\
57+
DEVICE_EXTERNAL cv asp void *\
58+
__spirv_GenericCastToPtrExplicit_##sfx(cv void *p ,int) {\
59+
return (cv asp void *)p;\
60+
}
61+
62+
#define DefGenericCastToPtrExpl(sfx, asp)\
63+
DefGenericCastToPtrExplImpl(sfx, asp, )\
64+
DefGenericCastToPtrExplImpl(sfx, asp, const)\
65+
DefGenericCastToPtrExplImpl(sfx, asp, volatile)\
66+
DefGenericCastToPtrExplImpl(sfx, asp, const volatile)
67+
68+
DefGenericCastToPtrExpl(ToPrivate, OCL_PRIVATE)
69+
DefGenericCastToPtrExpl(ToLocal, OCL_LOCAL)
70+
DefGenericCastToPtrExpl(ToGlobal, OCL_GLOBAL)
71+
6472
#define DefSubgroupBlockINTEL1(Type, PType) \
6573
template <> \
6674
__SYCL_CONVERGENT__ DEVICE_EXTERNAL Type \

llvm/lib/SYCLLowerIR/SYCLJointMatrixTransform.cpp

Lines changed: 85 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ namespace {
2121

2222
static constexpr char ACCESS_CHAIN[] = "_Z19__spirv_AccessChain";
2323
static constexpr char MATRIX_TYPE[] = "spirv.CooperativeMatrixKHR";
24+
static constexpr char MATRIX_LAYOUT[] = "joint_matrix_layout_to_spv";
2425

2526
Type *getInnermostType(Type *Ty) {
2627
while (auto *ArrayTy = dyn_cast<ArrayType>(Ty))
@@ -184,17 +185,99 @@ bool transformAccessChain(Function *F) {
184185
}
185186
return ModuleChanged;
186187
}
188+
189+
StoreInst *findLastStoreBeforeLoad(Value *Ptr, Instruction *Load) {
190+
BasicBlock::iterator It(Load);
191+
while (It != Load->getParent()->begin()) {
192+
--It;
193+
if (auto *Store = dyn_cast<StoreInst>(&*It))
194+
if (Store->getPointerOperand() == Ptr)
195+
return Store;
196+
}
197+
return nullptr;
198+
}
199+
200+
// Per SPIR-V specification Layout of a matrix must be a constant instruction
201+
// aka a constexpr or specialization constant. Meanwhile in SYCL headers
202+
// layout is passed as a parameter to joint_matrix_load function, so even if
203+
// that layout is a constant expression in the user's code - it's not possible
204+
// to prove that to the compiler, so constant propagation will happen only
205+
// after inlining, not in AST. That means, that with O0 layout would remain
206+
// to be a runtime variable in LLVM IR.
207+
// SYCL matrix layout is being mapped on SPIR-V matrix layout by
208+
// joint_matrix_layout_to_spv function. The following routine finds calls to
209+
// this function and replaces them with the found constant.
210+
// This function also cleans up code, that becomes dead. Pattern of the dead
211+
// code is stable, as user's code doesn't affect it.
212+
bool propagateConstexprLayout(Function *F) {
213+
llvm::SmallVector<Instruction *, 8> ToErase;
214+
for (auto I = F->user_begin(), E = F->user_end(); I != E;) {
215+
User *U = *I++;
216+
auto *CI = dyn_cast<CallInst>(U);
217+
if (!CI)
218+
continue;
219+
auto *Op = dyn_cast<Instruction>(CI->getArgOperand(0));
220+
if (!Op || !isa<LoadInst>(Op))
221+
continue;
222+
auto *Ptr = dyn_cast<Instruction>(cast<LoadInst>(Op)->getPointerOperand());
223+
if (!Ptr)
224+
continue;
225+
226+
ConstantInt *ConstLayout = nullptr;
227+
StoreInst *SI = findLastStoreBeforeLoad(Ptr, Op);
228+
if (!SI)
229+
continue;
230+
ConstLayout = dyn_cast<ConstantInt>(SI->getValueOperand());
231+
if (ConstLayout) {
232+
CI->replaceAllUsesWith(ConstLayout);
233+
ToErase.push_back(CI);
234+
ToErase.push_back(SI);
235+
ToErase.push_back(Op);
236+
ToErase.push_back(Ptr);
237+
if (auto *Cast = dyn_cast<AddrSpaceCastInst>(Ptr)) {
238+
auto *OrigPtr = Cast->getPointerOperand();
239+
if (auto *AI = dyn_cast<AllocaInst>(OrigPtr))
240+
ToErase.push_back(AI);
241+
}
242+
}
243+
}
244+
245+
// There are possible cases, when a single instruction result is used multiple
246+
// times. For this case we have to use a vector to store such instructions
247+
// and keep track if we have removed them before to avoid double free().
248+
SmallPtrSet<Instruction *, 8> Erased;
249+
for (Instruction *II : ToErase) {
250+
if (!II->use_empty())
251+
continue;
252+
if (Erased.contains(II))
253+
continue;
254+
II->dropAllReferences();
255+
II->eraseFromParent();
256+
Erased.insert(II);
257+
}
258+
return !ToErase.empty();
259+
}
187260
} // namespace
188261

189262
PreservedAnalyses
190263
SYCLJointMatrixTransformPass::run(Module &M, ModuleAnalysisManager &MAM) {
191264
bool ModuleChanged = false;
265+
llvm::SmallVector<Function *, 1> ToErase;
192266
for (Function &F : M) {
193-
if (!F.isDeclaration())
194-
continue;
267+
if (!F.isDeclaration()) {
268+
if (F.getName() == MATRIX_LAYOUT) {
269+
ModuleChanged |= propagateConstexprLayout(&F);
270+
ToErase.push_back(&F);
271+
} else
272+
continue;
273+
}
195274
if (F.getName().starts_with(ACCESS_CHAIN))
196275
ModuleChanged |= transformAccessChain(&F);
197276
}
198277

278+
for (auto *F : ToErase)
279+
if (F->users().empty())
280+
F->eraseFromParent();
281+
199282
return ModuleChanged ? PreservedAnalyses::none() : PreservedAnalyses::all();
200283
}
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
; The test checks, that users of the call to joint_matrix_layout_to_spv matrix
2+
; are replaced with the layout constant.
3+
4+
; RUN: opt -passes=sycl-joint-matrix-transform < %s -S | FileCheck %s
5+
6+
; ModuleID = 'test.bc'
7+
source_filename = "test.cpp"
8+
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1"
9+
target triple = "spir64-unknown-unknown"
10+
11+
$joint_matrix_layout_to_spv = comdat any
12+
13+
; CHECK: define weak_odr dso_local spir_kernel void @test
14+
; CHECK-NEXT: entry:
15+
; CHECK-NEXT: %{{.*}} = call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 16, 16, 2) @_Z32__spirv_CooperativeMatrixLoadKHR{{.*}}(ptr addrspace(1){{.*}}, i32 noundef 0, i64 noundef{{.*}}
16+
; CHECK-NEXT: %{{.*}} = call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 16, 16, 2) @_Z32__spirv_CooperativeMatrixLoadKHR{{.*}}(ptr addrspace(1){{.*}}, i32 noundef 1, i64 noundef{{.*}}
17+
; CHECK-NEXT: %{{.*}} = call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 16, 16, 2) @_Z32__spirv_CooperativeMatrixLoadKHR{{.*}}(ptr addrspace(1){{.*}}, i32 noundef 1, i64 noundef{{.*}}
18+
; CHECK-NEXT: %{{.*}} = call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 16, 16, 2) @_Z32__spirv_CooperativeMatrixLoadKHR{{.*}}(ptr addrspace(1){{.*}}, i32 noundef 2, i64 noundef{{.*}}
19+
; CHECK-NEXT: ret void
20+
21+
; CHECK-NOT: joint_matrix_layout_to_spv
22+
23+
define weak_odr dso_local spir_kernel void @test(ptr addrspace(1) %matrix.1, ptr addrspace(1) %matrix.2, i64 noundef %stride) {
24+
entry:
25+
%layout.1 = alloca i32, align 4
26+
%layout.2 = alloca i32, align 4
27+
%layout.ascast.1 = addrspacecast ptr %layout.1 to ptr addrspace(4)
28+
%layout.ascast.2 = addrspacecast ptr %layout.2 to ptr addrspace(4)
29+
store i32 0, ptr addrspace(4) %layout.ascast.1, align 4
30+
store i32 1, ptr addrspace(4) %layout.ascast.2, align 4
31+
32+
%layout.val.1 = load i32, ptr addrspace(4) %layout.ascast.1, align 4
33+
%layout.spv.1 = call spir_func noundef i32 @joint_matrix_layout_to_spv(i32 noundef %layout.val.1)
34+
%mload.1 = call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 16, 16, 2) @_Z32__spirv_CooperativeMatrixLoadKHRIU3AS1ffLm16ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEPNS1_28__spirv_CooperativeMatrixKHRIT0_XT5_EXT1_EXT2_EXT3_EEEPT_S3_mi(ptr addrspace(1) noundef %matrix.1, i32 noundef %layout.spv.1, i64 noundef %stride, i32 noundef 0)
35+
36+
%layout.val.2 = load i32, ptr addrspace(4) %layout.ascast.2, align 4
37+
%layout.spv.2 = call spir_func noundef i32 @joint_matrix_layout_to_spv(i32 noundef %layout.val.2)
38+
%mload.2 = call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 16, 16, 2) @_Z32__spirv_CooperativeMatrixLoadKHRIU3AS1ffLm16ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEPNS1_28__spirv_CooperativeMatrixKHRIT0_XT5_EXT1_EXT2_EXT3_EEEPT_S3_mi(ptr addrspace(1) noundef %matrix.2, i32 noundef %layout.spv.2, i64 noundef %stride, i32 noundef 0)
39+
40+
%layout.spv.3 = call spir_func noundef i32 @joint_matrix_layout_to_spv(i32 noundef %layout.val.2)
41+
%mload.3 = call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 16, 16, 2) @_Z32__spirv_CooperativeMatrixLoadKHRIU3AS1ffLm16ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEPNS1_28__spirv_CooperativeMatrixKHRIT0_XT5_EXT1_EXT2_EXT3_EEEPT_S3_mi(ptr addrspace(1) noundef %matrix.2, i32 noundef %layout.spv.3, i64 noundef %stride, i32 noundef 0)
42+
43+
store i32 2, ptr addrspace(4) %layout.ascast.2, align 4
44+
%layout.val.4 = load i32, ptr addrspace(4) %layout.ascast.2, align 4
45+
%layout.spv.4 = call spir_func noundef i32 @joint_matrix_layout_to_spv(i32 noundef %layout.val.4)
46+
%mload.4 = call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 16, 16, 2) @_Z32__spirv_CooperativeMatrixLoadKHRIU3AS1ffLm16ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEPNS1_28__spirv_CooperativeMatrixKHRIT0_XT5_EXT1_EXT2_EXT3_EEEPT_S3_mi(ptr addrspace(1) noundef %matrix.2, i32 noundef %layout.spv.4, i64 noundef %stride, i32 noundef 0)
47+
ret void
48+
}
49+
50+
declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 16, 16, 2) @_Z32__spirv_CooperativeMatrixLoadKHRIU3AS1ffLm16ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEPNS1_28__spirv_CooperativeMatrixKHRIT0_XT5_EXT1_EXT2_EXT3_EEEPT_S3_mi(ptr addrspace(1) noundef, i32 noundef, i64 noundef, i32 noundef)
51+
52+
define linkonce_odr dso_local spir_func noundef i32 @joint_matrix_layout_to_spv(i32 noundef %Layout) comdat {
53+
entry:
54+
%retval = alloca i32, align 4
55+
%Layout.addr = alloca i32, align 4
56+
%retval.ascast = addrspacecast ptr %retval to ptr addrspace(4)
57+
%Layout.addr.ascast = addrspacecast ptr %Layout.addr to ptr addrspace(4)
58+
store i32 %Layout, ptr addrspace(4) %Layout.addr.ascast, align 4
59+
%0 = load i32, ptr addrspace(4) %Layout.addr.ascast, align 4
60+
switch i32 %0, label %sw.epilog [
61+
i32 0, label %sw.bb
62+
i32 1, label %sw.bb1
63+
i32 2, label %sw.bb2
64+
i32 3, label %sw.bb3
65+
]
66+
67+
sw.bb: ; preds = %entry
68+
store i32 0, ptr addrspace(4) %retval.ascast, align 4
69+
br label %return
70+
71+
sw.bb1: ; preds = %entry
72+
store i32 1, ptr addrspace(4) %retval.ascast, align 4
73+
br label %return
74+
75+
sw.bb2: ; preds = %entry
76+
store i32 2, ptr addrspace(4) %retval.ascast, align 4
77+
br label %return
78+
79+
sw.bb3: ; preds = %entry
80+
store i32 3, ptr addrspace(4) %retval.ascast, align 4
81+
br label %return
82+
83+
sw.epilog: ; preds = %entry
84+
call void @llvm.trap()
85+
unreachable
86+
87+
return: ; preds = %sw.bb3, %sw.bb2, %sw.bb1, %sw.bb
88+
%1 = load i32, ptr addrspace(4) %retval.ascast, align 4
89+
ret i32 %1
90+
}
Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
# commit 222e4b1d51536bb38e03e2000a79679af0a44a6d
2-
# Merge: 30d183a0 28108a7e
3-
# Author: Kenneth Benzie (Benie) <k.benzie@codeplay.com>
4-
# Date: Fri Jan 17 10:28:34 2025 +0000
5-
# Merge pull request #2561 from Bensuo/ben/cmd-buffer-l0-fence
6-
# [L0][CMDBUF] Optimize fence/event waits during update
7-
set(UNIFIED_RUNTIME_TAG 222e4b1d51536bb38e03e2000a79679af0a44a6d)
1+
# commit 8b7a99578966eb691a961d9620ea38d235196b2f
2+
# Merge: ed095412 7b0e3b19
3+
# Author: Martin Grant <martin.morrisongrant@codeplay.com>
4+
# Date: Mon Jan 20 09:27:22 2025 +0000
5+
# Merge pull request #2582 from przemektmalon/przemek/intel-host-usm-support
6+
# Enable Host USM backed images on Level Zero
7+
set(UNIFIED_RUNTIME_TAG 8b7a99578966eb691a961d9620ea38d235196b2f)

sycl/doc/developer/DockerBKMs.md

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,6 @@ development containers:
5353
NVidia/AMD and can be used for building DPC++
5454
compiler from source with all backends enabled or for end-to-end testing
5555
with HIP/CUDA on machines with corresponding GPUs available.
56-
- `devops/containers/sycl_ubuntu2204_nightly`: contains the latest successfully
57-
built nightly build of DPC++ compiler. The Dockerfile comes in three flavors:
58-
with pre-installed Intel drivers (`latest`), without them (`no-drivers`) and
59-
with development kits installed (`build`).
6056

6157
### Ubuntu 24.04-based Dockerfiles
6258

@@ -78,7 +74,8 @@ development containers:
7874
NVidia/AMD and can be used for building DPC++
7975
compiler from source with all backends enabled or for end-to-end testing
8076
with HIP/CUDA on machines with corresponding GPUs available.
81-
77+
- `devops/containers/nightly`: contains the latest successfully
78+
built nightly build of DPC++ compiler.
8279

8380
## Running Docker container interactively
8481

@@ -199,7 +196,7 @@ Docker containers can be built with the following command:
199196
docker build -f path/to/devops/containers/file.Dockerfile path/to/devops/
200197
```
201198

202-
The `ubuntu2204_preinstalled.Dockerfile` script expects `llvm_sycl.tar.xz` file
199+
The `nightly.Dockerfile` script expects `llvm_sycl.tar.xz` file
203200
to be present in `devops/` directory.
204201

205202
Containers other than base provide several configurable arguments, the most

0 commit comments

Comments
 (0)