Skip to content

Commit 7a2e290

Browse files
author
iclsrc
committed
Merge from 'sycl' to 'sycl-web' (10 commits)
2 parents f48b6da + 3ee3f1f commit 7a2e290

32 files changed

+2994
-192
lines changed

clang/lib/CodeGen/BackendUtil.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1093,8 +1093,9 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
10931093
ThinOrFullLTOPhase) {
10941094
MPM.addPass(createModuleToFunctionPassAdaptor(
10951095
InferAddressSpacesPass(clang::targets::SPIR_GENERIC_AS)));
1096-
MPM.addPass(
1097-
createModuleToFunctionPassAdaptor(SYCLOptimizeBarriersPass()));
1096+
if (Level != OptimizationLevel::O0)
1097+
MPM.addPass(createModuleToFunctionPassAdaptor(
1098+
SYCLOptimizeBarriersPass()));
10981099
});
10991100
}
11001101

clang/test/CodeGenSYCL/kernel-early-optimization-pipeline.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
// CHECK: InferFunctionAttrsPass
1313
// CHECK: AlwaysInlinerPass
1414
// CHECK: ModuleInlinerWrapperPass
15+
// CHECK: SYCLOptimizeBarriersPass
1516
// CHECK: ConstantMergePass
1617
// CHECK: SYCLMutatePrintfAddrspacePass
1718
// CHECK: SYCLPropagateAspectsUsagePass
@@ -22,4 +23,13 @@
2223
//
2324
// RUN: %clang_cc1 -O2 -fsycl-is-device -triple spir64-unknown-unknown %s -mdebug-pass Structure -emit-llvm -fno-sycl-early-optimizations -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-NOEARLYOPT
2425
// CHECK-NOEARLYOPT-NOT: ConstantMergePass1
26+
// CHECK-NOEARLYOPT-NOT: SYCLOptimizeBarriersPass
2527
// CHECK-NOEARLYOPT: SYCLMutatePrintfAddrspacePass
28+
29+
// RUN: %clang_cc1 -O0 -fsycl-is-device -triple spir64-unknown-unknown %s -mdebug-pass Structure -emit-llvm -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-O0
30+
// CHECK-O0-NOT: SYCLOptimizeBarriersPass
31+
32+
// Passes registered via registerOptimizerLastEPCallback don't run on empty
33+
// code
34+
__attribute__((sycl_device)) void bar() {
35+
}

devops/scripts/benchmarks/benches/compute.py

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -218,19 +218,6 @@ def benchmarks(self) -> list[Benchmark]:
218218

219219
# Add GraphApiSubmitGraph benchmarks
220220
for in_order_queue in [0, 1]:
221-
for profiler_type in profiler_types:
222-
benches.append(
223-
GraphApiSubmitGraph(
224-
self,
225-
runtime,
226-
in_order_queue,
227-
self.submit_graph_num_kernels[-1],
228-
0,
229-
profiler_type,
230-
useEvents=0,
231-
useHostTasks=1,
232-
)
233-
)
234221
for num_kernels in self.submit_graph_num_kernels:
235222
for measure_completion_time in [0, 1]:
236223
for use_events in [0, 1]:
@@ -1009,6 +996,9 @@ def __init__(
1009996
profiler_type,
1010997
)
1011998

999+
def supported_runtimes(self) -> list[RUNTIMES]:
1000+
return super().supported_runtimes() + [RUNTIMES.SYCL_PREVIEW]
1001+
10121002
def explicit_group(self):
10131003
return f"SubmitGraph {self.ioq_str}{self.measure_str}{self.use_events_str}{self.host_tasks_str}, {self.numKernels} kernels"
10141004

libdevice/sanitizer/msan_rtl.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -814,4 +814,42 @@ __msan_unpoison_strided_copy(uptr dest, uint32_t dest_as, uptr src,
814814
"__msan_unpoison_strided_copy"));
815815
}
816816

817+
static __SYCL_CONSTANT__ const char __msan_print_copy_unsupport_type[] =
818+
"[kernel] __msan_unpoison_copy: unsupported type(%d <- %d)\n";
819+
820+
DEVICE_EXTERN_C_NOINLINE void __msan_unpoison_copy(uptr dst, uint32_t dst_as,
821+
uptr src, uint32_t src_as,
822+
uint32_t dst_element_size,
823+
uint32_t src_element_size,
824+
uptr counts) {
825+
if (!GetMsanLaunchInfo)
826+
return;
827+
828+
MSAN_DEBUG(__spirv_ocl_printf(__msan_print_func_beg, "__msan_unpoison_copy"));
829+
830+
uptr shadow_dst = MemToShadow(dst, dst_as);
831+
if (shadow_dst != GetMsanLaunchInfo->CleanShadow) {
832+
uptr shadow_src = MemToShadow(src, src_as);
833+
834+
if (dst_element_size == 1 && src_element_size == 1) {
835+
Memcpy<__SYCL_GLOBAL__ int8_t *, __SYCL_GLOBAL__ int8_t *>(
836+
(__SYCL_GLOBAL__ int8_t *)shadow_dst,
837+
(__SYCL_GLOBAL__ int8_t *)shadow_src, counts);
838+
} else if (dst_element_size == 4 && src_element_size == 2) {
839+
Memcpy<__SYCL_GLOBAL__ int32_t *, __SYCL_GLOBAL__ int16_t *>(
840+
(__SYCL_GLOBAL__ int32_t *)shadow_dst,
841+
(__SYCL_GLOBAL__ int16_t *)shadow_src, counts);
842+
} else if (dst_element_size == 2 && src_element_size == 4) {
843+
Memcpy<__SYCL_GLOBAL__ int16_t *, __SYCL_GLOBAL__ int32_t *>(
844+
(__SYCL_GLOBAL__ int16_t *)shadow_dst,
845+
(__SYCL_GLOBAL__ int32_t *)shadow_src, counts);
846+
} else {
847+
__spirv_ocl_printf(__msan_print_copy_unsupport_type, dst_element_size,
848+
src_element_size);
849+
}
850+
}
851+
852+
MSAN_DEBUG(__spirv_ocl_printf(__msan_print_func_end, "__msan_unpoison_copy"));
853+
}
854+
817855
#endif // __SPIR__ || __SPIRV__

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

Lines changed: 51 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -858,6 +858,7 @@ class MemorySanitizerOnSpirv {
858858
FunctionCallee MsanUnpoisonStackFunc;
859859
FunctionCallee MsanUnpoisonShadowFunc;
860860
FunctionCallee MsanSetPrivateBaseFunc;
861+
FunctionCallee MsanUnpoisonCopyFunc;
861862
FunctionCallee MsanUnpoisonStridedCopyFunc;
862863
};
863864

@@ -965,6 +966,18 @@ void MemorySanitizerOnSpirv::initializeCallbacks() {
965966
M.getOrInsertFunction("__msan_set_private_base", IRB.getVoidTy(),
966967
PointerType::get(C, kSpirOffloadPrivateAS));
967968

969+
// __msan_unpoison_copy(
970+
// uptr dest, uint32_t dest_as,
971+
// uptr src, uint32_t src_as,
972+
// uint32_t dst_element_size,
973+
// uint32_t src_element_size,
974+
// uptr counts,
975+
// )
976+
MsanUnpoisonCopyFunc = M.getOrInsertFunction(
977+
"__msan_unpoison_copy", IRB.getVoidTy(), IntptrTy, IRB.getInt32Ty(),
978+
IntptrTy, IRB.getInt32Ty(), IRB.getInt32Ty(), IRB.getInt32Ty(),
979+
IRB.getInt64Ty());
980+
968981
// __msan_unpoison_strided_copy(
969982
// uptr dest, uint32_t dest_as,
970983
// uptr src, uint32_t src_as,
@@ -7721,24 +7734,53 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
77217734
IRB.getInt32(Src->getType()->getPointerAddressSpace()),
77227735
IRB.getInt32(ElementSize), NumElements, Stride});
77237736
} else if (FuncName.contains(
7724-
"__sycl_getComposite2020SpecConstantValue")) {
7737+
"__sycl_getComposite2020SpecConstantValue") ||
7738+
FuncName.contains("clog")) {
77257739
// clang-format off
7726-
// Handle builtin functions like "_Z40__sycl_getComposite2020SpecConstantValue"
7740+
// Handle builtin functions which have sret arguments.
77277741
// Structs which are larger than 64b will be returned via sret arguments
77287742
// and will be initialized inside the function. So we need to unpoison
77297743
// the sret arguments.
77307744
// clang-format on
77317745
if (Func->hasStructRetAttr()) {
77327746
Type *SCTy = Func->getParamStructRetType(0);
77337747
unsigned Size = Func->getDataLayout().getTypeStoreSize(SCTy);
7734-
auto *Addr = CB.getArgOperand(0);
7735-
IRB.CreateCall(
7736-
MS.Spirv.MsanUnpoisonShadowFunc,
7737-
{IRB.CreatePointerCast(Addr, MS.Spirv.IntptrTy),
7738-
ConstantInt::get(MS.Spirv.Int32Ty,
7739-
Addr->getType()->getPointerAddressSpace()),
7740-
ConstantInt::get(MS.Spirv.IntptrTy, Size)});
7748+
if (FuncName.contains("clog")) {
7749+
auto *Dest = CB.getArgOperand(0);
7750+
auto *Src = CB.getArgOperand(1);
7751+
IRB.CreateCall(
7752+
MS.Spirv.MsanUnpoisonCopyFunc,
7753+
{IRB.CreatePointerCast(Dest, MS.Spirv.IntptrTy),
7754+
IRB.getInt32(Dest->getType()->getPointerAddressSpace()),
7755+
IRB.CreatePointerCast(Src, MS.Spirv.IntptrTy),
7756+
IRB.getInt32(Src->getType()->getPointerAddressSpace()),
7757+
IRB.getInt32(1), IRB.getInt32(1),
7758+
ConstantInt::get(MS.Spirv.IntptrTy, Size)});
7759+
} else {
7760+
auto *Addr = CB.getArgOperand(0);
7761+
IRB.CreateCall(
7762+
MS.Spirv.MsanUnpoisonShadowFunc,
7763+
{IRB.CreatePointerCast(Addr, MS.Spirv.IntptrTy),
7764+
ConstantInt::get(MS.Spirv.Int32Ty,
7765+
Addr->getType()->getPointerAddressSpace()),
7766+
ConstantInt::get(MS.Spirv.IntptrTy, Size)});
7767+
}
77417768
}
7769+
} else if (FuncName.contains("__devicelib_ConvertBF16ToFINTELVec") ||
7770+
FuncName.contains("__devicelib_ConvertFToBF16INTELVec")) {
7771+
size_t NumElements;
7772+
bool IsBF16ToF = FuncName.contains("BF16ToF");
7773+
FuncName.take_back().getAsInteger(10, NumElements);
7774+
auto *Src = CB.getArgOperand(0);
7775+
auto *Dest = CB.getArgOperand(1);
7776+
IRB.CreateCall(
7777+
MS.Spirv.MsanUnpoisonCopyFunc,
7778+
{IRB.CreatePointerCast(Dest, MS.Spirv.IntptrTy),
7779+
IRB.getInt32(Dest->getType()->getPointerAddressSpace()),
7780+
IRB.CreatePointerCast(Src, MS.Spirv.IntptrTy),
7781+
IRB.getInt32(Src->getType()->getPointerAddressSpace()),
7782+
IRB.getInt32(IsBF16ToF ? 4 : 2), IRB.getInt32(IsBF16ToF ? 2 : 4),
7783+
ConstantInt::get(MS.Spirv.IntptrTy, NumElements)});
77427784
}
77437785
}
77447786
}

llvm/test/Instrumentation/MemorySanitizer/SPIRV/spirv_groupasynccopy.ll renamed to llvm/test/Instrumentation/MemorySanitizer/SPIRV/spirv_builtins.ll

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,9 @@ declare spir_func target("spirv.Event") @_Z22__spirv_GroupAsyncCopyiPU3AS3iPU3AS
77
declare dso_local spir_func target("spirv.Event") @_Z22__spirv_GroupAsyncCopyjPU3AS1iPU3AS3Kimm9ocl_event(i32, ptr addrspace(1), ptr addrspace(3), i64, i64, target("spirv.Event"))
88
declare dso_local spir_func target("spirv.Event") @_Z22__spirv_GroupAsyncCopyjPU3AS3Dv4_aPU3AS1KS_mm9ocl_event(i32, ptr addrspace(3), ptr addrspace(1), i64, i64, target("spirv.Event"))
99

10-
define spir_kernel void @kernel(ptr addrspace(3) %_arg_localAcc, ptr addrspace(1) %_arg_globalAcc) sanitize_memory {
10+
define spir_kernel void @kernel1(ptr addrspace(3) %_arg_localAcc, ptr addrspace(1) %_arg_globalAcc) sanitize_memory {
1111
entry:
12+
; CHECK-LABEL: define spir_kernel void @kernel1
1213
; CHECK: @__msan_barrier()
1314
; CHECK: [[REG1:%[0-9]+]] = ptrtoint ptr addrspace(3) %_arg_localAcc to i64
1415
; CHECK-NEXT: [[REG2:%[0-9]+]] = ptrtoint ptr addrspace(1) %_arg_globalAcc to i64
@@ -21,3 +22,28 @@ entry:
2122
%copy3 = call spir_func target("spirv.Event") @_Z22__spirv_GroupAsyncCopyjPU3AS3Dv4_aPU3AS1KS_mm9ocl_event(i32 2, ptr addrspace(3) %_arg_localAcc, ptr addrspace(1) %_arg_globalAcc, i64 512, i64 1, target("spirv.Event") zeroinitializer)
2223
ret void
2324
}
25+
26+
define spir_kernel void @kernel2(ptr addrspace(4) %tmp.ascast.i.i.i, ptr %byval-temp.i.i.i) {
27+
entry:
28+
; CHECK-LABEL: define spir_kernel void @kernel2
29+
; CHECK: [[REG3:%.*]] = ptrtoint ptr addrspace(4) [[REG4:%.*]] to i64
30+
; CHECK-NEXT: [[REG5:%.*]] = ptrtoint ptr [[REG6:%.*]] to i64
31+
; CHECK-NEXT: call void @__msan_unpoison_copy(i64 [[REG3]], i32 4, i64 [[REG5]], i32 0, i32 1, i32 1, i64 8)
32+
; CHECK-NEXT: call spir_func void @clogf(ptr addrspace(4) dead_on_unwind writable sret({ float, float }) align 4 [[REG4]], ptr noundef nonnull byval({ float, float }) align 4 [[REG6]])
33+
call spir_func void @clogf(ptr addrspace(4) dead_on_unwind writable sret({ float, float }) align 4 %tmp.ascast.i.i.i, ptr noundef nonnull byval({ float, float }) align 4 %byval-temp.i.i.i)
34+
ret void
35+
}
36+
37+
define spir_kernel void @kernel3(ptr addrspace(4) %0) {
38+
entry:
39+
; CHECK-LABEL: define spir_kernel void @kernel3
40+
; CHECK: [[REG7:%.*]] = ptrtoint ptr addrspace(4) [[REG8:%.*]] to i64
41+
; CHECK-NEXT: [[REG9:%.*]] = ptrtoint ptr addrspace(4) [[REG10:%.*]] to i64
42+
; CHECK-NEXT: call void @__msan_unpoison_copy(i64 [[REG7]], i32 4, i64 [[REG9]], i32 4, i32 4, i32 2, i64 4)
43+
; CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec4(ptr addrspace(4) noundef [[REG10]], ptr addrspace(4) noundef [[REG8]])
44+
call spir_func void @__devicelib_ConvertBF16ToFINTELVec4(ptr addrspace(4) noundef %0, ptr addrspace(4) noundef %0)
45+
ret void
46+
}
47+
48+
declare spir_func void @clogf(ptr addrspace(4) sret({ float, float }), ptr)
49+
declare spir_func void @__devicelib_ConvertBF16ToFINTELVec4(ptr addrspace(4), ptr addrspace(4))

0 commit comments

Comments
 (0)