Skip to content

Commit 3adcdd0

Browse files
authored
Merge branch 'main' into UnifAB
2 parents cbab59c + 4ff986a commit 3adcdd0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+878
-219
lines changed

clang/test/Driver/linker-wrapper-image.c

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// REQUIRES: x86-registered-target
22
// REQUIRES: nvptx-registered-target
33
// REQUIRES: amdgpu-registered-target
4+
// REQUIRES: spirv-registered-target
45

56
// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.elf.o
67

@@ -263,3 +264,36 @@
263264
// HIP: while.end:
264265
// HIP-NEXT: ret void
265266
// HIP-NEXT: }
267+
268+
// RUN: clang-offload-packager -o %t.out --image=file=%t.elf.o,kind=sycl,triple=spirv64-unknown-unknown,arch=generic
269+
// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \
270+
// RUN: -fembed-offload-object=%t.out
271+
// RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu \
272+
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=SYCL
273+
// RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu -r \
274+
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=SYCL
275+
276+
// SYCL: %__sycl.tgt_device_image = type { i16, i8, i8, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr }
277+
// SYCL-NEXT: %__sycl.tgt_bin_desc = type { i16, i16, ptr, ptr, ptr }
278+
279+
// SYCL: @.sycl_offloading.target.0 = internal unnamed_addr constant [1 x i8] zeroinitializer
280+
// SYCL-NEXT: @.sycl_offloading.opts.compile.0 = internal unnamed_addr constant [1 x i8] zeroinitializer
281+
// SYCL-NEXT: @.sycl_offloading.opts.link.0 = internal unnamed_addr constant [1 x i8] zeroinitializer
282+
// SYCL-NEXT: @.sycl_offloading.0.data = internal unnamed_addr constant [0 x i8] zeroinitializer, section ".llvm.offloading"
283+
// SYCL-NEXT: @.offloading.entry_name = internal unnamed_addr constant [5 x i8] c"stub\00", section ".llvm.rodata.offloading", align 1
284+
// SYCL-NEXT: @.offloading.entry.stub = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 8, i32 0, ptr null, ptr @.offloading.entry_name, i64 0, i64 0, ptr null }, section "llvm_offload_entries", align 8
285+
// SYCL-NEXT: @.sycl_offloading.entries_arr = internal constant [1 x %struct.__tgt_offload_entry] [%struct.__tgt_offload_entry { i64 0, i16 1, i16 8, i32 0, ptr null, ptr @.offloading.entry_name, i64 0, i64 0, ptr null }]
286+
// SYCL-NEXT: @.sycl_offloading.device_images = internal unnamed_addr constant [1 x %__sycl.tgt_device_image] [%__sycl.tgt_device_image { i16 3, i8 8, i8 0, ptr @.sycl_offloading.target.0, ptr @.sycl_offloading.opts.compile.0, ptr @.sycl_offloading.opts.link.0, ptr @.sycl_offloading.0.data, ptr @.sycl_offloading.0.data, ptr @.sycl_offloading.entries_arr, ptr getelementptr ([1 x %struct.__tgt_offload_entry], ptr @.sycl_offloading.entries_arr, i64 0, i64 1), ptr null, ptr null }]
287+
// SYCL-NEXT: @.sycl_offloading.descriptor = internal constant %__sycl.tgt_bin_desc { i16 1, i16 1, ptr @.sycl_offloading.device_images, ptr null, ptr null }
288+
289+
// SYCL: define internal void @sycl.descriptor_reg() section ".text.startup" {
290+
// SYCL-NEXT: entry:
291+
// SYCL-NEXT: call void @__sycl_register_lib(ptr @.sycl_offloading.descriptor)
292+
// SYCL-NEXT: ret void
293+
// SYCL-NEXT: }
294+
295+
// SYCL: define internal void @sycl.descriptor_unreg() section ".text.startup" {
296+
// SYCL-NEXT: entry:
297+
// SYCL-NEXT: call void @__sycl_unregister_lib(ptr @.sycl_offloading.descriptor)
298+
// SYCL-NEXT: ret void
299+
// SYCL-NEXT: }

clang/test/Driver/linker-wrapper.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ __attribute__((visibility("protected"), used)) int x;
5454
// RUN: clang-offload-packager -o %t.out \
5555
// RUN: --image=file=%t.spirv.bc,kind=sycl,triple=spirv64-unknown-unknown,arch=generic
5656
// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
57-
// RUN: not clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
57+
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
5858
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=SPIRV-LINK
5959

6060
// SPIRV-LINK: clang{{.*}} -o {{.*}}.img -dumpdir a.out.spirv64..img. --target=spirv64-unknown-unknown {{.*}}.o --sycl-link -Xlinker -triple=spirv64-unknown-unknown -Xlinker -arch=

clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -717,6 +717,14 @@ wrapDeviceImages(ArrayRef<std::unique_ptr<MemoryBuffer>> Buffers,
717717
M, BuffersToWrap.front(), offloading::getOffloadEntryArray(M)))
718718
return std::move(Err);
719719
break;
720+
case OFK_SYCL: {
721+
// TODO: fill these options once the Driver supports them.
722+
offloading::SYCLJITOptions Options;
723+
if (Error Err =
724+
offloading::wrapSYCLBinaries(M, BuffersToWrap.front(), Options))
725+
return std::move(Err);
726+
break;
727+
}
720728
default:
721729
return createStringError(getOffloadKindName(Kind) +
722730
" wrapping is not supported");
@@ -754,6 +762,32 @@ bundleOpenMP(ArrayRef<OffloadingImage> Images) {
754762
return std::move(Buffers);
755763
}
756764

765+
Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
766+
bundleSYCL(ArrayRef<OffloadingImage> Images) {
767+
SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
768+
if (DryRun) {
769+
// In dry-run mode there is an empty input which is insufficient for the
770+
// testing. Therefore, we return here a stub image.
771+
OffloadingImage Image;
772+
Image.TheImageKind = IMG_None;
773+
Image.TheOffloadKind = OffloadKind::OFK_SYCL;
774+
Image.StringData["symbols"] = "stub";
775+
Image.Image = MemoryBuffer::getMemBufferCopy("");
776+
SmallString<0> SerializedImage = OffloadBinary::write(Image);
777+
Buffers.emplace_back(MemoryBuffer::getMemBufferCopy(SerializedImage));
778+
return std::move(Buffers);
779+
}
780+
781+
for (const OffloadingImage &Image : Images) {
782+
// clang-sycl-linker packs outputs into one binary blob. Therefore, it is
783+
// passed to Offload Wrapper as is.
784+
StringRef S(Image.Image->getBufferStart(), Image.Image->getBufferSize());
785+
Buffers.emplace_back(MemoryBuffer::getMemBufferCopy(S));
786+
}
787+
788+
return std::move(Buffers);
789+
}
790+
757791
Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
758792
bundleCuda(ArrayRef<OffloadingImage> Images, const ArgList &Args) {
759793
SmallVector<std::pair<StringRef, StringRef>, 4> InputFiles;
@@ -806,8 +840,9 @@ bundleLinkedOutput(ArrayRef<OffloadingImage> Images, const ArgList &Args,
806840
llvm::TimeTraceScope TimeScope("Bundle linked output");
807841
switch (Kind) {
808842
case OFK_OpenMP:
809-
case OFK_SYCL:
810843
return bundleOpenMP(Images);
844+
case OFK_SYCL:
845+
return bundleSYCL(Images);
811846
case OFK_Cuda:
812847
return bundleCuda(Images, Args);
813848
case OFK_HIP:

flang/lib/Lower/Bridge.cpp

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2544,7 +2544,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
25442544
auto loopOp = fir::DoLoopOp::create(
25452545
*builder, loc, lowerValue, upperValue, stepValue,
25462546
/*unordered=*/false,
2547-
/*finalCountValue=*/true,
2547+
/*finalCountValue=*/false,
25482548
builder->createConvert(loc, loopVarType, lowerValue));
25492549
info.loopOp = loopOp;
25502550
builder->setInsertionPointToStart(loopOp.getBody());
@@ -2696,22 +2696,18 @@ class FirConverter : public Fortran::lower::AbstractConverter {
26962696
// Decrement tripVariable.
26972697
auto doLoopOp = mlir::cast<fir::DoLoopOp>(info.loopOp);
26982698
builder->setInsertionPointToEnd(doLoopOp.getBody());
2699-
llvm::SmallVector<mlir::Value, 2> results;
2700-
results.push_back(mlir::arith::AddIOp::create(
2701-
*builder, loc, doLoopOp.getInductionVar(), doLoopOp.getStep(),
2702-
iofAttr));
27032699
// Step loopVariable to help optimizations such as vectorization.
27042700
// Induction variable elimination will clean up as necessary.
27052701
mlir::Value step = builder->createConvert(
27062702
loc, info.getLoopVariableType(), doLoopOp.getStep());
27072703
mlir::Value loopVar =
27082704
fir::LoadOp::create(*builder, loc, info.loopVariable);
2709-
results.push_back(
2710-
mlir::arith::AddIOp::create(*builder, loc, loopVar, step, iofAttr));
2711-
fir::ResultOp::create(*builder, loc, results);
2705+
mlir::Value loopVarInc =
2706+
mlir::arith::AddIOp::create(*builder, loc, loopVar, step, iofAttr);
2707+
fir::ResultOp::create(*builder, loc, loopVarInc);
27122708
builder->setInsertionPointAfter(doLoopOp);
27132709
// The loop control variable may be used after the loop.
2714-
fir::StoreOp::create(*builder, loc, doLoopOp.getResult(1),
2710+
fir::StoreOp::create(*builder, loc, doLoopOp.getResult(0),
27152711
info.loopVariable);
27162712
continue;
27172713
}

flang/lib/Lower/IO.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -977,9 +977,9 @@ static void genIoLoop(Fortran::lower::AbstractConverter &converter,
977977
fir::StoreOp::create(builder, loc, lcv, loopVar);
978978
genItemList(ioImpliedDo);
979979
builder.setInsertionPointToEnd(doLoopOp.getBody());
980-
mlir::Value result = mlir::arith::AddIOp::create(
981-
builder, loc, doLoopOp.getInductionVar(), doLoopOp.getStep(), iofAttr);
982-
fir::ResultOp::create(builder, loc, result);
980+
// fir.do_loop's induction variable's increment is implied,
981+
// so we do not need to increment it explicitly.
982+
fir::ResultOp::create(builder, loc, doLoopOp.getInductionVar());
983983
builder.setInsertionPointAfter(doLoopOp);
984984
// The loop control variable may be used after the loop.
985985
lcv = builder.createConvert(loc, fir::unwrapRefType(loopVar.getType()),

flang/test/Fir/convert-to-llvm-openmp-and-fir.fir

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,7 @@ func.func @_QPopenmp_target_data_region() {
349349
%3 = fir.convert %c1024_i32 : (i32) -> index
350350
%c1 = arith.constant 1 : index
351351
%4 = fir.convert %2 : (index) -> i32
352-
%5:2 = fir.do_loop %arg0 = %2 to %3 step %c1 iter_args(%arg1 = %4) -> (index, i32) {
352+
%5 = fir.do_loop %arg0 = %2 to %3 step %c1 iter_args(%arg1 = %4) -> (i32) {
353353
fir.store %arg1 to %1 : !fir.ref<i32>
354354
%6 = fir.load %1 : !fir.ref<i32>
355355
%7 = fir.load %1 : !fir.ref<i32>
@@ -358,13 +358,12 @@ func.func @_QPopenmp_target_data_region() {
358358
%9 = arith.subi %8, %c1_i64 : i64
359359
%10 = fir.coordinate_of %0, %9 : (!fir.ref<!fir.array<1024xi32>>, i64) -> !fir.ref<i32>
360360
fir.store %6 to %10 : !fir.ref<i32>
361-
%11 = arith.addi %arg0, %c1 overflow<nsw> : index
362361
%12 = fir.convert %c1 : (index) -> i32
363362
%13 = fir.load %1 : !fir.ref<i32>
364363
%14 = arith.addi %13, %12 overflow<nsw> : i32
365-
fir.result %11, %14 : index, i32
364+
fir.result %14 : i32
366365
}
367-
fir.store %5#1 to %1 : !fir.ref<i32>
366+
fir.store %5 to %1 : !fir.ref<i32>
368367
omp.terminator
369368
}
370369
return
@@ -404,7 +403,6 @@ func.func @_QPopenmp_target_data_region() {
404403
// CHECK: %[[VAL_21:.*]] = llvm.sub %[[VAL_19]], %[[VAL_20]] : i64
405404
// CHECK: %[[VAL_22:.*]] = llvm.getelementptr %[[VAL_1]][0, %[[VAL_21]]] : (!llvm.ptr, i64) -> !llvm.ptr
406405
// CHECK: llvm.store %[[VAL_17]], %[[VAL_22]] : i32, !llvm.ptr
407-
// CHECK: %[[VAL_23:.*]] = llvm.add %[[VAL_12]], %[[VAL_8]] overflow<nsw> : i64
408406
// CHECK: %[[VAL_24:.*]] = llvm.trunc %[[VAL_8]] : i64 to i32
409407
// CHECK: %[[VAL_25:.*]] = llvm.load %[[VAL_3]] : !llvm.ptr -> i32
410408
// CHECK: %[[VAL_26:.*]] = llvm.add %[[VAL_25]], %[[VAL_24]] overflow<nsw> : i32
@@ -653,18 +651,17 @@ func.func @_QPsb() {
653651
omp.sections {
654652
omp.section {
655653
%2 = fir.convert %c1 : (index) -> i32
656-
%3:2 = fir.do_loop %arg0 = %c1 to %c10 step %c1 iter_args(%arg1 = %2) -> (index, i32) {
654+
%3 = fir.do_loop %arg0 = %c1 to %c10 step %c1 iter_args(%arg1 = %2) -> (i32) {
657655
fir.store %arg1 to %0 : !fir.ref<i32>
658656
%4 = fir.load %1 : !fir.ref<i32>
659657
%5 = arith.addi %4, %c1_i32 : i32
660658
fir.store %5 to %1 : !fir.ref<i32>
661-
%6 = arith.addi %arg0, %c1 : index
662659
%7 = fir.convert %c1 : (index) -> i32
663660
%8 = fir.load %0 : !fir.ref<i32>
664661
%9 = arith.addi %8, %7 : i32
665-
fir.result %6, %9 : index, i32
662+
fir.result %9 : i32
666663
}
667-
fir.store %3#1 to %0 : !fir.ref<i32>
664+
fir.store %3 to %0 : !fir.ref<i32>
668665
omp.terminator
669666
}
670667
omp.terminator

0 commit comments

Comments
 (0)