Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2985,10 +2985,11 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
case Intrinsic::spv_loop_merge: {
auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpLoopMerge));
for (unsigned i = 1; i < I.getNumExplicitOperands(); ++i) {
assert(I.getOperand(i).isMBB());
MIB.addMBB(I.getOperand(i).getMBB());
if (I.getOperand(i).isMBB())
MIB.addMBB(I.getOperand(i).getMBB());
else
MIB.addImm(foldImm(I.getOperand(i), MRI));
}
MIB.addImm(SPIRV::SelectionControl::None);
return MIB.constrainAllUses(TII, TRI, RBI);
}
case Intrinsic::spv_selection_merge: {
Expand Down
34 changes: 34 additions & 0 deletions llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -611,6 +611,40 @@ class SPIRVStructurizer : public FunctionPass {
auto MergeAddress = BlockAddress::get(Merge->getParent(), Merge);
auto ContinueAddress = BlockAddress::get(Continue->getParent(), Continue);
SmallVector<Value *, 2> Args = {MergeAddress, ContinueAddress};
unsigned LC = SPIRV::LoopControl::None;
// Currently used only to store PartialCount value. Later when other
// LoopControls are added - this map should be sorted before making
// them loop_merge operands to satisfy 3.23. Loop Control requirements.
std::vector<std::pair<unsigned, unsigned>> MaskToValueMap;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shall this be wrapped in a utility function?

SmallVector<unsigned, 1> getSpirvLoopControlOperandsFromLoopMetadata(Loop *L);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Applied, thanks!

if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable")) {
LC |= SPIRV::LoopControl::DontUnroll;
} else {
if (getBooleanLoopAttribute(L, "llvm.loop.unroll.enable")) {
LC |= SPIRV::LoopControl::Unroll;
}
std::optional<int> Count =
getOptionalIntLoopAttribute(L, "llvm.loop.unroll.count");
if (Count && Count != 1) {
LC |= SPIRV::LoopControl::PartialCount;
MaskToValueMap.emplace_back(
std::make_pair(SPIRV::LoopControl::PartialCount, *Count));
}
if (getBooleanLoopAttribute(L, "llvm.loop.unroll.full")) {
// llvm.loop.unroll.full doesn't have a direct counterpart in SPIR-V,
// the closest thing we can do is to add Unroll mask and if the trip
// count is not known at compile time - either disable unrolling by
// setting PartialCount to 1 or reuse already available PartialCount.
LC |= SPIRV::LoopControl::Unroll;
if ((LC & SPIRV::LoopControl::PartialCount) == 0) {
LC |= SPIRV::LoopControl::PartialCount;
MaskToValueMap.emplace_back(
std::make_pair(SPIRV::LoopControl::PartialCount, 1));
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure I understand:
Why would unroll.full imply unroll.count = 1?
Isn't unroll.full the same as LoopControl::Unroll?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The logic was, that llvm.loop.unroll.enable implies, that if the trip count is not known at compile time, then the loop can still be partial unrolled. Meanwhile llvm.loop.unroll.full just doesn't hints any unrolling, if trip count is not known at compile time. So (as SPIR-V spec doesn't distinguish between Unroll(enable) and Unroll(full)) Unroll + PartialCount(1) would mean to the backend: "please evaluate if it's possible to unroll the loop, and if not - do nothing with it". And I followed this logic long ago in KhronosGroup/SPIRV-LLVM-Translator#1664 .

Now, after some thinking, this logic seem incorrect to me. As if optimizer faces Unroll + PartialCount(1) loop controls, it should actually pick conservative approach and don't even try to unroll the loop. So I've removed this logic from the PR.

}
}
Args.emplace_back(llvm::ConstantInt::get(Builder.getInt32Ty(), LC));
for (auto &[Mask, Val] : MaskToValueMap)
Args.emplace_back(llvm::ConstantInt::get(Builder.getInt32Ty(), Val));

Builder.CreateIntrinsic(Intrinsic::spv_loop_merge, {}, {Args});
Modified = true;
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,8 @@ llvm::SplitKnownCriticalEdge(Instruction *TI, unsigned SuccNum,
// Create our unconditional branch.
BranchInst *NewBI = BranchInst::Create(DestBB, NewBB);
NewBI->setDebugLoc(TI->getDebugLoc());
if (auto *LoopMD = TI->getMetadata(LLVMContext::MD_loop))
Copy link
Contributor Author

@MrSidims MrSidims Mar 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ideally it should go into a separate PR, but the issue of not copying llvm.loop is being exposed during a call to reg2mem pass, which (if I understood correctly) is in semi-deprecated state and it's not trivial to test it in a stand-alone pull request.

NewBI->setMetadata(LLVMContext::MD_loop, LoopMD);

// Insert the block into the function... right after the block TI lives in.
Function &F = *TIBB->getParent();
Expand Down
228 changes: 228 additions & 0 deletions llvm/test/CodeGen/SPIRV/structurizer/loop-unroll.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 -verify-machineinstrs %s -o - | FileCheck %s
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %}

; CHECK-DAG: OpName %[[#For:]] "for_loop"
; CHECK-DAG: OpName %[[#While:]] "while_loop"
; CHECK-DAG: OpName %[[#DoWhile:]] "do_while_loop"
; CHECK-DAG: OpName %[[#Disable:]] "unroll_disable"
; CHECK-DAG: OpName %[[#Count:]] "unroll_count"
; CHECK-DAG: OpName %[[#Full:]] "unroll_full"
; CHECK-DAG: OpName %[[#FullCount:]] "unroll_full_count"

; CHECK: %[[#For]] = OpFunction
; CHECK: OpLoopMerge %[[#]] %[[#]] Unroll

; CHECK: %[[#While]] = OpFunction
; CHECK: OpLoopMerge %[[#]] %[[#]] Unroll

; CHECK: %[[#DoWhile]] = OpFunction
; CHECK: OpLoopMerge %[[#]] %[[#]] Unroll

; CHECK: %[[#Disable]] = OpFunction
; CHECK: OpLoopMerge %[[#]] %[[#]] DontUnroll

; CHECK: %[[#Count]] = OpFunction
; CHECK: OpLoopMerge %[[#]] %[[#]] PartialCount 4

; CHECK: %[[#Full]] = OpFunction
; CHECK: OpLoopMerge %[[#]] %[[#]] Unroll|PartialCount 1

; CHECK: %[[#FullCount]] = OpFunction
; CHECK: OpLoopMerge %[[#]] %[[#]] Unroll|PartialCount 4

define dso_local void @for_loop(ptr noundef %0, i32 noundef %1) {
%3 = alloca ptr, align 8
%4 = alloca i32, align 4
%5 = alloca i32, align 4
store ptr %0, ptr %3, align 8
store i32 %1, ptr %4, align 4
store i32 0, ptr %5, align 4
br label %6

6: ; preds = %15, %2
%7 = load i32, ptr %5, align 4
%8 = load i32, ptr %4, align 4
%9 = icmp slt i32 %7, %8
br i1 %9, label %10, label %18

10: ; preds = %6
%11 = load i32, ptr %5, align 4
%12 = load ptr, ptr %3, align 8
%13 = load i32, ptr %12, align 4
%14 = add nsw i32 %13, %11
store i32 %14, ptr %12, align 4
br label %15

15: ; preds = %10
%16 = load i32, ptr %5, align 4
%17 = add nsw i32 %16, 1
store i32 %17, ptr %5, align 4
br label %6, !llvm.loop !1

18: ; preds = %6
ret void
}

define dso_local void @while_loop(ptr noundef %0, i32 noundef %1) {
%3 = alloca ptr, align 8
%4 = alloca i32, align 4
%5 = alloca i32, align 4
store ptr %0, ptr %3, align 8
store i32 %1, ptr %4, align 4
store i32 0, ptr %5, align 4
br label %6

6: ; preds = %10, %2
%7 = load i32, ptr %5, align 4
%8 = load i32, ptr %4, align 4
%9 = icmp slt i32 %7, %8
br i1 %9, label %10, label %17

10: ; preds = %6
%11 = load i32, ptr %5, align 4
%12 = load ptr, ptr %3, align 8
%13 = load i32, ptr %12, align 4
%14 = add nsw i32 %13, %11
store i32 %14, ptr %12, align 4
%15 = load i32, ptr %5, align 4
%16 = add nsw i32 %15, 1
store i32 %16, ptr %5, align 4
br label %6, !llvm.loop !3

17: ; preds = %6
ret void
}

define dso_local void @do_while_loop(ptr noundef %0, i32 noundef %1) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for reference: this case has exposed issue in reg2mem pass

%3 = alloca ptr, align 8
%4 = alloca i32, align 4
%5 = alloca i32, align 4
store ptr %0, ptr %3, align 8
store i32 %1, ptr %4, align 4
store i32 0, ptr %5, align 4
br label %6

6: ; preds = %13, %2
%7 = load i32, ptr %5, align 4
%8 = load ptr, ptr %3, align 8
%9 = load i32, ptr %8, align 4
%10 = add nsw i32 %9, %7
store i32 %10, ptr %8, align 4
%11 = load i32, ptr %5, align 4
%12 = add nsw i32 %11, 1
store i32 %12, ptr %5, align 4
br label %13

13: ; preds = %6
%14 = load i32, ptr %5, align 4
%15 = load i32, ptr %4, align 4
%16 = icmp slt i32 %14, %15
br i1 %16, label %6, label %17, !llvm.loop !4

17: ; preds = %13
ret void
}

define dso_local void @unroll_disable(i32 noundef %0) {
%2 = alloca i32, align 4
%3 = alloca i32, align 4
store i32 %0, ptr %2, align 4
store i32 0, ptr %3, align 4
br label %4

4: ; preds = %7, %1
%5 = load i32, ptr %3, align 4
%6 = add nsw i32 %5, 1
store i32 %6, ptr %3, align 4
br label %7

7: ; preds = %4
%8 = load i32, ptr %3, align 4
%9 = load i32, ptr %2, align 4
%10 = icmp slt i32 %8, %9
br i1 %10, label %4, label %11, !llvm.loop !5

11: ; preds = %7
ret void
}

define dso_local void @unroll_count(i32 noundef %0) {
%2 = alloca i32, align 4
%3 = alloca i32, align 4
store i32 %0, ptr %2, align 4
store i32 0, ptr %3, align 4
br label %4

4: ; preds = %7, %1
%5 = load i32, ptr %3, align 4
%6 = add nsw i32 %5, 1
store i32 %6, ptr %3, align 4
br label %7

7: ; preds = %4
%8 = load i32, ptr %3, align 4
%9 = load i32, ptr %2, align 4
%10 = icmp slt i32 %8, %9
br i1 %10, label %4, label %11, !llvm.loop !7

11: ; preds = %7
ret void
}

define dso_local void @unroll_full(i32 noundef %0) {
%2 = alloca i32, align 4
%3 = alloca i32, align 4
store i32 %0, ptr %2, align 4
store i32 0, ptr %3, align 4
br label %4

4: ; preds = %7, %1
%5 = load i32, ptr %3, align 4
%6 = add nsw i32 %5, 1
store i32 %6, ptr %3, align 4
br label %7

7: ; preds = %4
%8 = load i32, ptr %3, align 4
%9 = load i32, ptr %2, align 4
%10 = icmp slt i32 %8, %9
br i1 %10, label %4, label %11, !llvm.loop !9

11: ; preds = %7
ret void
}

define dso_local void @unroll_full_count(i32 noundef %0) {
%2 = alloca i32, align 4
%3 = alloca i32, align 4
store i32 %0, ptr %2, align 4
store i32 0, ptr %3, align 4
br label %4

4: ; preds = %7, %1
%5 = load i32, ptr %3, align 4
%6 = add nsw i32 %5, 1
store i32 %6, ptr %3, align 4
br label %7

7: ; preds = %4
%8 = load i32, ptr %3, align 4
%9 = load i32, ptr %2, align 4
%10 = icmp slt i32 %8, %9
br i1 %10, label %4, label %11, !llvm.loop !11

11: ; preds = %7
ret void
}

!1 = distinct !{!1, !2}
!2 = !{!"llvm.loop.unroll.enable"}
!3 = distinct !{!3, !2}
!4 = distinct !{!4, !2}
!5 = distinct !{!5, !6}
!6 = !{!"llvm.loop.unroll.disable"}
!7 = distinct !{!7, !8}
!8 = !{!"llvm.loop.unroll.count", i32 4}
!9 = distinct !{!9, !10}
!10 = !{!"llvm.loop.unroll.full"}
!11 = distinct !{!11, !10, !8}
Comment on lines +247 to +257
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it legal for LLVM to have both unroll.disable and unroll.enable set?
If yes, we should probably test that no matter what, we pick one and emit valid SPIR-V.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it's legal. Added a test case.

Loading