Skip to content

Commit 72cb554

Browse files
agrabezhigcbot
authored andcommitted
Set unroll-max-upperbound flag to 16
This fix is an alternative for LLVM-16 patch alter-unroll-max-upperbound-command-line-option-value.patch.
1 parent c1c6278 commit 72cb554

File tree

2 files changed

+98
-0
lines changed

2 files changed

+98
-0
lines changed

IGC/AdaptorOCL/dllInterfaceCompute.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -995,6 +995,18 @@ bool TranslateBuildSPMD(const STB_TranslateInputArgs *pInputArgs, STB_TranslateO
995995
args.push_back("igc");
996996
auto optionsMap = llvm::cl::getRegisteredOptions();
997997

998+
// The default value (8) for max of trip count upper bound that is considered
999+
// in unrolling is not enough for some important compute workloads, so we set it to 16.
1000+
// When UnrollMaxUpperBound parameter will be available to set in UnrollingPreferences
1001+
// this code will be removed.
1002+
llvm::StringRef unrollMaxUpperBoundFlag = "-unroll-max-upperbound=16";
1003+
auto unrollMaxUpperBoundSwitch = optionsMap.find(unrollMaxUpperBoundFlag.trim("-=16"));
1004+
if (unrollMaxUpperBoundSwitch != optionsMap.end()) {
1005+
if (unrollMaxUpperBoundSwitch->getValue()->getNumOccurrences() == 0) {
1006+
args.push_back(unrollMaxUpperBoundFlag.data());
1007+
}
1008+
}
1009+
9981010
// Disable code sinking in instruction combining.
9991011
// This is a workaround for a performance issue caused by code sinking
10001012
// that is being done in LLVM's instcombine pass.
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2025 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; REQUIRES: llvm-spirv, regkeys, mtl-supported, llvm-16-plus
10+
11+
; RUN: llvm-as -opaque-pointers=1 %s -o %t.bc
12+
; RUN: llvm-spirv -opaque-pointers=1 %t.bc -o %t.spv
13+
; RUN: ocloc compile -spirv_input -file %t.spv -device mtl -options " -igc_opts 'EnableOpaquePointersBackend=1,PrintToConsole=1,PrintAfter=Splitstructurephis'" 2>&1 | FileCheck %s
14+
15+
; This test checks that 2 inner loops with are fully unrolled by 12 and SROA applied to both allocas
16+
17+
; CHECK-LABEL: spir_kernel void @quux(
18+
; CHECK-NOT: = alloca
19+
20+
target triple = "spir64-unknown-unknown"
21+
22+
%struct.widget = type <{ %struct.baz, %struct.snork, i32, i32, i32, i32, [12 x i8] }>
23+
%struct.baz = type { [2 x ptr addrspace(4)] }
24+
%struct.snork = type { i32, [12 x %struct.spam], [12 x [1 x i32]] }
25+
%struct.spam = type { i32, i32, i32 }
26+
%struct.zot = type <{ %struct.baz, i32, %struct.snork, %struct.wombat, %struct.wombat, %struct.wombat, i8, i32, i32, i32, i32, [4 x i8] }>
27+
%struct.wombat = type { i8 }
28+
%struct.wombat.0 = type { i16 }
29+
30+
define spir_kernel void @quux(i1 %arg, ptr %arg1, i1 %arg2, ptr %arg3) {
31+
bb:
32+
%tmp = alloca %struct.widget, align 16
33+
%tmp3 = alloca %struct.zot, align 16
34+
br i1 %arg, label %bb4, label %bb22
35+
36+
bb4: ; preds = %bb4, %bb
37+
br i1 %arg2, label %bb5, label %bb4
38+
39+
bb5: ; preds = %bb18, %bb4
40+
br i1 %arg, label %bb7, label %bb6
41+
42+
bb6: ; preds = %bb33, %bb5
43+
ret void
44+
45+
bb7: ; preds = %bb12, %bb5
46+
%tmp8 = phi i32 [ %tmp16, %bb12 ], [ 0, %bb5 ]
47+
%tmp9 = phi i32 [ %tmp17, %bb12 ], [ 0, %bb5 ]
48+
%tmp10 = icmp ugt i32 %tmp9, 11
49+
%tmp11 = or i1 %tmp10, %arg
50+
br i1 %tmp11, label %bb18, label %bb12
51+
52+
bb12: ; preds = %bb7
53+
%tmp13 = zext i32 %tmp9 to i64
54+
%tmp14 = getelementptr %struct.zot, ptr %tmp3, i64 0, i32 2, i32 2, i64 %tmp13, i64 0
55+
%tmp15 = load i32, ptr %tmp14, align 4
56+
%tmp16 = or i32 %tmp8, %tmp15
57+
%tmp17 = add i32 %tmp9, 1
58+
br label %bb7
59+
60+
bb18: ; preds = %bb7
61+
%tmp19 = zext i32 %tmp8 to i64
62+
%tmp20 = getelementptr %struct.wombat.0, ptr %arg3, i64 %tmp19, i32 0
63+
%tmp21 = load i16, ptr %tmp20, align 2
64+
store i16 %tmp21, ptr %arg1, align 2
65+
br label %bb5
66+
67+
bb22: ; preds = %bb27, %bb
68+
%tmp23 = phi i32 [ %tmp31, %bb27 ], [ 0, %bb ]
69+
%tmp24 = phi i32 [ %tmp32, %bb27 ], [ 0, %bb ]
70+
%tmp25 = icmp ugt i32 %tmp24, 11
71+
%tmp26 = or i1 %tmp25, %arg
72+
br i1 %tmp26, label %bb33, label %bb27
73+
74+
bb27: ; preds = %bb22
75+
%tmp28 = zext i32 %tmp24 to i64
76+
%tmp29 = getelementptr %struct.widget, ptr %tmp, i64 0, i32 1, i32 2, i64 %tmp28, i64 0
77+
%tmp30 = load i32, ptr %tmp29, align 4
78+
%tmp31 = or i32 %tmp23, %tmp30
79+
%tmp32 = add i32 %tmp24, 1
80+
br label %bb22
81+
82+
bb33: ; preds = %bb22
83+
%tmp34 = zext i32 %tmp23 to i64
84+
store i64 %tmp34, ptr %arg1, align 8
85+
br label %bb6
86+
}

0 commit comments

Comments
 (0)