Skip to content

Commit 8239cba

Browse files
KanclerzPiotrigcbot
authored andcommitted
Replace i64 llvm min max intrinsics with simple comp+select
LLVM 15 introduced common usage of llvm.smax llvm.smin llvm.umax llvm.umin i64 intrinsics were not emulated on HW that has partial or doesnt support i64. This commit replaces them with icmp + select that is further properly emulated.
1 parent da8e392 commit 8239cba

File tree

5 files changed

+95
-300
lines changed

5 files changed

+95
-300
lines changed

IGC/Compiler/CISACodeGen/Emu64OpsPass.cpp

Lines changed: 1 addition & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*========================== begin_copyright_notice ============================
22
3-
Copyright (C) 2017-2021 Intel Corporation
3+
Copyright (C) 2017-2024 Intel Corporation
44
55
SPDX-License-Identifier: MIT
66
@@ -1933,53 +1933,6 @@ bool InstExpander::visitCall(CallInst& Call) {
19331933
Emu->setExpandedValues(&Call, SelectLo, SelectHo);
19341934
return true;
19351935
}
1936-
// emulate LLVM min/max intrinsics
1937-
case Intrinsic::smax:
1938-
case Intrinsic::smin:
1939-
case Intrinsic::umax:
1940-
case Intrinsic::umin:
1941-
{
1942-
// The least significant halves' comparison is dependent on that
1943-
// for the most significant halves, so we gain nothing by lowering
1944-
// this into i32 min/max calls. Basic cmp/sel sequence should
1945-
// suffice
1946-
const DenseMap<Intrinsic::ID, CmpInst::Predicate> CmpPredMap {
1947-
{Intrinsic::smax, CmpInst::Predicate::ICMP_SGT},
1948-
{Intrinsic::smin, CmpInst::Predicate::ICMP_SLT},
1949-
{Intrinsic::umax, CmpInst::Predicate::ICMP_UGT},
1950-
{Intrinsic::umin, CmpInst::Predicate::ICMP_ULT}
1951-
};
1952-
Value* LHS = Call.getArgOperand(0), * RHS = Call.getArgOperand(1);
1953-
// FIXME: Note that we aren't producing expanded/emulated values
1954-
// here, but rather replacing the call uses with the result of a
1955-
// newly generated i64 instruction. To make that work, 2 criteria
1956-
// should be satisfied from the perspective of Emu64Ops::expandInsts
1957-
// algorithm:
1958-
// 1. Inst-over-BB iterators cannot be invalidated
1959-
// 2. Due to averse inst-over-BB iteration order, the cmp/sel
1960-
// sequence must be inserted after the current min/max call,
1961-
// before its first use - regardless of the fact that the call
1962-
// itself will be unlinked from those uses and marked for
1963-
// deletion.
1964-
// For 1, we're entirely relying on IRBuilder's internal validation
1965-
// of instruction numbering within the BB. For 2, we're basically
1966-
// exploiting the knowledge that the inst-over-BB iteration in the
1967-
// parent method strictly heeds the averse order.
1968-
// TODO: Instead of hacking the iteration logic from within the
1969-
// helper InstExpander method, we should encapsulate this use-case
1970-
// (inserting new i64 insts into the emulation queue) at the
1971-
// Emu64Ops class level. One of the options is implementing a util
1972-
// akin to LLVM's InstructionWorklist, which would support averse
1973-
// iteration order and handle the deferred instructions upon their
1974-
// creation. Such a worklist class might have its use in a broader
1975-
// set of IGC passes, hence implementing this a "global" IGC util
1976-
// could be an idea.
1977-
IRB->SetInsertPoint(&*std::next(BasicBlock::iterator(Call)));
1978-
auto* Cmp = cast<Instruction>(
1979-
IRB->CreateICmp(CmpPredMap.lookup(IntrID), LHS, RHS));
1980-
Call.replaceAllUsesWith(IRB->CreateSelect(Cmp, LHS, RHS));
1981-
return true;
1982-
}
19831936
}
19841937
}
19851938

IGC/Compiler/Optimizer/OpenCLPasses/ReplaceUnsupportedIntrinsics/ReplaceUnsupportedIntrinsics.cpp

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*========================== begin_copyright_notice ============================
22
3-
Copyright (C) 2017-2023 Intel Corporation
3+
Copyright (C) 2017-2024 Intel Corporation
44
55
SPDX-License-Identifier: MIT
66
@@ -109,6 +109,7 @@ namespace
109109
void replaceLRound(IntrinsicInst* I);
110110
void replaceLRint(IntrinsicInst* I);
111111
void replaceCountTheLeadingZeros(IntrinsicInst* I);
112+
void replaceI64MinMax(IntrinsicInst* I);
112113

113114
static const std::map< Intrinsic::ID, MemFuncPtr_t > m_intrinsicToFunc;
114115
};
@@ -137,7 +138,11 @@ const std::map< Intrinsic::ID, ReplaceUnsupportedIntrinsics::MemFuncPtr_t > Repl
137138
{ Intrinsic::llround, &ReplaceUnsupportedIntrinsics::replaceLRound },
138139
{ Intrinsic::lrint, &ReplaceUnsupportedIntrinsics::replaceLRint },
139140
{ Intrinsic::llrint, &ReplaceUnsupportedIntrinsics::replaceLRint },
140-
{ Intrinsic::ctlz, &ReplaceUnsupportedIntrinsics::replaceCountTheLeadingZeros }
141+
{ Intrinsic::ctlz, &ReplaceUnsupportedIntrinsics::replaceCountTheLeadingZeros },
142+
{ Intrinsic::smax, &ReplaceUnsupportedIntrinsics::replaceI64MinMax },
143+
{ Intrinsic::smin, &ReplaceUnsupportedIntrinsics::replaceI64MinMax },
144+
{ Intrinsic::umax, &ReplaceUnsupportedIntrinsics::replaceI64MinMax },
145+
{ Intrinsic::umin, &ReplaceUnsupportedIntrinsics::replaceI64MinMax }
141146
};
142147

143148
ReplaceUnsupportedIntrinsics::ReplaceUnsupportedIntrinsics() : FunctionPass(ID)
@@ -1030,6 +1035,30 @@ void ReplaceUnsupportedIntrinsics::replaceLRint(IntrinsicInst* I) {
10301035
I->eraseFromParent();
10311036
}
10321037

1038+
/*
1039+
Replaces i64 calls to llvm.smax, llvm.smin, llvm.umax, llvm.umin to
1040+
icmp + select instructionc that can be emulated.
1041+
*/
1042+
void ReplaceUnsupportedIntrinsics::replaceI64MinMax(IntrinsicInst* I)
1043+
{
1044+
if(!I->getType()->isIntegerTy(64))
1045+
return;
1046+
1047+
const SmallDenseMap<Intrinsic::ID, CmpInst::Predicate, 4> CmpPredMap {
1048+
{Intrinsic::smax, CmpInst::Predicate::ICMP_SGT},
1049+
{Intrinsic::smin, CmpInst::Predicate::ICMP_SLT},
1050+
{Intrinsic::umax, CmpInst::Predicate::ICMP_UGT},
1051+
{Intrinsic::umin, CmpInst::Predicate::ICMP_ULT}
1052+
};
1053+
1054+
IGCLLVM::IRBuilder<> Builder(I);
1055+
1056+
Value* LHS = I->getArgOperand(0), * RHS = I->getArgOperand(1);
1057+
auto Cmp = cast<Instruction>(
1058+
Builder.CreateICmp(CmpPredMap.lookup(I->getIntrinsicID()), LHS, RHS));
1059+
I->replaceAllUsesWith(Builder.CreateSelect(Cmp, LHS, RHS));
1060+
}
1061+
10331062
/*
10341063
Replaces llvm.ctlz.* intrinsics (count the leading zeros)
10351064
to llvm.ctlz.i32 because we support llvm.ctlz intrinsic

IGC/Compiler/tests/Emu64Ops/calls-typed-pointers.ll

Lines changed: 1 addition & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -41,135 +41,11 @@ define void @test_abs(i64 %arg) {
4141
ret void
4242
}
4343

44-
; CHECK-LABEL: @test_smax(
45-
; CHECK: %[[CAST_LHS:.+]] = bitcast i64 %argL to <2 x i32>
46-
; CHECK: %[[LHS_LO:.+]] = extractelement <2 x i32> %[[CAST_LHS]], i32 0
47-
; CHECK: %[[LHS_HI:.+]] = extractelement <2 x i32> %[[CAST_LHS]], i32 1
48-
; CHECK: %[[CAST_RHS:.+]] = bitcast i64 %argR to <2 x i32>
49-
; CHECK: %[[RHS_LO:.+]] = extractelement <2 x i32> %[[CAST_RHS]], i32 0
50-
; CHECK: %[[RHS_HI:.+]] = extractelement <2 x i32> %[[CAST_RHS]], i32 1
51-
52-
; COM: Comparing LSBs in case MSB halves are equal
53-
; CHECK: %[[CMP_LO:.+]] = icmp ugt i32 %[[LHS_LO]], %[[RHS_LO]]
54-
; CHECK: %[[CMP_EQ_HI:.+]] = icmp eq i32 %[[LHS_HI]], %[[RHS_HI]]
55-
; CHECK: %[[COND_LO:.+]] = and i1 %[[CMP_EQ_HI]], %[[CMP_LO]]
56-
; COM: Comparing signed MSBs - sgt
57-
; CHECK: %[[COND_HI:.+]] = icmp sgt i32 %[[LHS_HI]], %[[RHS_HI]]
58-
; CHECK: %[[RES_COND:.+]] = or i1 %[[COND_LO]], %[[COND_HI]]
59-
60-
; CHECK: %[[SEL_LO:.+]] = select i1 %[[RES_COND]], i32 %[[LHS_LO]], i32 %[[RHS_LO]]
61-
; CHECK: %[[SEL_HI:.+]] = select i1 %[[RES_COND]], i32 %[[LHS_HI]], i32 %[[RHS_HI]]
62-
; CHECK: %[[RES_LO:.+]] = insertelement <2 x i32> undef, i32 %[[SEL_LO]], i32 0
63-
; CHECK: %[[RES_VEC:.+]] = insertelement <2 x i32> %[[RES_LO]], i32 %[[SEL_HI]], i32 1
64-
; CHECK: %[[RES_CAST:.+]] = bitcast <2 x i32> %[[RES_VEC]] to i64
65-
; CHECK: call void @use.i64(i64 %[[RES_CAST]])
66-
; CHECK: ret void
67-
define void @test_smax(i64 %argL, i64 %argR) {
68-
%1 = call i64 @llvm.smax.i64(i64 %argL, i64 %argR)
69-
call void @use.i64(i64 %1)
70-
ret void
71-
}
72-
73-
; CHECK-LABEL: @test_smin(
74-
; CHECK: %[[CAST_LHS:.+]] = bitcast i64 %argL to <2 x i32>
75-
; CHECK: %[[LHS_LO:.+]] = extractelement <2 x i32> %[[CAST_LHS]], i32 0
76-
; CHECK: %[[LHS_HI:.+]] = extractelement <2 x i32> %[[CAST_LHS]], i32 1
77-
; CHECK: %[[CAST_RHS:.+]] = bitcast i64 %argR to <2 x i32>
78-
; CHECK: %[[RHS_LO:.+]] = extractelement <2 x i32> %[[CAST_RHS]], i32 0
79-
; CHECK: %[[RHS_HI:.+]] = extractelement <2 x i32> %[[CAST_RHS]], i32 1
80-
81-
; COM: Comparing LSBs in case MSB halves are equal
82-
; CHECK: %[[CMP_LO:.+]] = icmp ult i32 %[[LHS_LO]], %[[RHS_LO]]
83-
; CHECK: %[[CMP_EQ_HI:.+]] = icmp eq i32 %[[LHS_HI]], %[[RHS_HI]]
84-
; CHECK: %[[COND_LO:.+]] = and i1 %[[CMP_EQ_HI]], %[[CMP_LO]]
85-
; COM: Comparing signed MSBs - slt
86-
; CHECK: %[[COND_HI:.+]] = icmp slt i32 %[[LHS_HI]], %[[RHS_HI]]
87-
; CHECK: %[[RES_COND:.+]] = or i1 %[[COND_LO]], %[[COND_HI]]
88-
89-
; CHECK: %[[SEL_LO:.+]] = select i1 %[[RES_COND]], i32 %[[LHS_LO]], i32 %[[RHS_LO]]
90-
; CHECK: %[[SEL_HI:.+]] = select i1 %[[RES_COND]], i32 %[[LHS_HI]], i32 %[[RHS_HI]]
91-
; CHECK: %[[RES_LO:.+]] = insertelement <2 x i32> undef, i32 %[[SEL_LO]], i32 0
92-
; CHECK: %[[RES_VEC:.+]] = insertelement <2 x i32> %[[RES_LO]], i32 %[[SEL_HI]], i32 1
93-
; CHECK: %[[RES_CAST:.+]] = bitcast <2 x i32> %[[RES_VEC]] to i64
94-
; CHECK: call void @use.i64(i64 %[[RES_CAST]])
95-
; CHECK: ret void
96-
define void @test_smin(i64 %argL, i64 %argR) {
97-
%1 = call i64 @llvm.smin.i64(i64 %argL, i64 %argR)
98-
call void @use.i64(i64 %1)
99-
ret void
100-
}
101-
102-
; CHECK-LABEL: @test_umax(
103-
; CHECK: %[[CAST_LHS:.+]] = bitcast i64 %argL to <2 x i32>
104-
; CHECK: %[[LHS_LO:.+]] = extractelement <2 x i32> %[[CAST_LHS]], i32 0
105-
; CHECK: %[[LHS_HI:.+]] = extractelement <2 x i32> %[[CAST_LHS]], i32 1
106-
; CHECK: %[[CAST_RHS:.+]] = bitcast i64 %argR to <2 x i32>
107-
; CHECK: %[[RHS_LO:.+]] = extractelement <2 x i32> %[[CAST_RHS]], i32 0
108-
; CHECK: %[[RHS_HI:.+]] = extractelement <2 x i32> %[[CAST_RHS]], i32 1
109-
110-
; COM: Comparing LSBs in case MSB halves are equal
111-
; CHECK: %[[CMP_LO:.+]] = icmp ugt i32 %[[LHS_LO]], %[[RHS_LO]]
112-
; CHECK: %[[CMP_EQ_HI:.+]] = icmp eq i32 %[[LHS_HI]], %[[RHS_HI]]
113-
; CHECK: %[[COND_LO:.+]] = and i1 %[[CMP_EQ_HI]], %[[CMP_LO]]
114-
; COM: Comparing unsigned MSBs - ugt
115-
; CHECK: %[[COND_HI:.+]] = icmp ugt i32 %[[LHS_HI]], %[[RHS_HI]]
116-
; CHECK: %[[RES_COND:.+]] = or i1 %[[COND_LO]], %[[COND_HI]]
117-
118-
; CHECK: %[[SEL_LO:.+]] = select i1 %[[RES_COND]], i32 %[[LHS_LO]], i32 %[[RHS_LO]]
119-
; CHECK: %[[SEL_HI:.+]] = select i1 %[[RES_COND]], i32 %[[LHS_HI]], i32 %[[RHS_HI]]
120-
; CHECK: %[[RES_LO:.+]] = insertelement <2 x i32> undef, i32 %[[SEL_LO]], i32 0
121-
; CHECK: %[[RES_VEC:.+]] = insertelement <2 x i32> %[[RES_LO]], i32 %[[SEL_HI]], i32 1
122-
; CHECK: %[[RES_CAST:.+]] = bitcast <2 x i32> %[[RES_VEC]] to i64
123-
; CHECK: call void @use.i64(i64 %[[RES_CAST]])
124-
; CHECK: ret void
125-
define void @test_umax(i64 %argL, i64 %argR) {
126-
%1 = call i64 @llvm.umax.i64(i64 %argL, i64 %argR)
127-
call void @use.i64(i64 %1)
128-
ret void
129-
}
130-
131-
; CHECK-LABEL: @test_umin(
132-
; CHECK: %[[CAST_LHS:.+]] = bitcast i64 %argL to <2 x i32>
133-
; CHECK: %[[LHS_LO:.+]] = extractelement <2 x i32> %[[CAST_LHS]], i32 0
134-
; CHECK: %[[LHS_HI:.+]] = extractelement <2 x i32> %[[CAST_LHS]], i32 1
135-
; CHECK: %[[CAST_RHS:.+]] = bitcast i64 %argR to <2 x i32>
136-
; CHECK: %[[RHS_LO:.+]] = extractelement <2 x i32> %[[CAST_RHS]], i32 0
137-
; CHECK: %[[RHS_HI:.+]] = extractelement <2 x i32> %[[CAST_RHS]], i32 1
138-
139-
; COM: Comparing LSBs in case MSB halves are equal
140-
; CHECK: %[[CMP_LO:.+]] = icmp ult i32 %[[LHS_LO]], %[[RHS_LO]]
141-
; CHECK: %[[CMP_EQ_HI:.+]] = icmp eq i32 %[[LHS_HI]], %[[RHS_HI]]
142-
; CHECK: %[[COND_LO:.+]] = and i1 %[[CMP_EQ_HI]], %[[CMP_LO]]
143-
; COM: Comparing unsigned MSBs - ult
144-
; CHECK: %[[COND_HI:.+]] = icmp ult i32 %[[LHS_HI]], %[[RHS_HI]]
145-
; CHECK: %[[RES_COND:.+]] = or i1 %[[COND_LO]], %[[COND_HI]]
146-
147-
; CHECK: %[[SEL_LO:.+]] = select i1 %[[RES_COND]], i32 %[[LHS_LO]], i32 %[[RHS_LO]]
148-
; CHECK: %[[SEL_HI:.+]] = select i1 %[[RES_COND]], i32 %[[LHS_HI]], i32 %[[RHS_HI]]
149-
; CHECK: %[[RES_LO:.+]] = insertelement <2 x i32> undef, i32 %[[SEL_LO]], i32 0
150-
; CHECK: %[[RES_VEC:.+]] = insertelement <2 x i32> %[[RES_LO]], i32 %[[SEL_HI]], i32 1
151-
; CHECK: %[[RES_CAST:.+]] = bitcast <2 x i32> %[[RES_VEC]] to i64
152-
; CHECK: call void @use.i64(i64 %[[RES_CAST]])
153-
; CHECK: ret void
154-
define void @test_umin(i64 %argL, i64 %argR) {
155-
%1 = call i64 @llvm.umin.i64(i64 %argL, i64 %argR)
156-
call void @use.i64(i64 %1)
157-
ret void
158-
}
159-
16044
declare i64 @llvm.abs.i64(i64, i1)
161-
declare i64 @llvm.smax.i64(i64, i64)
162-
declare i64 @llvm.smin.i64(i64, i64)
163-
declare i64 @llvm.umax.i64(i64, i64)
164-
declare i64 @llvm.umin.i64(i64, i64)
16545
declare void @use.i64(i64)
16646

167-
!igc.functions = !{!0, !3, !4, !5, !6}
47+
!igc.functions = !{!0}
16848

16949
!0 = !{void (i64)* @test_abs, !1}
17050
!1 = !{!2}
17151
!2 = !{!"function_type", i32 0}
172-
!3 = !{void (i64, i64)* @test_smax, !1}
173-
!4 = !{void (i64, i64)* @test_smin, !1}
174-
!5 = !{void (i64, i64)* @test_umax, !1}
175-
!6 = !{void (i64, i64)* @test_umin, !1}

IGC/Compiler/tests/Emu64Ops/calls.ll

Lines changed: 1 addition & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -41,135 +41,11 @@ define void @test_abs(i64 %arg) {
4141
ret void
4242
}
4343

44-
; CHECK-LABEL: @test_smax(
45-
; CHECK: %[[CAST_LHS:.+]] = bitcast i64 %argL to <2 x i32>
46-
; CHECK: %[[LHS_LO:.+]] = extractelement <2 x i32> %[[CAST_LHS]], i32 0
47-
; CHECK: %[[LHS_HI:.+]] = extractelement <2 x i32> %[[CAST_LHS]], i32 1
48-
; CHECK: %[[CAST_RHS:.+]] = bitcast i64 %argR to <2 x i32>
49-
; CHECK: %[[RHS_LO:.+]] = extractelement <2 x i32> %[[CAST_RHS]], i32 0
50-
; CHECK: %[[RHS_HI:.+]] = extractelement <2 x i32> %[[CAST_RHS]], i32 1
51-
52-
; COM: Comparing LSBs in case MSB halves are equal
53-
; CHECK: %[[CMP_LO:.+]] = icmp ugt i32 %[[LHS_LO]], %[[RHS_LO]]
54-
; CHECK: %[[CMP_EQ_HI:.+]] = icmp eq i32 %[[LHS_HI]], %[[RHS_HI]]
55-
; CHECK: %[[COND_LO:.+]] = and i1 %[[CMP_EQ_HI]], %[[CMP_LO]]
56-
; COM: Comparing signed MSBs - sgt
57-
; CHECK: %[[COND_HI:.+]] = icmp sgt i32 %[[LHS_HI]], %[[RHS_HI]]
58-
; CHECK: %[[RES_COND:.+]] = or i1 %[[COND_LO]], %[[COND_HI]]
59-
60-
; CHECK: %[[SEL_LO:.+]] = select i1 %[[RES_COND]], i32 %[[LHS_LO]], i32 %[[RHS_LO]]
61-
; CHECK: %[[SEL_HI:.+]] = select i1 %[[RES_COND]], i32 %[[LHS_HI]], i32 %[[RHS_HI]]
62-
; CHECK: %[[RES_LO:.+]] = insertelement <2 x i32> undef, i32 %[[SEL_LO]], i32 0
63-
; CHECK: %[[RES_VEC:.+]] = insertelement <2 x i32> %[[RES_LO]], i32 %[[SEL_HI]], i32 1
64-
; CHECK: %[[RES_CAST:.+]] = bitcast <2 x i32> %[[RES_VEC]] to i64
65-
; CHECK: call void @use.i64(i64 %[[RES_CAST]])
66-
; CHECK: ret void
67-
define void @test_smax(i64 %argL, i64 %argR) {
68-
%1 = call i64 @llvm.smax.i64(i64 %argL, i64 %argR)
69-
call void @use.i64(i64 %1)
70-
ret void
71-
}
72-
73-
; CHECK-LABEL: @test_smin(
74-
; CHECK: %[[CAST_LHS:.+]] = bitcast i64 %argL to <2 x i32>
75-
; CHECK: %[[LHS_LO:.+]] = extractelement <2 x i32> %[[CAST_LHS]], i32 0
76-
; CHECK: %[[LHS_HI:.+]] = extractelement <2 x i32> %[[CAST_LHS]], i32 1
77-
; CHECK: %[[CAST_RHS:.+]] = bitcast i64 %argR to <2 x i32>
78-
; CHECK: %[[RHS_LO:.+]] = extractelement <2 x i32> %[[CAST_RHS]], i32 0
79-
; CHECK: %[[RHS_HI:.+]] = extractelement <2 x i32> %[[CAST_RHS]], i32 1
80-
81-
; COM: Comparing LSBs in case MSB halves are equal
82-
; CHECK: %[[CMP_LO:.+]] = icmp ult i32 %[[LHS_LO]], %[[RHS_LO]]
83-
; CHECK: %[[CMP_EQ_HI:.+]] = icmp eq i32 %[[LHS_HI]], %[[RHS_HI]]
84-
; CHECK: %[[COND_LO:.+]] = and i1 %[[CMP_EQ_HI]], %[[CMP_LO]]
85-
; COM: Comparing signed MSBs - slt
86-
; CHECK: %[[COND_HI:.+]] = icmp slt i32 %[[LHS_HI]], %[[RHS_HI]]
87-
; CHECK: %[[RES_COND:.+]] = or i1 %[[COND_LO]], %[[COND_HI]]
88-
89-
; CHECK: %[[SEL_LO:.+]] = select i1 %[[RES_COND]], i32 %[[LHS_LO]], i32 %[[RHS_LO]]
90-
; CHECK: %[[SEL_HI:.+]] = select i1 %[[RES_COND]], i32 %[[LHS_HI]], i32 %[[RHS_HI]]
91-
; CHECK: %[[RES_LO:.+]] = insertelement <2 x i32> undef, i32 %[[SEL_LO]], i32 0
92-
; CHECK: %[[RES_VEC:.+]] = insertelement <2 x i32> %[[RES_LO]], i32 %[[SEL_HI]], i32 1
93-
; CHECK: %[[RES_CAST:.+]] = bitcast <2 x i32> %[[RES_VEC]] to i64
94-
; CHECK: call void @use.i64(i64 %[[RES_CAST]])
95-
; CHECK: ret void
96-
define void @test_smin(i64 %argL, i64 %argR) {
97-
%1 = call i64 @llvm.smin.i64(i64 %argL, i64 %argR)
98-
call void @use.i64(i64 %1)
99-
ret void
100-
}
101-
102-
; CHECK-LABEL: @test_umax(
103-
; CHECK: %[[CAST_LHS:.+]] = bitcast i64 %argL to <2 x i32>
104-
; CHECK: %[[LHS_LO:.+]] = extractelement <2 x i32> %[[CAST_LHS]], i32 0
105-
; CHECK: %[[LHS_HI:.+]] = extractelement <2 x i32> %[[CAST_LHS]], i32 1
106-
; CHECK: %[[CAST_RHS:.+]] = bitcast i64 %argR to <2 x i32>
107-
; CHECK: %[[RHS_LO:.+]] = extractelement <2 x i32> %[[CAST_RHS]], i32 0
108-
; CHECK: %[[RHS_HI:.+]] = extractelement <2 x i32> %[[CAST_RHS]], i32 1
109-
110-
; COM: Comparing LSBs in case MSB halves are equal
111-
; CHECK: %[[CMP_LO:.+]] = icmp ugt i32 %[[LHS_LO]], %[[RHS_LO]]
112-
; CHECK: %[[CMP_EQ_HI:.+]] = icmp eq i32 %[[LHS_HI]], %[[RHS_HI]]
113-
; CHECK: %[[COND_LO:.+]] = and i1 %[[CMP_EQ_HI]], %[[CMP_LO]]
114-
; COM: Comparing unsigned MSBs - ugt
115-
; CHECK: %[[COND_HI:.+]] = icmp ugt i32 %[[LHS_HI]], %[[RHS_HI]]
116-
; CHECK: %[[RES_COND:.+]] = or i1 %[[COND_LO]], %[[COND_HI]]
117-
118-
; CHECK: %[[SEL_LO:.+]] = select i1 %[[RES_COND]], i32 %[[LHS_LO]], i32 %[[RHS_LO]]
119-
; CHECK: %[[SEL_HI:.+]] = select i1 %[[RES_COND]], i32 %[[LHS_HI]], i32 %[[RHS_HI]]
120-
; CHECK: %[[RES_LO:.+]] = insertelement <2 x i32> undef, i32 %[[SEL_LO]], i32 0
121-
; CHECK: %[[RES_VEC:.+]] = insertelement <2 x i32> %[[RES_LO]], i32 %[[SEL_HI]], i32 1
122-
; CHECK: %[[RES_CAST:.+]] = bitcast <2 x i32> %[[RES_VEC]] to i64
123-
; CHECK: call void @use.i64(i64 %[[RES_CAST]])
124-
; CHECK: ret void
125-
define void @test_umax(i64 %argL, i64 %argR) {
126-
%1 = call i64 @llvm.umax.i64(i64 %argL, i64 %argR)
127-
call void @use.i64(i64 %1)
128-
ret void
129-
}
130-
131-
; CHECK-LABEL: @test_umin(
132-
; CHECK: %[[CAST_LHS:.+]] = bitcast i64 %argL to <2 x i32>
133-
; CHECK: %[[LHS_LO:.+]] = extractelement <2 x i32> %[[CAST_LHS]], i32 0
134-
; CHECK: %[[LHS_HI:.+]] = extractelement <2 x i32> %[[CAST_LHS]], i32 1
135-
; CHECK: %[[CAST_RHS:.+]] = bitcast i64 %argR to <2 x i32>
136-
; CHECK: %[[RHS_LO:.+]] = extractelement <2 x i32> %[[CAST_RHS]], i32 0
137-
; CHECK: %[[RHS_HI:.+]] = extractelement <2 x i32> %[[CAST_RHS]], i32 1
138-
139-
; COM: Comparing LSBs in case MSB halves are equal
140-
; CHECK: %[[CMP_LO:.+]] = icmp ult i32 %[[LHS_LO]], %[[RHS_LO]]
141-
; CHECK: %[[CMP_EQ_HI:.+]] = icmp eq i32 %[[LHS_HI]], %[[RHS_HI]]
142-
; CHECK: %[[COND_LO:.+]] = and i1 %[[CMP_EQ_HI]], %[[CMP_LO]]
143-
; COM: Comparing unsigned MSBs - ult
144-
; CHECK: %[[COND_HI:.+]] = icmp ult i32 %[[LHS_HI]], %[[RHS_HI]]
145-
; CHECK: %[[RES_COND:.+]] = or i1 %[[COND_LO]], %[[COND_HI]]
146-
147-
; CHECK: %[[SEL_LO:.+]] = select i1 %[[RES_COND]], i32 %[[LHS_LO]], i32 %[[RHS_LO]]
148-
; CHECK: %[[SEL_HI:.+]] = select i1 %[[RES_COND]], i32 %[[LHS_HI]], i32 %[[RHS_HI]]
149-
; CHECK: %[[RES_LO:.+]] = insertelement <2 x i32> undef, i32 %[[SEL_LO]], i32 0
150-
; CHECK: %[[RES_VEC:.+]] = insertelement <2 x i32> %[[RES_LO]], i32 %[[SEL_HI]], i32 1
151-
; CHECK: %[[RES_CAST:.+]] = bitcast <2 x i32> %[[RES_VEC]] to i64
152-
; CHECK: call void @use.i64(i64 %[[RES_CAST]])
153-
; CHECK: ret void
154-
define void @test_umin(i64 %argL, i64 %argR) {
155-
%1 = call i64 @llvm.umin.i64(i64 %argL, i64 %argR)
156-
call void @use.i64(i64 %1)
157-
ret void
158-
}
159-
16044
declare i64 @llvm.abs.i64(i64, i1)
161-
declare i64 @llvm.smax.i64(i64, i64)
162-
declare i64 @llvm.smin.i64(i64, i64)
163-
declare i64 @llvm.umax.i64(i64, i64)
164-
declare i64 @llvm.umin.i64(i64, i64)
16545
declare void @use.i64(i64)
16646

167-
!igc.functions = !{!0, !3, !4, !5, !6}
47+
!igc.functions = !{!0}
16848

16949
!0 = !{void (i64)* @test_abs, !1}
17050
!1 = !{!2}
17151
!2 = !{!"function_type", i32 0}
172-
!3 = !{void (i64, i64)* @test_smax, !1}
173-
!4 = !{void (i64, i64)* @test_smin, !1}
174-
!5 = !{void (i64, i64)* @test_umax, !1}
175-
!6 = !{void (i64, i64)* @test_umin, !1}

0 commit comments

Comments
 (0)