Replace i64 llvm min max intrinsics with simple comp+select

KanclerzPiotr · igcbot · commit 8239cbae4bf7 · 2024-11-22T11:35:31.000+01:00
LLVM 15 introduced common usage of
llvm.smax
llvm.smin
llvm.umax
llvm.umin
i64 intrinsics were not emulated on HW that has partial or doesnt support i64.
This commit replaces them with icmp + select that is further properly emulated.
diff --git a/IGC/Compiler/CISACodeGen/Emu64OpsPass.cpp b/IGC/Compiler/CISACodeGen/Emu64OpsPass.cpp
@@ -1,6 +1,6 @@
 /*========================== begin_copyright_notice ============================
 
-Copyright (C) 2017-2021 Intel Corporation
+Copyright (C) 2017-2024 Intel Corporation
 
 SPDX-License-Identifier: MIT
 
@@ -1933,53 +1933,6 @@ bool InstExpander::visitCall(CallInst& Call) {
             Emu->setExpandedValues(&Call, SelectLo, SelectHo);
             return true;
         }
-        // emulate LLVM min/max intrinsics
-        case Intrinsic::smax:
-        case Intrinsic::smin:
-        case Intrinsic::umax:
-        case Intrinsic::umin:
-        {
-            // The least significant halves' comparison is dependent on that
-            // for the most significant halves, so we gain nothing by lowering
-            // this into i32 min/max calls. Basic cmp/sel sequence should
-            // suffice
-            const DenseMap<Intrinsic::ID, CmpInst::Predicate> CmpPredMap {
-                {Intrinsic::smax, CmpInst::Predicate::ICMP_SGT},
-                {Intrinsic::smin, CmpInst::Predicate::ICMP_SLT},
-                {Intrinsic::umax, CmpInst::Predicate::ICMP_UGT},
-                {Intrinsic::umin, CmpInst::Predicate::ICMP_ULT}
-            };
-            Value* LHS = Call.getArgOperand(0), * RHS = Call.getArgOperand(1);
-            // FIXME: Note that we aren't producing expanded/emulated values
-            // here, but rather replacing the call uses with the result of a
-            // newly generated i64 instruction. To make that work, 2 criteria
-            // should be satisfied from the perspective of Emu64Ops::expandInsts
-            // algorithm:
-            // 1. Inst-over-BB iterators cannot be invalidated
-            // 2. Due to averse inst-over-BB iteration order, the cmp/sel
-            //    sequence must be inserted after the current min/max call,
-            //    before its first use - regardless of the fact that the call
-            //    itself will be unlinked from those uses and marked for
-            //    deletion.
-            // For 1, we're entirely relying on IRBuilder's internal validation
-            // of instruction numbering within the BB. For 2, we're basically
-            // exploiting the knowledge that the inst-over-BB iteration in the
-            // parent method strictly heeds the averse order.
-            // TODO: Instead of hacking the iteration logic from within the
-            // helper InstExpander method, we should encapsulate this use-case
-            // (inserting new i64 insts into the emulation queue) at the
-            // Emu64Ops class level. One of the options is implementing a util
-            // akin to LLVM's InstructionWorklist, which would support averse
-            // iteration order and handle the deferred instructions upon their
-            // creation. Such a worklist class might have its use in a broader
-            // set of IGC passes, hence implementing this a "global" IGC util
-            // could be an idea.
-            IRB->SetInsertPoint(&*std::next(BasicBlock::iterator(Call)));
-            auto* Cmp = cast<Instruction>(
-                IRB->CreateICmp(CmpPredMap.lookup(IntrID), LHS, RHS));
-            Call.replaceAllUsesWith(IRB->CreateSelect(Cmp, LHS, RHS));
-            return true;
-        }
         }
     }
 
diff --git a/IGC/Compiler/Optimizer/OpenCLPasses/ReplaceUnsupportedIntrinsics/ReplaceUnsupportedIntrinsics.cpp b/IGC/Compiler/Optimizer/OpenCLPasses/ReplaceUnsupportedIntrinsics/ReplaceUnsupportedIntrinsics.cpp
@@ -1,6 +1,6 @@
 /*========================== begin_copyright_notice ============================
 
-Copyright (C) 2017-2023 Intel Corporation
+Copyright (C) 2017-2024 Intel Corporation
 
 SPDX-License-Identifier: MIT
 
@@ -109,6 +109,7 @@ namespace
         void replaceLRound(IntrinsicInst* I);
         void replaceLRint(IntrinsicInst* I);
         void replaceCountTheLeadingZeros(IntrinsicInst* I);
+        void replaceI64MinMax(IntrinsicInst* I);
 
         static const std::map< Intrinsic::ID, MemFuncPtr_t > m_intrinsicToFunc;
     };
@@ -137,7 +138,11 @@ const std::map< Intrinsic::ID, ReplaceUnsupportedIntrinsics::MemFuncPtr_t > Repl
     { Intrinsic::llround,    &ReplaceUnsupportedIntrinsics::replaceLRound },
     { Intrinsic::lrint,      &ReplaceUnsupportedIntrinsics::replaceLRint },
     { Intrinsic::llrint,     &ReplaceUnsupportedIntrinsics::replaceLRint },
-    { Intrinsic::ctlz,       &ReplaceUnsupportedIntrinsics::replaceCountTheLeadingZeros }
+    { Intrinsic::ctlz,       &ReplaceUnsupportedIntrinsics::replaceCountTheLeadingZeros },
+    { Intrinsic::smax,       &ReplaceUnsupportedIntrinsics::replaceI64MinMax },
+    { Intrinsic::smin,       &ReplaceUnsupportedIntrinsics::replaceI64MinMax },
+    { Intrinsic::umax,       &ReplaceUnsupportedIntrinsics::replaceI64MinMax },
+    { Intrinsic::umin,       &ReplaceUnsupportedIntrinsics::replaceI64MinMax }
 };
 
 ReplaceUnsupportedIntrinsics::ReplaceUnsupportedIntrinsics() : FunctionPass(ID)
@@ -1030,6 +1035,30 @@ void ReplaceUnsupportedIntrinsics::replaceLRint(IntrinsicInst* I) {
     I->eraseFromParent();
 }
 
+/*
+    Replaces i64 calls to llvm.smax, llvm.smin, llvm.umax, llvm.umin to
+    icmp + select instructionc that can be emulated.
+*/
+void ReplaceUnsupportedIntrinsics::replaceI64MinMax(IntrinsicInst* I)
+{
+    if(!I->getType()->isIntegerTy(64))
+        return;
+
+    const SmallDenseMap<Intrinsic::ID, CmpInst::Predicate, 4> CmpPredMap {
+        {Intrinsic::smax, CmpInst::Predicate::ICMP_SGT},
+        {Intrinsic::smin, CmpInst::Predicate::ICMP_SLT},
+        {Intrinsic::umax, CmpInst::Predicate::ICMP_UGT},
+        {Intrinsic::umin, CmpInst::Predicate::ICMP_ULT}
+    };
+
+    IGCLLVM::IRBuilder<> Builder(I);
+
+    Value* LHS = I->getArgOperand(0), * RHS = I->getArgOperand(1);
+    auto Cmp = cast<Instruction>(
+        Builder.CreateICmp(CmpPredMap.lookup(I->getIntrinsicID()), LHS, RHS));
+    I->replaceAllUsesWith(Builder.CreateSelect(Cmp, LHS, RHS));
+}
+
 /*
   Replaces llvm.ctlz.* intrinsics (count the leading zeros)
   to llvm.ctlz.i32 because we support llvm.ctlz intrinsic
diff --git a/IGC/Compiler/tests/Emu64Ops/calls-typed-pointers.ll b/IGC/Compiler/tests/Emu64Ops/calls-typed-pointers.ll
@@ -41,135 +41,11 @@ define void @test_abs(i64 %arg) {
   ret void
 }
 
-; CHECK-LABEL: @test_smax(
-; CHECK: %[[CAST_LHS:.+]] = bitcast i64 %argL to <2 x i32>
-; CHECK: %[[LHS_LO:.+]] = extractelement <2 x i32> %[[CAST_LHS]], i32 0
-; CHECK: %[[LHS_HI:.+]] = extractelement <2 x i32> %[[CAST_LHS]], i32 1
-; CHECK: %[[CAST_RHS:.+]] = bitcast i64 %argR to <2 x i32>
-; CHECK: %[[RHS_LO:.+]] = extractelement <2 x i32> %[[CAST_RHS]], i32 0
-; CHECK: %[[RHS_HI:.+]] = extractelement <2 x i32> %[[CAST_RHS]], i32 1
-
-; COM: Comparing LSBs in case MSB halves are equal
-; CHECK: %[[CMP_LO:.+]] = icmp ugt i32 %[[LHS_LO]], %[[RHS_LO]]
-; CHECK: %[[CMP_EQ_HI:.+]] = icmp eq i32 %[[LHS_HI]], %[[RHS_HI]]
-; CHECK: %[[COND_LO:.+]] = and i1 %[[CMP_EQ_HI]], %[[CMP_LO]]
-; COM: Comparing signed MSBs - sgt
-; CHECK: %[[COND_HI:.+]] = icmp sgt i32 %[[LHS_HI]], %[[RHS_HI]]
-; CHECK: %[[RES_COND:.+]] = or i1 %[[COND_LO]], %[[COND_HI]]
-
-; CHECK: %[[SEL_LO:.+]] = select i1 %[[RES_COND]], i32 %[[LHS_LO]], i32 %[[RHS_LO]]
-; CHECK: %[[SEL_HI:.+]] = select i1 %[[RES_COND]], i32 %[[LHS_HI]], i32 %[[RHS_HI]]
-; CHECK: %[[RES_LO:.+]] = insertelement <2 x i32> undef, i32 %[[SEL_LO]], i32 0
-; CHECK: %[[RES_VEC:.+]] = insertelement <2 x i32> %[[RES_LO]], i32 %[[SEL_HI]], i32 1
-; CHECK: %[[RES_CAST:.+]] = bitcast <2 x i32> %[[RES_VEC]] to i64
-; CHECK: call void @use.i64(i64 %[[RES_CAST]])
-; CHECK: ret void
-define void @test_smax(i64 %argL, i64 %argR) {
-  %1 = call i64 @llvm.smax.i64(i64 %argL, i64 %argR)
-  call void @use.i64(i64 %1)
-  ret void
-}
-
-; CHECK-LABEL: @test_smin(
-; CHECK: %[[CAST_LHS:.+]] = bitcast i64 %argL to <2 x i32>
-; CHECK: %[[LHS_LO:.+]] = extractelement <2 x i32> %[[CAST_LHS]], i32 0
-; CHECK: %[[LHS_HI:.+]] = extractelement <2 x i32> %[[CAST_LHS]], i32 1
-; CHECK: %[[CAST_RHS:.+]] = bitcast i64 %argR to <2 x i32>
-; CHECK: %[[RHS_LO:.+]] = extractelement <2 x i32> %[[CAST_RHS]], i32 0
-; CHECK: %[[RHS_HI:.+]] = extractelement <2 x i32> %[[CAST_RHS]], i32 1
-
-; COM: Comparing LSBs in case MSB halves are equal
-; CHECK: %[[CMP_LO:.+]] = icmp ult i32 %[[LHS_LO]], %[[RHS_LO]]
-; CHECK: %[[CMP_EQ_HI:.+]] = icmp eq i32 %[[LHS_HI]], %[[RHS_HI]]
-; CHECK: %[[COND_LO:.+]] = and i1 %[[CMP_EQ_HI]], %[[CMP_LO]]
-; COM: Comparing signed MSBs - slt
-; CHECK: %[[COND_HI:.+]] = icmp slt i32 %[[LHS_HI]], %[[RHS_HI]]
-; CHECK: %[[RES_COND:.+]] = or i1 %[[COND_LO]], %[[COND_HI]]
-
-; CHECK: %[[SEL_LO:.+]] = select i1 %[[RES_COND]], i32 %[[LHS_LO]], i32 %[[RHS_LO]]
-; CHECK: %[[SEL_HI:.+]] = select i1 %[[RES_COND]], i32 %[[LHS_HI]], i32 %[[RHS_HI]]
-; CHECK: %[[RES_LO:.+]] = insertelement <2 x i32> undef, i32 %[[SEL_LO]], i32 0
-; CHECK: %[[RES_VEC:.+]] = insertelement <2 x i32> %[[RES_LO]], i32 %[[SEL_HI]], i32 1
-; CHECK: %[[RES_CAST:.+]] = bitcast <2 x i32> %[[RES_VEC]] to i64
-; CHECK: call void @use.i64(i64 %[[RES_CAST]])
-; CHECK: ret void
-define void @test_smin(i64 %argL, i64 %argR) {
-  %1 = call i64 @llvm.smin.i64(i64 %argL, i64 %argR)
-  call void @use.i64(i64 %1)
-  ret void
-}
-
-; CHECK-LABEL: @test_umax(
-; CHECK: %[[CAST_LHS:.+]] = bitcast i64 %argL to <2 x i32>
-; CHECK: %[[LHS_LO:.+]] = extractelement <2 x i32> %[[CAST_LHS]], i32 0
-; CHECK: %[[LHS_HI:.+]] = extractelement <2 x i32> %[[CAST_LHS]], i32 1
-; CHECK: %[[CAST_RHS:.+]] = bitcast i64 %argR to <2 x i32>
-; CHECK: %[[RHS_LO:.+]] = extractelement <2 x i32> %[[CAST_RHS]], i32 0
-; CHECK: %[[RHS_HI:.+]] = extractelement <2 x i32> %[[CAST_RHS]], i32 1
-
-; COM: Comparing LSBs in case MSB halves are equal
-; CHECK: %[[CMP_LO:.+]] = icmp ugt i32 %[[LHS_LO]], %[[RHS_LO]]
-; CHECK: %[[CMP_EQ_HI:.+]] = icmp eq i32 %[[LHS_HI]], %[[RHS_HI]]
-; CHECK: %[[COND_LO:.+]] = and i1 %[[CMP_EQ_HI]], %[[CMP_LO]]
-; COM: Comparing unsigned MSBs - ugt
-; CHECK: %[[COND_HI:.+]] = icmp ugt i32 %[[LHS_HI]], %[[RHS_HI]]
-; CHECK: %[[RES_COND:.+]] = or i1 %[[COND_LO]], %[[COND_HI]]
-
-; CHECK: %[[SEL_LO:.+]] = select i1 %[[RES_COND]], i32 %[[LHS_LO]], i32 %[[RHS_LO]]
-; CHECK: %[[SEL_HI:.+]] = select i1 %[[RES_COND]], i32 %[[LHS_HI]], i32 %[[RHS_HI]]
-; CHECK: %[[RES_LO:.+]] = insertelement <2 x i32> undef, i32 %[[SEL_LO]], i32 0
-; CHECK: %[[RES_VEC:.+]] = insertelement <2 x i32> %[[RES_LO]], i32 %[[SEL_HI]], i32 1
-; CHECK: %[[RES_CAST:.+]] = bitcast <2 x i32> %[[RES_VEC]] to i64
-; CHECK: call void @use.i64(i64 %[[RES_CAST]])
-; CHECK: ret void
-define void @test_umax(i64 %argL, i64 %argR) {
-  %1 = call i64 @llvm.umax.i64(i64 %argL, i64 %argR)
-  call void @use.i64(i64 %1)
-  ret void
-}
-
-; CHECK-LABEL: @test_umin(
-; CHECK: %[[CAST_LHS:.+]] = bitcast i64 %argL to <2 x i32>
-; CHECK: %[[LHS_LO:.+]] = extractelement <2 x i32> %[[CAST_LHS]], i32 0
-; CHECK: %[[LHS_HI:.+]] = extractelement <2 x i32> %[[CAST_LHS]], i32 1
-; CHECK: %[[CAST_RHS:.+]] = bitcast i64 %argR to <2 x i32>
-; CHECK: %[[RHS_LO:.+]] = extractelement <2 x i32> %[[CAST_RHS]], i32 0
-; CHECK: %[[RHS_HI:.+]] = extractelement <2 x i32> %[[CAST_RHS]], i32 1
-
-; COM: Comparing LSBs in case MSB halves are equal
-; CHECK: %[[CMP_LO:.+]] = icmp ult i32 %[[LHS_LO]], %[[RHS_LO]]
-; CHECK: %[[CMP_EQ_HI:.+]] = icmp eq i32 %[[LHS_HI]], %[[RHS_HI]]
-; CHECK: %[[COND_LO:.+]] = and i1 %[[CMP_EQ_HI]], %[[CMP_LO]]
-; COM: Comparing unsigned MSBs - ult
-; CHECK: %[[COND_HI:.+]] = icmp ult i32 %[[LHS_HI]], %[[RHS_HI]]
-; CHECK: %[[RES_COND:.+]] = or i1 %[[COND_LO]], %[[COND_HI]]
-
-; CHECK: %[[SEL_LO:.+]] = select i1 %[[RES_COND]], i32 %[[LHS_LO]], i32 %[[RHS_LO]]
-; CHECK: %[[SEL_HI:.+]] = select i1 %[[RES_COND]], i32 %[[LHS_HI]], i32 %[[RHS_HI]]
-; CHECK: %[[RES_LO:.+]] = insertelement <2 x i32> undef, i32 %[[SEL_LO]], i32 0
-; CHECK: %[[RES_VEC:.+]] = insertelement <2 x i32> %[[RES_LO]], i32 %[[SEL_HI]], i32 1
-; CHECK: %[[RES_CAST:.+]] = bitcast <2 x i32> %[[RES_VEC]] to i64
-; CHECK: call void @use.i64(i64 %[[RES_CAST]])
-; CHECK: ret void
-define void @test_umin(i64 %argL, i64 %argR) {
-  %1 = call i64 @llvm.umin.i64(i64 %argL, i64 %argR)
-  call void @use.i64(i64 %1)
-  ret void
-}
-
 declare i64 @llvm.abs.i64(i64, i1)
-declare i64 @llvm.smax.i64(i64, i64)
-declare i64 @llvm.smin.i64(i64, i64)
-declare i64 @llvm.umax.i64(i64, i64)
-declare i64 @llvm.umin.i64(i64, i64)
 declare void @use.i64(i64)
 
-!igc.functions = !{!0, !3, !4, !5, !6}
+!igc.functions = !{!0}
 
 !0 = !{void (i64)* @test_abs, !1}
 !1 = !{!2}
 !2 = !{!"function_type", i32 0}
-!3 = !{void (i64, i64)* @test_smax, !1}
-!4 = !{void (i64, i64)* @test_smin, !1}
-!5 = !{void (i64, i64)* @test_umax, !1}
-!6 = !{void (i64, i64)* @test_umin, !1}
diff --git a/IGC/Compiler/tests/Emu64Ops/calls.ll b/IGC/Compiler/tests/Emu64Ops/calls.ll
@@ -41,135 +41,11 @@ define void @test_abs(i64 %arg) {
   ret void
 }
 
-; CHECK-LABEL: @test_smax(
-; CHECK: %[[CAST_LHS:.+]] = bitcast i64 %argL to <2 x i32>
-; CHECK: %[[LHS_LO:.+]] = extractelement <2 x i32> %[[CAST_LHS]], i32 0
-; CHECK: %[[LHS_HI:.+]] = extractelement <2 x i32> %[[CAST_LHS]], i32 1
-; CHECK: %[[CAST_RHS:.+]] = bitcast i64 %argR to <2 x i32>
-; CHECK: %[[RHS_LO:.+]] = extractelement <2 x i32> %[[CAST_RHS]], i32 0
-; CHECK: %[[RHS_HI:.+]] = extractelement <2 x i32> %[[CAST_RHS]], i32 1
-
-; COM: Comparing LSBs in case MSB halves are equal
-; CHECK: %[[CMP_LO:.+]] = icmp ugt i32 %[[LHS_LO]], %[[RHS_LO]]
-; CHECK: %[[CMP_EQ_HI:.+]] = icmp eq i32 %[[LHS_HI]], %[[RHS_HI]]
-; CHECK: %[[COND_LO:.+]] = and i1 %[[CMP_EQ_HI]], %[[CMP_LO]]
-; COM: Comparing signed MSBs - sgt
-; CHECK: %[[COND_HI:.+]] = icmp sgt i32 %[[LHS_HI]], %[[RHS_HI]]
-; CHECK: %[[RES_COND:.+]] = or i1 %[[COND_LO]], %[[COND_HI]]
-
-; CHECK: %[[SEL_LO:.+]] = select i1 %[[RES_COND]], i32 %[[LHS_LO]], i32 %[[RHS_LO]]
-; CHECK: %[[SEL_HI:.+]] = select i1 %[[RES_COND]], i32 %[[LHS_HI]], i32 %[[RHS_HI]]
-; CHECK: %[[RES_LO:.+]] = insertelement <2 x i32> undef, i32 %[[SEL_LO]], i32 0
-; CHECK: %[[RES_VEC:.+]] = insertelement <2 x i32> %[[RES_LO]], i32 %[[SEL_HI]], i32 1
-; CHECK: %[[RES_CAST:.+]] = bitcast <2 x i32> %[[RES_VEC]] to i64
-; CHECK: call void @use.i64(i64 %[[RES_CAST]])
-; CHECK: ret void
-define void @test_smax(i64 %argL, i64 %argR) {
-  %1 = call i64 @llvm.smax.i64(i64 %argL, i64 %argR)
-  call void @use.i64(i64 %1)
-  ret void
-}
-
-; CHECK-LABEL: @test_smin(
-; CHECK: %[[CAST_LHS:.+]] = bitcast i64 %argL to <2 x i32>
-; CHECK: %[[LHS_LO:.+]] = extractelement <2 x i32> %[[CAST_LHS]], i32 0
-; CHECK: %[[LHS_HI:.+]] = extractelement <2 x i32> %[[CAST_LHS]], i32 1
-; CHECK: %[[CAST_RHS:.+]] = bitcast i64 %argR to <2 x i32>
-; CHECK: %[[RHS_LO:.+]] = extractelement <2 x i32> %[[CAST_RHS]], i32 0
-; CHECK: %[[RHS_HI:.+]] = extractelement <2 x i32> %[[CAST_RHS]], i32 1
-
-; COM: Comparing LSBs in case MSB halves are equal
-; CHECK: %[[CMP_LO:.+]] = icmp ult i32 %[[LHS_LO]], %[[RHS_LO]]
-; CHECK: %[[CMP_EQ_HI:.+]] = icmp eq i32 %[[LHS_HI]], %[[RHS_HI]]
-; CHECK: %[[COND_LO:.+]] = and i1 %[[CMP_EQ_HI]], %[[CMP_LO]]
-; COM: Comparing signed MSBs - slt
-; CHECK: %[[COND_HI:.+]] = icmp slt i32 %[[LHS_HI]], %[[RHS_HI]]
-; CHECK: %[[RES_COND:.+]] = or i1 %[[COND_LO]], %[[COND_HI]]
-
-; CHECK: %[[SEL_LO:.+]] = select i1 %[[RES_COND]], i32 %[[LHS_LO]], i32 %[[RHS_LO]]
-; CHECK: %[[SEL_HI:.+]] = select i1 %[[RES_COND]], i32 %[[LHS_HI]], i32 %[[RHS_HI]]
-; CHECK: %[[RES_LO:.+]] = insertelement <2 x i32> undef, i32 %[[SEL_LO]], i32 0
-; CHECK: %[[RES_VEC:.+]] = insertelement <2 x i32> %[[RES_LO]], i32 %[[SEL_HI]], i32 1
-; CHECK: %[[RES_CAST:.+]] = bitcast <2 x i32> %[[RES_VEC]] to i64
-; CHECK: call void @use.i64(i64 %[[RES_CAST]])
-; CHECK: ret void
-define void @test_smin(i64 %argL, i64 %argR) {
-  %1 = call i64 @llvm.smin.i64(i64 %argL, i64 %argR)
-  call void @use.i64(i64 %1)
-  ret void
-}
-
-; CHECK-LABEL: @test_umax(
-; CHECK: %[[CAST_LHS:.+]] = bitcast i64 %argL to <2 x i32>
-; CHECK: %[[LHS_LO:.+]] = extractelement <2 x i32> %[[CAST_LHS]], i32 0
-; CHECK: %[[LHS_HI:.+]] = extractelement <2 x i32> %[[CAST_LHS]], i32 1
-; CHECK: %[[CAST_RHS:.+]] = bitcast i64 %argR to <2 x i32>
-; CHECK: %[[RHS_LO:.+]] = extractelement <2 x i32> %[[CAST_RHS]], i32 0
-; CHECK: %[[RHS_HI:.+]] = extractelement <2 x i32> %[[CAST_RHS]], i32 1
-
-; COM: Comparing LSBs in case MSB halves are equal
-; CHECK: %[[CMP_LO:.+]] = icmp ugt i32 %[[LHS_LO]], %[[RHS_LO]]
-; CHECK: %[[CMP_EQ_HI:.+]] = icmp eq i32 %[[LHS_HI]], %[[RHS_HI]]
-; CHECK: %[[COND_LO:.+]] = and i1 %[[CMP_EQ_HI]], %[[CMP_LO]]
-; COM: Comparing unsigned MSBs - ugt
-; CHECK: %[[COND_HI:.+]] = icmp ugt i32 %[[LHS_HI]], %[[RHS_HI]]
-; CHECK: %[[RES_COND:.+]] = or i1 %[[COND_LO]], %[[COND_HI]]
-
-; CHECK: %[[SEL_LO:.+]] = select i1 %[[RES_COND]], i32 %[[LHS_LO]], i32 %[[RHS_LO]]
-; CHECK: %[[SEL_HI:.+]] = select i1 %[[RES_COND]], i32 %[[LHS_HI]], i32 %[[RHS_HI]]
-; CHECK: %[[RES_LO:.+]] = insertelement <2 x i32> undef, i32 %[[SEL_LO]], i32 0
-; CHECK: %[[RES_VEC:.+]] = insertelement <2 x i32> %[[RES_LO]], i32 %[[SEL_HI]], i32 1
-; CHECK: %[[RES_CAST:.+]] = bitcast <2 x i32> %[[RES_VEC]] to i64
-; CHECK: call void @use.i64(i64 %[[RES_CAST]])
-; CHECK: ret void
-define void @test_umax(i64 %argL, i64 %argR) {
-  %1 = call i64 @llvm.umax.i64(i64 %argL, i64 %argR)
-  call void @use.i64(i64 %1)
-  ret void
-}
-
-; CHECK-LABEL: @test_umin(
-; CHECK: %[[CAST_LHS:.+]] = bitcast i64 %argL to <2 x i32>
-; CHECK: %[[LHS_LO:.+]] = extractelement <2 x i32> %[[CAST_LHS]], i32 0
-; CHECK: %[[LHS_HI:.+]] = extractelement <2 x i32> %[[CAST_LHS]], i32 1
-; CHECK: %[[CAST_RHS:.+]] = bitcast i64 %argR to <2 x i32>
-; CHECK: %[[RHS_LO:.+]] = extractelement <2 x i32> %[[CAST_RHS]], i32 0
-; CHECK: %[[RHS_HI:.+]] = extractelement <2 x i32> %[[CAST_RHS]], i32 1
-
-; COM: Comparing LSBs in case MSB halves are equal
-; CHECK: %[[CMP_LO:.+]] = icmp ult i32 %[[LHS_LO]], %[[RHS_LO]]
-; CHECK: %[[CMP_EQ_HI:.+]] = icmp eq i32 %[[LHS_HI]], %[[RHS_HI]]
-; CHECK: %[[COND_LO:.+]] = and i1 %[[CMP_EQ_HI]], %[[CMP_LO]]
-; COM: Comparing unsigned MSBs - ult
-; CHECK: %[[COND_HI:.+]] = icmp ult i32 %[[LHS_HI]], %[[RHS_HI]]
-; CHECK: %[[RES_COND:.+]] = or i1 %[[COND_LO]], %[[COND_HI]]
-
-; CHECK: %[[SEL_LO:.+]] = select i1 %[[RES_COND]], i32 %[[LHS_LO]], i32 %[[RHS_LO]]
-; CHECK: %[[SEL_HI:.+]] = select i1 %[[RES_COND]], i32 %[[LHS_HI]], i32 %[[RHS_HI]]
-; CHECK: %[[RES_LO:.+]] = insertelement <2 x i32> undef, i32 %[[SEL_LO]], i32 0
-; CHECK: %[[RES_VEC:.+]] = insertelement <2 x i32> %[[RES_LO]], i32 %[[SEL_HI]], i32 1
-; CHECK: %[[RES_CAST:.+]] = bitcast <2 x i32> %[[RES_VEC]] to i64
-; CHECK: call void @use.i64(i64 %[[RES_CAST]])
-; CHECK: ret void
-define void @test_umin(i64 %argL, i64 %argR) {
-  %1 = call i64 @llvm.umin.i64(i64 %argL, i64 %argR)
-  call void @use.i64(i64 %1)
-  ret void
-}
-
 declare i64 @llvm.abs.i64(i64, i1)
-declare i64 @llvm.smax.i64(i64, i64)
-declare i64 @llvm.smin.i64(i64, i64)
-declare i64 @llvm.umax.i64(i64, i64)
-declare i64 @llvm.umin.i64(i64, i64)
 declare void @use.i64(i64)
 
-!igc.functions = !{!0, !3, !4, !5, !6}
+!igc.functions = !{!0}
 
 !0 = !{void (i64)* @test_abs, !1}
 !1 = !{!2}
 !2 = !{!"function_type", i32 0}
-!3 = !{void (i64, i64)* @test_smax, !1}
-!4 = !{void (i64, i64)* @test_smin, !1}
-!5 = !{void (i64, i64)* @test_umax, !1}
-!6 = !{void (i64, i64)* @test_umin, !1}
diff --git a/IGC/Compiler/tests/ReplaceUnsupportedIntrinsics/minmax.ll b/IGC/Compiler/tests/ReplaceUnsupportedIntrinsics/minmax.ll