From ea7b616ae0907d73123aff26235c58492e4953c4 Mon Sep 17 00:00:00 2001 From: Farzon Lotfi Date: Thu, 10 Apr 2025 20:43:49 -0400 Subject: [PATCH] [DirectX] legalize usub.sat fixes #135285 This change implements the usub.sat intrinsic to perform an unsigned saturating subtraction on the 2 arguments. The minimum value this operation is clamp to is 0. --- .../Target/DirectX/DXILIntrinsicExpansion.cpp | 18 ++++++ llvm/test/CodeGen/DirectX/usub_sat.ll | 60 +++++++++++++++++++ 2 files changed, 78 insertions(+) create mode 100644 llvm/test/CodeGen/DirectX/usub_sat.ll diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp index 84acf4d536d0c..70f284e08b250 100644 --- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp +++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp @@ -65,12 +65,27 @@ static bool isIntrinsicExpansion(Function &F) { case Intrinsic::dx_sign: case Intrinsic::dx_step: case Intrinsic::dx_radians: + case Intrinsic::usub_sat: case Intrinsic::vector_reduce_add: case Intrinsic::vector_reduce_fadd: return true; } return false; } + +static Value *expandUsubSat(CallInst *Orig) { + Value *A = Orig->getArgOperand(0); + Value *B = Orig->getArgOperand(1); + Type *Ty = A->getType(); + + IRBuilder<> Builder(Orig); + + Value *Cmp = Builder.CreateICmpULT(A, B, "usub.cmp"); + Value *Sub = Builder.CreateSub(A, B, "usub.sub"); + Value *Zero = ConstantInt::get(Ty, 0); + return Builder.CreateSelect(Cmp, Zero, Sub, "usub.sat"); +} + static Value *expandVecReduceAdd(CallInst *Orig, Intrinsic::ID IntrinsicId) { assert(IntrinsicId == Intrinsic::vector_reduce_add || IntrinsicId == Intrinsic::vector_reduce_fadd); @@ -586,6 +601,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) { case Intrinsic::dx_radians: Result = expandRadiansIntrinsic(Orig); break; + case Intrinsic::usub_sat: + Result = expandUsubSat(Orig); + break; case Intrinsic::vector_reduce_add: case Intrinsic::vector_reduce_fadd: Result = expandVecReduceAdd(Orig, IntrinsicId); diff --git a/llvm/test/CodeGen/DirectX/usub_sat.ll b/llvm/test/CodeGen/DirectX/usub_sat.ll new file mode 100644 index 0000000000000..8cfb1a1fe9bd1 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/usub_sat.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -dxil-intrinsic-expansion -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s + +; Make sure dxil operation function calls for pow are generated. + +define noundef i16 @usub_sat_i16(i16 noundef %a, i16 noundef %b) { +; CHECK-LABEL: define noundef i16 @usub_sat_i16( +; CHECK-SAME: i16 noundef [[A:%.*]], i16 noundef [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[USUB_CMP:%.*]] = icmp ult i16 [[A]], [[B]] +; CHECK-NEXT: [[USUB_SUB:%.*]] = sub i16 [[A]], [[B]] +; CHECK-NEXT: [[ELT_USUB_SAT:%.*]] = select i1 [[USUB_CMP]], i16 0, i16 [[USUB_SUB]] +; CHECK-NEXT: ret i16 [[ELT_USUB_SAT]] +; +entry: + %elt.usub_sat = call i16 @llvm.usub.sat.i16(i16 %a, i16 %b) + ret i16 %elt.usub_sat +} + +define noundef i32 @usub_sat_i32(i32 noundef %a, i32 noundef %b) { +; CHECK-LABEL: define noundef i32 @usub_sat_i32( +; CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[USUB_CMP:%.*]] = icmp ult i32 [[A]], [[B]] +; CHECK-NEXT: [[USUB_SUB:%.*]] = sub i32 [[A]], [[B]] +; CHECK-NEXT: [[ELT_USUB_SAT:%.*]] = select i1 [[USUB_CMP]], i32 0, i32 [[USUB_SUB]] +; CHECK-NEXT: ret i32 [[ELT_USUB_SAT]] +; +entry: + %elt.usub_sat = call i32 @llvm.usub.sat.i32(i32 %a, i32 %b) + ret i32 %elt.usub_sat +} + +define noundef i64 @usub_sat_i64(i64 noundef %a, i64 noundef %b) { +; CHECK-LABEL: define noundef i64 @usub_sat_i64( +; CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[USUB_CMP:%.*]] = icmp ult i64 [[A]], [[B]] +; CHECK-NEXT: [[USUB_SUB:%.*]] = sub i64 [[A]], [[B]] +; CHECK-NEXT: [[ELT_USUB_SAT:%.*]] = select i1 [[USUB_CMP]], i64 0, i64 [[USUB_SUB]] +; CHECK-NEXT: ret i64 [[ELT_USUB_SAT]] +; +entry: + %elt.usub_sat = call i64 @llvm.usub.sat.i64(i64 %a, i64 %b) + ret i64 %elt.usub_sat +} + +define noundef <4 x i32> @usub_sat_vec(<4 x i32> noundef %a, <4 x i32> noundef %b) { +; CHECK-LABEL: define noundef <4 x i32> @usub_sat_vec( +; CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[USUB_CMP:%.*]] = icmp ult <4 x i32> [[A]], [[B]] +; CHECK-NEXT: [[USUB_SUB:%.*]] = sub <4 x i32> [[A]], [[B]] +; CHECK-NEXT: [[ELT_USUB_SAT:%.*]] = select <4 x i1> [[USUB_CMP]], <4 x i32> zeroinitializer, <4 x i32> [[USUB_SUB]] +; CHECK-NEXT: ret <4 x i32> [[ELT_USUB_SAT]] +; +entry: + %elt.usub_sat = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %elt.usub_sat +}