diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 00925ed42fcd4..d123a06cc5d9e 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1687,6 +1687,9 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SRL, VT, Custom); } + setOperationAction(ISD::SADDSAT, MVT::i32, Legal); + setOperationAction(ISD::SADDSAT, MVT::i64, Legal); + // Extending loads from (native) vectors of i8 into (native) vectors of i16 // are legal. setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, MVT::v2i8, Legal); diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 0e13dd3214da6..c8b0bf3c5270c 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -204,6 +204,8 @@ HexagonTargetLowering::initializeHVXLowering() { setOperationAction(ISD::CTLZ, T, Legal); setOperationAction(ISD::SELECT, T, Legal); setOperationAction(ISD::SPLAT_VECTOR, T, Legal); + setOperationAction(ISD::UADDSAT, T, Legal); + setOperationAction(ISD::SADDSAT, T, Legal); if (T != ByteV) { setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal); setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal); @@ -295,6 +297,8 @@ HexagonTargetLowering::initializeHVXLowering() { setOperationAction(ISD::CTPOP, T, Custom); setOperationAction(ISD::ADD, T, Legal); + setOperationAction(ISD::UADDSAT, T, Legal); + setOperationAction(ISD::SADDSAT, T, Legal); setOperationAction(ISD::SUB, T, Legal); setOperationAction(ISD::MUL, T, Custom); setOperationAction(ISD::MULHS, T, Custom); diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td index 2a991bafbf148..2337f185c7b36 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatterns.td +++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td @@ -1517,6 +1517,14 @@ def: Pat<(or I32:$Rs, anyimm:$s10), (A2_orir I32:$Rs, imm:$s10)>; def: Pat<(and I32:$Rs, anyimm:$s10), (A2_andir I32:$Rs, imm:$s10)>; def: Pat<(sub anyimm:$s10, I32:$Rs), (A2_subri imm:$s10, I32:$Rs)>; +class OpR_RR_pat_sat + : Pat<(ResType (Op RxPred:$Rs, RxPred:$Rt)), + (MI RxPred:$Rs, RxPred:$Rt)>; + +def: OpR_RR_pat_sat; +def: OpR_RR_pat_sat; + def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td index ba449eaeed34c..fb2ef59d99ef1 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td @@ -426,6 +426,21 @@ let Predicates = [UseHVX] in { (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>; } +let Predicates = [UseHVX] in { + def: OpR_RR_pat_sat; + def: OpR_RR_pat_sat; + def: OpR_RR_pat_sat; + def: OpR_RR_pat_sat; + def: OpR_RR_pat_sat; + def: OpR_RR_pat_sat; + def: OpR_RR_pat_sat; + def: OpR_RR_pat_sat; + def: OpR_RR_pat_sat; + def: OpR_RR_pat_sat; + def: OpR_RR_pat_sat; + def: OpR_RR_pat_sat; +} + // For now, we always deal with vector floating point in SF mode. class OpR_RR_pat_conv diff --git a/llvm/test/CodeGen/Hexagon/addsat.ll b/llvm/test/CodeGen/Hexagon/addsat.ll new file mode 100644 index 0000000000000..489c7d5a0fdff --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/addsat.ll @@ -0,0 +1,157 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; Test for saturating add instructions. + +; CHECK-LABEL: test1 +; CHECK: v{{.*}}.ub = vadd(v{{[0-9]+}}.ub,v{{[0-9]+}}.ub):sat +define <128 x i8> @test1(<128 x i8>* %a0, <128 x i8>* %a1) #0 { +entry: + %wide.load = load <128 x i8>, <128 x i8>* %a0, align 1 + %wide.load62 = load <128 x i8>, <128 x i8>* %a1, align 1 + %add = call <128 x i8> @llvm.uadd.sat.v128i8(<128 x i8> %wide.load, <128 x i8> %wide.load62) + ret <128 x i8> %add +} + +; CHECK-LABEL: test2 +; CHECK: v{{.*}}.b = vadd(v{{[0-9]+}}.b,v{{[0-9]+}}.b):sat +define <128 x i8> @test2(<128 x i8>* %a0, <128 x i8>* %a1) #0 { +entry: + %wide.load = load <128 x i8>, <128 x i8>* %a0, align 1 + %wide.load62 = load <128 x i8>, <128 x i8>* %a1, align 1 + %add = call <128 x i8> @llvm.sadd.sat.v128i8(<128 x i8> %wide.load, <128 x i8> %wide.load62) + ret <128 x i8> %add +} + +; CHECK-LABEL: test3 +; CHECK: v{{.*}}.uh = vadd(v{{[0-9]+}}.uh,v{{[0-9]+}}.uh):sat +define <64 x i16> @test3(<64 x i16>* %a0, <64 x i16>* %a1) #0 { +entry: + %wide.load = load <64 x i16>, <64 x i16>* %a0, align 1 + %wide.load62 = load <64 x i16>, <64 x i16>* %a1, align 1 + %add = call <64 x i16> @llvm.uadd.sat.v64i16(<64 x i16> %wide.load, <64 x i16> %wide.load62) + ret <64 x i16> %add +} + +; CHECK-LABEL: test4 +; CHECK: v{{.*}}.h = vadd(v{{[0-9]+}}.h,v{{[0-9]+}}.h):sat +define <64 x i16> @test4(<64 x i16>* %a0, <64 x i16>* %a1) #0 { +entry: + %wide.load = load <64 x i16>, <64 x i16>* %a0, align 1 + %wide.load62 = load <64 x i16>, <64 x i16>* %a1, align 1 + %add = call <64 x i16> @llvm.sadd.sat.v64i16(<64 x i16> %wide.load, <64 x i16> %wide.load62) + ret <64 x i16> %add +} + +; CHECK-LABEL: test5 +; CHECK: v{{.*}}.uw = vadd(v{{[0-9]+}}.uw,v{{[0-9]+}}.uw):sat +define <32 x i32> @test5(<32 x i32>* %a0, <32 x i32>* %a1) #0 { +entry: + %wide.load = load <32 x i32>, <32 x i32>* %a0, align 1 + %wide.load62 = load <32 x i32>, <32 x i32>* %a1, align 1 + %add = call <32 x i32> @llvm.uadd.sat.v32i32(<32 x i32> %wide.load, <32 x i32> %wide.load62) + ret <32 x i32> %add +} + +; CHECK-LABEL: test6 +; CHECK: v{{.*}}.w = vadd(v{{[0-9]+}}.w,v{{[0-9]+}}.w):sat +define <32 x i32> @test6(<32 x i32>* %a0, <32 x i32>* %a1) #0 { +entry: + %wide.load = load <32 x i32>, <32 x i32>* %a0, align 1 + %wide.load62 = load <32 x i32>, <32 x i32>* %a1, align 1 + %add = call <32 x i32> @llvm.sadd.sat.v32i32(<32 x i32> %wide.load, <32 x i32> %wide.load62) + ret <32 x i32> %add +} + +; CHECK-LABEL: test7 +; CHECK: v{{[0-9]+}}:{{[0-9]+}}.ub = vadd(v{{[0-9]+}}:{{[0-9]+}}.ub,v{{[0-9]+}}:{{[0-9]+}}.ub):sat +define <256 x i8> @test7(<256 x i8>* %a0, <256 x i8>* %a1) #0 { +entry: + %wide.load = load <256 x i8>, <256 x i8>* %a0, align 1 + %wide.load62 = load <256 x i8>, <256 x i8>* %a1, align 1 + %add = call <256 x i8> @llvm.uadd.sat.v256i8(<256 x i8> %wide.load, <256 x i8> %wide.load62) + ret <256 x i8> %add +} + +; CHECK-LABEL: test8 +; CHECK: v{{[0-9]+}}:{{[0-9]+}}.b = vadd(v{{[0-9]+}}:{{[0-9]+}}.b,v{{[0-9]+}}:{{[0-9]+}}.b):sat +define <256 x i8> @test8(<256 x i8>* %a0, <256 x i8>* %a1) #0 { +entry: + %wide.load = load <256 x i8>, <256 x i8>* %a0, align 1 + %wide.load62 = load <256 x i8>, <256 x i8>* %a1, align 1 + %add = call <256 x i8> @llvm.sadd.sat.v256i8(<256 x i8> %wide.load, <256 x i8> %wide.load62) + ret <256 x i8> %add +} + +; CHECK-LABEL: test9 +; CHECK: v{{[0-9]+}}:{{[0-9]+}}.uh = vadd(v{{[0-9]+}}:{{[0-9]+}}.uh,v{{[0-9]+}}:{{[0-9]+}}.uh):sat +define <128 x i16> @test9(<128 x i16>* %a0, <128 x i16>* %a1) #0 { +entry: + %wide.load = load <128 x i16>, <128 x i16>* %a0, align 1 + %wide.load62 = load <128 x i16>, <128 x i16>* %a1, align 1 + %add = call <128 x i16> @llvm.uadd.sat.v128i16(<128 x i16> %wide.load, <128 x i16> %wide.load62) + ret <128 x i16> %add +} + +; CHECK-LABEL: test10 +; CHECK: v{{[0-9]+}}:{{[0-9]+}}.h = vadd(v{{[0-9]+}}:{{[0-9]+}}.h,v{{[0-9]+}}:{{[0-9]+}}.h):sat +define <128 x i16> @test10(<128 x i16>* %a0, <128 x i16>* %a1) #0 { +entry: + %wide.load = load <128 x i16>, <128 x i16>* %a0, align 1 + %wide.load62 = load <128 x i16>, <128 x i16>* %a1, align 1 + %add = call <128 x i16> @llvm.sadd.sat.v128i16(<128 x i16> %wide.load, <128 x i16> %wide.load62) + ret <128 x i16> %add +} + +; CHECK-LABEL: test11 +; CHECK: v{{[0-9]+}}:{{[0-9]+}}.uw = vadd(v{{[0-9]+}}:{{[0-9]+}}.uw,v{{[0-9]+}}:{{[0-9]+}}.uw):sat +define <64 x i32> @test11(<64 x i32>* %a0, <64 x i32>* %a1) #0 { +entry: + %wide.load = load <64 x i32>, <64 x i32>* %a0, align 1 + %wide.load62 = load <64 x i32>, <64 x i32>* %a1, align 1 + %add = call <64 x i32> @llvm.uadd.sat.v64i32(<64 x i32> %wide.load, <64 x i32> %wide.load62) + ret <64 x i32> %add +} + +; CHECK-LABEL: test12 +; CHECK: v{{[0-9]+}}:{{[0-9]+}}.w = vadd(v{{[0-9]+}}:{{[0-9]+}}.w,v{{[0-9]+}}:{{[0-9]+}}.w):sat +define <64 x i32> @test12(<64 x i32>* %a0, <64 x i32>* %a1) #0 { +entry: + %wide.load = load <64 x i32>, <64 x i32>* %a0, align 1 + %wide.load62 = load <64 x i32>, <64 x i32>* %a1, align 1 + %add = call <64 x i32> @llvm.sadd.sat.v64i32(<64 x i32> %wide.load, <64 x i32> %wide.load62) + ret <64 x i32> %add +} + +; CHECK-LABEL: test13 +; CHECK: r{{[0-9]+}} = add(r{{[0-9]+}},r{{[0-9]+}}):sat +define i32 @test13(i32 %a0, i32 %a1) #0 { +entry: + %add = call i32 @llvm.sadd.sat.i32(i32 %a0, i32 %a1) + ret i32 %add +} + +; CHECK-LABEL: test14 +; CHECK: r{{[0-9]+}}:{{[0-9]+}} = add(r{{[0-9]+}}:{{[0-9]+}},r{{[0-9]+}}:{{[0-9]+}}):sat +define i64 @test14(i64 %a0, i64 %a1) #0 { +entry: + %add = call i64 @llvm.sadd.sat.i64(i64 %a0, i64 %a1) + ret i64 %add +} + +declare <128 x i8> @llvm.uadd.sat.v128i8(<128 x i8>, <128 x i8>) #1 +declare <128 x i8> @llvm.sadd.sat.v128i8(<128 x i8>, <128 x i8>) #1 +declare <64 x i16> @llvm.uadd.sat.v64i16(<64 x i16>, <64 x i16>) #1 +declare <64 x i16> @llvm.sadd.sat.v64i16(<64 x i16>, <64 x i16>) #1 +declare <32 x i32> @llvm.uadd.sat.v32i32(<32 x i32>, <32 x i32>) #1 +declare <32 x i32> @llvm.sadd.sat.v32i32(<32 x i32>, <32 x i32>) #1 +declare <256 x i8> @llvm.uadd.sat.v256i8(<256 x i8>, <256 x i8>) #1 +declare <256 x i8> @llvm.sadd.sat.v256i8(<256 x i8>, <256 x i8>) #1 +declare <128 x i16> @llvm.uadd.sat.v128i16(<128 x i16>, <128 x i16>) #1 +declare <128 x i16> @llvm.sadd.sat.v128i16(<128 x i16>, <128 x i16>) #1 +declare <64 x i32> @llvm.uadd.sat.v64i32(<64 x i32>, <64 x i32>) #1 +declare <64 x i32> @llvm.sadd.sat.v64i32(<64 x i32>, <64 x i32>) #1 +declare i32 @llvm.sadd.sat.i32(i32, i32) +declare i64 @llvm.sadd.sat.i64(i64, i64) + +attributes #0 = { nounwind "target-cpu"="hexagonv73" "target-features"="+hvxv73,+hvx-length128b" } +attributes #1 = { nounwind readnone speculatable willreturn }