diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index f339396f3a411..90d3d92d6bbf5 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -5211,34 +5211,34 @@ AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, // XOR: llvm/test/CodeGen/AArch64/reduce-xor.ll // AND: llvm/test/CodeGen/AArch64/reduce-and.ll static const CostTblEntry CostTblNoPairwise[]{ - {ISD::ADD, MVT::v8i8, 2}, - {ISD::ADD, MVT::v16i8, 2}, - {ISD::ADD, MVT::v4i16, 2}, - {ISD::ADD, MVT::v8i16, 2}, - {ISD::ADD, MVT::v2i32, 2}, - {ISD::ADD, MVT::v4i32, 2}, - {ISD::ADD, MVT::v2i64, 2}, - {ISD::OR, MVT::v8i8, 15}, - {ISD::OR, MVT::v16i8, 17}, - {ISD::OR, MVT::v4i16, 7}, - {ISD::OR, MVT::v8i16, 9}, - {ISD::OR, MVT::v2i32, 3}, - {ISD::OR, MVT::v4i32, 5}, - {ISD::OR, MVT::v2i64, 3}, - {ISD::XOR, MVT::v8i8, 15}, - {ISD::XOR, MVT::v16i8, 17}, - {ISD::XOR, MVT::v4i16, 7}, - {ISD::XOR, MVT::v8i16, 9}, - {ISD::XOR, MVT::v2i32, 3}, - {ISD::XOR, MVT::v4i32, 5}, - {ISD::XOR, MVT::v2i64, 3}, - {ISD::AND, MVT::v8i8, 15}, - {ISD::AND, MVT::v16i8, 17}, - {ISD::AND, MVT::v4i16, 7}, - {ISD::AND, MVT::v8i16, 9}, - {ISD::AND, MVT::v2i32, 3}, - {ISD::AND, MVT::v4i32, 5}, - {ISD::AND, MVT::v2i64, 3}, + {ISD::ADD, MVT::v8i8, 2}, + {ISD::ADD, MVT::v16i8, 2}, + {ISD::ADD, MVT::v4i16, 2}, + {ISD::ADD, MVT::v8i16, 2}, + {ISD::ADD, MVT::v2i32, 2}, + {ISD::ADD, MVT::v4i32, 2}, + {ISD::ADD, MVT::v2i64, 2}, + {ISD::OR, MVT::v8i8, 5}, // fmov + orr_lsr + orr_lsr + lsr + orr + {ISD::OR, MVT::v16i8, 7}, // ext + orr + same as v8i8 + {ISD::OR, MVT::v4i16, 4}, // fmov + orr_lsr + lsr + orr + {ISD::OR, MVT::v8i16, 6}, // ext + orr + same as v4i16 + {ISD::OR, MVT::v2i32, 3}, // fmov + lsr + orr + {ISD::OR, MVT::v4i32, 5}, // ext + orr + same as v2i32 + {ISD::OR, MVT::v2i64, 3}, // ext + orr + fmov + {ISD::XOR, MVT::v8i8, 5}, // Same as above for or... + {ISD::XOR, MVT::v16i8, 7}, + {ISD::XOR, MVT::v4i16, 4}, + {ISD::XOR, MVT::v8i16, 6}, + {ISD::XOR, MVT::v2i32, 3}, + {ISD::XOR, MVT::v4i32, 5}, + {ISD::XOR, MVT::v2i64, 3}, + {ISD::AND, MVT::v8i8, 5}, // Same as above for or... + {ISD::AND, MVT::v16i8, 7}, + {ISD::AND, MVT::v4i16, 4}, + {ISD::AND, MVT::v8i16, 6}, + {ISD::AND, MVT::v2i32, 3}, + {ISD::AND, MVT::v4i32, 5}, + {ISD::AND, MVT::v2i64, 3}, }; switch (ISD) { default: diff --git a/llvm/test/Analysis/CostModel/AArch64/reduce-and.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-and.ll index 21e0356fd7321..b221fc8a35ab3 100644 --- a/llvm/test/Analysis/CostModel/AArch64/reduce-and.ll +++ b/llvm/test/Analysis/CostModel/AArch64/reduce-and.ll @@ -15,14 +15,14 @@ define void @reduce() { ; CHECK-NEXT: Cost Model: Found costs of 9 for: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef) ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V1i8 = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> undef) ; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:6 SizeLat:6 for: %V3i8 = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 7 for: %V4i8 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 15 for: %V8i8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 17 for: %V16i8 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 18 for: %V32i8 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 20 for: %V64i8 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 7 for: %V4i16 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of 9 for: %V8i16 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of 10 for: %V16i16 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> undef) +; CHECK-NEXT: Cost Model: Found costs of 4 for: %V4i8 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef) +; CHECK-NEXT: Cost Model: Found costs of 5 for: %V8i8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef) +; CHECK-NEXT: Cost Model: Found costs of 7 for: %V16i8 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef) +; CHECK-NEXT: Cost Model: Found costs of 8 for: %V32i8 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef) +; CHECK-NEXT: Cost Model: Found costs of 10 for: %V64i8 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef) +; CHECK-NEXT: Cost Model: Found costs of 4 for: %V4i16 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> undef) +; CHECK-NEXT: Cost Model: Found costs of 6 for: %V8i16 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> undef) +; CHECK-NEXT: Cost Model: Found costs of 7 for: %V16i16 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> undef) ; CHECK-NEXT: Cost Model: Found costs of 3 for: %V2i32 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> undef) ; CHECK-NEXT: Cost Model: Found costs of 5 for: %V4i32 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> undef) ; CHECK-NEXT: Cost Model: Found costs of 6 for: %V8i32 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef) diff --git a/llvm/test/Analysis/CostModel/AArch64/reduce-or.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-or.ll index 27dd42297bfab..4bb59e3a09b7a 100644 --- a/llvm/test/Analysis/CostModel/AArch64/reduce-or.ll +++ b/llvm/test/Analysis/CostModel/AArch64/reduce-or.ll @@ -15,14 +15,14 @@ define void @reduce() { ; CHECK-NEXT: Cost Model: Found costs of 9 for: %V128 = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> undef) ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V1i8 = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> undef) ; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:6 SizeLat:6 for: %V3i8 = call i8 @llvm.vector.reduce.or.v3i8(<3 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 7 for: %V4i8 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 15 for: %V8i8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 17 for: %V16i8 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 18 for: %V32i8 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 20 for: %V64i8 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 7 for: %V4i16 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of 9 for: %V8i16 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of 10 for: %V16i16 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> undef) +; CHECK-NEXT: Cost Model: Found costs of 4 for: %V4i8 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef) +; CHECK-NEXT: Cost Model: Found costs of 5 for: %V8i8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef) +; CHECK-NEXT: Cost Model: Found costs of 7 for: %V16i8 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef) +; CHECK-NEXT: Cost Model: Found costs of 8 for: %V32i8 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef) +; CHECK-NEXT: Cost Model: Found costs of 10 for: %V64i8 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef) +; CHECK-NEXT: Cost Model: Found costs of 4 for: %V4i16 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> undef) +; CHECK-NEXT: Cost Model: Found costs of 6 for: %V8i16 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> undef) +; CHECK-NEXT: Cost Model: Found costs of 7 for: %V16i16 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> undef) ; CHECK-NEXT: Cost Model: Found costs of 3 for: %V2i32 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> undef) ; CHECK-NEXT: Cost Model: Found costs of 5 for: %V4i32 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> undef) ; CHECK-NEXT: Cost Model: Found costs of 6 for: %V8i32 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef) diff --git a/llvm/test/Analysis/CostModel/AArch64/reduce-xor.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-xor.ll index 826605450a2d8..8e81aadbb9934 100644 --- a/llvm/test/Analysis/CostModel/AArch64/reduce-xor.ll +++ b/llvm/test/Analysis/CostModel/AArch64/reduce-xor.ll @@ -15,14 +15,14 @@ define void @reduce() { ; CHECK-NEXT: Cost Model: Found costs of 9 for: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef) ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V1i8 = call i8 @llvm.vector.reduce.xor.v1i8(<1 x i8> undef) ; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:6 SizeLat:6 for: %V3i8 = call i8 @llvm.vector.reduce.xor.v3i8(<3 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 7 for: %V4i8 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 15 for: %V8i8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 17 for: %V16i8 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 18 for: %V32i8 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 20 for: %V64i8 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 7 for: %V4i16 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of 9 for: %V8i16 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of 10 for: %V16i16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef) +; CHECK-NEXT: Cost Model: Found costs of 4 for: %V4i8 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef) +; CHECK-NEXT: Cost Model: Found costs of 5 for: %V8i8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef) +; CHECK-NEXT: Cost Model: Found costs of 7 for: %V16i8 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef) +; CHECK-NEXT: Cost Model: Found costs of 8 for: %V32i8 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef) +; CHECK-NEXT: Cost Model: Found costs of 10 for: %V64i8 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef) +; CHECK-NEXT: Cost Model: Found costs of 4 for: %V4i16 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef) +; CHECK-NEXT: Cost Model: Found costs of 6 for: %V8i16 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef) +; CHECK-NEXT: Cost Model: Found costs of 7 for: %V16i16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef) ; CHECK-NEXT: Cost Model: Found costs of 3 for: %V2i32 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef) ; CHECK-NEXT: Cost Model: Found costs of 5 for: %V4i32 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef) ; CHECK-NEXT: Cost Model: Found costs of 6 for: %V8i32 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef) diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/quant_4x4.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/quant_4x4.ll index 09a59de44c745..d55559d632019 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/quant_4x4.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/quant_4x4.ll @@ -62,12 +62,11 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) { ; CHECK-NEXT: store <8 x i16> [[PREDPHI]], ptr [[DCT]], align 2, !alias.scope [[META0]], !noalias [[META3]] ; CHECK-NEXT: store <8 x i16> [[PREDPHI34]], ptr [[TMP0]], align 2, !alias.scope [[META0]], !noalias [[META3]] ; CHECK-NEXT: [[BIN_RDX35:%.*]] = or <8 x i16> [[PREDPHI34]], [[PREDPHI]] -; CHECK-NEXT: [[BIN_RDX:%.*]] = sext <8 x i16> [[BIN_RDX35]] to <8 x i32> -; CHECK-NEXT: [[TMP29:%.*]] = tail call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[BIN_RDX]]) +; CHECK-NEXT: [[TMP29:%.*]] = tail call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[BIN_RDX35]]) ; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[OR_LCSSA:%.*]] = phi i32 [ [[TMP29]], [[VECTOR_BODY]] ], [ [[OR_15:%.*]], [[IF_END_15:%.*]] ] -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[OR_LCSSA]], 0 +; CHECK-NEXT: [[OR_LCSSA_IN:%.*]] = phi i16 [ [[TMP29]], [[VECTOR_BODY]] ], [ [[OR_1551:%.*]], [[IF_END_15:%.*]] ] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i16 [[OR_LCSSA_IN]], 0 ; CHECK-NEXT: [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL]] to i32 ; CHECK-NEXT: ret i32 [[LNOT_EXT]] ; CHECK: for.body: @@ -514,8 +513,7 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) { ; CHECK: if.end.15: ; CHECK-NEXT: [[STOREMERGE_15:%.*]] = phi i16 [ [[CONV28_15]], [[IF_ELSE_15]] ], [ [[CONV12_15]], [[IF_THEN_15]] ] ; CHECK-NEXT: store i16 [[STOREMERGE_15]], ptr [[ARRAYIDX_15]], align 2 -; CHECK-NEXT: [[OR_1551:%.*]] = or i16 [[OR_1450]], [[STOREMERGE_15]] -; CHECK-NEXT: [[OR_15]] = sext i16 [[OR_1551]] to i32 +; CHECK-NEXT: [[OR_1551]] = or i16 [[OR_1450]], [[STOREMERGE_15]] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; entry: