@@ -2222,64 +2222,6 @@ defm MVE_VRHADDu8 : MVE_VRHADD<MVE_v16u8, avgceilu>;
22222222defm MVE_VRHADDu16 : MVE_VRHADD<MVE_v8u16, avgceilu>;
22232223defm MVE_VRHADDu32 : MVE_VRHADD<MVE_v4u32, avgceilu>;
22242224
2225- // Rounding Halving Add perform the arithemtic operation with an extra bit of
2226- // precision, before performing the shift, to void clipping errors. We're not
2227- // modelling that here with these patterns, but we're using no wrap forms of
2228- // add to ensure that the extra bit of information is not needed for the
2229- // arithmetic or the rounding.
2230- let Predicates = [HasMVEInt] in {
2231- def : Pat<(v16i8 (ARMvshrsImm (addnsw (addnsw (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)),
2232- (v16i8 (ARMvmovImm (i32 3585)))),
2233- (i32 1))),
2234- (MVE_VRHADDs8 MQPR:$Qm, MQPR:$Qn)>;
2235- def : Pat<(v8i16 (ARMvshrsImm (addnsw (addnsw (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)),
2236- (v8i16 (ARMvmovImm (i32 2049)))),
2237- (i32 1))),
2238- (MVE_VRHADDs16 MQPR:$Qm, MQPR:$Qn)>;
2239- def : Pat<(v4i32 (ARMvshrsImm (addnsw (addnsw (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)),
2240- (v4i32 (ARMvmovImm (i32 1)))),
2241- (i32 1))),
2242- (MVE_VRHADDs32 MQPR:$Qm, MQPR:$Qn)>;
2243- def : Pat<(v16i8 (ARMvshruImm (addnuw (addnuw (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)),
2244- (v16i8 (ARMvmovImm (i32 3585)))),
2245- (i32 1))),
2246- (MVE_VRHADDu8 MQPR:$Qm, MQPR:$Qn)>;
2247- def : Pat<(v8i16 (ARMvshruImm (addnuw (addnuw (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)),
2248- (v8i16 (ARMvmovImm (i32 2049)))),
2249- (i32 1))),
2250- (MVE_VRHADDu16 MQPR:$Qm, MQPR:$Qn)>;
2251- def : Pat<(v4i32 (ARMvshruImm (addnuw (addnuw (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)),
2252- (v4i32 (ARMvmovImm (i32 1)))),
2253- (i32 1))),
2254- (MVE_VRHADDu32 MQPR:$Qm, MQPR:$Qn)>;
2255-
2256- def : Pat<(v16i8 (ARMvshrsImm (addnsw (addnsw (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)),
2257- (v16i8 (ARMvdup (i32 1)))),
2258- (i32 1))),
2259- (MVE_VRHADDs8 MQPR:$Qm, MQPR:$Qn)>;
2260- def : Pat<(v8i16 (ARMvshrsImm (addnsw (addnsw (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)),
2261- (v8i16 (ARMvdup (i32 1)))),
2262- (i32 1))),
2263- (MVE_VRHADDs16 MQPR:$Qm, MQPR:$Qn)>;
2264- def : Pat<(v4i32 (ARMvshrsImm (addnsw (addnsw (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)),
2265- (v4i32 (ARMvdup (i32 1)))),
2266- (i32 1))),
2267- (MVE_VRHADDs32 MQPR:$Qm, MQPR:$Qn)>;
2268- def : Pat<(v16i8 (ARMvshruImm (addnuw (addnuw (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)),
2269- (v16i8 (ARMvdup (i32 1)))),
2270- (i32 1))),
2271- (MVE_VRHADDu8 MQPR:$Qm, MQPR:$Qn)>;
2272- def : Pat<(v8i16 (ARMvshruImm (addnuw (addnuw (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)),
2273- (v8i16 (ARMvdup (i32 1)))),
2274- (i32 1))),
2275- (MVE_VRHADDu16 MQPR:$Qm, MQPR:$Qn)>;
2276- def : Pat<(v4i32 (ARMvshruImm (addnuw (addnuw (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)),
2277- (v4i32 (ARMvdup (i32 1)))),
2278- (i32 1))),
2279- (MVE_VRHADDu32 MQPR:$Qm, MQPR:$Qn)>;
2280- }
2281-
2282-
22832225class MVE_VHADDSUB<string iname, string suffix, bit U, bit subtract,
22842226 bits<2> size, list<dag> pattern=[]>
22852227 : MVE_int<iname, suffix, size, pattern> {
@@ -2303,8 +2245,7 @@ class MVE_VHSUB_<string suffix, bit U, bits<2> size,
23032245 : MVE_VHADDSUB<"vhsub", suffix, U, 0b1, size, pattern>;
23042246
23052247multiclass MVE_VHADD_m<MVEVectorVTInfo VTI, SDNode Op,
2306- SDPatternOperator unpred_op, Intrinsic PredInt, PatFrag add_op,
2307- SDNode shift_op> {
2248+ SDPatternOperator unpred_op, Intrinsic PredInt> {
23082249 def "" : MVE_VHADD_<VTI.Suffix, VTI.Unsigned, VTI.Size>;
23092250 defvar Inst = !cast<Instruction>(NAME);
23102251 defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 VTI.Unsigned)), !cast<Instruction>(NAME)>;
@@ -2313,26 +2254,18 @@ multiclass MVE_VHADD_m<MVEVectorVTInfo VTI, SDNode Op,
23132254 // Unpredicated add-and-divide-by-two
23142255 def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), (i32 VTI.Unsigned))),
23152256 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
2316-
2317- def : Pat<(VTI.Vec (shift_op (add_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)), (i32 1))),
2318- (Inst MQPR:$Qm, MQPR:$Qn)>;
23192257 }
23202258}
23212259
2322- multiclass MVE_VHADD<MVEVectorVTInfo VTI, SDNode Op, PatFrag add_op, SDNode shift_op>
2323- : MVE_VHADD_m<VTI, Op, int_arm_mve_vhadd, int_arm_mve_hadd_predicated, add_op,
2324- shift_op>;
2260+ multiclass MVE_VHADD<MVEVectorVTInfo VTI, SDNode Op>
2261+ : MVE_VHADD_m<VTI, Op, int_arm_mve_vhadd, int_arm_mve_hadd_predicated>;
23252262
2326- // Halving add/sub perform the arithemtic operation with an extra bit of
2327- // precision, before performing the shift, to void clipping errors. We're not
2328- // modelling that here with these patterns, but we're using no wrap forms of
2329- // add/sub to ensure that the extra bit of information is not needed.
2330- defm MVE_VHADDs8 : MVE_VHADD<MVE_v16s8, avgfloors, addnsw, ARMvshrsImm>;
2331- defm MVE_VHADDs16 : MVE_VHADD<MVE_v8s16, avgfloors, addnsw, ARMvshrsImm>;
2332- defm MVE_VHADDs32 : MVE_VHADD<MVE_v4s32, avgfloors, addnsw, ARMvshrsImm>;
2333- defm MVE_VHADDu8 : MVE_VHADD<MVE_v16u8, avgflooru, addnuw, ARMvshruImm>;
2334- defm MVE_VHADDu16 : MVE_VHADD<MVE_v8u16, avgflooru, addnuw, ARMvshruImm>;
2335- defm MVE_VHADDu32 : MVE_VHADD<MVE_v4u32, avgflooru, addnuw, ARMvshruImm>;
2263+ defm MVE_VHADDs8 : MVE_VHADD<MVE_v16s8, avgfloors>;
2264+ defm MVE_VHADDs16 : MVE_VHADD<MVE_v8s16, avgfloors>;
2265+ defm MVE_VHADDs32 : MVE_VHADD<MVE_v4s32, avgfloors>;
2266+ defm MVE_VHADDu8 : MVE_VHADD<MVE_v16u8, avgflooru>;
2267+ defm MVE_VHADDu16 : MVE_VHADD<MVE_v8u16, avgflooru>;
2268+ defm MVE_VHADDu32 : MVE_VHADD<MVE_v4u32, avgflooru>;
23362269
23372270multiclass MVE_VHSUB_m<MVEVectorVTInfo VTI,
23382271 SDPatternOperator unpred_op, Intrinsic pred_int, PatFrag sub_op,
0 commit comments