@@ -2431,6 +2431,47 @@ def DblwdCmp {
24312431 (v2i64 (XXSPLTW EQWSHAND, 2)), 0));
24322432}
24332433
2434+ class SplatAndAssignIndexed<
2435+ SDPatternOperator op,
2436+ int Total, dag splat,
2437+ int Index, dag assign> {
2438+ defvar head = !listsplat(splat, Index);
2439+ defvar x = [assign];
2440+ defvar tail = !listsplat(splat, !sub(!sub(Total, Index), 1));
2441+ list<dag> Ops = !listconcat(head, x, tail);
2442+ dag DAG = !foldl((op), Ops, a, b, !con(a, (op b)));
2443+ }
2444+
2445+ class BVExtLoadAndZerosFP<int Index> : SplatAndAssignIndexed<
2446+ build_vector,
2447+ 2, (f64 fpimm0),
2448+ Index, (f64 (extloadf32 ForceXForm:$src))>;
2449+
2450+ class BVZExtLoadAndZerosInt<int Index> : SplatAndAssignIndexed<
2451+ build_vector,
2452+ 2, (i64 0),
2453+ Index, (i64 (zextloadi32 ForceXForm:$src))>;
2454+
2455+ class BVLoadAndZerosInt<int Index> : SplatAndAssignIndexed<
2456+ build_vector,
2457+ 4, (i32 0),
2458+ Index, (i32 (load ForceXForm:$src))>;
2459+
2460+ class BVLoadAndZerosFP<int Index> : SplatAndAssignIndexed<
2461+ build_vector,
2462+ 4, (f32 fpimm0),
2463+ Index, (f32 (load ForceXForm:$src))>;
2464+
2465+ class BVLoadAndZerosDbl<int Index> : SplatAndAssignIndexed<
2466+ build_vector,
2467+ 2, (f64 fpimm0),
2468+ Index, (f64 (load ForceXForm:$src))>;
2469+
2470+ class BVLoadAndZerosLong<int Index> : SplatAndAssignIndexed<
2471+ build_vector,
2472+ 2, (i64 0),
2473+ Index, (i64 (load ForceXForm:$src))>;
2474+
24342475//---------------------------- Anonymous Patterns ----------------------------//
24352476// Predicate combinations are kept in roughly chronological order in terms of
24362477// instruction availability in the architecture. For example, VSX came in with
@@ -3449,6 +3490,53 @@ foreach Idx = [ [0,3], [2,1], [3,2] ] in {
34493490 (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
34503491 sub_64), ForceXForm:$src)>;
34513492}
3493+
3494+ // BUILD_VECTOR via single load and zeros.
3495+ // Extension load.
3496+ def : Pat<(v2f64 BVExtLoadAndZerosFP<0>.DAG),
3497+ (v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
3498+ def : Pat<(v2f64 BVExtLoadAndZerosFP<1>.DAG),
3499+ (v2f64 (XXPERMDIs
3500+ (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC), 2))>;
3501+
3502+ def : Pat<(v2i64 BVZExtLoadAndZerosInt<0>.DAG),
3503+ (v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
3504+ def : Pat<(v2i64 BVZExtLoadAndZerosInt<1>.DAG),
3505+ (v2i64 (XXPERMDIs
3506+ (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC), 2))>;
3507+
3508+ // Normal load.
3509+ foreach Index = !range(4) in {
3510+ defvar Temp = !sub(5, Index);
3511+ defvar Offset = !if(!gt(Temp, 3), !sub(Temp, 4), Temp);
3512+ if !ne(Offset, 0) then {
3513+ def : Pat<(v4i32 BVLoadAndZerosInt<Index>.DAG),
3514+ (v4i32 (XXSLDWIs
3515+ (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC),
3516+ Offset))>;
3517+ def : Pat<(v4f32 BVLoadAndZerosFP<Index>.DAG),
3518+ (v4f32 (XXSLDWIs
3519+ (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC),
3520+ Offset))>;
3521+ } else {
3522+ def : Pat<(v4i32 BVLoadAndZerosInt<Index>.DAG),
3523+ (v4i32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
3524+ def : Pat<(v4f32 BVLoadAndZerosFP<Index>.DAG),
3525+ (v4f32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
3526+ }
3527+ }
3528+
3529+ def : Pat<(v2f64 BVLoadAndZerosDbl<0>.DAG),
3530+ (v2f64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
3531+ def : Pat<(v2f64 BVLoadAndZerosDbl<1>.DAG),
3532+ (v2f64 (XXPERMDIs
3533+ (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC), 2))>;
3534+
3535+ def : Pat<(v2i64 BVLoadAndZerosLong<0>.DAG),
3536+ (v2i64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
3537+ def : Pat<(v2i64 BVLoadAndZerosLong<1>.DAG),
3538+ (v2i64 (XXPERMDIs
3539+ (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC), 2))>;
34523540} // HasVSX, HasP8Vector, IsBigEndian, IsPPC64
34533541
34543542// Little endian Power8 VSX subtarget.
@@ -3542,6 +3630,54 @@ foreach Idx = [ [0,2], [1,1], [3,3] ] in {
35423630 (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
35433631 sub_64), ForceXForm:$src)>;
35443632}
3633+
3634+ // BUILD_VECTOR via single load and zeros.
3635+ // Extension load.
3636+ def : Pat<(v2f64 BVExtLoadAndZerosFP<1>.DAG),
3637+ (v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
3638+ def : Pat<(v2f64 BVExtLoadAndZerosFP<0>.DAG),
3639+ (v2f64 (XXPERMDIs
3640+ (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC), 2))>;
3641+
3642+ def : Pat<(v2i64 BVZExtLoadAndZerosInt<1>.DAG),
3643+ (v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
3644+ def : Pat<(v2i64 BVZExtLoadAndZerosInt<0>.DAG),
3645+ (v2i64 (XXPERMDIs
3646+ (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC), 2))>;
3647+
3648+ // Normal load.
3649+ foreach Index = !range(4) in {
3650+ defvar Temp = !sub(!add(Index, 4), 2);
3651+ defvar Offset = !if(!gt(Temp, 3), !sub(Temp, 4), Temp);
3652+ if !ne(Offset, 0) then {
3653+ def : Pat<(v4i32 BVLoadAndZerosInt<Index>.DAG),
3654+ (v4i32 (XXSLDWIs
3655+ (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC),
3656+ Offset))>;
3657+ def : Pat<(v4f32 BVLoadAndZerosFP<Index>.DAG),
3658+ (v4f32 (XXSLDWIs
3659+ (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC),
3660+ Offset))>;
3661+ } else {
3662+ def : Pat<(v4i32 BVLoadAndZerosInt<Index>.DAG),
3663+ (v4i32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
3664+ def : Pat<(v4f32 BVLoadAndZerosFP<Index>.DAG),
3665+ (v4f32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
3666+ }
3667+ }
3668+
3669+ def : Pat<(v2f64 BVLoadAndZerosDbl<1>.DAG),
3670+ (v2f64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
3671+ def : Pat<(v2f64 BVLoadAndZerosDbl<0>.DAG),
3672+ (v2f64 (XXPERMDIs
3673+ (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC), 2))>;
3674+
3675+ def : Pat<(v2i64 BVLoadAndZerosLong<1>.DAG),
3676+ (v2i64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
3677+ def : Pat<(v2i64 BVLoadAndZerosLong<0>.DAG),
3678+ (v2i64 (XXPERMDIs
3679+ (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC), 2))>;
3680+
35453681} // HasVSX, HasP8Vector, IsLittleEndian
35463682
35473683// Big endian pre-Power9 VSX subtarget.
0 commit comments