You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: llvm/test/CodeGen/AArch64/sve-partial-reduce-wide-add.ll
+67Lines changed: 67 additions & 0 deletions
Original file line number
Diff line number
Diff line change
@@ -72,3 +72,70 @@ entry:
72
72
%partial.reduce = tailcall <vscale x 8 x i16> @llvm.experimental.vector.partial.reduce.add.nxv8i16.nxv16i16(<vscale x 8 x i16> %acc, <vscale x 16 x i16> %input.wide)
73
73
ret <vscale x 8 x i16> %partial.reduce
74
74
}
75
+
76
+
define <vscale x 2 x i32> @signed_wide_add_nxv4i16(<vscale x 2 x i32> %acc, <vscale x 4 x i16> %input){
77
+
; CHECK-LABEL: signed_wide_add_nxv4i16:
78
+
; CHECK: // %bb.0: // %entry
79
+
; CHECK-NEXT: ptrue p0.s
80
+
; CHECK-NEXT: sxth z1.s, p0/m, z1.s
81
+
; CHECK-NEXT: uunpklo z2.d, z1.s
82
+
; CHECK-NEXT: uunpkhi z1.d, z1.s
83
+
; CHECK-NEXT: add z0.d, z0.d, z2.d
84
+
; CHECK-NEXT: add z0.d, z1.d, z0.d
85
+
; CHECK-NEXT: ret
86
+
entry:
87
+
%input.wide = sext <vscale x 4 x i16> %inputto <vscale x 4 x i32>
88
+
%partial.reduce = tailcall <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv4i32(<vscale x 2 x i32> %acc, <vscale x 4 x i32> %input.wide)
89
+
ret <vscale x 2 x i32> %partial.reduce
90
+
}
91
+
92
+
define <vscale x 2 x i32> @unsigned_wide_add_nxv4i16(<vscale x 2 x i32> %acc, <vscale x 4 x i16> %input){
93
+
; CHECK-LABEL: unsigned_wide_add_nxv4i16:
94
+
; CHECK: // %bb.0: // %entry
95
+
; CHECK-NEXT: and z1.s, z1.s, #0xffff
96
+
; CHECK-NEXT: uunpklo z2.d, z1.s
97
+
; CHECK-NEXT: uunpkhi z1.d, z1.s
98
+
; CHECK-NEXT: add z0.d, z0.d, z2.d
99
+
; CHECK-NEXT: add z0.d, z1.d, z0.d
100
+
; CHECK-NEXT: ret
101
+
entry:
102
+
%input.wide = zext <vscale x 4 x i16> %inputto <vscale x 4 x i32>
103
+
%partial.reduce = tailcall <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv4i32(<vscale x 2 x i32> %acc, <vscale x 4 x i32> %input.wide)
104
+
ret <vscale x 2 x i32> %partial.reduce
105
+
}
106
+
107
+
define <vscale x 4 x i64> @signed_wide_add_nxv8i32(<vscale x 4 x i64> %acc, <vscale x 8 x i32> %input){
108
+
; CHECK-LABEL: signed_wide_add_nxv8i32:
109
+
; CHECK: // %bb.0: // %entry
110
+
; CHECK-NEXT: sunpkhi z4.d, z2.s
111
+
; CHECK-NEXT: sunpklo z2.d, z2.s
112
+
; CHECK-NEXT: sunpkhi z5.d, z3.s
113
+
; CHECK-NEXT: sunpklo z3.d, z3.s
114
+
; CHECK-NEXT: add z0.d, z0.d, z2.d
115
+
; CHECK-NEXT: add z1.d, z1.d, z4.d
116
+
; CHECK-NEXT: add z0.d, z3.d, z0.d
117
+
; CHECK-NEXT: add z1.d, z5.d, z1.d
118
+
; CHECK-NEXT: ret
119
+
entry:
120
+
%input.wide = sext <vscale x 8 x i32> %inputto <vscale x 8 x i64>
121
+
%partial.reduce = tailcall <vscale x 4 x i64> @llvm.experimental.vector.partial.reduce.add.nxv4i64.nxv8i64(<vscale x 4 x i64> %acc, <vscale x 8 x i64> %input.wide)
122
+
ret <vscale x 4 x i64> %partial.reduce
123
+
}
124
+
125
+
define <vscale x 4 x i64> @unsigned_wide_add_nxv8i32(<vscale x 4 x i64> %acc, <vscale x 8 x i32> %input){
126
+
; CHECK-LABEL: unsigned_wide_add_nxv8i32:
127
+
; CHECK: // %bb.0: // %entry
128
+
; CHECK-NEXT: uunpkhi z4.d, z2.s
129
+
; CHECK-NEXT: uunpklo z2.d, z2.s
130
+
; CHECK-NEXT: uunpkhi z5.d, z3.s
131
+
; CHECK-NEXT: uunpklo z3.d, z3.s
132
+
; CHECK-NEXT: add z0.d, z0.d, z2.d
133
+
; CHECK-NEXT: add z1.d, z1.d, z4.d
134
+
; CHECK-NEXT: add z0.d, z3.d, z0.d
135
+
; CHECK-NEXT: add z1.d, z5.d, z1.d
136
+
; CHECK-NEXT: ret
137
+
entry:
138
+
%input.wide = zext <vscale x 8 x i32> %inputto <vscale x 8 x i64>
139
+
%partial.reduce = tailcall <vscale x 4 x i64> @llvm.experimental.vector.partial.reduce.add.nxv4i64.nxv8i64(<vscale x 4 x i64> %acc, <vscale x 8 x i64> %input.wide)
0 commit comments