@@ -50,4 +50,123 @@ define void @histogram_i32_literal_noscale(ptr %base, <vscale x 4 x i32> %indice
5050 ret void
5151}
5252
53+ define void @histogram_i32_promote (ptr %base , <vscale x 2 x i64 > %indices , <vscale x 2 x i1 > %mask , i32 %inc ) #0 {
54+ ; CHECK-LABEL: histogram_i32_promote:
55+ ; CHECK: // %bb.0:
56+ ; CHECK-NEXT: histcnt z1.d, p0/z, z0.d, z0.d
57+ ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
58+ ; CHECK-NEXT: mov z3.d, x1
59+ ; CHECK-NEXT: ld1w { z2.d }, p0/z, [x0, z0.d, lsl #2]
60+ ; CHECK-NEXT: ptrue p1.d
61+ ; CHECK-NEXT: mad z1.d, p1/m, z3.d, z2.d
62+ ; CHECK-NEXT: st1w { z1.d }, p0, [x0, z0.d, lsl #2]
63+ ; CHECK-NEXT: ret
64+ %buckets = getelementptr i32 , ptr %base , <vscale x 2 x i64 > %indices
65+ call void @llvm.experimental.vector.histogram.add.nxv2p0.i32 (<vscale x 2 x ptr > %buckets , i32 %inc , <vscale x 2 x i1 > %mask )
66+ ret void
67+ }
68+
69+ define void @histogram_i16 (ptr %base , <vscale x 4 x i32 > %indices , <vscale x 4 x i1 > %mask , i16 %inc ) #0 {
70+ ; CHECK-LABEL: histogram_i16:
71+ ; CHECK: // %bb.0:
72+ ; CHECK-NEXT: histcnt z1.s, p0/z, z0.s, z0.s
73+ ; CHECK-NEXT: mov z3.s, w1
74+ ; CHECK-NEXT: ld1h { z2.s }, p0/z, [x0, z0.s, sxtw #1]
75+ ; CHECK-NEXT: ptrue p1.s
76+ ; CHECK-NEXT: mad z1.s, p1/m, z3.s, z2.s
77+ ; CHECK-NEXT: st1h { z1.s }, p0, [x0, z0.s, sxtw #1]
78+ ; CHECK-NEXT: ret
79+ %buckets = getelementptr i16 , ptr %base , <vscale x 4 x i32 > %indices
80+ call void @llvm.experimental.vector.histogram.add.nxv4p0.i16 (<vscale x 4 x ptr > %buckets , i16 %inc , <vscale x 4 x i1 > %mask )
81+ ret void
82+ }
83+
84+ define void @histogram_i8 (ptr %base , <vscale x 4 x i32 > %indices , <vscale x 4 x i1 > %mask , i8 %inc ) #0 {
85+ ; CHECK-LABEL: histogram_i8:
86+ ; CHECK: // %bb.0:
87+ ; CHECK-NEXT: histcnt z1.s, p0/z, z0.s, z0.s
88+ ; CHECK-NEXT: mov z3.s, w1
89+ ; CHECK-NEXT: ld1b { z2.s }, p0/z, [x0, z0.s, sxtw]
90+ ; CHECK-NEXT: ptrue p1.s
91+ ; CHECK-NEXT: mad z1.s, p1/m, z3.s, z2.s
92+ ; CHECK-NEXT: st1b { z1.s }, p0, [x0, z0.s, sxtw]
93+ ; CHECK-NEXT: ret
94+ %buckets = getelementptr i8 , ptr %base , <vscale x 4 x i32 > %indices
95+ call void @llvm.experimental.vector.histogram.add.nxv4p0.i8 (<vscale x 4 x ptr > %buckets , i8 %inc , <vscale x 4 x i1 > %mask )
96+ ret void
97+ }
98+
99+ define void @histogram_i16_2_lane (ptr %base , <vscale x 2 x i64 > %indices , <vscale x 2 x i1 > %mask , i16 %inc ) #0 {
100+ ; CHECK-LABEL: histogram_i16_2_lane:
101+ ; CHECK: // %bb.0:
102+ ; CHECK-NEXT: histcnt z1.d, p0/z, z0.d, z0.d
103+ ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
104+ ; CHECK-NEXT: mov z3.d, x1
105+ ; CHECK-NEXT: ld1h { z2.d }, p0/z, [x0, z0.d, lsl #1]
106+ ; CHECK-NEXT: ptrue p1.d
107+ ; CHECK-NEXT: mad z1.d, p1/m, z3.d, z2.d
108+ ; CHECK-NEXT: st1h { z1.d }, p0, [x0, z0.d, lsl #1]
109+ ; CHECK-NEXT: ret
110+ %buckets = getelementptr i16 , ptr %base , <vscale x 2 x i64 > %indices
111+ call void @llvm.experimental.vector.histogram.add.nxv2p0.i16 (<vscale x 2 x ptr > %buckets , i16 %inc , <vscale x 2 x i1 > %mask )
112+ ret void
113+ }
114+
115+ define void @histogram_i8_2_lane (ptr %base , <vscale x 2 x i64 > %indices , <vscale x 2 x i1 > %mask , i8 %inc ) #0 {
116+ ; CHECK-LABEL: histogram_i8_2_lane:
117+ ; CHECK: // %bb.0:
118+ ; CHECK-NEXT: histcnt z1.d, p0/z, z0.d, z0.d
119+ ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
120+ ; CHECK-NEXT: mov z3.d, x1
121+ ; CHECK-NEXT: ld1b { z2.d }, p0/z, [x0, z0.d]
122+ ; CHECK-NEXT: ptrue p1.d
123+ ; CHECK-NEXT: mad z1.d, p1/m, z3.d, z2.d
124+ ; CHECK-NEXT: st1b { z1.d }, p0, [x0, z0.d]
125+ ; CHECK-NEXT: ret
126+ %buckets = getelementptr i8 , ptr %base , <vscale x 2 x i64 > %indices
127+ call void @llvm.experimental.vector.histogram.add.nxv2p0.i8 (<vscale x 2 x ptr > %buckets , i8 %inc , <vscale x 2 x i1 > %mask )
128+ ret void
129+ }
130+
131+ define void @histogram_i16_literal_1 (ptr %base , <vscale x 4 x i32 > %indices , <vscale x 4 x i1 > %mask ) #0 {
132+ ; CHECK-LABEL: histogram_i16_literal_1:
133+ ; CHECK: // %bb.0:
134+ ; CHECK-NEXT: histcnt z1.s, p0/z, z0.s, z0.s
135+ ; CHECK-NEXT: ld1h { z2.s }, p0/z, [x0, z0.s, sxtw #1]
136+ ; CHECK-NEXT: add z1.s, z2.s, z1.s
137+ ; CHECK-NEXT: st1h { z1.s }, p0, [x0, z0.s, sxtw #1]
138+ ; CHECK-NEXT: ret
139+ %buckets = getelementptr i16 , ptr %base , <vscale x 4 x i32 > %indices
140+ call void @llvm.experimental.vector.histogram.add.nxv4p0.i16 (<vscale x 4 x ptr > %buckets , i16 1 , <vscale x 4 x i1 > %mask )
141+ ret void
142+ }
143+
144+ define void @histogram_i16_literal_2 (ptr %base , <vscale x 4 x i32 > %indices , <vscale x 4 x i1 > %mask ) #0 {
145+ ; CHECK-LABEL: histogram_i16_literal_2:
146+ ; CHECK: // %bb.0:
147+ ; CHECK-NEXT: histcnt z1.s, p0/z, z0.s, z0.s
148+ ; CHECK-NEXT: ld1h { z2.s }, p0/z, [x0, z0.s, sxtw #1]
149+ ; CHECK-NEXT: adr z1.s, [z2.s, z1.s, lsl #1]
150+ ; CHECK-NEXT: st1h { z1.s }, p0, [x0, z0.s, sxtw #1]
151+ ; CHECK-NEXT: ret
152+ %buckets = getelementptr i16 , ptr %base , <vscale x 4 x i32 > %indices
153+ call void @llvm.experimental.vector.histogram.add.nxv4p0.i16 (<vscale x 4 x ptr > %buckets , i16 2 , <vscale x 4 x i1 > %mask )
154+ ret void
155+ }
156+
157+ define void @histogram_i16_literal_3 (ptr %base , <vscale x 4 x i32 > %indices , <vscale x 4 x i1 > %mask ) #0 {
158+ ; CHECK-LABEL: histogram_i16_literal_3:
159+ ; CHECK: // %bb.0:
160+ ; CHECK-NEXT: histcnt z1.s, p0/z, z0.s, z0.s
161+ ; CHECK-NEXT: mov z3.s, #3 // =0x3
162+ ; CHECK-NEXT: ld1h { z2.s }, p0/z, [x0, z0.s, sxtw #1]
163+ ; CHECK-NEXT: ptrue p1.s
164+ ; CHECK-NEXT: mad z1.s, p1/m, z3.s, z2.s
165+ ; CHECK-NEXT: st1h { z1.s }, p0, [x0, z0.s, sxtw #1]
166+ ; CHECK-NEXT: ret
167+ %buckets = getelementptr i16 , ptr %base , <vscale x 4 x i32 > %indices
168+ call void @llvm.experimental.vector.histogram.add.nxv4p0.i16 (<vscale x 4 x ptr > %buckets , i16 3 , <vscale x 4 x i1 > %mask )
169+ ret void
170+ }
171+
53172attributes #0 = { "target-features" ="+sve2" vscale_range(1 , 16 ) }
0 commit comments