11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2- ; RUN: llc < %s -verify-machineinstrs -mattr=+sve -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-NONSTREAMING
3- ; RUN: llc < %s -verify-machineinstrs -mattr=+sme -global-isel=0 -force-streaming | FileCheck %s --check-prefixes=CHECK,STREAMING-COMPAT
4- ; RUN: llc < %s -verify-machineinstrs -mattr=+sve -global-isel=0 -force-streaming-compatible | FileCheck %s --check-prefixes=CHECK,STREAMING-COMPAT
2+ ; RUN: llc < %s -verify-machineinstrs -mattr=+sve -global-isel=0 | FileCheck %s
3+ ; RUN: llc < %s -verify-machineinstrs -mattr=+sme -global-isel=0 -force-streaming | FileCheck %s
4+ ; RUN: llc < %s -verify-machineinstrs -mattr=+sve -global-isel=0 -force-streaming-compatible | FileCheck %s
55
66target triple = "aarch64-unknown-linux-gnu"
77
@@ -106,18 +106,11 @@ entry:
106106}
107107
108108define void @test_str_lane_s8 (ptr %a , <vscale x 16 x i8 > %b ) {
109- ; CHECK-NONSTREAMING-LABEL: test_str_lane_s8:
110- ; CHECK-NONSTREAMING: // %bb.0: // %entry
111- ; CHECK-NONSTREAMING-NEXT: umov w8, v0.b[7]
112- ; CHECK-NONSTREAMING-NEXT: strb w8, [x0]
113- ; CHECK-NONSTREAMING-NEXT: ret
114- ;
115- ; STREAMING-COMPAT-LABEL: test_str_lane_s8:
116- ; STREAMING-COMPAT: // %bb.0: // %entry
117- ; STREAMING-COMPAT-NEXT: mov z0.b, z0.b[7]
118- ; STREAMING-COMPAT-NEXT: fmov w8, s0
119- ; STREAMING-COMPAT-NEXT: strb w8, [x0]
120- ; STREAMING-COMPAT-NEXT: ret
109+ ; CHECK-LABEL: test_str_lane_s8:
110+ ; CHECK: // %bb.0: // %entry
111+ ; CHECK-NEXT: mov z0.b, z0.b[7]
112+ ; CHECK-NEXT: str b0, [x0]
113+ ; CHECK-NEXT: ret
121114
122115entry:
123116 %0 = extractelement <vscale x 16 x i8 > %b , i32 7
@@ -128,8 +121,7 @@ entry:
128121define void @test_str_lane0_s8 (ptr %a , <vscale x 16 x i8 > %b ) {
129122; CHECK-LABEL: test_str_lane0_s8:
130123; CHECK: // %bb.0: // %entry
131- ; CHECK-NEXT: fmov w8, s0
132- ; CHECK-NEXT: strb w8, [x0]
124+ ; CHECK-NEXT: str b0, [x0]
133125; CHECK-NEXT: ret
134126
135127entry:
@@ -201,6 +193,19 @@ define void @test_str_reduction_i32_to_i16(ptr %ptr, <vscale x 4 x i1> %p0, <vsc
201193 ret void
202194}
203195
196+ define void @test_str_reduction_i32_to_i8 (ptr %ptr , <vscale x 4 x i1 > %p0 , <vscale x 4 x i32 > %v ) {
197+ ; CHECK-LABEL: test_str_reduction_i32_to_i8:
198+ ; CHECK: // %bb.0:
199+ ; CHECK-NEXT: uaddv d0, p0, z0.s
200+ ; CHECK-NEXT: str b0, [x0]
201+ ; CHECK-NEXT: ret
202+
203+ %reduce = tail call i64 @llvm.aarch64.sve.uaddv.nxv4i32 (<vscale x 4 x i1 > %p0 , <vscale x 4 x i32 > %v )
204+ %trunc = trunc i64 %reduce to i8
205+ store i8 %trunc , ptr %ptr , align 1
206+ ret void
207+ }
208+
204209define void @test_str_reduction_i32_to_i32_negative_offset (ptr %ptr , <vscale x 4 x i1 > %p0 , <vscale x 4 x i32 > %v ) {
205210; CHECK-LABEL: test_str_reduction_i32_to_i32_negative_offset:
206211; CHECK: // %bb.0:
@@ -242,6 +247,20 @@ define void @test_str_reduction_i32_to_i16_negative_offset(ptr %ptr, <vscale x 4
242247 ret void
243248}
244249
250+ define void @test_str_reduction_i32_to_i8_negative_offset (ptr %ptr , <vscale x 4 x i1 > %p0 , <vscale x 4 x i32 > %v ) {
251+ ; CHECK-LABEL: test_str_reduction_i32_to_i8_negative_offset:
252+ ; CHECK: // %bb.0:
253+ ; CHECK-NEXT: uaddv d0, p0, z0.s
254+ ; CHECK-NEXT: stur b0, [x0, #-8]
255+ ; CHECK-NEXT: ret
256+
257+ %reduce = tail call i64 @llvm.aarch64.sve.uaddv.nxv4i32 (<vscale x 4 x i1 > %p0 , <vscale x 4 x i32 > %v )
258+ %trunc = trunc i64 %reduce to i8
259+ %out_ptr = getelementptr inbounds i8 , ptr %ptr , i64 -8
260+ store i8 %trunc , ptr %out_ptr , align 1
261+ ret void
262+ }
263+
245264define void @test_str_lane_s32_negative_offset (ptr %a , <vscale x 4 x i32 > %b ) {
246265; CHECK-LABEL: test_str_lane_s32_negative_offset:
247266; CHECK: // %bb.0: // %entry
@@ -297,18 +316,11 @@ entry:
297316}
298317
299318define void @test_str_lane_s8_negative_offset (ptr %a , <vscale x 16 x i8 > %b ) {
300- ; CHECK-NONSTREAMING-LABEL: test_str_lane_s8_negative_offset:
301- ; CHECK-NONSTREAMING: // %bb.0: // %entry
302- ; CHECK-NONSTREAMING-NEXT: umov w8, v0.b[7]
303- ; CHECK-NONSTREAMING-NEXT: sturb w8, [x0, #-8]
304- ; CHECK-NONSTREAMING-NEXT: ret
305- ;
306- ; STREAMING-COMPAT-LABEL: test_str_lane_s8_negative_offset:
307- ; STREAMING-COMPAT: // %bb.0: // %entry
308- ; STREAMING-COMPAT-NEXT: mov z0.b, z0.b[7]
309- ; STREAMING-COMPAT-NEXT: fmov w8, s0
310- ; STREAMING-COMPAT-NEXT: sturb w8, [x0, #-8]
311- ; STREAMING-COMPAT-NEXT: ret
319+ ; CHECK-LABEL: test_str_lane_s8_negative_offset:
320+ ; CHECK: // %bb.0: // %entry
321+ ; CHECK-NEXT: mov z0.b, z0.b[7]
322+ ; CHECK-NEXT: stur b0, [x0, #-8]
323+ ; CHECK-NEXT: ret
312324
313325entry:
314326 %0 = extractelement <vscale x 16 x i8 > %b , i32 7
@@ -320,8 +332,7 @@ entry:
320332define void @test_str_lane0_s8_negative_offset (ptr %a , <vscale x 16 x i8 > %b ) {
321333; CHECK-LABEL: test_str_lane0_s8_negative_offset:
322334; CHECK: // %bb.0: // %entry
323- ; CHECK-NEXT: fmov w8, s0
324- ; CHECK-NEXT: sturb w8, [x0, #-8]
335+ ; CHECK-NEXT: stur b0, [x0, #-8]
325336; CHECK-NEXT: ret
326337
327338entry:
@@ -385,6 +396,48 @@ entry:
385396 ret void
386397}
387398
399+
400+ define void @test_str_trunc_lane_s32_to_s8 (ptr %a , <vscale x 4 x i32 > %b ) {
401+ ; CHECK-LABEL: test_str_trunc_lane_s32_to_s8:
402+ ; CHECK: // %bb.0: // %entry
403+ ; CHECK-NEXT: mov z0.s, z0.s[3]
404+ ; CHECK-NEXT: str b0, [x0]
405+ ; CHECK-NEXT: ret
406+
407+ entry:
408+ %0 = extractelement <vscale x 4 x i32 > %b , i32 3
409+ %trunc = trunc i32 %0 to i8
410+ store i8 %trunc , ptr %a , align 1
411+ ret void
412+ }
413+
414+ define void @test_str_trunc_lane0_s32_to_s8 (ptr %a , <vscale x 4 x i32 > %b ) {
415+ ; CHECK-LABEL: test_str_trunc_lane0_s32_to_s8:
416+ ; CHECK: // %bb.0: // %entry
417+ ; CHECK-NEXT: str b0, [x0]
418+ ; CHECK-NEXT: ret
419+
420+ entry:
421+ %0 = extractelement <vscale x 4 x i32 > %b , i32 0
422+ %trunc = trunc i32 %0 to i8
423+ store i8 %trunc , ptr %a , align 1
424+ ret void
425+ }
426+
427+ define void @test_str_trunc_lane_s64_to_s8 (ptr %a , <vscale x 2 x i64 > %b ) {
428+ ; CHECK-LABEL: test_str_trunc_lane_s64_to_s8:
429+ ; CHECK: // %bb.0: // %entry
430+ ; CHECK-NEXT: mov z0.d, z0.d[3]
431+ ; CHECK-NEXT: str b0, [x0]
432+ ; CHECK-NEXT: ret
433+
434+ entry:
435+ %0 = extractelement <vscale x 2 x i64 > %b , i32 3
436+ %trunc = trunc i64 %0 to i8
437+ store i8 %trunc , ptr %a , align 1
438+ ret void
439+ }
440+
388441define void @test_str_trunc_lane_s32_to_s16_negative_offset (ptr %a , <vscale x 4 x i32 > %b ) {
389442; CHECK-LABEL: test_str_trunc_lane_s32_to_s16_negative_offset:
390443; CHECK: // %bb.0: // %entry
@@ -413,3 +466,47 @@ entry:
413466 store i16 %trunc , ptr %out_ptr , align 2
414467 ret void
415468}
469+
470+ define void @test_str_trunc_lane_s32_to_s8_negative_offset (ptr %a , <vscale x 4 x i32 > %b ) {
471+ ; CHECK-LABEL: test_str_trunc_lane_s32_to_s8_negative_offset:
472+ ; CHECK: // %bb.0: // %entry
473+ ; CHECK-NEXT: mov z0.s, z0.s[3]
474+ ; CHECK-NEXT: stur b0, [x0, #-8]
475+ ; CHECK-NEXT: ret
476+
477+ entry:
478+ %0 = extractelement <vscale x 4 x i32 > %b , i32 3
479+ %trunc = trunc i32 %0 to i8
480+ %out_ptr = getelementptr inbounds i8 , ptr %a , i64 -8
481+ store i8 %trunc , ptr %out_ptr , align 1
482+ ret void
483+ }
484+
485+ define void @test_str_trunc_lane0_s32_to_s8_negative_offset (ptr %a , <vscale x 4 x i32 > %b ) {
486+ ; CHECK-LABEL: test_str_trunc_lane0_s32_to_s8_negative_offset:
487+ ; CHECK: // %bb.0: // %entry
488+ ; CHECK-NEXT: stur b0, [x0, #-8]
489+ ; CHECK-NEXT: ret
490+
491+ entry:
492+ %0 = extractelement <vscale x 4 x i32 > %b , i32 0
493+ %trunc = trunc i32 %0 to i8
494+ %out_ptr = getelementptr inbounds i8 , ptr %a , i64 -8
495+ store i8 %trunc , ptr %out_ptr , align 1
496+ ret void
497+ }
498+
499+ define void @test_str_trunc_lane_s64_to_s8_negative_offset (ptr %a , <vscale x 2 x i64 > %b ) {
500+ ; CHECK-LABEL: test_str_trunc_lane_s64_to_s8_negative_offset:
501+ ; CHECK: // %bb.0: // %entry
502+ ; CHECK-NEXT: mov z0.d, z0.d[3]
503+ ; CHECK-NEXT: stur b0, [x0, #-8]
504+ ; CHECK-NEXT: ret
505+
506+ entry:
507+ %0 = extractelement <vscale x 2 x i64 > %b , i32 3
508+ %trunc = trunc i64 %0 to i8
509+ %out_ptr = getelementptr inbounds i8 , ptr %a , i64 -8
510+ store i8 %trunc , ptr %out_ptr , align 1
511+ ret void
512+ }
0 commit comments