@@ -345,6 +345,86 @@ define <3 x i32> @load_v3i32(ptr %src) {
345345 ret <3 x i32 > %l
346346}
347347
348+ define <3 x i32 > @load_v3i8_zext_to_3xi32 (ptr %src ) {
349+ ; CHECK-LABEL: load_v3i8_zext_to_3xi32:
350+ ; CHECK: ; %bb.0:
351+ ; CHECK-NEXT: sub sp, sp, #16
352+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
353+ ; CHECK-NEXT: ldrh w8, [x0]
354+ ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
355+ ; CHECK-NEXT: strh w8, [sp, #12]
356+ ; CHECK-NEXT: add x8, x0, #2
357+ ; CHECK-NEXT: ldr s0, [sp, #12]
358+ ; CHECK-NEXT: ushll.8h v0, v0, #0
359+ ; CHECK-NEXT: ld1.b { v0 }[4], [x8]
360+ ; CHECK-NEXT: ushll.4s v0, v0, #0
361+ ; CHECK-NEXT: and.16b v0, v0, v1
362+ ; CHECK-NEXT: add sp, sp, #16
363+ ; CHECK-NEXT: ret
364+ ;
365+ ; BE-LABEL: load_v3i8_zext_to_3xi32:
366+ ; BE: // %bb.0:
367+ ; BE-NEXT: sub sp, sp, #16
368+ ; BE-NEXT: .cfi_def_cfa_offset 16
369+ ; BE-NEXT: ldrh w8, [x0]
370+ ; BE-NEXT: movi v1.2d, #0x0000ff000000ff
371+ ; BE-NEXT: strh w8, [sp, #12]
372+ ; BE-NEXT: add x8, x0, #2
373+ ; BE-NEXT: ldr s0, [sp, #12]
374+ ; BE-NEXT: rev32 v0.8b, v0.8b
375+ ; BE-NEXT: ushll v0.8h, v0.8b, #0
376+ ; BE-NEXT: ld1 { v0.b }[4], [x8]
377+ ; BE-NEXT: ushll v0.4s, v0.4h, #0
378+ ; BE-NEXT: and v0.16b, v0.16b, v1.16b
379+ ; BE-NEXT: rev64 v0.4s, v0.4s
380+ ; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
381+ ; BE-NEXT: add sp, sp, #16
382+ ; BE-NEXT: ret
383+ %l = load <3 x i8 >, ptr %src , align 1
384+ %e = zext <3 x i8 > %l to <3 x i32 >
385+ ret <3 x i32 > %e
386+ }
387+
388+ define <3 x i32 > @load_v3i8_sext_to_3xi32 (ptr %src ) {
389+ ; CHECK-LABEL: load_v3i8_sext_to_3xi32:
390+ ; CHECK: ; %bb.0:
391+ ; CHECK-NEXT: sub sp, sp, #16
392+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
393+ ; CHECK-NEXT: ldrh w8, [x0]
394+ ; CHECK-NEXT: strh w8, [sp, #12]
395+ ; CHECK-NEXT: add x8, x0, #2
396+ ; CHECK-NEXT: ldr s0, [sp, #12]
397+ ; CHECK-NEXT: ushll.8h v0, v0, #0
398+ ; CHECK-NEXT: ld1.b { v0 }[4], [x8]
399+ ; CHECK-NEXT: ushll.4s v0, v0, #0
400+ ; CHECK-NEXT: shl.4s v0, v0, #24
401+ ; CHECK-NEXT: sshr.4s v0, v0, #24
402+ ; CHECK-NEXT: add sp, sp, #16
403+ ; CHECK-NEXT: ret
404+ ;
405+ ; BE-LABEL: load_v3i8_sext_to_3xi32:
406+ ; BE: // %bb.0:
407+ ; BE-NEXT: sub sp, sp, #16
408+ ; BE-NEXT: .cfi_def_cfa_offset 16
409+ ; BE-NEXT: ldrh w8, [x0]
410+ ; BE-NEXT: strh w8, [sp, #12]
411+ ; BE-NEXT: add x8, x0, #2
412+ ; BE-NEXT: ldr s0, [sp, #12]
413+ ; BE-NEXT: rev32 v0.8b, v0.8b
414+ ; BE-NEXT: ushll v0.8h, v0.8b, #0
415+ ; BE-NEXT: ld1 { v0.b }[4], [x8]
416+ ; BE-NEXT: ushll v0.4s, v0.4h, #0
417+ ; BE-NEXT: shl v0.4s, v0.4s, #24
418+ ; BE-NEXT: sshr v0.4s, v0.4s, #24
419+ ; BE-NEXT: rev64 v0.4s, v0.4s
420+ ; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
421+ ; BE-NEXT: add sp, sp, #16
422+ ; BE-NEXT: ret
423+ %l = load <3 x i8 >, ptr %src , align 1
424+ %e = sext <3 x i8 > %l to <3 x i32 >
425+ ret <3 x i32 > %e
426+ }
427+
348428define void @store_trunc_from_64bits (ptr %src , ptr %dst ) {
349429; CHECK-LABEL: store_trunc_from_64bits:
350430; CHECK: ; %bb.0: ; %entry
@@ -388,9 +468,9 @@ define void @store_trunc_add_from_64bits(ptr %src, ptr %dst) {
388468; CHECK-NEXT: ldr s0, [x0]
389469; CHECK-NEXT: add x9, x0, #4
390470; CHECK-NEXT: Lloh0:
391- ; CHECK-NEXT: adrp x8, lCPI9_0 @PAGE
471+ ; CHECK-NEXT: adrp x8, lCPI11_0 @PAGE
392472; CHECK-NEXT: Lloh1:
393- ; CHECK-NEXT: ldr d1, [x8, lCPI9_0 @PAGEOFF]
473+ ; CHECK-NEXT: ldr d1, [x8, lCPI11_0 @PAGEOFF]
394474; CHECK-NEXT: add x8, x1, #1
395475; CHECK-NEXT: ld1.h { v0 }[2], [x9]
396476; CHECK-NEXT: add x9, x1, #2
@@ -409,8 +489,8 @@ define void @store_trunc_add_from_64bits(ptr %src, ptr %dst) {
409489; BE-NEXT: add x8, x0, #4
410490; BE-NEXT: rev32 v0.4h, v0.4h
411491; BE-NEXT: ld1 { v0.h }[2], [x8]
412- ; BE-NEXT: adrp x8, .LCPI9_0
413- ; BE-NEXT: add x8, x8, :lo12:.LCPI9_0
492+ ; BE-NEXT: adrp x8, .LCPI11_0
493+ ; BE-NEXT: add x8, x8, :lo12:.LCPI11_0
414494; BE-NEXT: ld1 { v1.4h }, [x8]
415495; BE-NEXT: add v0.4h, v0.4h, v1.4h
416496; BE-NEXT: xtn v1.8b, v0.8h
@@ -538,9 +618,9 @@ define void @load_ext_add_to_64bits(ptr %src, ptr %dst) {
538618; CHECK-NEXT: .cfi_def_cfa_offset 16
539619; CHECK-NEXT: ldrh w9, [x0]
540620; CHECK-NEXT: Lloh2:
541- ; CHECK-NEXT: adrp x8, lCPI13_0 @PAGE
621+ ; CHECK-NEXT: adrp x8, lCPI15_0 @PAGE
542622; CHECK-NEXT: Lloh3:
543- ; CHECK-NEXT: ldr d1, [x8, lCPI13_0 @PAGEOFF]
623+ ; CHECK-NEXT: ldr d1, [x8, lCPI15_0 @PAGEOFF]
544624; CHECK-NEXT: add x8, x1, #4
545625; CHECK-NEXT: strh w9, [sp, #12]
546626; CHECK-NEXT: add x9, x0, #2
@@ -566,8 +646,8 @@ define void @load_ext_add_to_64bits(ptr %src, ptr %dst) {
566646; BE-NEXT: rev32 v0.8b, v0.8b
567647; BE-NEXT: ushll v0.8h, v0.8b, #0
568648; BE-NEXT: ld1 { v0.b }[4], [x8]
569- ; BE-NEXT: adrp x8, .LCPI13_0
570- ; BE-NEXT: add x8, x8, :lo12:.LCPI13_0
649+ ; BE-NEXT: adrp x8, .LCPI15_0
650+ ; BE-NEXT: add x8, x8, :lo12:.LCPI15_0
571651; BE-NEXT: ld1 { v1.4h }, [x8]
572652; BE-NEXT: add x8, x1, #4
573653; BE-NEXT: bic v0.4h, #255, lsl #8
@@ -796,3 +876,115 @@ define void @shift_trunc_volatile_store(ptr %src, ptr %dst) {
796876 store volatile <3 x i8 > %t , ptr %dst , align 1
797877 ret void
798878}
879+
880+ define void @load_v3i8_zext_to_3xi32_add_trunc_store (ptr %src ) {
881+ ; CHECK-LABEL: load_v3i8_zext_to_3xi32_add_trunc_store:
882+ ; CHECK: ; %bb.0:
883+ ; CHECK-NEXT: sub sp, sp, #16
884+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
885+ ; CHECK-NEXT: ldrh w9, [x0]
886+ ; CHECK-NEXT: Lloh4:
887+ ; CHECK-NEXT: adrp x8, lCPI22_0@PAGE
888+ ; CHECK-NEXT: Lloh5:
889+ ; CHECK-NEXT: ldr q1, [x8, lCPI22_0@PAGEOFF]
890+ ; CHECK-NEXT: add x8, x0, #1
891+ ; CHECK-NEXT: strh w9, [sp, #12]
892+ ; CHECK-NEXT: add x9, x0, #2
893+ ; CHECK-NEXT: ldr s0, [sp, #12]
894+ ; CHECK-NEXT: ushll.8h v0, v0, #0
895+ ; CHECK-NEXT: ld1.b { v0 }[4], [x9]
896+ ; CHECK-NEXT: uaddw.4s v0, v1, v0
897+ ; CHECK-NEXT: st1.b { v0 }[4], [x8]
898+ ; CHECK-NEXT: st1.b { v0 }[8], [x9]
899+ ; CHECK-NEXT: st1.b { v0 }[0], [x0]
900+ ; CHECK-NEXT: add sp, sp, #16
901+ ; CHECK-NEXT: ret
902+ ; CHECK-NEXT: .loh AdrpLdr Lloh4, Lloh5
903+ ;
904+ ; BE-LABEL: load_v3i8_zext_to_3xi32_add_trunc_store:
905+ ; BE: // %bb.0:
906+ ; BE-NEXT: sub sp, sp, #16
907+ ; BE-NEXT: .cfi_def_cfa_offset 16
908+ ; BE-NEXT: ldrh w9, [x0]
909+ ; BE-NEXT: adrp x8, .LCPI22_0
910+ ; BE-NEXT: add x8, x8, :lo12:.LCPI22_0
911+ ; BE-NEXT: ld1 { v1.4h }, [x8]
912+ ; BE-NEXT: strh w9, [sp, #12]
913+ ; BE-NEXT: add x9, x0, #2
914+ ; BE-NEXT: ldr s0, [sp, #12]
915+ ; BE-NEXT: rev32 v0.8b, v0.8b
916+ ; BE-NEXT: ushll v0.8h, v0.8b, #0
917+ ; BE-NEXT: ld1 { v0.b }[4], [x9]
918+ ; BE-NEXT: add v0.4h, v0.4h, v1.4h
919+ ; BE-NEXT: xtn v1.8b, v0.8h
920+ ; BE-NEXT: umov w8, v0.h[2]
921+ ; BE-NEXT: rev32 v1.16b, v1.16b
922+ ; BE-NEXT: str s1, [sp, #8]
923+ ; BE-NEXT: ldrh w9, [sp, #8]
924+ ; BE-NEXT: strb w8, [x0, #2]
925+ ; BE-NEXT: strh w9, [x0]
926+ ; BE-NEXT: add sp, sp, #16
927+ ; BE-NEXT: ret
928+ %l = load <3 x i8 >, ptr %src , align 1
929+ %e = zext <3 x i8 > %l to <3 x i32 >
930+ %add = add <3 x i32 > %e , <i32 1 , i32 2 , i32 3 >
931+ %t = trunc <3 x i32 > %add to <3 x i8 >
932+ store <3 x i8 > %t , ptr %src
933+ ret void
934+ }
935+
936+ define void @load_v3i8_sext_to_3xi32_add_trunc_store (ptr %src ) {
937+ ; CHECK-LABEL: load_v3i8_sext_to_3xi32_add_trunc_store:
938+ ; CHECK: ; %bb.0:
939+ ; CHECK-NEXT: sub sp, sp, #16
940+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
941+ ; CHECK-NEXT: ldrh w9, [x0]
942+ ; CHECK-NEXT: Lloh6:
943+ ; CHECK-NEXT: adrp x8, lCPI23_0@PAGE
944+ ; CHECK-NEXT: Lloh7:
945+ ; CHECK-NEXT: ldr q1, [x8, lCPI23_0@PAGEOFF]
946+ ; CHECK-NEXT: add x8, x0, #1
947+ ; CHECK-NEXT: strh w9, [sp, #12]
948+ ; CHECK-NEXT: add x9, x0, #2
949+ ; CHECK-NEXT: ldr s0, [sp, #12]
950+ ; CHECK-NEXT: ushll.8h v0, v0, #0
951+ ; CHECK-NEXT: ld1.b { v0 }[4], [x9]
952+ ; CHECK-NEXT: uaddw.4s v0, v1, v0
953+ ; CHECK-NEXT: st1.b { v0 }[4], [x8]
954+ ; CHECK-NEXT: st1.b { v0 }[8], [x9]
955+ ; CHECK-NEXT: st1.b { v0 }[0], [x0]
956+ ; CHECK-NEXT: add sp, sp, #16
957+ ; CHECK-NEXT: ret
958+ ; CHECK-NEXT: .loh AdrpLdr Lloh6, Lloh7
959+ ;
960+ ; BE-LABEL: load_v3i8_sext_to_3xi32_add_trunc_store:
961+ ; BE: // %bb.0:
962+ ; BE-NEXT: sub sp, sp, #16
963+ ; BE-NEXT: .cfi_def_cfa_offset 16
964+ ; BE-NEXT: ldrh w9, [x0]
965+ ; BE-NEXT: adrp x8, .LCPI23_0
966+ ; BE-NEXT: add x8, x8, :lo12:.LCPI23_0
967+ ; BE-NEXT: ld1 { v1.4h }, [x8]
968+ ; BE-NEXT: strh w9, [sp, #12]
969+ ; BE-NEXT: add x9, x0, #2
970+ ; BE-NEXT: ldr s0, [sp, #12]
971+ ; BE-NEXT: rev32 v0.8b, v0.8b
972+ ; BE-NEXT: ushll v0.8h, v0.8b, #0
973+ ; BE-NEXT: ld1 { v0.b }[4], [x9]
974+ ; BE-NEXT: add v0.4h, v0.4h, v1.4h
975+ ; BE-NEXT: xtn v1.8b, v0.8h
976+ ; BE-NEXT: umov w8, v0.h[2]
977+ ; BE-NEXT: rev32 v1.16b, v1.16b
978+ ; BE-NEXT: str s1, [sp, #8]
979+ ; BE-NEXT: ldrh w9, [sp, #8]
980+ ; BE-NEXT: strb w8, [x0, #2]
981+ ; BE-NEXT: strh w9, [x0]
982+ ; BE-NEXT: add sp, sp, #16
983+ ; BE-NEXT: ret
984+ %l = load <3 x i8 >, ptr %src , align 1
985+ %e = sext <3 x i8 > %l to <3 x i32 >
986+ %add = add <3 x i32 > %e , <i32 1 , i32 2 , i32 3 >
987+ %t = trunc <3 x i32 > %add to <3 x i8 >
988+ store <3 x i8 > %t , ptr %src
989+ ret void
990+ }
0 commit comments