@@ -393,70 +393,69 @@ entry:
393393define <32 x i1 > @whilewr_32_expand3 (ptr %a , ptr %b ) {
394394; CHECK-LABEL: whilewr_32_expand3:
395395; CHECK: // %bb.0: // %entry
396- ; CHECK-NEXT: sub x10, x1, x0
396+ ; CHECK-NEXT: subs x10, x1, x0
397397; CHECK-NEXT: index z0.d, #0, #1
398- ; CHECK-NEXT: sub x9, x10, #61
399- ; CHECK-NEXT: subs x11, x10, #64
400- ; CHECK-NEXT: add x12, x10, #3
401- ; CHECK-NEXT: csel x9, x9, x11, mi
398+ ; CHECK-NEXT: add x9, x10, #3
399+ ; CHECK-NEXT: sub x12, x10, #61
400+ ; CHECK-NEXT: csel x9, x9, x10, mi
402401; CHECK-NEXT: asr x11, x9, #2
403- ; CHECK-NEXT: mov z1.d, z0.d
404402; CHECK-NEXT: mov z2.d, z0.d
405403; CHECK-NEXT: mov z3.d, z0.d
406- ; CHECK-NEXT: cmp x11, #1
407404; CHECK-NEXT: mov z4.d, z0.d
405+ ; CHECK-NEXT: cmp x11, #1
406+ ; CHECK-NEXT: dup v1.2d, x11
408407; CHECK-NEXT: mov z5.d, z0.d
409408; CHECK-NEXT: cset w9, lt
410- ; CHECK-NEXT: cmp x10, #0
409+ ; CHECK-NEXT: subs x10, x10, #64
411410; CHECK-NEXT: mov z6.d, z0.d
412411; CHECK-NEXT: csel x10, x12, x10, mi
413- ; CHECK-NEXT: dup v7.2d, x11
414- ; CHECK-NEXT: add z1 .d, z1 .d, #12 // =0xc
412+ ; CHECK-NEXT: mov z7.d, z0.d
413+ ; CHECK-NEXT: add z2 .d, z2 .d, #12 // =0xc
415414; CHECK-NEXT: asr x10, x10, #2
416- ; CHECK-NEXT: add z2 .d, z2 .d, #10 // =0xa
417- ; CHECK-NEXT: add z3 .d, z3 .d, #8 // =0x8
418- ; CHECK-NEXT: add z4 .d, z4 .d, #6 // =0x6
419- ; CHECK-NEXT: add z5 .d, z5 .d, #4 // =0x4
420- ; CHECK-NEXT: add z6.d, z6.d, #2 // =0x2
415+ ; CHECK-NEXT: add z3 .d, z3 .d, #10 // =0xa
416+ ; CHECK-NEXT: add z4 .d, z4 .d, #8 // =0x8
417+ ; CHECK-NEXT: add z5 .d, z5 .d, #6 // =0x6
418+ ; CHECK-NEXT: add z6 .d, z6 .d, #4 // =0x4
419+ ; CHECK-NEXT: cmhi v17.2d, v1.2d, v0.2d
421420; CHECK-NEXT: dup v16.2d, x10
422- ; CHECK-NEXT: cmhi v17.2d, v7.2d, v0.2d
423- ; CHECK-NEXT: cmhi v19.2d, v7 .2d, v1 .2d
424- ; CHECK-NEXT: cmhi v20.2d, v7 .2d, v2 .2d
425- ; CHECK-NEXT: cmhi v21.2d, v7 .2d, v3 .2d
421+ ; CHECK-NEXT: add z7.d, z7.d, #2 // =0x2
422+ ; CHECK-NEXT: cmhi v19.2d, v1 .2d, v2 .2d
423+ ; CHECK-NEXT: cmhi v20.2d, v1 .2d, v3 .2d
424+ ; CHECK-NEXT: cmhi v21.2d, v1 .2d, v4 .2d
426425; CHECK-NEXT: cmp x10, #1
427- ; CHECK-NEXT: cmhi v22.2d, v7 .2d, v4 .2d
426+ ; CHECK-NEXT: cmhi v22.2d, v1 .2d, v5 .2d
428427; CHECK-NEXT: cset w10, lt
429428; CHECK-NEXT: cmhi v18.2d, v16.2d, v0.2d
430429; CHECK-NEXT: add z0.d, z0.d, #14 // =0xe
431- ; CHECK-NEXT: cmhi v1.2d, v16.2d, v1.2d
432430; CHECK-NEXT: cmhi v2.2d, v16.2d, v2.2d
433431; CHECK-NEXT: cmhi v3.2d, v16.2d, v3.2d
434432; CHECK-NEXT: cmhi v4.2d, v16.2d, v4.2d
435- ; CHECK-NEXT: cmhi v23.2d, v16.2d, v5.2d
436- ; CHECK-NEXT: cmhi v24.2d, v16.2d, v6.2d
437- ; CHECK-NEXT: cmhi v5.2d, v7.2d, v5.2d
433+ ; CHECK-NEXT: cmhi v5.2d, v16.2d, v5.2d
434+ ; CHECK-NEXT: cmhi v23.2d, v16.2d, v6.2d
435+ ; CHECK-NEXT: cmhi v24.2d, v16.2d, v7.2d
436+ ; CHECK-NEXT: cmhi v6.2d, v1.2d, v6.2d
438437; CHECK-NEXT: cmhi v16.2d, v16.2d, v0.2d
439- ; CHECK-NEXT: cmhi v6 .2d, v7 .2d, v6 .2d
440- ; CHECK-NEXT: cmhi v0.2d, v7 .2d, v0.2d
441- ; CHECK-NEXT: uzp1 v7 .4s, v21.4s, v20.4s
442- ; CHECK-NEXT: uzp1 v2 .4s, v3 .4s, v2 .4s
443- ; CHECK-NEXT: uzp1 v3 .4s, v23.4s, v4 .4s
444- ; CHECK-NEXT: uzp1 v4 .4s, v18.4s, v24.4s
445- ; CHECK-NEXT: uzp1 v5 .4s, v5 .4s, v22.4s
446- ; CHECK-NEXT: uzp1 v1 .4s, v1 .4s, v16.4s
447- ; CHECK-NEXT: uzp1 v6 .4s, v17.4s, v6 .4s
438+ ; CHECK-NEXT: cmhi v7 .2d, v1 .2d, v7 .2d
439+ ; CHECK-NEXT: cmhi v0.2d, v1 .2d, v0.2d
440+ ; CHECK-NEXT: uzp1 v1 .4s, v21.4s, v20.4s
441+ ; CHECK-NEXT: uzp1 v3 .4s, v4 .4s, v3 .4s
442+ ; CHECK-NEXT: uzp1 v4 .4s, v23.4s, v5 .4s
443+ ; CHECK-NEXT: uzp1 v5 .4s, v18.4s, v24.4s
444+ ; CHECK-NEXT: uzp1 v6 .4s, v6 .4s, v22.4s
445+ ; CHECK-NEXT: uzp1 v2 .4s, v2 .4s, v16.4s
446+ ; CHECK-NEXT: uzp1 v7 .4s, v17.4s, v7 .4s
448447; CHECK-NEXT: uzp1 v0.4s, v19.4s, v0.4s
449- ; CHECK-NEXT: uzp1 v3 .8h, v4 .8h, v3 .8h
450- ; CHECK-NEXT: uzp1 v1 .8h, v2 .8h, v1 .8h
451- ; CHECK-NEXT: uzp1 v2 .8h, v6 .8h, v5 .8h
452- ; CHECK-NEXT: uzp1 v0.8h, v7 .8h, v0.8h
453- ; CHECK-NEXT: uzp1 v1.16b, v3 .16b, v1 .16b
454- ; CHECK-NEXT: uzp1 v0.16b, v2 .16b, v0.16b
455- ; CHECK-NEXT: dup v3 .16b, w10
456- ; CHECK-NEXT: dup v2 .16b, w9
448+ ; CHECK-NEXT: uzp1 v4 .8h, v5 .8h, v4 .8h
449+ ; CHECK-NEXT: uzp1 v2 .8h, v3 .8h, v2 .8h
450+ ; CHECK-NEXT: uzp1 v3 .8h, v7 .8h, v6 .8h
451+ ; CHECK-NEXT: uzp1 v0.8h, v1 .8h, v0.8h
452+ ; CHECK-NEXT: uzp1 v1.16b, v4 .16b, v2 .16b
453+ ; CHECK-NEXT: uzp1 v0.16b, v3 .16b, v0.16b
454+ ; CHECK-NEXT: dup v2 .16b, w10
455+ ; CHECK-NEXT: dup v3 .16b, w9
457456; CHECK-NEXT: adrp x9, .LCPI14_0
458- ; CHECK-NEXT: orr v1.16b, v1.16b, v3 .16b
459- ; CHECK-NEXT: orr v0.16b, v0.16b, v2 .16b
457+ ; CHECK-NEXT: orr v1.16b, v1.16b, v2 .16b
458+ ; CHECK-NEXT: orr v0.16b, v0.16b, v3 .16b
460459; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI14_0]
461460; CHECK-NEXT: shl v1.16b, v1.16b, #7
462461; CHECK-NEXT: shl v0.16b, v0.16b, #7
@@ -470,8 +469,8 @@ define <32 x i1> @whilewr_32_expand3(ptr %a, ptr %b) {
470469; CHECK-NEXT: zip1 v0.16b, v0.16b, v3.16b
471470; CHECK-NEXT: addv h1, v1.8h
472471; CHECK-NEXT: addv h0, v0.8h
473- ; CHECK-NEXT: str h1, [x8]
474- ; CHECK-NEXT: str h0, [x8, #2 ]
472+ ; CHECK-NEXT: str h1, [x8, #2 ]
473+ ; CHECK-NEXT: str h0, [x8]
475474; CHECK-NEXT: ret
476475entry:
477476 %0 = call <32 x i1 > @llvm.loop.dependence.war.mask.v32i1 (ptr %a , ptr %b , i64 4 )
@@ -587,70 +586,69 @@ entry:
587586define <32 x i1 > @whilewr_64_expand4 (ptr %a , ptr %b ) {
588587; CHECK-LABEL: whilewr_64_expand4:
589588; CHECK: // %bb.0: // %entry
590- ; CHECK-NEXT: sub x10, x1, x0
589+ ; CHECK-NEXT: subs x10, x1, x0
591590; CHECK-NEXT: index z0.d, #0, #1
592- ; CHECK-NEXT: sub x9, x10, #121
593- ; CHECK-NEXT: subs x11, x10, #128
594- ; CHECK-NEXT: add x12, x10, #7
595- ; CHECK-NEXT: csel x9, x9, x11, mi
591+ ; CHECK-NEXT: add x9, x10, #7
592+ ; CHECK-NEXT: sub x12, x10, #121
593+ ; CHECK-NEXT: csel x9, x9, x10, mi
596594; CHECK-NEXT: asr x11, x9, #3
597- ; CHECK-NEXT: mov z1.d, z0.d
598595; CHECK-NEXT: mov z2.d, z0.d
599596; CHECK-NEXT: mov z3.d, z0.d
600- ; CHECK-NEXT: cmp x11, #1
601597; CHECK-NEXT: mov z4.d, z0.d
598+ ; CHECK-NEXT: cmp x11, #1
599+ ; CHECK-NEXT: dup v1.2d, x11
602600; CHECK-NEXT: mov z5.d, z0.d
603601; CHECK-NEXT: cset w9, lt
604- ; CHECK-NEXT: cmp x10, #0
602+ ; CHECK-NEXT: subs x10, x10, #128
605603; CHECK-NEXT: mov z6.d, z0.d
606604; CHECK-NEXT: csel x10, x12, x10, mi
607- ; CHECK-NEXT: dup v7.2d, x11
608- ; CHECK-NEXT: add z1 .d, z1 .d, #12 // =0xc
605+ ; CHECK-NEXT: mov z7.d, z0.d
606+ ; CHECK-NEXT: add z2 .d, z2 .d, #12 // =0xc
609607; CHECK-NEXT: asr x10, x10, #3
610- ; CHECK-NEXT: add z2 .d, z2 .d, #10 // =0xa
611- ; CHECK-NEXT: add z3 .d, z3 .d, #8 // =0x8
612- ; CHECK-NEXT: add z4 .d, z4 .d, #6 // =0x6
613- ; CHECK-NEXT: add z5 .d, z5 .d, #4 // =0x4
614- ; CHECK-NEXT: add z6.d, z6.d, #2 // =0x2
608+ ; CHECK-NEXT: add z3 .d, z3 .d, #10 // =0xa
609+ ; CHECK-NEXT: add z4 .d, z4 .d, #8 // =0x8
610+ ; CHECK-NEXT: add z5 .d, z5 .d, #6 // =0x6
611+ ; CHECK-NEXT: add z6 .d, z6 .d, #4 // =0x4
612+ ; CHECK-NEXT: cmhi v17.2d, v1.2d, v0.2d
615613; CHECK-NEXT: dup v16.2d, x10
616- ; CHECK-NEXT: cmhi v17.2d, v7.2d, v0.2d
617- ; CHECK-NEXT: cmhi v19.2d, v7 .2d, v1 .2d
618- ; CHECK-NEXT: cmhi v20.2d, v7 .2d, v2 .2d
619- ; CHECK-NEXT: cmhi v21.2d, v7 .2d, v3 .2d
614+ ; CHECK-NEXT: add z7.d, z7.d, #2 // =0x2
615+ ; CHECK-NEXT: cmhi v19.2d, v1 .2d, v2 .2d
616+ ; CHECK-NEXT: cmhi v20.2d, v1 .2d, v3 .2d
617+ ; CHECK-NEXT: cmhi v21.2d, v1 .2d, v4 .2d
620618; CHECK-NEXT: cmp x10, #1
621- ; CHECK-NEXT: cmhi v22.2d, v7 .2d, v4 .2d
619+ ; CHECK-NEXT: cmhi v22.2d, v1 .2d, v5 .2d
622620; CHECK-NEXT: cset w10, lt
623621; CHECK-NEXT: cmhi v18.2d, v16.2d, v0.2d
624622; CHECK-NEXT: add z0.d, z0.d, #14 // =0xe
625- ; CHECK-NEXT: cmhi v1.2d, v16.2d, v1.2d
626623; CHECK-NEXT: cmhi v2.2d, v16.2d, v2.2d
627624; CHECK-NEXT: cmhi v3.2d, v16.2d, v3.2d
628625; CHECK-NEXT: cmhi v4.2d, v16.2d, v4.2d
629- ; CHECK-NEXT: cmhi v23.2d, v16.2d, v5.2d
630- ; CHECK-NEXT: cmhi v24.2d, v16.2d, v6.2d
631- ; CHECK-NEXT: cmhi v5.2d, v7.2d, v5.2d
626+ ; CHECK-NEXT: cmhi v5.2d, v16.2d, v5.2d
627+ ; CHECK-NEXT: cmhi v23.2d, v16.2d, v6.2d
628+ ; CHECK-NEXT: cmhi v24.2d, v16.2d, v7.2d
629+ ; CHECK-NEXT: cmhi v6.2d, v1.2d, v6.2d
632630; CHECK-NEXT: cmhi v16.2d, v16.2d, v0.2d
633- ; CHECK-NEXT: cmhi v6 .2d, v7 .2d, v6 .2d
634- ; CHECK-NEXT: cmhi v0.2d, v7 .2d, v0.2d
635- ; CHECK-NEXT: uzp1 v7 .4s, v21.4s, v20.4s
636- ; CHECK-NEXT: uzp1 v2 .4s, v3 .4s, v2 .4s
637- ; CHECK-NEXT: uzp1 v3 .4s, v23.4s, v4 .4s
638- ; CHECK-NEXT: uzp1 v4 .4s, v18.4s, v24.4s
639- ; CHECK-NEXT: uzp1 v5 .4s, v5 .4s, v22.4s
640- ; CHECK-NEXT: uzp1 v1 .4s, v1 .4s, v16.4s
641- ; CHECK-NEXT: uzp1 v6 .4s, v17.4s, v6 .4s
631+ ; CHECK-NEXT: cmhi v7 .2d, v1 .2d, v7 .2d
632+ ; CHECK-NEXT: cmhi v0.2d, v1 .2d, v0.2d
633+ ; CHECK-NEXT: uzp1 v1 .4s, v21.4s, v20.4s
634+ ; CHECK-NEXT: uzp1 v3 .4s, v4 .4s, v3 .4s
635+ ; CHECK-NEXT: uzp1 v4 .4s, v23.4s, v5 .4s
636+ ; CHECK-NEXT: uzp1 v5 .4s, v18.4s, v24.4s
637+ ; CHECK-NEXT: uzp1 v6 .4s, v6 .4s, v22.4s
638+ ; CHECK-NEXT: uzp1 v2 .4s, v2 .4s, v16.4s
639+ ; CHECK-NEXT: uzp1 v7 .4s, v17.4s, v7 .4s
642640; CHECK-NEXT: uzp1 v0.4s, v19.4s, v0.4s
643- ; CHECK-NEXT: uzp1 v3 .8h, v4 .8h, v3 .8h
644- ; CHECK-NEXT: uzp1 v1 .8h, v2 .8h, v1 .8h
645- ; CHECK-NEXT: uzp1 v2 .8h, v6 .8h, v5 .8h
646- ; CHECK-NEXT: uzp1 v0.8h, v7 .8h, v0.8h
647- ; CHECK-NEXT: uzp1 v1.16b, v3 .16b, v1 .16b
648- ; CHECK-NEXT: uzp1 v0.16b, v2 .16b, v0.16b
649- ; CHECK-NEXT: dup v3 .16b, w10
650- ; CHECK-NEXT: dup v2 .16b, w9
641+ ; CHECK-NEXT: uzp1 v4 .8h, v5 .8h, v4 .8h
642+ ; CHECK-NEXT: uzp1 v2 .8h, v3 .8h, v2 .8h
643+ ; CHECK-NEXT: uzp1 v3 .8h, v7 .8h, v6 .8h
644+ ; CHECK-NEXT: uzp1 v0.8h, v1 .8h, v0.8h
645+ ; CHECK-NEXT: uzp1 v1.16b, v4 .16b, v2 .16b
646+ ; CHECK-NEXT: uzp1 v0.16b, v3 .16b, v0.16b
647+ ; CHECK-NEXT: dup v2 .16b, w10
648+ ; CHECK-NEXT: dup v3 .16b, w9
651649; CHECK-NEXT: adrp x9, .LCPI18_0
652- ; CHECK-NEXT: orr v1.16b, v1.16b, v3 .16b
653- ; CHECK-NEXT: orr v0.16b, v0.16b, v2 .16b
650+ ; CHECK-NEXT: orr v1.16b, v1.16b, v2 .16b
651+ ; CHECK-NEXT: orr v0.16b, v0.16b, v3 .16b
654652; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI18_0]
655653; CHECK-NEXT: shl v1.16b, v1.16b, #7
656654; CHECK-NEXT: shl v0.16b, v0.16b, #7
@@ -664,8 +662,8 @@ define <32 x i1> @whilewr_64_expand4(ptr %a, ptr %b) {
664662; CHECK-NEXT: zip1 v0.16b, v0.16b, v3.16b
665663; CHECK-NEXT: addv h1, v1.8h
666664; CHECK-NEXT: addv h0, v0.8h
667- ; CHECK-NEXT: str h1, [x8]
668- ; CHECK-NEXT: str h0, [x8, #2 ]
665+ ; CHECK-NEXT: str h1, [x8, #2 ]
666+ ; CHECK-NEXT: str h0, [x8]
669667; CHECK-NEXT: ret
670668entry:
671669 %0 = call <32 x i1 > @llvm.loop.dependence.war.mask.v32i1 (ptr %a , ptr %b , i64 8 )
0 commit comments