@@ -111,22 +111,22 @@ define dso_local void @TanhTemplated(ptr noalias %ifm, ptr noalias %ofm, ptr non
111111; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
112112; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; vband x9, x10, x3; nopv
113113; CHECK-NEXT: vldb wl7, [p0], #32; vmov wh3, wl2
114- ; CHECK-NEXT: nopx ; vmov wh9, wl2; vmul.f bmh4 , x7, x0, r1
115- ; CHECK-NEXT: vconv.bf16.fp32 wl7, bml4; vldb wl7, [p0], #32; vmax_lt.bf16 x5, r16, x11, x8; vmac.f bmh3 , bmh0, x3, x4, r1
114+ ; CHECK-NEXT: nopx ; vmov wh9, wl2; vmul.f bmh5 , x7, x0, r1
115+ ; CHECK-NEXT: vconv.bf16.fp32 wl7, bml4; vldb wl7, [p0], #32; vmax_lt.bf16 x5, r16, x11, x8; vmac.f bmh4 , bmh0, x3, x4, r1
116116; CHECK-NEXT: vband x9, x10, x5; vmul.f bmh2, x6, x9, r1
117- ; CHECK-NEXT: vmov wh9, wl2; vmul.f bmh5 , x7, x0, r1
118- ; CHECK-NEXT: vsub.f bml1, bmh4 , bmh1, r0
119- ; CHECK-NEXT: vmul.f bmh7 , x6, x9, r1
120- ; CHECK-NEXT: vmul.f bmh6 , x0, x7, r1
121- ; CHECK-NEXT: vmov wh5, wl2; vsub.f bml2, bmh5 , bmh1, r0
117+ ; CHECK-NEXT: vmov wh9, wl2; vmul.f bmh6 , x7, x0, r1
118+ ; CHECK-NEXT: vsub.f bml0, bmh5 , bmh1, r0
119+ ; CHECK-NEXT: vmul.f bmh3 , x6, x9, r1
120+ ; CHECK-NEXT: vmul.f bmh7 , x0, x7, r1
121+ ; CHECK-NEXT: vmov wh5, wl2; vsub.f bml1, bmh6 , bmh1, r0
122122; CHECK-NEXT: vconv.bf16.fp32 wl7, bmh2; vmul.f bmh8, x0, x7, r1
123- ; CHECK-NEXT: vmac.f bml0, bmh0, x5, x4, r1
124- ; CHECK-NEXT: vmsc.f bml3, bmh3, x7, x3, r1
123+ ; CHECK-NEXT: vmac.f bml2, bmh0, x5, x4, r1
124+ ; CHECK-NEXT: vmsc.f bml3, bmh4, x7, x3, r1
125+ ; CHECK-NEXT: vconv.bf16.fp32 wl3, bmh3
125126; CHECK-NEXT: vconv.bf16.fp32 wl3, bmh7
126- ; CHECK-NEXT: vconv.bf16.fp32 wl3, bmh6
127- ; CHECK-NEXT: vst.conv.bf16.fp32 bml1, [p1], #32; vmsc.f bml4, bml0, x3, x5, r1
127+ ; CHECK-NEXT: vst.conv.bf16.fp32 bml0, [p1], #32; vmsc.f bml4, bml2, x3, x5, r1
128128; CHECK-NEXT: vconv.bf16.fp32 wl5, bmh8; vmin_ge.bf16 x9, r16, x3, x1
129- ; CHECK-NEXT: vst.conv.bf16.fp32 bml2 , [p1], #32; vmax_lt.bf16 x3, r16, x9, x8
129+ ; CHECK-NEXT: vst.conv.bf16.fp32 bml1 , [p1], #32; vmax_lt.bf16 x3, r16, x9, x8
130130; CHECK-NEXT: .L_LEnd0:
131131; CHECK-NEXT: nopb ; nopa ; vconv.bf16.fp32 wl7, bml3; nopx ; vmin_ge.bf16 x11, r16, x5, x1; nopv
132132; CHECK-NEXT: // %bb.2:
0 commit comments