@@ -2018,30 +2018,30 @@ define i32 @test_udot_v24i8(ptr %p1, ptr %p2) {
20182018;
20192019; CHECK-GI-BASE-LABEL: test_udot_v24i8:
20202020; CHECK-GI-BASE: // %bb.0: // %entry
2021- ; CHECK-GI-BASE-NEXT: fmov s0, wzr
2022- ; CHECK-GI-BASE-NEXT: fmov s1, wzr
2021+ ; CHECK-GI-BASE-NEXT: movi d0, #0000000000000000
2022+ ; CHECK-GI-BASE-NEXT: movi d1, #0000000000000000
20232023; CHECK-GI-BASE-NEXT: ldr q2, [x0]
20242024; CHECK-GI-BASE-NEXT: ldr d3, [x0, #16]
20252025; CHECK-GI-BASE-NEXT: ldr q4, [x1]
20262026; CHECK-GI-BASE-NEXT: ldr d5, [x1, #16]
20272027; CHECK-GI-BASE-NEXT: ushll v6.8h, v2.8b, #0
20282028; CHECK-GI-BASE-NEXT: ushll2 v2.8h, v2.16b, #0
2029- ; CHECK-GI-BASE-NEXT: mov v0.s[1], wzr
2030- ; CHECK-GI-BASE-NEXT: mov v1.s[1], wzr
20312029; CHECK-GI-BASE-NEXT: ushll v3.8h, v3.8b, #0
20322030; CHECK-GI-BASE-NEXT: ushll v7.8h, v4.8b, #0
20332031; CHECK-GI-BASE-NEXT: ushll2 v4.8h, v4.16b, #0
2032+ ; CHECK-GI-BASE-NEXT: mov v0.s[1], wzr
2033+ ; CHECK-GI-BASE-NEXT: mov v1.s[1], wzr
20342034; CHECK-GI-BASE-NEXT: ushll v5.8h, v5.8b, #0
2035- ; CHECK-GI-BASE-NEXT: mov v0.s[2], wzr
2036- ; CHECK-GI-BASE-NEXT: mov v1.s[2], wzr
20372035; CHECK-GI-BASE-NEXT: umull v16.4s, v7.4h, v6.4h
20382036; CHECK-GI-BASE-NEXT: umull v17.4s, v4.4h, v2.4h
20392037; CHECK-GI-BASE-NEXT: umull v18.4s, v5.4h, v3.4h
2040- ; CHECK-GI-BASE-NEXT: mov v0.s[3 ], wzr
2041- ; CHECK-GI-BASE-NEXT: mov v1.s[3 ], wzr
2038+ ; CHECK-GI-BASE-NEXT: mov v0.s[2 ], wzr
2039+ ; CHECK-GI-BASE-NEXT: mov v1.s[2 ], wzr
20422040; CHECK-GI-BASE-NEXT: umlal2 v16.4s, v7.8h, v6.8h
20432041; CHECK-GI-BASE-NEXT: umlal2 v17.4s, v4.8h, v2.8h
20442042; CHECK-GI-BASE-NEXT: umlal2 v18.4s, v5.8h, v3.8h
2043+ ; CHECK-GI-BASE-NEXT: mov v0.s[3], wzr
2044+ ; CHECK-GI-BASE-NEXT: mov v1.s[3], wzr
20452045; CHECK-GI-BASE-NEXT: add v0.4s, v0.4s, v1.4s
20462046; CHECK-GI-BASE-NEXT: add v1.4s, v16.4s, v17.4s
20472047; CHECK-GI-BASE-NEXT: add v0.4s, v18.4s, v0.4s
@@ -2118,55 +2118,55 @@ define i32 @test_udot_v48i8(ptr %p1, ptr %p2) {
21182118;
21192119; CHECK-GI-BASE-LABEL: test_udot_v48i8:
21202120; CHECK-GI-BASE: // %bb.0: // %entry
2121- ; CHECK-GI-BASE-NEXT: fmov s0, wzr
2122- ; CHECK-GI-BASE-NEXT: fmov s2, wzr
2121+ ; CHECK-GI-BASE-NEXT: movi d0, #0000000000000000
2122+ ; CHECK-GI-BASE-NEXT: movi d2, #0000000000000000
21232123; CHECK-GI-BASE-NEXT: ldr q16, [x0, #32]
2124- ; CHECK-GI-BASE-NEXT: fmov s1, wzr
2125- ; CHECK-GI-BASE-NEXT: fmov s3, wzr
2124+ ; CHECK-GI-BASE-NEXT: movi d1, #0000000000000000
2125+ ; CHECK-GI-BASE-NEXT: movi d3, #0000000000000000
21262126; CHECK-GI-BASE-NEXT: ldr q19, [x1, #32]
21272127; CHECK-GI-BASE-NEXT: ldp q5, q7, [x1]
21282128; CHECK-GI-BASE-NEXT: ushll v23.8h, v16.8b, #0
2129+ ; CHECK-GI-BASE-NEXT: ldp q18, q17, [x0]
2130+ ; CHECK-GI-BASE-NEXT: ushll v20.8h, v19.8b, #0
21292131; CHECK-GI-BASE-NEXT: mov v0.s[1], wzr
21302132; CHECK-GI-BASE-NEXT: mov v2.s[1], wzr
2131- ; CHECK-GI-BASE-NEXT: ushll v20 .8h, v19.8b , #0
2133+ ; CHECK-GI-BASE-NEXT: ushll2 v19 .8h, v19.16b , #0
21322134; CHECK-GI-BASE-NEXT: mov v1.s[1], wzr
21332135; CHECK-GI-BASE-NEXT: mov v3.s[1], wzr
2134- ; CHECK-GI-BASE-NEXT: ushll2 v19.8h, v19.16b, #0
2135- ; CHECK-GI-BASE-NEXT: ldp q18, q17, [x0]
21362136; CHECK-GI-BASE-NEXT: ushll v4.8h, v5.8b, #0
21372137; CHECK-GI-BASE-NEXT: ushll2 v5.8h, v5.16b, #0
21382138; CHECK-GI-BASE-NEXT: ushll v6.8h, v7.8b, #0
21392139; CHECK-GI-BASE-NEXT: ushll2 v7.8h, v7.16b, #0
2140- ; CHECK-GI-BASE-NEXT: ushll2 v16.8h, v16.16b, #0
2141- ; CHECK-GI-BASE-NEXT: mov v0.s[2], wzr
2142- ; CHECK-GI-BASE-NEXT: mov v2.s[2], wzr
21432140; CHECK-GI-BASE-NEXT: ushll v21.8h, v18.8b, #0
21442141; CHECK-GI-BASE-NEXT: ushll2 v18.8h, v18.16b, #0
21452142; CHECK-GI-BASE-NEXT: ushll v22.8h, v17.8b, #0
21462143; CHECK-GI-BASE-NEXT: ushll2 v17.8h, v17.16b, #0
2144+ ; CHECK-GI-BASE-NEXT: ushll2 v16.8h, v16.16b, #0
2145+ ; CHECK-GI-BASE-NEXT: mov v0.s[2], wzr
2146+ ; CHECK-GI-BASE-NEXT: mov v2.s[2], wzr
21472147; CHECK-GI-BASE-NEXT: mov v1.s[2], wzr
21482148; CHECK-GI-BASE-NEXT: mov v3.s[2], wzr
2149- ; CHECK-GI-BASE-NEXT: umull v28.4s, v20.4h, v23.4h
2150- ; CHECK-GI-BASE-NEXT: umull v29.4s, v19.4h, v16.4h
21512149; CHECK-GI-BASE-NEXT: umull v24.4s, v4.4h, v21.4h
21522150; CHECK-GI-BASE-NEXT: umull v25.4s, v5.4h, v18.4h
21532151; CHECK-GI-BASE-NEXT: umull v26.4s, v6.4h, v22.4h
21542152; CHECK-GI-BASE-NEXT: umull v27.4s, v7.4h, v17.4h
2153+ ; CHECK-GI-BASE-NEXT: umull v28.4s, v20.4h, v23.4h
2154+ ; CHECK-GI-BASE-NEXT: umull v29.4s, v19.4h, v16.4h
21552155; CHECK-GI-BASE-NEXT: mov v0.s[3], wzr
21562156; CHECK-GI-BASE-NEXT: mov v2.s[3], wzr
21572157; CHECK-GI-BASE-NEXT: mov v1.s[3], wzr
21582158; CHECK-GI-BASE-NEXT: mov v3.s[3], wzr
2159- ; CHECK-GI-BASE-NEXT: umlal2 v28.4s, v20.8h, v23.8h
2160- ; CHECK-GI-BASE-NEXT: umlal2 v29.4s, v19.8h, v16.8h
21612159; CHECK-GI-BASE-NEXT: umlal2 v24.4s, v4.8h, v21.8h
21622160; CHECK-GI-BASE-NEXT: umlal2 v25.4s, v5.8h, v18.8h
21632161; CHECK-GI-BASE-NEXT: umlal2 v26.4s, v6.8h, v22.8h
21642162; CHECK-GI-BASE-NEXT: umlal2 v27.4s, v7.8h, v17.8h
2163+ ; CHECK-GI-BASE-NEXT: umlal2 v28.4s, v20.8h, v23.8h
2164+ ; CHECK-GI-BASE-NEXT: umlal2 v29.4s, v19.8h, v16.8h
21652165; CHECK-GI-BASE-NEXT: add v0.4s, v0.4s, v2.4s
21662166; CHECK-GI-BASE-NEXT: add v1.4s, v1.4s, v3.4s
2167- ; CHECK-GI-BASE-NEXT: add v4.4s, v28.4s, v29.4s
21682167; CHECK-GI-BASE-NEXT: add v2.4s, v24.4s, v25.4s
21692168; CHECK-GI-BASE-NEXT: add v3.4s, v26.4s, v27.4s
2169+ ; CHECK-GI-BASE-NEXT: add v4.4s, v28.4s, v29.4s
21702170; CHECK-GI-BASE-NEXT: add v0.4s, v0.4s, v1.4s
21712171; CHECK-GI-BASE-NEXT: add v1.4s, v2.4s, v3.4s
21722172; CHECK-GI-BASE-NEXT: add v0.4s, v4.4s, v0.4s
@@ -2336,30 +2336,30 @@ define i32 @test_sdot_v24i8(ptr %p1, ptr %p2) {
23362336;
23372337; CHECK-GI-BASE-LABEL: test_sdot_v24i8:
23382338; CHECK-GI-BASE: // %bb.0: // %entry
2339- ; CHECK-GI-BASE-NEXT: fmov s0, wzr
2340- ; CHECK-GI-BASE-NEXT: fmov s1, wzr
2339+ ; CHECK-GI-BASE-NEXT: movi d0, #0000000000000000
2340+ ; CHECK-GI-BASE-NEXT: movi d1, #0000000000000000
23412341; CHECK-GI-BASE-NEXT: ldr q2, [x0]
23422342; CHECK-GI-BASE-NEXT: ldr d3, [x0, #16]
23432343; CHECK-GI-BASE-NEXT: ldr q4, [x1]
23442344; CHECK-GI-BASE-NEXT: ldr d5, [x1, #16]
23452345; CHECK-GI-BASE-NEXT: sshll v6.8h, v2.8b, #0
23462346; CHECK-GI-BASE-NEXT: sshll2 v2.8h, v2.16b, #0
2347- ; CHECK-GI-BASE-NEXT: mov v0.s[1], wzr
2348- ; CHECK-GI-BASE-NEXT: mov v1.s[1], wzr
23492347; CHECK-GI-BASE-NEXT: sshll v3.8h, v3.8b, #0
23502348; CHECK-GI-BASE-NEXT: sshll v7.8h, v4.8b, #0
23512349; CHECK-GI-BASE-NEXT: sshll2 v4.8h, v4.16b, #0
2350+ ; CHECK-GI-BASE-NEXT: mov v0.s[1], wzr
2351+ ; CHECK-GI-BASE-NEXT: mov v1.s[1], wzr
23522352; CHECK-GI-BASE-NEXT: sshll v5.8h, v5.8b, #0
2353- ; CHECK-GI-BASE-NEXT: mov v0.s[2], wzr
2354- ; CHECK-GI-BASE-NEXT: mov v1.s[2], wzr
23552353; CHECK-GI-BASE-NEXT: smull v16.4s, v7.4h, v6.4h
23562354; CHECK-GI-BASE-NEXT: smull v17.4s, v4.4h, v2.4h
23572355; CHECK-GI-BASE-NEXT: smull v18.4s, v5.4h, v3.4h
2358- ; CHECK-GI-BASE-NEXT: mov v0.s[3 ], wzr
2359- ; CHECK-GI-BASE-NEXT: mov v1.s[3 ], wzr
2356+ ; CHECK-GI-BASE-NEXT: mov v0.s[2 ], wzr
2357+ ; CHECK-GI-BASE-NEXT: mov v1.s[2 ], wzr
23602358; CHECK-GI-BASE-NEXT: smlal2 v16.4s, v7.8h, v6.8h
23612359; CHECK-GI-BASE-NEXT: smlal2 v17.4s, v4.8h, v2.8h
23622360; CHECK-GI-BASE-NEXT: smlal2 v18.4s, v5.8h, v3.8h
2361+ ; CHECK-GI-BASE-NEXT: mov v0.s[3], wzr
2362+ ; CHECK-GI-BASE-NEXT: mov v1.s[3], wzr
23632363; CHECK-GI-BASE-NEXT: add v0.4s, v0.4s, v1.4s
23642364; CHECK-GI-BASE-NEXT: add v1.4s, v16.4s, v17.4s
23652365; CHECK-GI-BASE-NEXT: add v0.4s, v18.4s, v0.4s
@@ -2436,55 +2436,55 @@ define i32 @test_sdot_v48i8(ptr %p1, ptr %p2) {
24362436;
24372437; CHECK-GI-BASE-LABEL: test_sdot_v48i8:
24382438; CHECK-GI-BASE: // %bb.0: // %entry
2439- ; CHECK-GI-BASE-NEXT: fmov s0, wzr
2440- ; CHECK-GI-BASE-NEXT: fmov s2, wzr
2439+ ; CHECK-GI-BASE-NEXT: movi d0, #0000000000000000
2440+ ; CHECK-GI-BASE-NEXT: movi d2, #0000000000000000
24412441; CHECK-GI-BASE-NEXT: ldr q16, [x0, #32]
2442- ; CHECK-GI-BASE-NEXT: fmov s1, wzr
2443- ; CHECK-GI-BASE-NEXT: fmov s3, wzr
2442+ ; CHECK-GI-BASE-NEXT: movi d1, #0000000000000000
2443+ ; CHECK-GI-BASE-NEXT: movi d3, #0000000000000000
24442444; CHECK-GI-BASE-NEXT: ldr q19, [x1, #32]
24452445; CHECK-GI-BASE-NEXT: ldp q5, q7, [x1]
24462446; CHECK-GI-BASE-NEXT: sshll v23.8h, v16.8b, #0
2447+ ; CHECK-GI-BASE-NEXT: ldp q18, q17, [x0]
2448+ ; CHECK-GI-BASE-NEXT: sshll v20.8h, v19.8b, #0
24472449; CHECK-GI-BASE-NEXT: mov v0.s[1], wzr
24482450; CHECK-GI-BASE-NEXT: mov v2.s[1], wzr
2449- ; CHECK-GI-BASE-NEXT: sshll v20 .8h, v19.8b , #0
2451+ ; CHECK-GI-BASE-NEXT: sshll2 v19 .8h, v19.16b , #0
24502452; CHECK-GI-BASE-NEXT: mov v1.s[1], wzr
24512453; CHECK-GI-BASE-NEXT: mov v3.s[1], wzr
2452- ; CHECK-GI-BASE-NEXT: sshll2 v19.8h, v19.16b, #0
2453- ; CHECK-GI-BASE-NEXT: ldp q18, q17, [x0]
24542454; CHECK-GI-BASE-NEXT: sshll v4.8h, v5.8b, #0
24552455; CHECK-GI-BASE-NEXT: sshll2 v5.8h, v5.16b, #0
24562456; CHECK-GI-BASE-NEXT: sshll v6.8h, v7.8b, #0
24572457; CHECK-GI-BASE-NEXT: sshll2 v7.8h, v7.16b, #0
2458- ; CHECK-GI-BASE-NEXT: sshll2 v16.8h, v16.16b, #0
2459- ; CHECK-GI-BASE-NEXT: mov v0.s[2], wzr
2460- ; CHECK-GI-BASE-NEXT: mov v2.s[2], wzr
24612458; CHECK-GI-BASE-NEXT: sshll v21.8h, v18.8b, #0
24622459; CHECK-GI-BASE-NEXT: sshll2 v18.8h, v18.16b, #0
24632460; CHECK-GI-BASE-NEXT: sshll v22.8h, v17.8b, #0
24642461; CHECK-GI-BASE-NEXT: sshll2 v17.8h, v17.16b, #0
2462+ ; CHECK-GI-BASE-NEXT: sshll2 v16.8h, v16.16b, #0
2463+ ; CHECK-GI-BASE-NEXT: mov v0.s[2], wzr
2464+ ; CHECK-GI-BASE-NEXT: mov v2.s[2], wzr
24652465; CHECK-GI-BASE-NEXT: mov v1.s[2], wzr
24662466; CHECK-GI-BASE-NEXT: mov v3.s[2], wzr
2467- ; CHECK-GI-BASE-NEXT: smull v28.4s, v20.4h, v23.4h
2468- ; CHECK-GI-BASE-NEXT: smull v29.4s, v19.4h, v16.4h
24692467; CHECK-GI-BASE-NEXT: smull v24.4s, v4.4h, v21.4h
24702468; CHECK-GI-BASE-NEXT: smull v25.4s, v5.4h, v18.4h
24712469; CHECK-GI-BASE-NEXT: smull v26.4s, v6.4h, v22.4h
24722470; CHECK-GI-BASE-NEXT: smull v27.4s, v7.4h, v17.4h
2471+ ; CHECK-GI-BASE-NEXT: smull v28.4s, v20.4h, v23.4h
2472+ ; CHECK-GI-BASE-NEXT: smull v29.4s, v19.4h, v16.4h
24732473; CHECK-GI-BASE-NEXT: mov v0.s[3], wzr
24742474; CHECK-GI-BASE-NEXT: mov v2.s[3], wzr
24752475; CHECK-GI-BASE-NEXT: mov v1.s[3], wzr
24762476; CHECK-GI-BASE-NEXT: mov v3.s[3], wzr
2477- ; CHECK-GI-BASE-NEXT: smlal2 v28.4s, v20.8h, v23.8h
2478- ; CHECK-GI-BASE-NEXT: smlal2 v29.4s, v19.8h, v16.8h
24792477; CHECK-GI-BASE-NEXT: smlal2 v24.4s, v4.8h, v21.8h
24802478; CHECK-GI-BASE-NEXT: smlal2 v25.4s, v5.8h, v18.8h
24812479; CHECK-GI-BASE-NEXT: smlal2 v26.4s, v6.8h, v22.8h
24822480; CHECK-GI-BASE-NEXT: smlal2 v27.4s, v7.8h, v17.8h
2481+ ; CHECK-GI-BASE-NEXT: smlal2 v28.4s, v20.8h, v23.8h
2482+ ; CHECK-GI-BASE-NEXT: smlal2 v29.4s, v19.8h, v16.8h
24832483; CHECK-GI-BASE-NEXT: add v0.4s, v0.4s, v2.4s
24842484; CHECK-GI-BASE-NEXT: add v1.4s, v1.4s, v3.4s
2485- ; CHECK-GI-BASE-NEXT: add v4.4s, v28.4s, v29.4s
24862485; CHECK-GI-BASE-NEXT: add v2.4s, v24.4s, v25.4s
24872486; CHECK-GI-BASE-NEXT: add v3.4s, v26.4s, v27.4s
2487+ ; CHECK-GI-BASE-NEXT: add v4.4s, v28.4s, v29.4s
24882488; CHECK-GI-BASE-NEXT: add v0.4s, v0.4s, v1.4s
24892489; CHECK-GI-BASE-NEXT: add v1.4s, v2.4s, v3.4s
24902490; CHECK-GI-BASE-NEXT: add v0.4s, v4.4s, v0.4s
0 commit comments