@@ -2022,14 +2022,9 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64(<vscale x 16 x i64> %va, <vscale x
20222022; RV32-NEXT: mul a1, a1, a2
20232023; RV32-NEXT: sub sp, sp, a1
20242024; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb
2025- ; RV32-NEXT: vmv1r.v v24 , v0
2025+ ; RV32-NEXT: vmv1r.v v7 , v0
20262026; RV32-NEXT: csrr a1, vlenb
2027- ; RV32-NEXT: slli a1, a1, 5
2028- ; RV32-NEXT: add a1, sp, a1
2029- ; RV32-NEXT: addi a1, a1, 16
2030- ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
2031- ; RV32-NEXT: csrr a1, vlenb
2032- ; RV32-NEXT: li a2, 48
2027+ ; RV32-NEXT: li a2, 40
20332028; RV32-NEXT: mul a1, a1, a2
20342029; RV32-NEXT: add a1, sp, a1
20352030; RV32-NEXT: addi a1, a1, 16
@@ -2045,101 +2040,88 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64(<vscale x 16 x i64> %va, <vscale x
20452040; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
20462041; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t
20472042; RV32-NEXT: csrr a3, vlenb
2048- ; RV32-NEXT: li a4, 40
2043+ ; RV32-NEXT: li a4, 48
20492044; RV32-NEXT: mul a3, a3, a4
20502045; RV32-NEXT: add a3, sp, a3
20512046; RV32-NEXT: addi a3, a3, 16
20522047; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
20532048; RV32-NEXT: lui a3, 349525
20542049; RV32-NEXT: addi a3, a3, 1365
20552050; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
2056- ; RV32-NEXT: vmv.v.x v16, a3
2057- ; RV32-NEXT: csrr a3, vlenb
2058- ; RV32-NEXT: li a4, 24
2059- ; RV32-NEXT: mul a3, a3, a4
2060- ; RV32-NEXT: add a3, sp, a3
2061- ; RV32-NEXT: addi a3, a3, 16
2062- ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
2051+ ; RV32-NEXT: vmv.v.x v8, a3
20632052; RV32-NEXT: csrr a3, vlenb
2064- ; RV32-NEXT: li a4, 40
2065- ; RV32-NEXT: mul a3, a3, a4
2053+ ; RV32-NEXT: slli a3, a3, 5
20662054; RV32-NEXT: add a3, sp, a3
20672055; RV32-NEXT: addi a3, a3, 16
2068- ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
2069- ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2070- ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
2056+ ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
20712057; RV32-NEXT: csrr a3, vlenb
20722058; RV32-NEXT: slli a3, a3, 5
20732059; RV32-NEXT: add a3, sp, a3
20742060; RV32-NEXT: addi a3, a3, 16
2075- ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
2076- ; RV32-NEXT: vsub.vv v8, v16, v8, v0.t
2061+ ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
20772062; RV32-NEXT: csrr a3, vlenb
2078- ; RV32-NEXT: slli a3, a3, 5
2063+ ; RV32-NEXT: li a4, 48
2064+ ; RV32-NEXT: mul a3, a3, a4
20792065; RV32-NEXT: add a3, sp, a3
20802066; RV32-NEXT: addi a3, a3, 16
2081- ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
2067+ ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
2068+ ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2069+ ; RV32-NEXT: vand.vv v8, v24, v8, v0.t
2070+ ; RV32-NEXT: vsub.vv v16, v16, v8, v0.t
20822071; RV32-NEXT: lui a3, 209715
20832072; RV32-NEXT: addi a3, a3, 819
20842073; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
2085- ; RV32-NEXT: vmv.v.x v16, a3
2086- ; RV32-NEXT: csrr a3, vlenb
2087- ; RV32-NEXT: slli a3, a3, 5
2088- ; RV32-NEXT: add a3, sp, a3
2089- ; RV32-NEXT: addi a3, a3, 16
2090- ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
2091- ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2092- ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
2074+ ; RV32-NEXT: vmv.v.x v8, a3
20932075; RV32-NEXT: csrr a3, vlenb
2094- ; RV32-NEXT: slli a3, a3, 4
2076+ ; RV32-NEXT: li a4, 48
2077+ ; RV32-NEXT: mul a3, a3, a4
20952078; RV32-NEXT: add a3, sp, a3
20962079; RV32-NEXT: addi a3, a3, 16
20972080; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
20982081; RV32-NEXT: csrr a3, vlenb
2099- ; RV32-NEXT: slli a3, a3, 5
2082+ ; RV32-NEXT: li a4, 48
2083+ ; RV32-NEXT: mul a3, a3, a4
21002084; RV32-NEXT: add a3, sp, a3
21012085; RV32-NEXT: addi a3, a3, 16
21022086; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
2103- ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
2087+ ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2088+ ; RV32-NEXT: vand.vv v8, v16, v8, v0.t
2089+ ; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t
21042090; RV32-NEXT: csrr a3, vlenb
2105- ; RV32-NEXT: li a4, 40
2091+ ; RV32-NEXT: li a4, 48
21062092; RV32-NEXT: mul a3, a3, a4
21072093; RV32-NEXT: add a3, sp, a3
21082094; RV32-NEXT: addi a3, a3, 16
2109- ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
2110- ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
2111- ; RV32-NEXT: csrr a3, vlenb
2112- ; RV32-NEXT: slli a3, a3, 4
2113- ; RV32-NEXT: add a3, sp, a3
2114- ; RV32-NEXT: addi a3, a3, 16
2115- ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
2116- ; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
2095+ ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
2096+ ; RV32-NEXT: vand.vv v16, v16, v24, v0.t
2097+ ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
21172098; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
21182099; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
21192100; RV32-NEXT: lui a3, 61681
21202101; RV32-NEXT: addi a3, a3, -241
21212102; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
21222103; RV32-NEXT: vmv.v.x v16, a3
21232104; RV32-NEXT: csrr a3, vlenb
2124- ; RV32-NEXT: slli a3, a3, 5
2105+ ; RV32-NEXT: li a4, 24
2106+ ; RV32-NEXT: mul a3, a3, a4
21252107; RV32-NEXT: add a3, sp, a3
21262108; RV32-NEXT: addi a3, a3, 16
21272109; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
21282110; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2129- ; RV32-NEXT: vand.vv v16 , v8, v16, v0.t
2111+ ; RV32-NEXT: vand.vv v8 , v8, v16, v0.t
21302112; RV32-NEXT: lui a3, 4112
21312113; RV32-NEXT: addi a3, a3, 257
21322114; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
2133- ; RV32-NEXT: vmv.v.x v8 , a3
2115+ ; RV32-NEXT: vmv.v.x v16 , a3
21342116; RV32-NEXT: csrr a3, vlenb
21352117; RV32-NEXT: slli a3, a3, 4
21362118; RV32-NEXT: add a3, sp, a3
21372119; RV32-NEXT: addi a3, a3, 16
2138- ; RV32-NEXT: vs8r.v v8 , (a3) # Unknown-size Folded Spill
2120+ ; RV32-NEXT: vs8r.v v16 , (a3) # Unknown-size Folded Spill
21392121; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2140- ; RV32-NEXT: vmul.vv v16, v16, v8 , v0.t
2122+ ; RV32-NEXT: vmul.vv v8, v8, v16 , v0.t
21412123; RV32-NEXT: li a2, 56
2142- ; RV32-NEXT: vsrl.vx v8, v16 , a2, v0.t
2124+ ; RV32-NEXT: vsrl.vx v8, v8 , a2, v0.t
21432125; RV32-NEXT: csrr a3, vlenb
21442126; RV32-NEXT: slli a3, a3, 3
21452127; RV32-NEXT: add a3, sp, a3
@@ -2149,8 +2131,8 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64(<vscale x 16 x i64> %va, <vscale x
21492131; RV32-NEXT: # %bb.1:
21502132; RV32-NEXT: mv a0, a1
21512133; RV32-NEXT: .LBB46_2:
2152- ; RV32-NEXT: vmv1r.v v0, v24
2153- ; RV32-NEXT: li a3, 48
2134+ ; RV32-NEXT: vmv1r.v v0, v7
2135+ ; RV32-NEXT: li a3, 40
21542136; RV32-NEXT: mul a1, a1, a3
21552137; RV32-NEXT: add a1, sp, a1
21562138; RV32-NEXT: addi a1, a1, 16
@@ -2160,71 +2142,64 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64(<vscale x 16 x i64> %va, <vscale x
21602142; RV32-NEXT: addi a0, sp, 16
21612143; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
21622144; RV32-NEXT: csrr a0, vlenb
2163- ; RV32-NEXT: li a1, 24
2164- ; RV32-NEXT: mul a0, a0, a1
2165- ; RV32-NEXT: add a0, sp, a0
2166- ; RV32-NEXT: addi a0, a0, 16
2167- ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
2168- ; RV32-NEXT: addi a0, sp, 16
2169- ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
2170- ; RV32-NEXT: vand.vv v16, v8, v16, v0.t
2171- ; RV32-NEXT: csrr a0, vlenb
2172- ; RV32-NEXT: li a1, 48
2173- ; RV32-NEXT: mul a0, a0, a1
2145+ ; RV32-NEXT: slli a0, a0, 5
21742146; RV32-NEXT: add a0, sp, a0
21752147; RV32-NEXT: addi a0, a0, 16
21762148; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
2177- ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
2149+ ; RV32-NEXT: addi a0, sp, 16
2150+ ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
2151+ ; RV32-NEXT: vand.vv v8, v16, v8, v0.t
21782152; RV32-NEXT: csrr a0, vlenb
2179- ; RV32-NEXT: li a1, 48
2153+ ; RV32-NEXT: li a1, 40
21802154; RV32-NEXT: mul a0, a0, a1
21812155; RV32-NEXT: add a0, sp, a0
21822156; RV32-NEXT: addi a0, a0, 16
2183- ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
2157+ ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
2158+ ; RV32-NEXT: vsub.vv v8, v16, v8, v0.t
21842159; RV32-NEXT: csrr a0, vlenb
21852160; RV32-NEXT: li a1, 40
21862161; RV32-NEXT: mul a0, a0, a1
21872162; RV32-NEXT: add a0, sp, a0
21882163; RV32-NEXT: addi a0, a0, 16
2189- ; RV32-NEXT: vl8r .v v16 , (a0) # Unknown-size Folded Reload
2164+ ; RV32-NEXT: vs8r .v v8 , (a0) # Unknown-size Folded Spill
21902165; RV32-NEXT: csrr a0, vlenb
21912166; RV32-NEXT: li a1, 48
21922167; RV32-NEXT: mul a0, a0, a1
21932168; RV32-NEXT: add a0, sp, a0
21942169; RV32-NEXT: addi a0, a0, 16
21952170; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
2196- ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
21972171; RV32-NEXT: csrr a0, vlenb
2198- ; RV32-NEXT: li a1, 24
2172+ ; RV32-NEXT: li a1, 40
21992173; RV32-NEXT: mul a0, a0, a1
22002174; RV32-NEXT: add a0, sp, a0
22012175; RV32-NEXT: addi a0, a0, 16
2202- ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
2176+ ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
2177+ ; RV32-NEXT: vand.vv v16, v16, v8, v0.t
22032178; RV32-NEXT: csrr a0, vlenb
2204- ; RV32-NEXT: li a1, 48
2205- ; RV32-NEXT: mul a0, a0, a1
2179+ ; RV32-NEXT: slli a0, a0, 5
22062180; RV32-NEXT: add a0, sp, a0
22072181; RV32-NEXT: addi a0, a0, 16
2208- ; RV32-NEXT: vl8r .v v8 , (a0) # Unknown-size Folded Reload
2209- ; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t
2182+ ; RV32-NEXT: vs8r .v v16 , (a0) # Unknown-size Folded Spill
2183+ ; RV32-NEXT: vmv8r.v v16, v8
22102184; RV32-NEXT: csrr a0, vlenb
22112185; RV32-NEXT: li a1, 40
22122186; RV32-NEXT: mul a0, a0, a1
22132187; RV32-NEXT: add a0, sp, a0
22142188; RV32-NEXT: addi a0, a0, 16
22152189; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
2216- ; RV32-NEXT: vand.vv v16, v16, v8, v0.t
2190+ ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
2191+ ; RV32-NEXT: vand.vv v8, v8, v16, v0.t
22172192; RV32-NEXT: csrr a0, vlenb
2218- ; RV32-NEXT: li a1, 24
2219- ; RV32-NEXT: mul a0, a0, a1
2193+ ; RV32-NEXT: slli a0, a0, 5
22202194; RV32-NEXT: add a0, sp, a0
22212195; RV32-NEXT: addi a0, a0, 16
2222- ; RV32-NEXT: vl8r.v v8 , (a0) # Unknown-size Folded Reload
2223- ; RV32-NEXT: vadd.vv v8, v8, v16 , v0.t
2196+ ; RV32-NEXT: vl8r.v v16 , (a0) # Unknown-size Folded Reload
2197+ ; RV32-NEXT: vadd.vv v8, v16, v8 , v0.t
22242198; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
22252199; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
22262200; RV32-NEXT: csrr a0, vlenb
2227- ; RV32-NEXT: slli a0, a0, 5
2201+ ; RV32-NEXT: li a1, 24
2202+ ; RV32-NEXT: mul a0, a0, a1
22282203; RV32-NEXT: add a0, sp, a0
22292204; RV32-NEXT: addi a0, a0, 16
22302205; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
@@ -2386,23 +2361,23 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64_unmasked(<vscale x 16 x i64> %va,
23862361; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill
23872362; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
23882363; RV32-NEXT: vand.vv v24, v24, v0
2389- ; RV32-NEXT: vsub.vv v24 , v16, v24
2364+ ; RV32-NEXT: vsub.vv v16 , v16, v24
23902365; RV32-NEXT: lui a3, 209715
23912366; RV32-NEXT: addi a3, a3, 819
23922367; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
23932368; RV32-NEXT: vmv.v.x v0, a3
23942369; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2395- ; RV32-NEXT: vand.vv v16, v24 , v0
2396- ; RV32-NEXT: vsrl.vi v24, v24 , 2
2370+ ; RV32-NEXT: vand.vv v24, v16 , v0
2371+ ; RV32-NEXT: vsrl.vi v16, v16 , 2
23972372; RV32-NEXT: csrr a3, vlenb
23982373; RV32-NEXT: slli a3, a3, 4
23992374; RV32-NEXT: add a3, sp, a3
24002375; RV32-NEXT: addi a3, a3, 16
24012376; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill
2402- ; RV32-NEXT: vand.vv v24, v24, v0
2403- ; RV32-NEXT: vadd.vv v24, v16, v24
2404- ; RV32-NEXT: vsrl.vi v16, v24, 4
2377+ ; RV32-NEXT: vand.vv v16, v16, v0
24052378; RV32-NEXT: vadd.vv v16, v24, v16
2379+ ; RV32-NEXT: vsrl.vi v24, v16, 4
2380+ ; RV32-NEXT: vadd.vv v16, v16, v24
24062381; RV32-NEXT: lui a3, 61681
24072382; RV32-NEXT: addi a3, a3, -241
24082383; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
@@ -2437,16 +2412,16 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64_unmasked(<vscale x 16 x i64> %va,
24372412; RV32-NEXT: addi a0, a0, 16
24382413; RV32-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
24392414; RV32-NEXT: vand.vv v24, v24, v0
2440- ; RV32-NEXT: vsub.vv v24 , v8, v24
2415+ ; RV32-NEXT: vsub.vv v8 , v8, v24
24412416; RV32-NEXT: csrr a0, vlenb
24422417; RV32-NEXT: slli a0, a0, 4
24432418; RV32-NEXT: add a0, sp, a0
24442419; RV32-NEXT: addi a0, a0, 16
24452420; RV32-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
2446- ; RV32-NEXT: vand.vv v8, v24 , v0
2447- ; RV32-NEXT: vsrl.vi v24, v24 , 2
2448- ; RV32-NEXT: vand.vv v24, v24 , v0
2449- ; RV32-NEXT: vadd.vv v8, v8, v24
2421+ ; RV32-NEXT: vand.vv v24, v8 , v0
2422+ ; RV32-NEXT: vsrl.vi v8, v8 , 2
2423+ ; RV32-NEXT: vand.vv v8, v8 , v0
2424+ ; RV32-NEXT: vadd.vv v8, v24, v8
24502425; RV32-NEXT: vsrl.vi v24, v8, 4
24512426; RV32-NEXT: vadd.vv v8, v8, v24
24522427; RV32-NEXT: csrr a0, vlenb
0 commit comments