@@ -1797,35 +1797,61 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
17971797; RV32-NEXT: addi a3, a3, 48
17981798; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
17991799; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
1800- ; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t
1800+ ; RV32-NEXT: vsrl.vi v16, v16, 1, v0.t
1801+ ; RV32-NEXT: csrr a3, vlenb
1802+ ; RV32-NEXT: slli a3, a3, 3
1803+ ; RV32-NEXT: add a3, sp, a3
1804+ ; RV32-NEXT: addi a3, a3, 48
1805+ ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
18011806; RV32-NEXT: csrr a3, vlenb
18021807; RV32-NEXT: li a4, 40
18031808; RV32-NEXT: mul a3, a3, a4
18041809; RV32-NEXT: add a3, sp, a3
18051810; RV32-NEXT: addi a3, a3, 48
18061811; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
1812+ ; RV32-NEXT: csrr a3, vlenb
1813+ ; RV32-NEXT: slli a3, a3, 3
1814+ ; RV32-NEXT: add a3, sp, a3
1815+ ; RV32-NEXT: addi a3, a3, 48
1816+ ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
18071817; RV32-NEXT: vand.vv v24, v24, v16, v0.t
18081818; RV32-NEXT: csrr a3, vlenb
18091819; RV32-NEXT: li a4, 24
18101820; RV32-NEXT: mul a3, a3, a4
18111821; RV32-NEXT: add a3, sp, a3
18121822; RV32-NEXT: addi a3, a3, 48
18131823; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
1814- ; RV32-NEXT: vsub.vv v24, v16, v24, v0.t
1815- ; RV32-NEXT: vand.vv v16, v24, v8, v0.t
1824+ ; RV32-NEXT: vsub.vv v16, v16, v24, v0.t
18161825; RV32-NEXT: csrr a3, vlenb
18171826; RV32-NEXT: li a4, 24
18181827; RV32-NEXT: mul a3, a3, a4
18191828; RV32-NEXT: add a3, sp, a3
18201829; RV32-NEXT: addi a3, a3, 48
18211830; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
1822- ; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t
1831+ ; RV32-NEXT: csrr a3, vlenb
1832+ ; RV32-NEXT: li a4, 24
1833+ ; RV32-NEXT: mul a3, a3, a4
1834+ ; RV32-NEXT: add a3, sp, a3
1835+ ; RV32-NEXT: addi a3, a3, 48
1836+ ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
18231837; RV32-NEXT: vand.vv v16, v16, v8, v0.t
18241838; RV32-NEXT: csrr a3, vlenb
1839+ ; RV32-NEXT: slli a3, a3, 3
1840+ ; RV32-NEXT: add a3, sp, a3
1841+ ; RV32-NEXT: addi a3, a3, 48
1842+ ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
1843+ ; RV32-NEXT: csrr a3, vlenb
18251844; RV32-NEXT: li a4, 24
18261845; RV32-NEXT: mul a3, a3, a4
18271846; RV32-NEXT: add a3, sp, a3
18281847; RV32-NEXT: addi a3, a3, 48
1848+ ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
1849+ ; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t
1850+ ; RV32-NEXT: vand.vv v16, v16, v8, v0.t
1851+ ; RV32-NEXT: csrr a3, vlenb
1852+ ; RV32-NEXT: slli a3, a3, 3
1853+ ; RV32-NEXT: add a3, sp, a3
1854+ ; RV32-NEXT: addi a3, a3, 48
18291855; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
18301856; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
18311857; RV32-NEXT: csrr a3, vlenb
@@ -1891,29 +1917,45 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
18911917; RV32-NEXT: addi a0, a0, 48
18921918; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
18931919; RV32-NEXT: vand.vv v16, v24, v16, v0.t
1894- ; RV32-NEXT: vsub.vv v24, v8, v16, v0.t
1920+ ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
1921+ ; RV32-NEXT: csrr a0, vlenb
1922+ ; RV32-NEXT: li a1, 40
1923+ ; RV32-NEXT: mul a0, a0, a1
1924+ ; RV32-NEXT: add a0, sp, a0
1925+ ; RV32-NEXT: addi a0, a0, 48
1926+ ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
18951927; RV32-NEXT: csrr a0, vlenb
18961928; RV32-NEXT: slli a0, a0, 5
18971929; RV32-NEXT: add a0, sp, a0
18981930; RV32-NEXT: addi a0, a0, 48
18991931; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
1900- ; RV32-NEXT: vand.vv v16, v24, v8, v0.t
19011932; RV32-NEXT: csrr a0, vlenb
19021933; RV32-NEXT: li a1, 40
19031934; RV32-NEXT: mul a0, a0, a1
19041935; RV32-NEXT: add a0, sp, a0
19051936; RV32-NEXT: addi a0, a0, 48
1937+ ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1938+ ; RV32-NEXT: vand.vv v16, v16, v8, v0.t
1939+ ; RV32-NEXT: csrr a0, vlenb
1940+ ; RV32-NEXT: slli a0, a0, 4
1941+ ; RV32-NEXT: add a0, sp, a0
1942+ ; RV32-NEXT: addi a0, a0, 48
19061943; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
1907- ; RV32-NEXT: vsrl.vi v8, v24, 2, v0.t
1944+ ; RV32-NEXT: csrr a0, vlenb
1945+ ; RV32-NEXT: li a1, 40
1946+ ; RV32-NEXT: mul a0, a0, a1
1947+ ; RV32-NEXT: add a0, sp, a0
1948+ ; RV32-NEXT: addi a0, a0, 48
1949+ ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
1950+ ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
19081951; RV32-NEXT: csrr a0, vlenb
19091952; RV32-NEXT: slli a0, a0, 5
19101953; RV32-NEXT: add a0, sp, a0
19111954; RV32-NEXT: addi a0, a0, 48
19121955; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
19131956; RV32-NEXT: vand.vv v8, v8, v16, v0.t
19141957; RV32-NEXT: csrr a0, vlenb
1915- ; RV32-NEXT: li a1, 40
1916- ; RV32-NEXT: mul a0, a0, a1
1958+ ; RV32-NEXT: slli a0, a0, 4
19171959; RV32-NEXT: add a0, sp, a0
19181960; RV32-NEXT: addi a0, a0, 48
19191961; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
@@ -3983,35 +4025,61 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
39834025; RV32-NEXT: addi a3, a3, 48
39844026; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
39854027; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
3986- ; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t
4028+ ; RV32-NEXT: vsrl.vi v16, v16, 1, v0.t
4029+ ; RV32-NEXT: csrr a3, vlenb
4030+ ; RV32-NEXT: slli a3, a3, 3
4031+ ; RV32-NEXT: add a3, sp, a3
4032+ ; RV32-NEXT: addi a3, a3, 48
4033+ ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
39874034; RV32-NEXT: csrr a3, vlenb
39884035; RV32-NEXT: li a4, 40
39894036; RV32-NEXT: mul a3, a3, a4
39904037; RV32-NEXT: add a3, sp, a3
39914038; RV32-NEXT: addi a3, a3, 48
39924039; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
4040+ ; RV32-NEXT: csrr a3, vlenb
4041+ ; RV32-NEXT: slli a3, a3, 3
4042+ ; RV32-NEXT: add a3, sp, a3
4043+ ; RV32-NEXT: addi a3, a3, 48
4044+ ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
39934045; RV32-NEXT: vand.vv v24, v24, v16, v0.t
39944046; RV32-NEXT: csrr a3, vlenb
39954047; RV32-NEXT: li a4, 24
39964048; RV32-NEXT: mul a3, a3, a4
39974049; RV32-NEXT: add a3, sp, a3
39984050; RV32-NEXT: addi a3, a3, 48
39994051; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
4000- ; RV32-NEXT: vsub.vv v24, v16, v24, v0.t
4001- ; RV32-NEXT: vand.vv v16, v24, v8, v0.t
4052+ ; RV32-NEXT: vsub.vv v16, v16, v24, v0.t
40024053; RV32-NEXT: csrr a3, vlenb
40034054; RV32-NEXT: li a4, 24
40044055; RV32-NEXT: mul a3, a3, a4
40054056; RV32-NEXT: add a3, sp, a3
40064057; RV32-NEXT: addi a3, a3, 48
40074058; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
4008- ; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t
4059+ ; RV32-NEXT: csrr a3, vlenb
4060+ ; RV32-NEXT: li a4, 24
4061+ ; RV32-NEXT: mul a3, a3, a4
4062+ ; RV32-NEXT: add a3, sp, a3
4063+ ; RV32-NEXT: addi a3, a3, 48
4064+ ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
40094065; RV32-NEXT: vand.vv v16, v16, v8, v0.t
40104066; RV32-NEXT: csrr a3, vlenb
4067+ ; RV32-NEXT: slli a3, a3, 3
4068+ ; RV32-NEXT: add a3, sp, a3
4069+ ; RV32-NEXT: addi a3, a3, 48
4070+ ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
4071+ ; RV32-NEXT: csrr a3, vlenb
40114072; RV32-NEXT: li a4, 24
40124073; RV32-NEXT: mul a3, a3, a4
40134074; RV32-NEXT: add a3, sp, a3
40144075; RV32-NEXT: addi a3, a3, 48
4076+ ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
4077+ ; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t
4078+ ; RV32-NEXT: vand.vv v16, v16, v8, v0.t
4079+ ; RV32-NEXT: csrr a3, vlenb
4080+ ; RV32-NEXT: slli a3, a3, 3
4081+ ; RV32-NEXT: add a3, sp, a3
4082+ ; RV32-NEXT: addi a3, a3, 48
40154083; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
40164084; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
40174085; RV32-NEXT: csrr a3, vlenb
@@ -4077,29 +4145,45 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
40774145; RV32-NEXT: addi a0, a0, 48
40784146; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
40794147; RV32-NEXT: vand.vv v16, v24, v16, v0.t
4080- ; RV32-NEXT: vsub.vv v24, v8, v16, v0.t
4148+ ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
4149+ ; RV32-NEXT: csrr a0, vlenb
4150+ ; RV32-NEXT: li a1, 40
4151+ ; RV32-NEXT: mul a0, a0, a1
4152+ ; RV32-NEXT: add a0, sp, a0
4153+ ; RV32-NEXT: addi a0, a0, 48
4154+ ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
40814155; RV32-NEXT: csrr a0, vlenb
40824156; RV32-NEXT: slli a0, a0, 5
40834157; RV32-NEXT: add a0, sp, a0
40844158; RV32-NEXT: addi a0, a0, 48
40854159; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
4086- ; RV32-NEXT: vand.vv v16, v24, v8, v0.t
40874160; RV32-NEXT: csrr a0, vlenb
40884161; RV32-NEXT: li a1, 40
40894162; RV32-NEXT: mul a0, a0, a1
40904163; RV32-NEXT: add a0, sp, a0
40914164; RV32-NEXT: addi a0, a0, 48
4165+ ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
4166+ ; RV32-NEXT: vand.vv v16, v16, v8, v0.t
4167+ ; RV32-NEXT: csrr a0, vlenb
4168+ ; RV32-NEXT: slli a0, a0, 4
4169+ ; RV32-NEXT: add a0, sp, a0
4170+ ; RV32-NEXT: addi a0, a0, 48
40924171; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
4093- ; RV32-NEXT: vsrl.vi v8, v24, 2, v0.t
4172+ ; RV32-NEXT: csrr a0, vlenb
4173+ ; RV32-NEXT: li a1, 40
4174+ ; RV32-NEXT: mul a0, a0, a1
4175+ ; RV32-NEXT: add a0, sp, a0
4176+ ; RV32-NEXT: addi a0, a0, 48
4177+ ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
4178+ ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
40944179; RV32-NEXT: csrr a0, vlenb
40954180; RV32-NEXT: slli a0, a0, 5
40964181; RV32-NEXT: add a0, sp, a0
40974182; RV32-NEXT: addi a0, a0, 48
40984183; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
40994184; RV32-NEXT: vand.vv v8, v8, v16, v0.t
41004185; RV32-NEXT: csrr a0, vlenb
4101- ; RV32-NEXT: li a1, 40
4102- ; RV32-NEXT: mul a0, a0, a1
4186+ ; RV32-NEXT: slli a0, a0, 4
41034187; RV32-NEXT: add a0, sp, a0
41044188; RV32-NEXT: addi a0, a0, 48
41054189; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
0 commit comments