@@ -324,23 +324,24 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
324324; RV32-NEXT: sw s0, 376(sp) # 4-byte Folded Spill
325325; RV32-NEXT: addi s0, sp, 384
326326; RV32-NEXT: andi sp, sp, -128
327- ; RV32-NEXT: li a2, 128
328- ; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
327+ ; RV32-NEXT: zext.b a1, a1
328+ ; RV32-NEXT: mv a2, sp
329+ ; RV32-NEXT: li a3, 128
330+ ; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
329331; RV32-NEXT: vle8.v v8, (a0)
330332; RV32-NEXT: addi a0, a0, 128
331333; RV32-NEXT: vle8.v v16, (a0)
334+ ; RV32-NEXT: add a1, a2, a1
332335; RV32-NEXT: vmseq.vi v0, v8, 0
333- ; RV32-NEXT: vmv.v.i v8, 0
334- ; RV32-NEXT: vmerge.vim v24, v8, 1, v0
335- ; RV32-NEXT: vmseq.vi v0, v16, 0
336- ; RV32-NEXT: zext.b a0, a1
337- ; RV32-NEXT: mv a1, sp
338- ; RV32-NEXT: add a0, a1, a0
339- ; RV32-NEXT: vse8.v v24, (a1)
340- ; RV32-NEXT: vmerge.vim v8, v8, 1, v0
341- ; RV32-NEXT: addi a1, sp, 128
342- ; RV32-NEXT: vse8.v v8, (a1)
343- ; RV32-NEXT: lbu a0, 0(a0)
336+ ; RV32-NEXT: vmv.v.i v24, 0
337+ ; RV32-NEXT: vmseq.vi v8, v16, 0
338+ ; RV32-NEXT: vmerge.vim v16, v24, 1, v0
339+ ; RV32-NEXT: vse8.v v16, (a2)
340+ ; RV32-NEXT: vmv1r.v v0, v8
341+ ; RV32-NEXT: vmerge.vim v8, v24, 1, v0
342+ ; RV32-NEXT: addi a0, sp, 128
343+ ; RV32-NEXT: vse8.v v8, (a0)
344+ ; RV32-NEXT: lbu a0, 0(a1)
344345; RV32-NEXT: addi sp, s0, -384
345346; RV32-NEXT: lw ra, 380(sp) # 4-byte Folded Reload
346347; RV32-NEXT: lw s0, 376(sp) # 4-byte Folded Reload
@@ -354,23 +355,24 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
354355; RV64-NEXT: sd s0, 368(sp) # 8-byte Folded Spill
355356; RV64-NEXT: addi s0, sp, 384
356357; RV64-NEXT: andi sp, sp, -128
357- ; RV64-NEXT: li a2, 128
358- ; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
358+ ; RV64-NEXT: zext.b a1, a1
359+ ; RV64-NEXT: mv a2, sp
360+ ; RV64-NEXT: li a3, 128
361+ ; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
359362; RV64-NEXT: vle8.v v8, (a0)
360363; RV64-NEXT: addi a0, a0, 128
361364; RV64-NEXT: vle8.v v16, (a0)
365+ ; RV64-NEXT: add a1, a2, a1
362366; RV64-NEXT: vmseq.vi v0, v8, 0
363- ; RV64-NEXT: vmv.v.i v8, 0
364- ; RV64-NEXT: vmerge.vim v24, v8, 1, v0
365- ; RV64-NEXT: vmseq.vi v0, v16, 0
366- ; RV64-NEXT: zext.b a0, a1
367- ; RV64-NEXT: mv a1, sp
368- ; RV64-NEXT: add a0, a1, a0
369- ; RV64-NEXT: vse8.v v24, (a1)
370- ; RV64-NEXT: vmerge.vim v8, v8, 1, v0
371- ; RV64-NEXT: addi a1, sp, 128
372- ; RV64-NEXT: vse8.v v8, (a1)
373- ; RV64-NEXT: lbu a0, 0(a0)
367+ ; RV64-NEXT: vmv.v.i v24, 0
368+ ; RV64-NEXT: vmseq.vi v8, v16, 0
369+ ; RV64-NEXT: vmerge.vim v16, v24, 1, v0
370+ ; RV64-NEXT: vse8.v v16, (a2)
371+ ; RV64-NEXT: vmv1r.v v0, v8
372+ ; RV64-NEXT: vmerge.vim v8, v24, 1, v0
373+ ; RV64-NEXT: addi a0, sp, 128
374+ ; RV64-NEXT: vse8.v v8, (a0)
375+ ; RV64-NEXT: lbu a0, 0(a1)
374376; RV64-NEXT: addi sp, s0, -384
375377; RV64-NEXT: ld ra, 376(sp) # 8-byte Folded Reload
376378; RV64-NEXT: ld s0, 368(sp) # 8-byte Folded Reload
@@ -384,23 +386,24 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
384386; RV32ZBS-NEXT: sw s0, 376(sp) # 4-byte Folded Spill
385387; RV32ZBS-NEXT: addi s0, sp, 384
386388; RV32ZBS-NEXT: andi sp, sp, -128
387- ; RV32ZBS-NEXT: li a2, 128
388- ; RV32ZBS-NEXT: vsetvli zero, a2, e8, m8, ta, ma
389+ ; RV32ZBS-NEXT: zext.b a1, a1
390+ ; RV32ZBS-NEXT: mv a2, sp
391+ ; RV32ZBS-NEXT: li a3, 128
392+ ; RV32ZBS-NEXT: vsetvli zero, a3, e8, m8, ta, ma
389393; RV32ZBS-NEXT: vle8.v v8, (a0)
390394; RV32ZBS-NEXT: addi a0, a0, 128
391395; RV32ZBS-NEXT: vle8.v v16, (a0)
396+ ; RV32ZBS-NEXT: add a1, a2, a1
392397; RV32ZBS-NEXT: vmseq.vi v0, v8, 0
393- ; RV32ZBS-NEXT: vmv.v.i v8, 0
394- ; RV32ZBS-NEXT: vmerge.vim v24, v8, 1, v0
395- ; RV32ZBS-NEXT: vmseq.vi v0, v16, 0
396- ; RV32ZBS-NEXT: zext.b a0, a1
397- ; RV32ZBS-NEXT: mv a1, sp
398- ; RV32ZBS-NEXT: add a0, a1, a0
399- ; RV32ZBS-NEXT: vse8.v v24, (a1)
400- ; RV32ZBS-NEXT: vmerge.vim v8, v8, 1, v0
401- ; RV32ZBS-NEXT: addi a1, sp, 128
402- ; RV32ZBS-NEXT: vse8.v v8, (a1)
403- ; RV32ZBS-NEXT: lbu a0, 0(a0)
398+ ; RV32ZBS-NEXT: vmv.v.i v24, 0
399+ ; RV32ZBS-NEXT: vmseq.vi v8, v16, 0
400+ ; RV32ZBS-NEXT: vmerge.vim v16, v24, 1, v0
401+ ; RV32ZBS-NEXT: vse8.v v16, (a2)
402+ ; RV32ZBS-NEXT: vmv1r.v v0, v8
403+ ; RV32ZBS-NEXT: vmerge.vim v8, v24, 1, v0
404+ ; RV32ZBS-NEXT: addi a0, sp, 128
405+ ; RV32ZBS-NEXT: vse8.v v8, (a0)
406+ ; RV32ZBS-NEXT: lbu a0, 0(a1)
404407; RV32ZBS-NEXT: addi sp, s0, -384
405408; RV32ZBS-NEXT: lw ra, 380(sp) # 4-byte Folded Reload
406409; RV32ZBS-NEXT: lw s0, 376(sp) # 4-byte Folded Reload
@@ -414,23 +417,24 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
414417; RV64ZBS-NEXT: sd s0, 368(sp) # 8-byte Folded Spill
415418; RV64ZBS-NEXT: addi s0, sp, 384
416419; RV64ZBS-NEXT: andi sp, sp, -128
417- ; RV64ZBS-NEXT: li a2, 128
418- ; RV64ZBS-NEXT: vsetvli zero, a2, e8, m8, ta, ma
420+ ; RV64ZBS-NEXT: zext.b a1, a1
421+ ; RV64ZBS-NEXT: mv a2, sp
422+ ; RV64ZBS-NEXT: li a3, 128
423+ ; RV64ZBS-NEXT: vsetvli zero, a3, e8, m8, ta, ma
419424; RV64ZBS-NEXT: vle8.v v8, (a0)
420425; RV64ZBS-NEXT: addi a0, a0, 128
421426; RV64ZBS-NEXT: vle8.v v16, (a0)
427+ ; RV64ZBS-NEXT: add a1, a2, a1
422428; RV64ZBS-NEXT: vmseq.vi v0, v8, 0
423- ; RV64ZBS-NEXT: vmv.v.i v8, 0
424- ; RV64ZBS-NEXT: vmerge.vim v24, v8, 1, v0
425- ; RV64ZBS-NEXT: vmseq.vi v0, v16, 0
426- ; RV64ZBS-NEXT: zext.b a0, a1
427- ; RV64ZBS-NEXT: mv a1, sp
428- ; RV64ZBS-NEXT: add a0, a1, a0
429- ; RV64ZBS-NEXT: vse8.v v24, (a1)
430- ; RV64ZBS-NEXT: vmerge.vim v8, v8, 1, v0
431- ; RV64ZBS-NEXT: addi a1, sp, 128
432- ; RV64ZBS-NEXT: vse8.v v8, (a1)
433- ; RV64ZBS-NEXT: lbu a0, 0(a0)
429+ ; RV64ZBS-NEXT: vmv.v.i v24, 0
430+ ; RV64ZBS-NEXT: vmseq.vi v8, v16, 0
431+ ; RV64ZBS-NEXT: vmerge.vim v16, v24, 1, v0
432+ ; RV64ZBS-NEXT: vse8.v v16, (a2)
433+ ; RV64ZBS-NEXT: vmv1r.v v0, v8
434+ ; RV64ZBS-NEXT: vmerge.vim v8, v24, 1, v0
435+ ; RV64ZBS-NEXT: addi a0, sp, 128
436+ ; RV64ZBS-NEXT: vse8.v v8, (a0)
437+ ; RV64ZBS-NEXT: lbu a0, 0(a1)
434438; RV64ZBS-NEXT: addi sp, s0, -384
435439; RV64ZBS-NEXT: ld ra, 376(sp) # 8-byte Folded Reload
436440; RV64ZBS-NEXT: ld s0, 368(sp) # 8-byte Folded Reload
0 commit comments