@@ -1980,30 +1980,6 @@ InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
19801980 SlideCost = 1 ; // With a constant index, we do not need to use addi.
19811981 }
19821982
1983- // Extract i64 in the target that has XLEN=32 need more instruction.
1984- if (Val->getScalarType ()->isIntegerTy () &&
1985- ST->getXLen () < Val->getScalarSizeInBits ()) {
1986- // For extractelement, we need the following instructions:
1987- // vsetivli zero, 1, e64, m1, ta, mu (not count)
1988- // vslidedown.vx v8, v8, a0
1989- // vmv.x.s a0, v8
1990- // li a1, 32
1991- // vsrl.vx v8, v8, a1
1992- // vmv.x.s a1, v8
1993-
1994- // For insertelement, we need the following instructions:
1995- // vsetivli zero, 2, e32, m4, ta, mu (not count)
1996- // vmv.v.i v12, 0
1997- // vslide1up.vx v16, v12, a1
1998- // vslide1up.vx v12, v16, a0
1999- // addi a0, a2, 1
2000- // vsetvli zero, a0, e64, m4, tu, mu (not count)
2001- // vslideup.vx v8, v12, a2
2002-
2003- // TODO: should we count these special vsetvlis?
2004- BaseCost = Opcode == Instruction::InsertElement ? 3 : 4 ;
2005- }
2006-
20071983 // When the vector needs to split into multiple register groups and the index
20081984 // exceeds single vector register group, we need to insert/extract the element
20091985 // via stack.
@@ -2031,6 +2007,30 @@ InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
20312007 CostKind);
20322008 }
20332009
2010+ // Extract i64 in the target that has XLEN=32 need more instruction.
2011+ if (Val->getScalarType ()->isIntegerTy () &&
2012+ ST->getXLen () < Val->getScalarSizeInBits ()) {
2013+ // For extractelement, we need the following instructions:
2014+ // vsetivli zero, 1, e64, m1, ta, mu (not count)
2015+ // vslidedown.vx v8, v8, a0
2016+ // vmv.x.s a0, v8
2017+ // li a1, 32
2018+ // vsrl.vx v8, v8, a1
2019+ // vmv.x.s a1, v8
2020+
2021+ // For insertelement, we need the following instructions:
2022+ // vsetivli zero, 2, e32, m4, ta, mu (not count)
2023+ // vmv.v.i v12, 0
2024+ // vslide1up.vx v16, v12, a1
2025+ // vslide1up.vx v12, v16, a0
2026+ // addi a0, a2, 1
2027+ // vsetvli zero, a0, e64, m4, tu, mu (not count)
2028+ // vslideup.vx v8, v12, a2
2029+
2030+ // TODO: should we count these special vsetvlis?
2031+ BaseCost = Opcode == Instruction::InsertElement ? 3 : 4 ;
2032+ }
2033+
20342034 return BaseCost + SlideCost;
20352035}
20362036
0 commit comments