@@ -2005,18 +2005,29 @@ InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
20052005 }
20062006
20072007 // When the vector needs to split into multiple register groups and the index
2008- // exceeds single vector register group, we need to extract the element via
2009- // stack.
2010- if (Opcode == Instruction::ExtractElement && LT.first > 1 &&
2011- ((Index == -1U ) || (Index > LT.second .getVectorMinNumElements () &&
2008+ // exceeds single vector register group, we need to insert/ extract the element
2009+ // via stack.
2010+ if (LT.first > 1 &&
2011+ ((Index == -1U ) || (Index >= LT.second .getVectorMinNumElements () &&
20122012 LT.second .isScalableVector ()))) {
20132013 Type *ScalarType = Val->getScalarType ();
20142014 Align VecAlign = DL.getPrefTypeAlign (Val);
20152015 Align SclAlign = DL.getPrefTypeAlign (ScalarType);
2016+
20162017 // Store all split vectors into stack and load the target element.
2017- return LT.first *
2018- getMemoryOpCost (Instruction::Store, Val, VecAlign, 0 , CostKind) +
2019- getMemoryOpCost (Instruction::Load, ScalarType, SclAlign, 0 ,
2018+ if (Opcode == Instruction::ExtractElement)
2019+ return LT.first * getMemoryOpCost (Instruction::Store, Val, VecAlign, 0 ,
2020+ CostKind) +
2021+ getMemoryOpCost (Instruction::Load, ScalarType, SclAlign, 0 ,
2022+ CostKind);
2023+
2024+ // Store all split vectors into stack and store the target element and load
2025+ // vectors back.
2026+ return LT.first * (getMemoryOpCost (Instruction::Store, Val, VecAlign, 0 ,
2027+ CostKind) +
2028+ getMemoryOpCost (Instruction::Load, Val, VecAlign, 0 ,
2029+ CostKind)) +
2030+ getMemoryOpCost (Instruction::Store, ScalarType, SclAlign, 0 ,
20202031 CostKind);
20212032 }
20222033
0 commit comments