@@ -5318,14 +5318,9 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCost(
5318
5318
};
5319
5319
5320
5320
static const CostTblEntry AVX2InterleavedStoreTbl[] = {
5321
- {2 , MVT::v2i8, 1 }, // interleave 2 x 2i8 into 4i8 (and store)
5322
- {2 , MVT::v4i8, 1 }, // interleave 2 x 4i8 into 8i8 (and store)
5323
- {2 , MVT::v8i8, 1 }, // interleave 2 x 8i8 into 16i8 (and store)
5324
5321
{2 , MVT::v16i8, 3 }, // interleave 2 x 16i8 into 32i8 (and store)
5325
5322
{2 , MVT::v32i8, 4 }, // interleave 2 x 32i8 into 64i8 (and store)
5326
5323
5327
- {2 , MVT::v2i16, 1 }, // interleave 2 x 2i16 into 4i16 (and store)
5328
- {2 , MVT::v4i16, 1 }, // interleave 2 x 4i16 into 8i16 (and store)
5329
5324
{2 , MVT::v8i16, 3 }, // interleave 2 x 8i16 into 16i16 (and store)
5330
5325
{2 , MVT::v16i16, 4 }, // interleave 2 x 16i16 into 32i16 (and store)
5331
5326
{2 , MVT::v32i16, 8 }, // interleave 2 x 32i16 into 64i16 (and store)
@@ -5410,6 +5405,15 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCost(
5410
5405
{6 , MVT::v8i64, 30 }, // interleave 6 x 8i64 into 48i64 (and store)
5411
5406
};
5412
5407
5408
+ static const CostTblEntry SSE2InterleavedStoreTbl[] = {
5409
+ {2 , MVT::v2i8, 1 }, // interleave 2 x 2i8 into 4i8 (and store)
5410
+ {2 , MVT::v4i8, 1 }, // interleave 2 x 4i8 into 8i8 (and store)
5411
+ {2 , MVT::v8i8, 1 }, // interleave 2 x 8i8 into 16i8 (and store)
5412
+
5413
+ {2 , MVT::v2i16, 1 }, // interleave 2 x 2i16 into 4i16 (and store)
5414
+ {2 , MVT::v4i16, 1 }, // interleave 2 x 4i16 into 8i16 (and store)
5415
+ };
5416
+
5413
5417
if (Opcode == Instruction::Load) {
5414
5418
// FIXME: if we have a partially-interleaved groups, with gaps,
5415
5419
// should we discount the not-demanded indicies?
@@ -5436,6 +5440,11 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCost(
5436
5440
if (const auto *Entry = CostTableLookup (AVX2InterleavedStoreTbl, Factor,
5437
5441
ETy.getSimpleVT ()))
5438
5442
return MemOpCosts + Entry->Cost ;
5443
+
5444
+ if (ST->hasSSE2 ())
5445
+ if (const auto *Entry = CostTableLookup (SSE2InterleavedStoreTbl, Factor,
5446
+ ETy.getSimpleVT ()))
5447
+ return MemOpCosts + Entry->Cost ;
5439
5448
}
5440
5449
5441
5450
return BaseT::getInterleavedMemoryOpCost (Opcode, VecTy, Factor, Indices,
0 commit comments