@@ -8195,35 +8195,142 @@ CORINFO_FIELD_HANDLE emitter::emitSimd16Const(simd16_t constValue)
81958195 return emitComp->eeFindJitDataOffs (cnum);
81968196}
81978197
8198- #if defined(TARGET_XARCH)
8199- CORINFO_FIELD_HANDLE emitter::emitSimd32Const (simd32_t constValue)
8198+ #ifdef TARGET_XARCH
8199+ // ------------------------------------------------------------------------
8200+ // emitSimdConst: Create a simd data section constant.
8201+ //
8202+ // Arguments:
8203+ // constValue - constant value
8204+ // attr - The EA_SIZE for the constant type
8205+ //
8206+ // Return Value:
8207+ // A field handle representing the data offset to access the constant.
8208+ //
8209+ // Note:
8210+ // Access to inline data is 'abstracted' by a special type of static member
8211+ // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
8212+ // to constant data, not a real static field.
8213+ //
8214+ CORINFO_FIELD_HANDLE emitter::emitSimdConst (simd_t * constValue, emitAttr attr)
82008215{
8201- unsigned cnsSize = 32 ;
8202- unsigned cnsAlign = cnsSize;
8216+ unsigned cnsSize = EA_SIZE (attr);
8217+ unsigned cnsAlign = cnsSize;
8218+ var_types dataType = (cnsSize >= 8 ) ? emitComp->getSIMDTypeForSize (cnsSize) : TYP_FLOAT;
82038219
8220+ #ifdef TARGET_XARCH
82048221 if (emitComp->compCodeOpt () == Compiler::SMALL_CODE)
82058222 {
82068223 cnsAlign = dataSection::MIN_DATA_ALIGN;
82078224 }
8225+ #endif // TARGET_XARCH
82088226
8209- UNATIVE_OFFSET cnum = emitDataConst (& constValue, cnsSize, cnsAlign, TYP_SIMD32 );
8227+ UNATIVE_OFFSET cnum = emitDataConst (constValue, cnsSize, cnsAlign, dataType );
82108228 return emitComp->eeFindJitDataOffs (cnum);
82118229}
82128230
8213- CORINFO_FIELD_HANDLE emitter::emitSimd64Const (simd64_t constValue)
8231+ // ------------------------------------------------------------------------
8232+ // emitSimdConstCompressedLoad: Create a simd data section constant,
8233+ // compressing it if possible, and emit an appropiate instruction
8234+ // to load or broadcast the constant to a register.
8235+ //
8236+ // Arguments:
8237+ // constValue - constant value
8238+ // attr - The EA_SIZE for the constant type
8239+ // targetReg - The target register
8240+ //
8241+ void emitter::emitSimdConstCompressedLoad (simd_t * constValue, emitAttr attr, regNumber targetReg)
82148242{
8215- unsigned cnsSize = 64 ;
8216- unsigned cnsAlign = cnsSize;
8243+ assert (EA_SIZE (attr) >= 8 && EA_SIZE (attr) <= 64 );
82178244
8218- if (emitComp->compCodeOpt () == Compiler::SMALL_CODE)
8245+ unsigned cnsSize = EA_SIZE (attr);
8246+ unsigned dataSize = cnsSize;
8247+ instruction ins = (cnsSize == 8 ) ? INS_movsd_simd : INS_movups;
8248+
8249+ // Most constant vectors tend to have repeated values, so we will first check to see if
8250+ // we can replace a full vector load with a smaller broadcast.
8251+
8252+ if ((dataSize == 64 ) && (constValue->v256 [1 ] == constValue->v256 [0 ]))
82198253 {
8220- cnsAlign = dataSection::MIN_DATA_ALIGN;
8254+ assert (emitComp->IsBaselineVector512IsaSupportedDebugOnly ());
8255+ dataSize = 32 ;
8256+ ins = INS_vbroadcastf32x8;
82218257 }
82228258
8223- UNATIVE_OFFSET cnum = emitDataConst (&constValue, cnsSize, cnsAlign, TYP_SIMD64);
8224- return emitComp->eeFindJitDataOffs (cnum);
8225- }
8259+ if ((dataSize == 32 ) && (constValue->v128 [1 ] == constValue->v128 [0 ]))
8260+ {
8261+ assert (emitComp->IsBaselineVector256IsaSupportedDebugOnly ());
8262+ dataSize = 16 ;
8263+ ins = INS_vbroadcastf128;
8264+ }
82268265
8266+ if ((dataSize == 16 ) && (constValue->u64 [1 ] == constValue->u64 [0 ]))
8267+ {
8268+ if (((cnsSize == 16 ) && emitComp->compOpportunisticallyDependsOn (InstructionSet_SSE3)) ||
8269+ emitComp->compOpportunisticallyDependsOn (InstructionSet_AVX))
8270+ {
8271+ dataSize = 8 ;
8272+ ins = (cnsSize == 16 ) ? INS_movddup : INS_vbroadcastsd;
8273+ }
8274+ }
8275+
8276+ // `vbroadcastss` fills the full SIMD register, so we can't do this last step if the
8277+ // original constant was smaller than a full reg (e.g. TYP_SIMD8)
8278+
8279+ if ((dataSize == 8 ) && (cnsSize >= 16 ) && (constValue->u32 [1 ] == constValue->u32 [0 ]))
8280+ {
8281+ if (emitComp->compOpportunisticallyDependsOn (InstructionSet_AVX))
8282+ {
8283+ dataSize = 4 ;
8284+ ins = INS_vbroadcastss;
8285+ }
8286+ }
8287+
8288+ if (dataSize < cnsSize)
8289+ {
8290+ // We found a broadcast match, so emit the broadcast instruction and return.
8291+ // Here we use the original emitAttr for the instruction, because we need to
8292+ // produce a register of the original constant's size, filled with the pattern.
8293+
8294+ CORINFO_FIELD_HANDLE hnd = emitSimdConst (constValue, EA_ATTR (dataSize));
8295+ emitIns_R_C (ins, attr, targetReg, hnd, 0 );
8296+ return ;
8297+ }
8298+
8299+ // Otherwise, if the upper lanes and/or elements of the constant are zero, we can use a
8300+ // smaller load, because all scalar and vector memory load instructions zero the uppers.
8301+
8302+ simd32_t zeroValue = {};
8303+
8304+ if ((dataSize == 64 ) && (constValue->v256 [1 ] == zeroValue))
8305+ {
8306+ dataSize = 32 ;
8307+ }
8308+
8309+ if ((dataSize == 32 ) && (constValue->v128 [1 ] == zeroValue.v128 [0 ]))
8310+ {
8311+ dataSize = 16 ;
8312+ }
8313+
8314+ if ((dataSize == 16 ) && (constValue->u64 [1 ] == 0 ))
8315+ {
8316+ dataSize = 8 ;
8317+ ins = INS_movsd_simd;
8318+ }
8319+
8320+ if ((dataSize == 8 ) && (constValue->u32 [1 ] == 0 ))
8321+ {
8322+ dataSize = 4 ;
8323+ ins = INS_movss;
8324+ }
8325+
8326+ // Here we set the emitAttr to the size of the actual load. It will zero extend
8327+ // up to the native SIMD register size.
8328+
8329+ attr = EA_ATTR (dataSize);
8330+
8331+ CORINFO_FIELD_HANDLE hnd = emitSimdConst (constValue, attr);
8332+ emitIns_R_C (ins, attr, targetReg, hnd, 0 );
8333+ }
82278334#endif // TARGET_XARCH
82288335
82298336#if defined(FEATURE_MASKED_HW_INTRINSICS)
0 commit comments