Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit 97efaab

Browse files
Merge pull request #15804 from tannergooding/hwintrin-containment
Adding basic containment support to the x86 HWIntrinsics
2 parents d1793d3 + 592aa82 commit 97efaab

File tree

6 files changed

+365
-78
lines changed

6 files changed

+365
-78
lines changed

src/jit/codegenlinear.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ void genPutArgStkSIMD12(GenTree* treeNode);
116116

117117
#if FEATURE_HW_INTRINSICS && defined(_TARGET_XARCH_)
118118
void genHWIntrinsic(GenTreeHWIntrinsic* node);
119+
void genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins);
119120
void genSSEIntrinsic(GenTreeHWIntrinsic* node);
120121
void genSSE2Intrinsic(GenTreeHWIntrinsic* node);
121122
void genSSE3Intrinsic(GenTreeHWIntrinsic* node);

src/jit/emitfmtsxarch.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ IF_DEF(RRD_MRD, IS_GM_RD|IS_R1_RD, DSP) // read reg , read [
123123
IF_DEF(RWR_MRD, IS_GM_RD|IS_R1_WR, DSP) // write reg , read [mem]
124124
IF_DEF(RRW_MRD, IS_GM_RD|IS_R1_RW, DSP) // r/w reg , read [mem]
125125

126+
IF_DEF(RWR_RRD_MRD, IS_GM_RD|IS_R1_WR|IS_R2_RD, DSP) // write reg , read reg2 , read [mem]
126127
IF_DEF(RWR_MRD_OFF, IS_GM_RD|IS_R1_WR, DSP) // write reg , offset mem
127128

128129
IF_DEF(MRD_RRD, IS_GM_RD|IS_R1_RD, DSP) // read [mem], read reg
@@ -147,6 +148,8 @@ IF_DEF(RRD_SRD, IS_SF_RD|IS_R1_RD, NONE) // read reg , read [
147148
IF_DEF(RWR_SRD, IS_SF_RD|IS_R1_WR, NONE) // write reg , read [stk]
148149
IF_DEF(RRW_SRD, IS_SF_RD|IS_R1_RW, NONE) // r/w reg , read [stk]
149150

151+
IF_DEF(RWR_RRD_SRD, IS_SF_RD|IS_R1_WR|IS_R2_RD, NONE) // write reg , read reg2, read [stk]
152+
150153
IF_DEF(SRD_RRD, IS_SF_RD|IS_R1_RD, NONE) // read [stk], read reg
151154
IF_DEF(SWR_RRD, IS_SF_WR|IS_R1_RD, NONE) // write [stk], read reg
152155
IF_DEF(SRW_RRD, IS_SF_RW|IS_R1_RD, NONE) // r/w [stk], read reg
@@ -170,6 +173,8 @@ IF_DEF(RRD_ARD, IS_AM_RD|IS_R1_RD, AMD ) // read reg , read [
170173
IF_DEF(RWR_ARD, IS_AM_RD|IS_R1_WR, AMD ) // write reg , read [adr]
171174
IF_DEF(RRW_ARD, IS_AM_RD|IS_R1_RW, AMD ) // r/w reg , read [adr]
172175

176+
IF_DEF(RWR_RRD_ARD, IS_AM_RD|IS_R1_WR|IS_R2_RD, AMD ) // write reg , read reg2, read [adr]
177+
173178
IF_DEF(ARD_RRD, IS_AM_RD|IS_R1_RD, AMD ) // read [adr], read reg
174179
IF_DEF(AWR_RRD, IS_AM_WR|IS_R1_RD, AMD ) // write [adr], read reg
175180
IF_DEF(ARW_RRD, IS_AM_RW|IS_R1_RD, AMD ) // r/w [adr], read reg

src/jit/emitxarch.cpp

Lines changed: 189 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1998,8 +1998,8 @@ UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code)
19981998
// BT supports 16 bit operands and this code doesn't handle the necessary 66 prefix.
19991999
assert(ins != INS_bt);
20002000

2001-
assert((attrSize == EA_4BYTE) || (attrSize == EA_PTRSIZE) // Only for x64
2002-
|| (attrSize == EA_16BYTE) // only for x64
2001+
assert((attrSize == EA_4BYTE) || (attrSize == EA_PTRSIZE) // Only for x64
2002+
|| (attrSize == EA_16BYTE) || (attrSize == EA_32BYTE) // only for x64
20032003
|| (ins == INS_movzx) || (ins == INS_movsx));
20042004
size = 3;
20052005
}
@@ -2588,6 +2588,8 @@ emitter::insFormat emitter::emitMapFmtAtoM(insFormat fmt)
25882588
return IF_RWR_MRD;
25892589
case IF_RRW_ARD:
25902590
return IF_RRW_MRD;
2591+
case IF_RWR_RRD_ARD:
2592+
return IF_RWR_RRD_MRD;
25912593

25922594
case IF_ARD_RRD:
25932595
return IF_MRD_RRD;
@@ -3889,6 +3891,71 @@ void emitter::emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regN
38893891
emitCurIGsize += sz;
38903892
}
38913893

3894+
void emitter::emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, insFormat fmt)
3895+
{
3896+
ssize_t offs = indir->Offset();
3897+
instrDesc* id = emitNewInstrAmd(attr, offs);
3898+
3899+
id->idIns(ins);
3900+
id->idReg1(reg1);
3901+
3902+
emitHandleMemOp(indir, id, fmt, ins);
3903+
3904+
UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
3905+
id->idCodeSize(sz);
3906+
3907+
dispIns(id);
3908+
emitCurIGsize += sz;
3909+
}
3910+
3911+
void emitter::emitIns_R_R_A(
3912+
instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir, insFormat fmt)
3913+
{
3914+
assert(IsSSEOrAVXInstruction(ins));
3915+
assert(IsThreeOperandAVXInstruction(ins));
3916+
3917+
ssize_t offs = indir->Offset();
3918+
instrDesc* id = emitNewInstrAmd(attr, offs);
3919+
3920+
id->idIns(ins);
3921+
id->idReg1(reg1);
3922+
id->idReg2(reg2);
3923+
3924+
emitHandleMemOp(indir, id, fmt, ins);
3925+
3926+
UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
3927+
id->idCodeSize(sz);
3928+
3929+
dispIns(id);
3930+
emitCurIGsize += sz;
3931+
}
3932+
3933+
void emitter::emitIns_R_R_C(
3934+
instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, CORINFO_FIELD_HANDLE fldHnd, int offs)
3935+
{
3936+
assert(IsSSEOrAVXInstruction(ins));
3937+
assert(IsThreeOperandAVXInstruction(ins));
3938+
3939+
// Static always need relocs
3940+
if (!jitStaticFldIsGlobAddr(fldHnd))
3941+
{
3942+
attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
3943+
}
3944+
3945+
instrDesc* id = emitNewInstrDsp(attr, offs);
3946+
UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
3947+
3948+
id->idIns(ins);
3949+
id->idInsFmt(IF_RWR_RRD_MRD);
3950+
id->idReg1(reg1);
3951+
id->idReg2(reg2);
3952+
id->idAddr()->iiaFieldHnd = fldHnd;
3953+
3954+
id->idCodeSize(sz);
3955+
dispIns(id);
3956+
emitCurIGsize += sz;
3957+
}
3958+
38923959
/*****************************************************************************
38933960
*
38943961
* Add an instruction with three register operands.
@@ -3915,6 +3982,30 @@ void emitter::emitIns_R_R_R(instruction ins, emitAttr attr, regNumber targetReg,
39153982
emitCurIGsize += sz;
39163983
}
39173984

3985+
void emitter::emitIns_R_R_S(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int varx, int offs)
3986+
{
3987+
assert(IsSSEOrAVXInstruction(ins));
3988+
assert(IsThreeOperandAVXInstruction(ins));
3989+
3990+
instrDesc* id = emitNewInstr(attr);
3991+
UNATIVE_OFFSET sz =
3992+
emitInsSizeSV(insCodeRM(ins), varx, offs) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
3993+
3994+
id->idIns(ins);
3995+
id->idInsFmt(IF_RWR_RRD_SRD);
3996+
id->idReg1(reg1);
3997+
id->idReg2(reg2);
3998+
id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
3999+
4000+
#ifdef DEBUG
4001+
id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
4002+
#endif
4003+
4004+
id->idCodeSize(sz);
4005+
dispIns(id);
4006+
emitCurIGsize += sz;
4007+
}
4008+
39184009
/**********************************************************************************
39194010
* emitIns_R_R_R_I: Add an instruction with three register operands and an immediate.
39204011
*
@@ -4888,9 +4979,43 @@ void emitter::emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNu
48884979
}
48894980

48904981
#if FEATURE_HW_INTRINSICS
4982+
void emitter::emitIns_SIMD_R_R_A(
4983+
instruction ins, regNumber reg, regNumber reg1, GenTreeIndir* indir, var_types simdtype)
4984+
{
4985+
if (UseVEXEncoding())
4986+
{
4987+
emitIns_R_R_A(ins, emitTypeSize(simdtype), reg, reg1, indir, IF_RWR_RRD_ARD);
4988+
}
4989+
else
4990+
{
4991+
if (reg1 != reg)
4992+
{
4993+
emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1);
4994+
}
4995+
emitIns_R_A(ins, emitTypeSize(simdtype), reg, indir, IF_RRW_ARD);
4996+
}
4997+
}
4998+
4999+
void emitter::emitIns_SIMD_R_R_C(
5000+
instruction ins, regNumber reg, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, var_types simdtype)
5001+
{
5002+
if (UseVEXEncoding())
5003+
{
5004+
emitIns_R_R_C(ins, emitTypeSize(simdtype), reg, reg1, fldHnd, offs);
5005+
}
5006+
else
5007+
{
5008+
if (reg1 != reg)
5009+
{
5010+
emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1);
5011+
}
5012+
emitIns_R_C(ins, emitTypeSize(simdtype), reg, fldHnd, offs);
5013+
}
5014+
}
5015+
48915016
void emitter::emitIns_SIMD_R_R_R(instruction ins, regNumber reg, regNumber reg1, regNumber reg2, var_types simdtype)
48925017
{
4893-
if (UseVEXEncoding() && reg1 != reg)
5018+
if (UseVEXEncoding())
48945019
{
48955020
emitIns_R_R_R(ins, emitTypeSize(simdtype), reg, reg1, reg2);
48965021
}
@@ -4903,6 +5028,22 @@ void emitter::emitIns_SIMD_R_R_R(instruction ins, regNumber reg, regNumber reg1,
49035028
emitIns_R_R(ins, emitTypeSize(simdtype), reg, reg2);
49045029
}
49055030
}
5031+
5032+
void emitter::emitIns_SIMD_R_R_S(instruction ins, regNumber reg, regNumber reg1, int varx, int offs, var_types simdtype)
5033+
{
5034+
if (UseVEXEncoding())
5035+
{
5036+
emitIns_R_R_S(ins, emitTypeSize(simdtype), reg, reg1, varx, offs);
5037+
}
5038+
else
5039+
{
5040+
if (reg1 != reg)
5041+
{
5042+
emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1);
5043+
}
5044+
emitIns_R_S(ins, emitTypeSize(simdtype), reg, varx, offs);
5045+
}
5046+
}
49065047
#endif
49075048

49085049
/*****************************************************************************
@@ -6918,6 +7059,11 @@ void emitter::emitDispIns(
69187059
emitDispAddrMode(id);
69197060
break;
69207061

7062+
case IF_RWR_RRD_ARD:
7063+
printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
7064+
emitDispAddrMode(id);
7065+
break;
7066+
69217067
case IF_ARD_RRD:
69227068
case IF_AWR_RRD:
69237069
case IF_ARW_RRD:
@@ -7061,6 +7207,12 @@ void emitter::emitDispIns(
70617207

70627208
break;
70637209

7210+
case IF_RWR_RRD_SRD:
7211+
printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
7212+
emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
7213+
id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
7214+
break;
7215+
70647216
case IF_RRD_RRD:
70657217
case IF_RWR_RRD:
70667218
case IF_RRW_RRD:
@@ -7189,6 +7341,12 @@ void emitter::emitDispIns(
71897341
emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
71907342
break;
71917343

7344+
case IF_RWR_RRD_MRD:
7345+
printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
7346+
offs = emitGetInsDsp(id);
7347+
emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
7348+
break;
7349+
71927350
case IF_RWR_MRD_OFF:
71937351

71947352
printf("%s, %s", emitRegName(id->idReg1(), attr), "offset");
@@ -7635,12 +7793,17 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
76357793
// Therefore, add VEX prefix is one is not already present.
76367794
code = AddVexPrefixIfNeededAndNotPresent(ins, code, size);
76377795

7638-
// For this format, moves do not support a third operand, so we only need to handle the binary ops.
76397796
if (IsDstDstSrcAVXInstruction(ins))
76407797
{
7641-
// Encode source operand reg in 'vvvv' bits in 1's complement form
7642-
// The order of operands are reversed, therefore use reg2 as the source.
7643-
code = insEncodeReg3456(ins, id->idReg1(), size, code);
7798+
regNumber src1 = id->idReg2();
7799+
7800+
if (id->idInsFmt() != IF_RWR_RRD_ARD)
7801+
{
7802+
src1 = id->idReg1();
7803+
}
7804+
7805+
// encode source operand reg in 'vvvv' bits in 1's compliement form
7806+
code = insEncodeReg3456(ins, src1, size, code);
76447807
}
76457808

76467809
// Emit the REX prefix if required
@@ -10988,6 +11151,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
1098811151
case IF_RRD_ARD:
1098911152
case IF_RWR_ARD:
1099011153
case IF_RRW_ARD:
11154+
case IF_RWR_RRD_ARD:
1099111155
code = insCodeRM(ins);
1099211156
code = AddVexPrefixIfNeeded(ins, code, size);
1099311157
regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
@@ -11082,6 +11246,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
1108211246
case IF_RRD_SRD:
1108311247
case IF_RWR_SRD:
1108411248
case IF_RRW_SRD:
11249+
case IF_RWR_RRD_SRD:
1108511250
code = insCodeRM(ins);
1108611251

1108711252
// 4-byte AVX instructions are special cased inside emitOutputSV
@@ -11094,16 +11259,17 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
1109411259
{
1109511260
code = AddVexPrefixIfNeeded(ins, code, size);
1109611261

11097-
// In case of AVX instructions that take 3 operands, encode reg1 as first source.
11098-
// Note that reg1 is both a source and a destination.
11099-
//
11100-
// TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
11101-
// now we use the single source as source1 and source2.
11102-
// For this format, moves do not support a third operand, so we only need to handle the binary ops.
1110311262
if (IsDstDstSrcAVXInstruction(ins))
1110411263
{
11264+
regNumber src1 = id->idReg2();
11265+
11266+
if (id->idInsFmt() != IF_RWR_RRD_SRD)
11267+
{
11268+
src1 = id->idReg1();
11269+
}
11270+
1110511271
// encode source operand reg in 'vvvv' bits in 1's compliement form
11106-
code = insEncodeReg3456(ins, id->idReg1(), size, code);
11272+
code = insEncodeReg3456(ins, src1, size, code);
1110711273
}
1110811274

1110911275
regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
@@ -11165,6 +11331,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
1116511331
case IF_RRD_MRD:
1116611332
case IF_RWR_MRD:
1116711333
case IF_RRW_MRD:
11334+
case IF_RWR_RRD_MRD:
1116811335
code = insCodeRM(ins);
1116911336
// Special case 4-byte AVX instructions
1117011337
if (Is4ByteAVXInstruction(ins))
@@ -11175,16 +11342,17 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
1117511342
{
1117611343
code = AddVexPrefixIfNeeded(ins, code, size);
1117711344

11178-
// In case of AVX instructions that take 3 operands, encode reg1 as first source.
11179-
// Note that reg1 is both a source and a destination.
11180-
//
11181-
// TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
11182-
// now we use the single source as source1 and source2.
11183-
// For this format, moves do not support a third operand, so we only need to handle the binary ops.
1118411345
if (IsDstDstSrcAVXInstruction(ins))
1118511346
{
11347+
regNumber src1 = id->idReg2();
11348+
11349+
if (id->idInsFmt() != IF_RWR_RRD_MRD)
11350+
{
11351+
src1 = id->idReg1();
11352+
}
11353+
1118611354
// encode source operand reg in 'vvvv' bits in 1's compliement form
11187-
code = insEncodeReg3456(ins, id->idReg1(), size, code);
11355+
code = insEncodeReg3456(ins, src1, size, code);
1118811356
}
1118911357

1119011358
regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);

src/jit/emitxarch.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,15 @@ void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2)
365365

366366
void emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int ival);
367367

368+
void emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, insFormat fmt);
369+
370+
void emitIns_R_R_A(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir, insFormat fmt);
371+
372+
void emitIns_R_R_C(
373+
instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, CORINFO_FIELD_HANDLE fldHnd, int offs);
374+
375+
void emitIns_R_R_S(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int varx, int offs);
376+
368377
void emitIns_R_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3);
369378

370379
void emitIns_R_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, int ival);
@@ -424,7 +433,11 @@ void emitIns_R_AX(instruction ins, emitAttr attr, regNumber ireg, regNumber reg,
424433
void emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, unsigned mul, int disp);
425434

426435
#if FEATURE_HW_INTRINSICS
436+
void emitIns_SIMD_R_R_A(instruction ins, regNumber reg, regNumber reg1, GenTreeIndir* indir, var_types simdtype);
437+
void emitIns_SIMD_R_R_C(
438+
instruction ins, regNumber reg, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, var_types simdtype);
427439
void emitIns_SIMD_R_R_R(instruction ins, regNumber reg, regNumber reg1, regNumber reg2, var_types simdtype);
440+
void emitIns_SIMD_R_R_S(instruction ins, regNumber reg, regNumber reg1, int varx, int offs, var_types simdtype);
428441
#endif
429442

430443
#if FEATURE_STACK_FP_X87

0 commit comments

Comments
 (0)