Skip to content

Commit 9cf511a

Browse files
committed
[RISCV] Add intrinsics for vector AMO operations
Add vamoswap, vamoadd, vamoxor, vamoand, vamoor, vamomin, vamomax, vamominu, vamomaxu intrinsics. Reviewed By: craig.topper, khchen Differential Revision: https://reviews.llvm.org/D94589
1 parent 9a0900d commit 9cf511a

20 files changed

+25297
-14
lines changed

llvm/include/llvm/IR/IntrinsicsRISCV.td

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,20 @@ let TargetPrefix = "riscv" in {
469469
[LLVMMatchType<0>, llvm_anyvector_ty,
470470
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
471471
[IntrNoMem]>, RISCVVIntrinsic;
472+
// For atomic operations without mask
473+
// Input: (base, index, value, vl)
474+
class RISCVAMONoMask
475+
: Intrinsic<[llvm_anyvector_ty],
476+
[LLVMPointerType<LLVMMatchType<0>>, llvm_anyvector_ty, LLVMMatchType<0>,
477+
llvm_anyint_ty],
478+
[NoCapture<ArgIndex<0>>]>, RISCVVIntrinsic;
479+
// For atomic operations with mask
480+
// Input: (base, index, value, mask, vl)
481+
class RISCVAMOMask
482+
: Intrinsic<[llvm_anyvector_ty],
483+
[LLVMPointerType<LLVMMatchType<0>>, llvm_anyvector_ty, LLVMMatchType<0>,
484+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
485+
[NoCapture<ArgIndex<0>>]>, RISCVVIntrinsic;
472486

473487
multiclass RISCVUSLoad {
474488
def "int_riscv_" # NAME : RISCVUSLoad;
@@ -568,6 +582,10 @@ let TargetPrefix = "riscv" in {
568582
def "int_riscv_" #NAME :RISCVConversionNoMask;
569583
def "int_riscv_" # NAME # "_mask" : RISCVConversionMask;
570584
}
585+
multiclass RISCVAMO {
586+
def "int_riscv_" # NAME : RISCVAMONoMask;
587+
def "int_riscv_" # NAME # "_mask" : RISCVAMOMask;
588+
}
571589

572590
defm vle : RISCVUSLoad;
573591
defm vleff : RISCVUSLoad;
@@ -578,6 +596,16 @@ let TargetPrefix = "riscv" in {
578596
defm vsxe: RISCVIStore;
579597
defm vsuxe: RISCVIStore;
580598

599+
defm vamoswap : RISCVAMO;
600+
defm vamoadd : RISCVAMO;
601+
defm vamoxor : RISCVAMO;
602+
defm vamoand : RISCVAMO;
603+
defm vamoor : RISCVAMO;
604+
defm vamomin : RISCVAMO;
605+
defm vamomax : RISCVAMO;
606+
defm vamominu : RISCVAMO;
607+
defm vamomaxu : RISCVAMO;
608+
581609
defm vadd : RISCVBinaryAAX;
582610
defm vsub : RISCVBinaryAAX;
583611
defm vrsub : RISCVBinaryAAX;

llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td

Lines changed: 205 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -73,10 +73,38 @@ def MxList {
7373
list<LMULInfo> m = [V_MF8, V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8];
7474
}
7575

76+
class MxSet<int eew> {
77+
list<LMULInfo> m = !cond(!eq(eew, 8) : [V_MF8, V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8],
78+
!eq(eew, 16) : [V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8],
79+
!eq(eew, 32) : [V_MF2, V_M1, V_M2, V_M4, V_M8],
80+
!eq(eew, 64) : [V_M1, V_M2, V_M4, V_M8]);
81+
}
82+
7683
class shift_amount<int num> {
7784
int val = !if(!eq(num, 1), 0, !add(1, shift_amount<!srl(num, 1)>.val));
7885
}
7986

87+
class octuple_from_str<string MX> {
88+
int ret = !cond(!eq(MX, "MF8") : 1,
89+
!eq(MX, "MF4") : 2,
90+
!eq(MX, "MF2") : 4,
91+
!eq(MX, "M1") : 8,
92+
!eq(MX, "M2") : 16,
93+
!eq(MX, "M4") : 32,
94+
!eq(MX, "M8") : 64);
95+
}
96+
97+
class octuple_to_str<int octuple> {
98+
string ret = !if(!eq(octuple, 1), "MF8",
99+
!if(!eq(octuple, 2), "MF4",
100+
!if(!eq(octuple, 4), "MF2",
101+
!if(!eq(octuple, 8), "M1",
102+
!if(!eq(octuple, 16), "M2",
103+
!if(!eq(octuple, 32), "M4",
104+
!if(!eq(octuple, 64), "M8",
105+
"NoDef")))))));
106+
}
107+
80108
// Output pattern for X0 used to represent VLMAX in the pseudo instructions.
81109
def VLMax : OutPatFrag<(ops), (XLenVT X0)>;
82110

@@ -826,6 +854,74 @@ class VPseudoTernaryNoMask<VReg RetClass,
826854
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
827855
}
828856

857+
class VPseudoAMOWDNoMask<VReg RetClass,
858+
VReg Op1Class> :
859+
Pseudo<(outs GetVRegNoV0<RetClass>.R:$vd_wd),
860+
(ins GPR:$rs1,
861+
Op1Class:$vs2,
862+
GetVRegNoV0<RetClass>.R:$vd,
863+
GPR:$vl, ixlenimm:$sew), []>,
864+
RISCVVPseudo {
865+
let mayLoad = 1;
866+
let mayStore = 1;
867+
let hasSideEffects = 1;
868+
let usesCustomInserter = 1;
869+
let Constraints = "$vd_wd = $vd";
870+
let Uses = [VL, VTYPE];
871+
let HasVLOp = 1;
872+
let HasSEWOp = 1;
873+
let HasDummyMask = 1;
874+
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
875+
}
876+
877+
class VPseudoAMOWDMask<VReg RetClass,
878+
VReg Op1Class> :
879+
Pseudo<(outs GetVRegNoV0<RetClass>.R:$vd_wd),
880+
(ins GPR:$rs1,
881+
Op1Class:$vs2,
882+
GetVRegNoV0<RetClass>.R:$vd,
883+
VMaskOp:$vm, GPR:$vl, ixlenimm:$sew), []>,
884+
RISCVVPseudo {
885+
let mayLoad = 1;
886+
let mayStore = 1;
887+
let hasSideEffects = 1;
888+
let usesCustomInserter = 1;
889+
let Constraints = "$vd_wd = $vd";
890+
let Uses = [VL, VTYPE];
891+
let HasVLOp = 1;
892+
let HasSEWOp = 1;
893+
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
894+
}
895+
896+
multiclass VPseudoAMOEI<int eew> {
897+
// Standard scalar AMO supports 32, 64, and 128 Mem data bits,
898+
// and in the base vector "V" extension, only SEW up to ELEN = max(XLEN, FLEN)
899+
// are required to be supported.
900+
// therefore only [32, 64] is allowed here.
901+
foreach sew = [32, 64] in {
902+
foreach lmul = MxSet<sew>.m in {
903+
defvar octuple_lmul = octuple_from_str<lmul.MX>.ret;
904+
// Calculate emul = eew * lmul / sew
905+
defvar octuple_emul = !srl(!mul(eew, octuple_lmul), shift_amount<sew>.val);
906+
if !and(!ge(octuple_emul, 1), !le(octuple_emul, 64)) then {
907+
defvar emulMX = octuple_to_str<octuple_emul>.ret;
908+
defvar lmulMX = octuple_to_str<octuple_lmul>.ret;
909+
defvar emul= !cast<LMULInfo>("V_" # emulMX);
910+
defvar lmul = !cast<LMULInfo>("V_" # lmulMX);
911+
let VLMul = lmul.value in {
912+
def "_WD_" # lmulMX # "_" # emulMX : VPseudoAMOWDNoMask<lmul.vrclass, emul.vrclass>;
913+
def "_WD_" # lmulMX # "_" # emulMX # "_MASK" : VPseudoAMOWDMask<lmul.vrclass, emul.vrclass>;
914+
}
915+
}
916+
}
917+
}
918+
}
919+
920+
multiclass VPseudoAMO {
921+
foreach eew = EEWList in
922+
defm "EI" # eew : VPseudoAMOEI<eew>;
923+
}
924+
829925
multiclass VPseudoUSLoad {
830926
foreach lmul = MxList.m in {
831927
defvar LInfo = lmul.MX;
@@ -1508,6 +1604,42 @@ class VPatTernaryMask<string intrinsic,
15081604
(mask_type V0),
15091605
(NoX0 GPR:$vl), sew)>;
15101606

1607+
class VPatAMOWDNoMask<string intrinsic_name,
1608+
string inst,
1609+
ValueType result_type,
1610+
ValueType op1_type,
1611+
int sew,
1612+
LMULInfo vlmul,
1613+
LMULInfo emul,
1614+
VReg op1_reg_class> :
1615+
Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
1616+
GPR:$rs1,
1617+
(op1_type op1_reg_class:$vs2),
1618+
(result_type vlmul.vrclass:$vd),
1619+
(XLenVT GPR:$vl))),
1620+
(!cast<Instruction>(inst # "_WD_" # vlmul.MX # "_" # emul.MX)
1621+
$rs1, $vs2, $vd,
1622+
(NoX0 GPR:$vl), sew)>;
1623+
1624+
class VPatAMOWDMask<string intrinsic_name,
1625+
string inst,
1626+
ValueType result_type,
1627+
ValueType op1_type,
1628+
ValueType mask_type,
1629+
int sew,
1630+
LMULInfo vlmul,
1631+
LMULInfo emul,
1632+
VReg op1_reg_class> :
1633+
Pat<(result_type (!cast<Intrinsic>(intrinsic_name # "_mask")
1634+
GPR:$rs1,
1635+
(op1_type op1_reg_class:$vs2),
1636+
(result_type vlmul.vrclass:$vd),
1637+
(mask_type V0),
1638+
(XLenVT GPR:$vl))),
1639+
(!cast<Instruction>(inst # "_WD_" # vlmul.MX # "_" # emul.MX # "_MASK")
1640+
$rs1, $vs2, $vd,
1641+
(mask_type V0), (NoX0 GPR:$vl), sew)>;
1642+
15111643
multiclass VPatUSLoad<string intrinsic,
15121644
string inst,
15131645
LLVMType type,
@@ -2383,6 +2515,44 @@ multiclass VPatConversionVF_WF <string intrinsic, string instruction> {
23832515
}
23842516
}
23852517

2518+
multiclass VPatAMOWD<string intrinsic,
2519+
string inst,
2520+
ValueType result_type,
2521+
ValueType offset_type,
2522+
ValueType mask_type,
2523+
int sew,
2524+
LMULInfo vlmul,
2525+
LMULInfo emul,
2526+
VReg op1_reg_class>
2527+
{
2528+
def : VPatAMOWDNoMask<intrinsic, inst, result_type, offset_type,
2529+
sew, vlmul, emul, op1_reg_class>;
2530+
def : VPatAMOWDMask<intrinsic, inst, result_type, offset_type,
2531+
mask_type, sew, vlmul, emul, op1_reg_class>;
2532+
}
2533+
2534+
multiclass VPatAMOV_WD<string intrinsic,
2535+
string inst,
2536+
list<VTypeInfo> vtilist> {
2537+
foreach eew = EEWList in {
2538+
foreach vti = vtilist in {
2539+
if !or(!eq(vti.SEW, 32), !eq(vti.SEW, 64)) then {
2540+
defvar octuple_lmul = octuple_from_str<vti.LMul.MX>.ret;
2541+
// Calculate emul = eew * lmul / sew
2542+
defvar octuple_emul = !srl(!mul(eew, octuple_lmul), shift_amount<vti.SEW>.val);
2543+
if !and(!ge(octuple_emul, 1), !le(octuple_emul, 64)) then {
2544+
defvar emulMX = octuple_to_str<octuple_emul>.ret;
2545+
defvar offsetVti = !cast<VTypeInfo>("VI" # eew # emulMX);
2546+
defvar inst_ei = inst # "EI" # eew;
2547+
defm : VPatAMOWD<intrinsic, inst_ei,
2548+
vti.Vector, offsetVti.Vector,
2549+
vti.Mask, vti.SEW, vti.LMul, offsetVti.LMul, offsetVti.RegClass>;
2550+
}
2551+
}
2552+
}
2553+
}
2554+
}
2555+
23862556
//===----------------------------------------------------------------------===//
23872557
// Pseudo instructions
23882558
//===----------------------------------------------------------------------===//
@@ -2459,6 +2629,19 @@ foreach eew = EEWList in {
24592629
defm PseudoVLE # eew # FF : VPseudoUSLoad;
24602630
}
24612631

2632+
//===----------------------------------------------------------------------===//
2633+
// 8. Vector AMO Operations
2634+
//===----------------------------------------------------------------------===//
2635+
defm PseudoVAMOSWAP : VPseudoAMO;
2636+
defm PseudoVAMOADD : VPseudoAMO;
2637+
defm PseudoVAMOXOR : VPseudoAMO;
2638+
defm PseudoVAMOAND : VPseudoAMO;
2639+
defm PseudoVAMOOR : VPseudoAMO;
2640+
defm PseudoVAMOMIN : VPseudoAMO;
2641+
defm PseudoVAMOMAX : VPseudoAMO;
2642+
defm PseudoVAMOMINU : VPseudoAMO;
2643+
defm PseudoVAMOMAXU : VPseudoAMO;
2644+
24622645
//===----------------------------------------------------------------------===//
24632646
// 12. Vector Integer Arithmetic Instructions
24642647
//===----------------------------------------------------------------------===//
@@ -2983,13 +3166,7 @@ foreach vti = AllVectors in
29833166
foreach vti = AllVectors in
29843167
foreach eew = EEWList in {
29853168
defvar vlmul = vti.LMul;
2986-
defvar octuple_lmul = !cond(!eq(vti.LMul.MX, "MF8") : 1,
2987-
!eq(vti.LMul.MX, "MF4") : 2,
2988-
!eq(vti.LMul.MX, "MF2") : 4,
2989-
!eq(vti.LMul.MX, "M1") : 8,
2990-
!eq(vti.LMul.MX, "M2") : 16,
2991-
!eq(vti.LMul.MX, "M4") : 32,
2992-
!eq(vti.LMul.MX, "M8") : 64);
3169+
defvar octuple_lmul = octuple_from_str<vti.LMul.MX>.ret;
29933170
defvar log_sew = shift_amount<vti.SEW>.val;
29943171
// The data vector register group has EEW=SEW, EMUL=LMUL, while the offset
29953172
// vector register group has EEW encoding in the instruction and EMUL=(EEW/SEW)*LMUL.
@@ -2998,13 +3175,7 @@ foreach eew = EEWList in {
29983175
// legal octuple elmul should be more than 0 and less than equal 64
29993176
if !gt(octuple_elmul, 0) then {
30003177
if !le(octuple_elmul, 64) then {
3001-
defvar log_elmul = shift_amount<octuple_elmul>.val;
3002-
// 0, 1, 2 -> V_MF8 ~ V_MF2
3003-
// 3, 4, 5, 6 -> V_M1 ~ V_M8
3004-
defvar elmul_str = !if(!eq(log_elmul, 0), "MF8",
3005-
!if(!eq(log_elmul, 1), "MF4",
3006-
!if(!eq(log_elmul, 2), "MF2",
3007-
"M" # !cast<string>(!shl(1, !add(log_elmul, -3))))));
3178+
defvar elmul_str = octuple_to_str<octuple_elmul>.ret;
30083179
defvar elmul =!cast<LMULInfo>("V_" # elmul_str);
30093180
defvar idx_vti = !cast<VTypeInfo>("VI" # eew # elmul_str);
30103181

@@ -3023,12 +3194,32 @@ foreach eew = EEWList in {
30233194
}
30243195
}
30253196
}
3197+
} // Predicates = [HasStdExtV]
30263198

3199+
//===----------------------------------------------------------------------===//
3200+
// 8. Vector AMO Operations
3201+
//===----------------------------------------------------------------------===//
3202+
let Predicates = [HasStdExtZvamo] in {
3203+
defm "" : VPatAMOV_WD<"int_riscv_vamoswap", "PseudoVAMOSWAP", AllIntegerVectors>;
3204+
defm "" : VPatAMOV_WD<"int_riscv_vamoadd", "PseudoVAMOADD", AllIntegerVectors>;
3205+
defm "" : VPatAMOV_WD<"int_riscv_vamoxor", "PseudoVAMOXOR", AllIntegerVectors>;
3206+
defm "" : VPatAMOV_WD<"int_riscv_vamoand", "PseudoVAMOAND", AllIntegerVectors>;
3207+
defm "" : VPatAMOV_WD<"int_riscv_vamoor", "PseudoVAMOOR", AllIntegerVectors>;
3208+
defm "" : VPatAMOV_WD<"int_riscv_vamomin", "PseudoVAMOMIN", AllIntegerVectors>;
3209+
defm "" : VPatAMOV_WD<"int_riscv_vamomax", "PseudoVAMOMAX", AllIntegerVectors>;
3210+
defm "" : VPatAMOV_WD<"int_riscv_vamominu", "PseudoVAMOMINU", AllIntegerVectors>;
3211+
defm "" : VPatAMOV_WD<"int_riscv_vamomaxu", "PseudoVAMOMAXU", AllIntegerVectors>;
3212+
} // Predicates = [HasStdExtZvamo]
3213+
3214+
let Predicates = [HasStdExtZvamo, HasStdExtF] in {
3215+
defm "" : VPatAMOV_WD<"int_riscv_vamoswap", "PseudoVAMOSWAP", AllFloatVectors>;
3216+
} // Predicates = [HasStdExtZvamo, HasStdExtF]
30273217

30283218
//===----------------------------------------------------------------------===//
30293219
// 12. Vector Integer Arithmetic Instructions
30303220
//===----------------------------------------------------------------------===//
30313221

3222+
let Predicates = [HasStdExtV] in {
30323223
//===----------------------------------------------------------------------===//
30333224
// 12.1. Vector Single-Width Integer Add and Subtract
30343225
//===----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)