@@ -73,10 +73,38 @@ def MxList {
73
73
list<LMULInfo> m = [V_MF8, V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8];
74
74
}
75
75
76
+ class MxSet<int eew> {
77
+ list<LMULInfo> m = !cond(!eq(eew, 8) : [V_MF8, V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8],
78
+ !eq(eew, 16) : [V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8],
79
+ !eq(eew, 32) : [V_MF2, V_M1, V_M2, V_M4, V_M8],
80
+ !eq(eew, 64) : [V_M1, V_M2, V_M4, V_M8]);
81
+ }
82
+
76
83
class shift_amount<int num> {
77
84
int val = !if(!eq(num, 1), 0, !add(1, shift_amount<!srl(num, 1)>.val));
78
85
}
79
86
87
+ class octuple_from_str<string MX> {
88
+ int ret = !cond(!eq(MX, "MF8") : 1,
89
+ !eq(MX, "MF4") : 2,
90
+ !eq(MX, "MF2") : 4,
91
+ !eq(MX, "M1") : 8,
92
+ !eq(MX, "M2") : 16,
93
+ !eq(MX, "M4") : 32,
94
+ !eq(MX, "M8") : 64);
95
+ }
96
+
97
+ class octuple_to_str<int octuple> {
98
+ string ret = !if(!eq(octuple, 1), "MF8",
99
+ !if(!eq(octuple, 2), "MF4",
100
+ !if(!eq(octuple, 4), "MF2",
101
+ !if(!eq(octuple, 8), "M1",
102
+ !if(!eq(octuple, 16), "M2",
103
+ !if(!eq(octuple, 32), "M4",
104
+ !if(!eq(octuple, 64), "M8",
105
+ "NoDef")))))));
106
+ }
107
+
80
108
// Output pattern for X0 used to represent VLMAX in the pseudo instructions.
81
109
def VLMax : OutPatFrag<(ops), (XLenVT X0)>;
82
110
@@ -826,6 +854,74 @@ class VPseudoTernaryNoMask<VReg RetClass,
826
854
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
827
855
}
828
856
857
+ class VPseudoAMOWDNoMask<VReg RetClass,
858
+ VReg Op1Class> :
859
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$vd_wd),
860
+ (ins GPR:$rs1,
861
+ Op1Class:$vs2,
862
+ GetVRegNoV0<RetClass>.R:$vd,
863
+ GPR:$vl, ixlenimm:$sew), []>,
864
+ RISCVVPseudo {
865
+ let mayLoad = 1;
866
+ let mayStore = 1;
867
+ let hasSideEffects = 1;
868
+ let usesCustomInserter = 1;
869
+ let Constraints = "$vd_wd = $vd";
870
+ let Uses = [VL, VTYPE];
871
+ let HasVLOp = 1;
872
+ let HasSEWOp = 1;
873
+ let HasDummyMask = 1;
874
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
875
+ }
876
+
877
+ class VPseudoAMOWDMask<VReg RetClass,
878
+ VReg Op1Class> :
879
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$vd_wd),
880
+ (ins GPR:$rs1,
881
+ Op1Class:$vs2,
882
+ GetVRegNoV0<RetClass>.R:$vd,
883
+ VMaskOp:$vm, GPR:$vl, ixlenimm:$sew), []>,
884
+ RISCVVPseudo {
885
+ let mayLoad = 1;
886
+ let mayStore = 1;
887
+ let hasSideEffects = 1;
888
+ let usesCustomInserter = 1;
889
+ let Constraints = "$vd_wd = $vd";
890
+ let Uses = [VL, VTYPE];
891
+ let HasVLOp = 1;
892
+ let HasSEWOp = 1;
893
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
894
+ }
895
+
896
+ multiclass VPseudoAMOEI<int eew> {
897
+ // Standard scalar AMO supports 32, 64, and 128 Mem data bits,
898
+ // and in the base vector "V" extension, only SEW up to ELEN = max(XLEN, FLEN)
899
+ // are required to be supported.
900
+ // therefore only [32, 64] is allowed here.
901
+ foreach sew = [32, 64] in {
902
+ foreach lmul = MxSet<sew>.m in {
903
+ defvar octuple_lmul = octuple_from_str<lmul.MX>.ret;
904
+ // Calculate emul = eew * lmul / sew
905
+ defvar octuple_emul = !srl(!mul(eew, octuple_lmul), shift_amount<sew>.val);
906
+ if !and(!ge(octuple_emul, 1), !le(octuple_emul, 64)) then {
907
+ defvar emulMX = octuple_to_str<octuple_emul>.ret;
908
+ defvar lmulMX = octuple_to_str<octuple_lmul>.ret;
909
+ defvar emul= !cast<LMULInfo>("V_" # emulMX);
910
+ defvar lmul = !cast<LMULInfo>("V_" # lmulMX);
911
+ let VLMul = lmul.value in {
912
+ def "_WD_" # lmulMX # "_" # emulMX : VPseudoAMOWDNoMask<lmul.vrclass, emul.vrclass>;
913
+ def "_WD_" # lmulMX # "_" # emulMX # "_MASK" : VPseudoAMOWDMask<lmul.vrclass, emul.vrclass>;
914
+ }
915
+ }
916
+ }
917
+ }
918
+ }
919
+
920
+ multiclass VPseudoAMO {
921
+ foreach eew = EEWList in
922
+ defm "EI" # eew : VPseudoAMOEI<eew>;
923
+ }
924
+
829
925
multiclass VPseudoUSLoad {
830
926
foreach lmul = MxList.m in {
831
927
defvar LInfo = lmul.MX;
@@ -1508,6 +1604,42 @@ class VPatTernaryMask<string intrinsic,
1508
1604
(mask_type V0),
1509
1605
(NoX0 GPR:$vl), sew)>;
1510
1606
1607
+ class VPatAMOWDNoMask<string intrinsic_name,
1608
+ string inst,
1609
+ ValueType result_type,
1610
+ ValueType op1_type,
1611
+ int sew,
1612
+ LMULInfo vlmul,
1613
+ LMULInfo emul,
1614
+ VReg op1_reg_class> :
1615
+ Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
1616
+ GPR:$rs1,
1617
+ (op1_type op1_reg_class:$vs2),
1618
+ (result_type vlmul.vrclass:$vd),
1619
+ (XLenVT GPR:$vl))),
1620
+ (!cast<Instruction>(inst # "_WD_" # vlmul.MX # "_" # emul.MX)
1621
+ $rs1, $vs2, $vd,
1622
+ (NoX0 GPR:$vl), sew)>;
1623
+
1624
+ class VPatAMOWDMask<string intrinsic_name,
1625
+ string inst,
1626
+ ValueType result_type,
1627
+ ValueType op1_type,
1628
+ ValueType mask_type,
1629
+ int sew,
1630
+ LMULInfo vlmul,
1631
+ LMULInfo emul,
1632
+ VReg op1_reg_class> :
1633
+ Pat<(result_type (!cast<Intrinsic>(intrinsic_name # "_mask")
1634
+ GPR:$rs1,
1635
+ (op1_type op1_reg_class:$vs2),
1636
+ (result_type vlmul.vrclass:$vd),
1637
+ (mask_type V0),
1638
+ (XLenVT GPR:$vl))),
1639
+ (!cast<Instruction>(inst # "_WD_" # vlmul.MX # "_" # emul.MX # "_MASK")
1640
+ $rs1, $vs2, $vd,
1641
+ (mask_type V0), (NoX0 GPR:$vl), sew)>;
1642
+
1511
1643
multiclass VPatUSLoad<string intrinsic,
1512
1644
string inst,
1513
1645
LLVMType type,
@@ -2383,6 +2515,44 @@ multiclass VPatConversionVF_WF <string intrinsic, string instruction> {
2383
2515
}
2384
2516
}
2385
2517
2518
+ multiclass VPatAMOWD<string intrinsic,
2519
+ string inst,
2520
+ ValueType result_type,
2521
+ ValueType offset_type,
2522
+ ValueType mask_type,
2523
+ int sew,
2524
+ LMULInfo vlmul,
2525
+ LMULInfo emul,
2526
+ VReg op1_reg_class>
2527
+ {
2528
+ def : VPatAMOWDNoMask<intrinsic, inst, result_type, offset_type,
2529
+ sew, vlmul, emul, op1_reg_class>;
2530
+ def : VPatAMOWDMask<intrinsic, inst, result_type, offset_type,
2531
+ mask_type, sew, vlmul, emul, op1_reg_class>;
2532
+ }
2533
+
2534
+ multiclass VPatAMOV_WD<string intrinsic,
2535
+ string inst,
2536
+ list<VTypeInfo> vtilist> {
2537
+ foreach eew = EEWList in {
2538
+ foreach vti = vtilist in {
2539
+ if !or(!eq(vti.SEW, 32), !eq(vti.SEW, 64)) then {
2540
+ defvar octuple_lmul = octuple_from_str<vti.LMul.MX>.ret;
2541
+ // Calculate emul = eew * lmul / sew
2542
+ defvar octuple_emul = !srl(!mul(eew, octuple_lmul), shift_amount<vti.SEW>.val);
2543
+ if !and(!ge(octuple_emul, 1), !le(octuple_emul, 64)) then {
2544
+ defvar emulMX = octuple_to_str<octuple_emul>.ret;
2545
+ defvar offsetVti = !cast<VTypeInfo>("VI" # eew # emulMX);
2546
+ defvar inst_ei = inst # "EI" # eew;
2547
+ defm : VPatAMOWD<intrinsic, inst_ei,
2548
+ vti.Vector, offsetVti.Vector,
2549
+ vti.Mask, vti.SEW, vti.LMul, offsetVti.LMul, offsetVti.RegClass>;
2550
+ }
2551
+ }
2552
+ }
2553
+ }
2554
+ }
2555
+
2386
2556
//===----------------------------------------------------------------------===//
2387
2557
// Pseudo instructions
2388
2558
//===----------------------------------------------------------------------===//
@@ -2459,6 +2629,19 @@ foreach eew = EEWList in {
2459
2629
defm PseudoVLE # eew # FF : VPseudoUSLoad;
2460
2630
}
2461
2631
2632
+ //===----------------------------------------------------------------------===//
2633
+ // 8. Vector AMO Operations
2634
+ //===----------------------------------------------------------------------===//
2635
+ defm PseudoVAMOSWAP : VPseudoAMO;
2636
+ defm PseudoVAMOADD : VPseudoAMO;
2637
+ defm PseudoVAMOXOR : VPseudoAMO;
2638
+ defm PseudoVAMOAND : VPseudoAMO;
2639
+ defm PseudoVAMOOR : VPseudoAMO;
2640
+ defm PseudoVAMOMIN : VPseudoAMO;
2641
+ defm PseudoVAMOMAX : VPseudoAMO;
2642
+ defm PseudoVAMOMINU : VPseudoAMO;
2643
+ defm PseudoVAMOMAXU : VPseudoAMO;
2644
+
2462
2645
//===----------------------------------------------------------------------===//
2463
2646
// 12. Vector Integer Arithmetic Instructions
2464
2647
//===----------------------------------------------------------------------===//
@@ -2983,13 +3166,7 @@ foreach vti = AllVectors in
2983
3166
foreach vti = AllVectors in
2984
3167
foreach eew = EEWList in {
2985
3168
defvar vlmul = vti.LMul;
2986
- defvar octuple_lmul = !cond(!eq(vti.LMul.MX, "MF8") : 1,
2987
- !eq(vti.LMul.MX, "MF4") : 2,
2988
- !eq(vti.LMul.MX, "MF2") : 4,
2989
- !eq(vti.LMul.MX, "M1") : 8,
2990
- !eq(vti.LMul.MX, "M2") : 16,
2991
- !eq(vti.LMul.MX, "M4") : 32,
2992
- !eq(vti.LMul.MX, "M8") : 64);
3169
+ defvar octuple_lmul = octuple_from_str<vti.LMul.MX>.ret;
2993
3170
defvar log_sew = shift_amount<vti.SEW>.val;
2994
3171
// The data vector register group has EEW=SEW, EMUL=LMUL, while the offset
2995
3172
// vector register group has EEW encoding in the instruction and EMUL=(EEW/SEW)*LMUL.
@@ -2998,13 +3175,7 @@ foreach eew = EEWList in {
2998
3175
// legal octuple elmul should be more than 0 and less than equal 64
2999
3176
if !gt(octuple_elmul, 0) then {
3000
3177
if !le(octuple_elmul, 64) then {
3001
- defvar log_elmul = shift_amount<octuple_elmul>.val;
3002
- // 0, 1, 2 -> V_MF8 ~ V_MF2
3003
- // 3, 4, 5, 6 -> V_M1 ~ V_M8
3004
- defvar elmul_str = !if(!eq(log_elmul, 0), "MF8",
3005
- !if(!eq(log_elmul, 1), "MF4",
3006
- !if(!eq(log_elmul, 2), "MF2",
3007
- "M" # !cast<string>(!shl(1, !add(log_elmul, -3))))));
3178
+ defvar elmul_str = octuple_to_str<octuple_elmul>.ret;
3008
3179
defvar elmul =!cast<LMULInfo>("V_" # elmul_str);
3009
3180
defvar idx_vti = !cast<VTypeInfo>("VI" # eew # elmul_str);
3010
3181
@@ -3023,12 +3194,32 @@ foreach eew = EEWList in {
3023
3194
}
3024
3195
}
3025
3196
}
3197
+ } // Predicates = [HasStdExtV]
3026
3198
3199
+ //===----------------------------------------------------------------------===//
3200
+ // 8. Vector AMO Operations
3201
+ //===----------------------------------------------------------------------===//
3202
+ let Predicates = [HasStdExtZvamo] in {
3203
+ defm "" : VPatAMOV_WD<"int_riscv_vamoswap", "PseudoVAMOSWAP", AllIntegerVectors>;
3204
+ defm "" : VPatAMOV_WD<"int_riscv_vamoadd", "PseudoVAMOADD", AllIntegerVectors>;
3205
+ defm "" : VPatAMOV_WD<"int_riscv_vamoxor", "PseudoVAMOXOR", AllIntegerVectors>;
3206
+ defm "" : VPatAMOV_WD<"int_riscv_vamoand", "PseudoVAMOAND", AllIntegerVectors>;
3207
+ defm "" : VPatAMOV_WD<"int_riscv_vamoor", "PseudoVAMOOR", AllIntegerVectors>;
3208
+ defm "" : VPatAMOV_WD<"int_riscv_vamomin", "PseudoVAMOMIN", AllIntegerVectors>;
3209
+ defm "" : VPatAMOV_WD<"int_riscv_vamomax", "PseudoVAMOMAX", AllIntegerVectors>;
3210
+ defm "" : VPatAMOV_WD<"int_riscv_vamominu", "PseudoVAMOMINU", AllIntegerVectors>;
3211
+ defm "" : VPatAMOV_WD<"int_riscv_vamomaxu", "PseudoVAMOMAXU", AllIntegerVectors>;
3212
+ } // Predicates = [HasStdExtZvamo]
3213
+
3214
+ let Predicates = [HasStdExtZvamo, HasStdExtF] in {
3215
+ defm "" : VPatAMOV_WD<"int_riscv_vamoswap", "PseudoVAMOSWAP", AllFloatVectors>;
3216
+ } // Predicates = [HasStdExtZvamo, HasStdExtF]
3027
3217
3028
3218
//===----------------------------------------------------------------------===//
3029
3219
// 12. Vector Integer Arithmetic Instructions
3030
3220
//===----------------------------------------------------------------------===//
3031
3221
3222
+ let Predicates = [HasStdExtV] in {
3032
3223
//===----------------------------------------------------------------------===//
3033
3224
// 12.1. Vector Single-Width Integer Add and Subtract
3034
3225
//===----------------------------------------------------------------------===//
0 commit comments