@@ -24,6 +24,7 @@ def VOP_F32_F32_F32_F32_VCC : VOPProfile<[f32, f32, f32, f32]> {
24
24
}
25
25
def VOP_F64_F64_F64_F64_VCC : VOPProfile<[f64, f64, f64, f64]> {
26
26
let Outs64 = (outs DstRC.RegClass:$vdst);
27
+ let HasExt64BitDPP = 1;
27
28
let IsSingle = 1;
28
29
}
29
30
}
@@ -51,7 +52,24 @@ def VOP3b_I64_I1_I32_I32_I64 : VOPProfile<[i64, i32, i32, i64]> {
51
52
52
53
let HasExt64BitDPP = 1 in {
53
54
def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile<f32>;
54
- def VOP3b_F64_I1_F64_F64_F64 : VOP3b_Profile<f64>;
55
+ def VOP3b_F64_I1_F64_F64_F64 : VOP3b_Profile<f64> {
56
+ let OutsVOP3DPP = Outs64;
57
+ let AsmVOP3DPP = getAsmVOP3DPP<Asm64>.ret;
58
+ let AsmVOP3DPP16 = getAsmVOP3DPP16<Asm64>.ret;
59
+ let AsmVOP3DPP8 = getAsmVOP3DPP8<Asm64>.ret;
60
+ }
61
+
62
+ def VOP3b_I64_I1_I32_I32_I64_DPP : VOPProfile<[i64, i32, i32, i64]> {
63
+ let HasClamp = 1;
64
+
65
+ let IsSingle = 1;
66
+ let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
67
+ let OutsVOP3DPP = Outs64;
68
+ let Asm64 = "$vdst, $sdst, $src0, $src1, $src2$clamp";
69
+ let AsmVOP3DPP = getAsmVOP3DPP<Asm64>.ret;
70
+ let AsmVOP3DPP16 = getAsmVOP3DPP16<Asm64>.ret;
71
+ let AsmVOP3DPP8 = getAsmVOP3DPP8<Asm64>.ret;
72
+ }
55
73
56
74
class V_MUL_PROF<VOPProfile P> : VOP3_Profile<P> {
57
75
let HasExtVOP3DPP = 0;
@@ -229,7 +247,7 @@ defm V_DIV_FMAS_F32 : VOP3Inst_Pseudo_Wrapper <"v_div_fmas_f32", VOP_F32_F32_F32
229
247
// result *= 2^64
230
248
//
231
249
let SchedRW = [WriteDouble], FPDPRounding = 1 in
232
- defm V_DIV_FMAS_F64 : VOP3Inst_Pseudo_Wrapper <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC, [] >;
250
+ defm V_DIV_FMAS_F64 : VOP3Inst <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC>;
233
251
} // End Uses = [MODE, VCC, EXEC]
234
252
235
253
} // End isCommutable = 1
@@ -294,7 +312,7 @@ defm V_CVT_PK_U8_F32 : VOP3Inst<"v_cvt_pk_u8_f32", VOP3_Profile<VOP_I32_F32_I32_
294
312
defm V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", DIV_FIXUP_F32_PROF, AMDGPUdiv_fixup>;
295
313
296
314
let SchedRW = [WriteDoubleAdd], FPDPRounding = 1 in {
297
- defm V_DIV_FIXUP_F64 : VOP3Inst <"v_div_fixup_f64", VOP3_Profile<VOP_F64_F64_F64_F64> , AMDGPUdiv_fixup>;
315
+ defm V_DIV_FIXUP_F64 : VOP3Inst <"v_div_fixup_f64", VOP_F64_F64_F64_F64_DPP_PROF , AMDGPUdiv_fixup>;
298
316
defm V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, any_fldexp>;
299
317
} // End SchedRW = [WriteDoubleAdd], FPDPRounding = 1
300
318
} // End isReMaterializable = 1
@@ -335,7 +353,7 @@ let mayRaiseFPException = 0 in { // Seems suspicious but manual doesn't say it d
335
353
336
354
// Double precision division pre-scale.
337
355
let SchedRW = [WriteDouble, WriteSALU], FPDPRounding = 1 in
338
- defm V_DIV_SCALE_F64 : VOP3Inst_Pseudo_Wrapper <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64>;
356
+ defm V_DIV_SCALE_F64 : VOP3Inst <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64>;
339
357
} // End mayRaiseFPException = 0
340
358
341
359
let isReMaterializable = 1 in
@@ -408,9 +426,9 @@ defm V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOPProfileMQSAD>;
408
426
} // End SubtargetPredicate = isGFX7Plus
409
427
410
428
let isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU] in {
411
- let SubtargetPredicate = isGFX7Plus, OtherPredicates = [HasNotMADIntraFwdBug] in {
412
- defm V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64 >;
413
- defm V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64 >;
429
+ let SubtargetPredicate = isGFX7Plus in {
430
+ defm V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64_DPP, null_frag, [HasNotMADIntraFwdBug] >;
431
+ defm V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64_DPP, null_frag, [HasNotMADIntraFwdBug] >;
414
432
}
415
433
let SubtargetPredicate = isGFX11Only, OtherPredicates = [HasMADIntraFwdBug],
416
434
Constraints = "@earlyclobber $vdst" in {
@@ -2054,8 +2072,8 @@ defm V_S_SQRT_F32 : VOP3Only_Real_Base_gfx12<0x288>;
2054
2072
defm V_S_SQRT_F16 : VOP3Only_Real_Base_gfx12<0x289>;
2055
2073
defm V_MAD_CO_U64_U32 : VOP3be_Real_with_name_gfx12<0x2fe, "V_MAD_U64_U32", "v_mad_co_u64_u32">;
2056
2074
defm V_MAD_CO_I64_I32 : VOP3be_Real_with_name_gfx12<0x2ff, "V_MAD_I64_I32", "v_mad_co_i64_i32">;
2057
- defm V_MINIMUM_F64 : VOP3Only_Real_Base_gfx12 <0x341>;
2058
- defm V_MAXIMUM_F64 : VOP3Only_Real_Base_gfx12 <0x342>;
2075
+ defm V_MINIMUM_F64 : VOP3Only_Realtriple_gfx11_gfx12 <0x341>;
2076
+ defm V_MAXIMUM_F64 : VOP3Only_Realtriple_gfx11_gfx12 <0x342>;
2059
2077
defm V_MINIMUM_F32 : VOP3Only_Realtriple_gfx12<0x365>;
2060
2078
defm V_MAXIMUM_F32 : VOP3Only_Realtriple_gfx12<0x366>;
2061
2079
defm V_MINIMUM_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x367, "v_minimum_f16">;
@@ -2127,6 +2145,13 @@ multiclass VOP3be_Real_gfx11_gfx12<bits<10> op, string opName, string asmName> :
2127
2145
VOP3be_Real<GFX11Gen, op, opName, asmName>,
2128
2146
VOP3be_Real<GFX12Gen, op, opName, asmName>;
2129
2147
2148
+ multiclass VOP3be_Real_gfx11_gfx12_not_gfx1250<bits<10> op, string opName, string asmName> :
2149
+ VOP3be_Real<GFX11Gen, op, opName, asmName>,
2150
+ VOP3be_Real<GFX12Not12_50Gen, op, opName, asmName>;
2151
+
2152
+ multiclass VOP3be_Realtriple_gfx1250<bits<10> op> :
2153
+ VOP3be_Realtriple<GFX1250Gen, op>;
2154
+
2130
2155
multiclass VOP3_Real_No_Suffix_gfx11_gfx12<bits<10> op> :
2131
2156
VOP3_Real_No_Suffix<GFX11Gen, op>, VOP3_Real_No_Suffix<GFX12Gen, op>;
2132
2157
@@ -2141,7 +2166,7 @@ defm V_BFE_U32 : VOP3_Realtriple_gfx11_gfx12<0x210>;
2141
2166
defm V_BFE_I32 : VOP3_Realtriple_gfx11_gfx12<0x211>;
2142
2167
defm V_BFI_B32 : VOP3_Realtriple_gfx11_gfx12<0x212>;
2143
2168
defm V_FMA_F32 : VOP3_Realtriple_gfx11_gfx12<0x213>;
2144
- defm V_FMA_F64 : VOP3_Real_Base_gfx11_gfx12 <0x214>;
2169
+ defm V_FMA_F64 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250 <0x214>;
2145
2170
defm V_LERP_U8 : VOP3_Realtriple_gfx11_gfx12<0x215>;
2146
2171
defm V_ALIGNBIT_B32 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x216, "v_alignbit_b32">;
2147
2172
defm V_ALIGNBYTE_B32 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x217, "v_alignbyte_b32">;
@@ -2161,9 +2186,9 @@ defm V_SAD_U16 : VOP3_Realtriple_gfx11_gfx12<0x224>;
2161
2186
defm V_SAD_U32 : VOP3_Realtriple_gfx11_gfx12<0x225>;
2162
2187
defm V_CVT_PK_U8_F32 : VOP3_Realtriple_gfx11_gfx12<0x226>;
2163
2188
defm V_DIV_FIXUP_F32 : VOP3_Real_Base_gfx11_gfx12<0x227>;
2164
- defm V_DIV_FIXUP_F64 : VOP3_Real_Base_gfx11_gfx12 <0x228>;
2189
+ defm V_DIV_FIXUP_F64 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250 <0x228>;
2165
2190
defm V_DIV_FMAS_F32 : VOP3_Real_Base_gfx11_gfx12<0x237>;
2166
- defm V_DIV_FMAS_F64 : VOP3_Real_Base_gfx11_gfx12 <0x238>;
2191
+ defm V_DIV_FMAS_F64 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250 <0x238>;
2167
2192
defm V_MSAD_U8 : VOP3_Realtriple_gfx11_gfx12<0x239>;
2168
2193
defm V_QSAD_PK_U16_U8 : VOP3_Real_Base_gfx11_gfx12<0x23a>;
2169
2194
defm V_MQSAD_PK_U16_U8 : VOP3_Real_Base_gfx11_gfx12<0x23b>;
@@ -2205,7 +2230,7 @@ defm V_MINMAX_I32 : VOP3_Realtriple_gfx11_gfx12<0x265>;
2205
2230
defm V_DOT2_F16_F16 : VOP3Dot_Realtriple_t16_and_fake16_gfx11_gfx12<0x266, "v_dot2_f16_f16">;
2206
2231
defm V_DOT2_BF16_BF16 : VOP3Dot_Realtriple_t16_and_fake16_gfx11_gfx12<0x267, "v_dot2_bf16_bf16">;
2207
2232
defm V_DIV_SCALE_F32 : VOP3be_Real_gfx11_gfx12<0x2fc, "V_DIV_SCALE_F32", "v_div_scale_f32">;
2208
- defm V_DIV_SCALE_F64 : VOP3be_Real_gfx11_gfx12 <0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">;
2233
+ defm V_DIV_SCALE_F64 : VOP3be_Real_gfx11_gfx12_not_gfx1250 <0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">;
2209
2234
defm V_MAD_U64_U32_gfx11 : VOP3be_Real_gfx11<0x2fe, "V_MAD_U64_U32_gfx11", "v_mad_u64_u32">;
2210
2235
defm V_MAD_I64_I32_gfx11 : VOP3be_Real_gfx11<0x2ff, "V_MAD_I64_I32_gfx11", "v_mad_i64_i32">;
2211
2236
defm V_ADD_NC_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x303, "v_add_nc_u16">;
@@ -2228,7 +2253,7 @@ defm V_ADD_F64 : VOP3_Real_Base_gfx11<0x327>;
2228
2253
defm V_MUL_F64 : VOP3_Real_Base_gfx11<0x328>;
2229
2254
defm V_MIN_F64 : VOP3_Real_Base_gfx11<0x329>;
2230
2255
defm V_MAX_F64 : VOP3_Real_Base_gfx11<0x32a>;
2231
- defm V_LDEXP_F64 : VOP3_Real_Base_gfx11_gfx12 <0x32b>;
2256
+ defm V_LDEXP_F64 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250 <0x32b>;
2232
2257
defm V_MUL_LO_U32 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x32c>;
2233
2258
defm V_MUL_HI_U32 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x32d>;
2234
2259
defm V_MUL_HI_I32 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x32e>;
@@ -2237,8 +2262,8 @@ defm V_LSHLREV_B16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x33
2237
2262
defm V_LSHRREV_B16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x339, "v_lshrrev_b16">;
2238
2263
defm V_ASHRREV_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x33a, "v_ashrrev_i16">;
2239
2264
defm V_LSHLREV_B64 : VOP3_Real_Base_gfx11<0x33c>;
2240
- defm V_LSHRREV_B64 : VOP3_Real_Base_gfx11_gfx12 <0x33d>;
2241
- defm V_ASHRREV_I64 : VOP3_Real_Base_gfx11_gfx12 <0x33e>;
2265
+ defm V_LSHRREV_B64 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250 <0x33d>;
2266
+ defm V_ASHRREV_I64 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250 <0x33e>;
2242
2267
defm V_READLANE_B32 : VOP3_Real_No_Suffix_gfx11_gfx12<0x360>; // Pseudo in VOP2
2243
2268
let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in {
2244
2269
defm V_WRITELANE_B32 : VOP3_Real_No_Suffix_gfx11_gfx12<0x361>; // Pseudo in VOP2
@@ -2260,9 +2285,16 @@ let AssemblerPredicate = isGFX11Plus in {
2260
2285
}
2261
2286
2262
2287
// These instructions differ from GFX12 variant by supporting DPP:
2288
+ defm V_FMA_F64 : VOP3Only_Realtriple_gfx1250<0x214>;
2289
+ defm V_DIV_FIXUP_F64 : VOP3Only_Realtriple_gfx1250<0x228>;
2290
+ defm V_DIV_FMAS_F64 : VOP3Only_Realtriple_gfx1250<0x238>;
2291
+ defm V_DIV_SCALE_F64 : VOP3be_Realtriple_gfx1250<0x2fd>;
2292
+ defm V_LDEXP_F64 : VOP3Only_Realtriple_gfx1250<0x32b>;
2263
2293
defm V_MUL_LO_U32 : VOP3Only_Realtriple_gfx1250<0x32c>;
2264
2294
defm V_MUL_HI_U32 : VOP3Only_Realtriple_gfx1250<0x32d>;
2265
2295
defm V_MUL_HI_I32 : VOP3Only_Realtriple_gfx1250<0x32e>;
2296
+ defm V_LSHRREV_B64 : VOP3Only_Realtriple_gfx1250<0x33d>;
2297
+ defm V_ASHRREV_I64 : VOP3Only_Realtriple_gfx1250<0x33e>;
2266
2298
2267
2299
defm V_PERM_PK16_B4_U4 : VOP3Only_Real_Base_gfx1250<0x23f>;
2268
2300
defm V_PERM_PK16_B6_U4 : VOP3Only_Real_Base_gfx1250<0x242>;
0 commit comments