Skip to content

Commit e3c7b7f

Browse files
authored
[AMDGPU] gfx1251 VOP1 dpp support (#159637)
1 parent 1c95d80 commit e3c7b7f

File tree

8 files changed

+379
-23
lines changed

8 files changed

+379
-23
lines changed

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 43 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,11 @@ def VOP_I16_F16_SPECIAL_OMOD_fake16 : VOPProfile_Fake16<VOP_I16_F16> {
212212
}
213213

214214

215+
def VOP_F64_F64_NO_DPP : VOPProfile <[f64, f64, untyped, untyped]> {
216+
let HasExtVOP3DPP = 0;
217+
let HasExt64BitDPP = 0;
218+
}
219+
215220
//===----------------------------------------------------------------------===//
216221
// VOP1 Instructions
217222
//===----------------------------------------------------------------------===//
@@ -344,9 +349,9 @@ defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, int_amdgcn_sqrt>;
344349
} // End TRANS = 1, SchedRW = [WriteTrans32]
345350

346351
let TRANS = 1, SchedRW = [WriteTrans64] in {
347-
defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64, AMDGPUrcp>;
348-
defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64, AMDGPUrsq>;
349-
defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64, int_amdgcn_sqrt>;
352+
defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64_NO_DPP, AMDGPUrcp>;
353+
defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64_NO_DPP, AMDGPUrsq>;
354+
defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64_NO_DPP, int_amdgcn_sqrt>;
350355
} // End TRANS = 1, SchedRW = [WriteTrans64]
351356

352357
let TRANS = 1, SchedRW = [WriteTrans32] in {
@@ -1025,6 +1030,11 @@ multiclass VOP1_Real_FULL_with_name<GFXGen Gen, bits<9> op, string opName,
10251030
multiclass VOP1_Real_NO_DPP<GFXGen Gen, bits<9> op> :
10261031
VOP1_Real_e32<Gen, op>, VOP1_Real_e64<Gen, op>;
10271032

1033+
multiclass VOP1_Real_with_DPP16<GFXGen Gen, bits<9> op> :
1034+
VOP1_Real_NO_DPP<Gen, op>,
1035+
VOP1_Real_dpp<Gen, op>,
1036+
VOP3_Real_dpp_Base<Gen, {0, 1, 1, op{6-0}}>;
1037+
10281038
multiclass VOP1_Real_FULL_t16_gfx11_gfx12<bits<9> op, string asmName,
10291039
string opName = NAME> :
10301040
VOP1_Real_FULL_with_name<GFX11Gen, op, opName, asmName>,
@@ -1057,17 +1067,22 @@ multiclass VOP1_Real_FULL_t16_and_fake16_gfx1250<
10571067
VOP1_Real_FULL_with_name<GFX1250Gen, op, opName#"_fake16", asmName>;
10581068
}
10591069

1070+
multiclass VOP1_Real_FULL_with_name_gfx11_gfx12_not_gfx1250<bits<9> op, string opName,
1071+
string asmName> :
1072+
VOP1_Real_FULL_with_name<GFX11Gen, op, opName, asmName>,
1073+
VOP1_Real_FULL_with_name<GFX12Not12_50Gen, op, opName, asmName>;
1074+
10601075
multiclass VOP1_Real_OpSelIsDPP_gfx1250<bits<9> op> : VOP1_Real_e32<GFX1250Gen, op> {
10611076
defvar ps = !cast<VOP_Pseudo>(NAME#"_e64");
10621077
def _e64_gfx1250 :
10631078
VOP3_Real_Gen<ps, GFX1250Gen>,
10641079
VOP3OpSelIsDPP_gfx12<{0, 1, 1, op{6-0}}, ps.Pfl>;
10651080
}
10661081

1067-
defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name<GFX12Not12_50Gen, 0x06c, "V_CVT_F32_FP8_OP_SEL", "v_cvt_f32_fp8">;
1068-
defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name<GFX1250Gen, 0x06c, "V_CVT_F32_FP8_gfx1250", "v_cvt_f32_fp8">;
1082+
defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name_gfx11_gfx12_not_gfx1250<0x06c, "V_CVT_F32_FP8_OP_SEL", "v_cvt_f32_fp8">;
1083+
defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name<GFX1250Gen, 0x06c, "V_CVT_F32_FP8_gfx1250", "v_cvt_f32_fp8">;
10691084

1070-
defm V_CVT_F32_BF8 : VOP1_Real_FULL_with_name<GFX12Gen, 0x06d, "V_CVT_F32_BF8_OP_SEL", "v_cvt_f32_bf8">;
1085+
defm V_CVT_F32_BF8 : VOP1_Real_FULL_with_name<GFX12Gen, 0x06d, "V_CVT_F32_BF8_OP_SEL", "v_cvt_f32_bf8">;
10711086

10721087
defm V_CVT_PK_F32_FP8_fake16 : VOP1_Real_e32_with_name<GFX12Gen, 0x06e, "V_CVT_PK_F32_FP8_fake16", "v_cvt_pk_f32_fp8">;
10731088
defm V_CVT_PK_F32_FP8_t16 : VOP1_Real_e32_with_name<GFX12Gen, 0x06e, "V_CVT_PK_F32_FP8_t16", "v_cvt_pk_f32_fp8">;
@@ -1252,17 +1267,17 @@ let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in {
12521267
multiclass VOP1_Real_gfx7<bits<9> op> :
12531268
VOP1_Real_e32_gfx7<op>, VOP1_Real_e64_gfx7<op>;
12541269

1255-
multiclass VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<bits<9> op> :
1270+
multiclass VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<bits<9> op> :
12561271
VOP1_Real_gfx7<op>, VOP1_Real_gfx10<op>, VOP1_Real_NO_DPP<GFX11Gen, op>,
1257-
VOP1_Real_NO_DPP<GFX12Gen, op>;
1272+
VOP1_Real_with_DPP16<GFX12Gen, op>;
12581273

12591274
defm V_LOG_LEGACY_F32 : VOP1_Real_gfx7<0x045>;
12601275
defm V_EXP_LEGACY_F32 : VOP1_Real_gfx7<0x046>;
12611276

1262-
defm V_TRUNC_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x017>;
1263-
defm V_CEIL_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x018>;
1264-
defm V_RNDNE_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x019>;
1265-
defm V_FLOOR_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x01a>;
1277+
defm V_TRUNC_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x017>;
1278+
defm V_CEIL_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x018>;
1279+
defm V_RNDNE_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x019>;
1280+
defm V_FLOOR_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x01a>;
12661281

12671282
//===----------------------------------------------------------------------===//
12681283
// GFX6, GFX7, GFX10, GFX11, GFX12
@@ -1300,6 +1315,10 @@ multiclass VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<bits<9> op> :
13001315
VOP1_Real_gfx6_gfx7_gfx10<op>, VOP1_Real_NO_DPP<GFX11Gen, op>,
13011316
VOP1_Real_NO_DPP<GFX12Gen, op>;
13021317

1318+
multiclass VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<bits<9> op> :
1319+
VOP1_Real_gfx6_gfx7_gfx10<op>, VOP1_Real_NO_DPP<GFX11Gen, op>,
1320+
VOP1_Real_with_DPP16<GFX12Gen, op>;
1321+
13031322
multiclass VOP1Only_Real_gfx6_gfx7_gfx10_gfx11_gfx12<bits<9> op> :
13041323
VOP1Only_Real_gfx6_gfx7<op>, VOP1Only_Real_gfx10_gfx11_gfx12<op>;
13051324

@@ -1314,8 +1333,8 @@ defm V_RSQ_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x032>;
13141333
defm V_NOP : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x000>;
13151334
defm V_MOV_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x001>;
13161335
defm V_READFIRSTLANE_B32 : VOP1Only_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x002>;
1317-
defm V_CVT_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x003>;
1318-
defm V_CVT_F64_I32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x004>;
1336+
defm V_CVT_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x003>;
1337+
defm V_CVT_F64_I32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x004>;
13191338
defm V_CVT_F32_I32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x005>;
13201339
defm V_CVT_F32_U32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x006>;
13211340
defm V_CVT_U32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x007>;
@@ -1325,14 +1344,14 @@ defm V_CVT_F32_F16 : VOP1_Real_gfx6_gfx7_gfx10<0x00b>;
13251344
defm V_CVT_RPI_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00c>;
13261345
defm V_CVT_FLR_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00d>;
13271346
defm V_CVT_OFF_F32_I4 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x00e>;
1328-
defm V_CVT_F32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x00f>;
1329-
defm V_CVT_F64_F32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x010>;
1347+
defm V_CVT_F32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x00f>;
1348+
defm V_CVT_F64_F32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x010>;
13301349
defm V_CVT_F32_UBYTE0 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x011>;
13311350
defm V_CVT_F32_UBYTE1 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x012>;
13321351
defm V_CVT_F32_UBYTE2 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x013>;
13331352
defm V_CVT_F32_UBYTE3 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x014>;
1334-
defm V_CVT_U32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x015>;
1335-
defm V_CVT_F64_U32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x016>;
1353+
defm V_CVT_U32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x015>;
1354+
defm V_CVT_F64_U32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x016>;
13361355
defm V_FRACT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x020>;
13371356
defm V_TRUNC_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x021>;
13381357
defm V_CEIL_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x022>;
@@ -1354,9 +1373,9 @@ defm V_BFREV_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x038>;
13541373
defm V_FFBH_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x039>;
13551374
defm V_FFBL_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x03a>;
13561375
defm V_FFBH_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x03b>;
1357-
defm V_FREXP_EXP_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x03c>;
1358-
defm V_FREXP_MANT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x03d>;
1359-
defm V_FRACT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x03e>;
1376+
defm V_FREXP_EXP_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x03c>;
1377+
defm V_FREXP_MANT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x03d>;
1378+
defm V_FRACT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x03e>;
13601379
defm V_FREXP_EXP_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x03f>;
13611380
defm V_FREXP_MANT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x040>;
13621381
defm V_CLREXCP : VOP1_Real_gfx6_gfx7_gfx10<0x041>;
@@ -1410,7 +1429,9 @@ multiclass VOP1_Real_vi <bits<10> op> {
14101429
if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then
14111430
def _dpp_vi :
14121431
VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>,
1413-
VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>;
1432+
VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")> {
1433+
let AssemblerPredicate = isGFX8GFX9;
1434+
}
14141435
}
14151436

14161437
defm V_NOP : VOP1_Real_vi <0x0>;

llvm/test/CodeGen/AMDGPU/dpp64_combine.mir

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
22
# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
3+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1251 -run-pass=gcn-dpp-combine -o - %s | FileCheck %s --check-prefix=GCN
34

45
---
56
# GCN-LABEL: name: dpp64_old_impdef
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
2+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
3+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1251 < %s | FileCheck -check-prefixes=GCN,GFX1251 %s
4+
5+
; GCN-LABEL: {{^}}mov_dpp64_test:
6+
; GCN: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
7+
; GCN: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
8+
define amdgpu_kernel void @mov_dpp64_test(ptr addrspace(1) %out, i64 %in1) {
9+
%tmp0 = call i64 @llvm.amdgcn.mov.dpp.i64(i64 %in1, i32 1, i32 1, i32 1, i1 0) #0
10+
store i64 %tmp0, ptr addrspace(1) %out
11+
ret void
12+
}
13+
14+
; GCN-LABEL: {{^}}mov_dpp64_row_share_test:
15+
; GFX12-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_share:1 row_mask:0x1 bank_mask:0x1
16+
; GFX1251: v_mov_b64_dpp v[{{[0-9:]+}}], v[{{[0-9:]+}}] row_share:1 row_mask:0x1 bank_mask:0x1
17+
define amdgpu_kernel void @mov_dpp64_row_share_test(ptr addrspace(1) %out, i64 %in1) {
18+
%tmp0 = call i64 @llvm.amdgcn.mov.dpp.i64(i64 %in1, i32 337, i32 1, i32 1, i1 0) #0
19+
store i64 %tmp0, ptr addrspace(1) %out
20+
ret void
21+
}
22+
23+
declare i64 @llvm.amdgcn.mov.dpp.i64(i64, i32, i32, i32, i1) #0
24+
25+
attributes #0 = { nounwind readnone convergent }
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1251 -show-encoding %s | FileCheck --check-prefixes=GFX1251 %s
2+
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s
3+
4+
v_mov_b64 v[4:5], v[2:3] row_share:0 row_mask:0xf bank_mask:0xf
5+
// GFX1251: v_mov_b64_dpp v[4:5], v[2:3] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3a,0x08,0x7e,0x02,0x50,0x01,0xff]
6+
// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand.
7+
// GFX1250-ERR-NEXT:{{^}}v_mov_b64 v[4:5], v[2:3] row_share:0 row_mask:0xf bank_mask:0xf
8+
// GFX1250-ERR-NEXT:{{^}} ^
9+
10+
v_mov_b64 v[4:5], v[2:3] row_share:15 row_mask:0x0 bank_mask:0x1
11+
// GFX1251: v_mov_b64_dpp v[4:5], v[2:3] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x3a,0x08,0x7e,0x02,0x5f,0x01,0x01]
12+
// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand.
13+
// GFX1250-ERR-NEXT:{{^}}v_mov_b64 v[4:5], v[2:3] row_share:15 row_mask:0x0 bank_mask:0x1
14+
// GFX1250-ERR-NEXT:{{^}} ^
15+
16+
v_mov_b64 v[254:255], v[254:255] row_share:3 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
17+
// GFX1251: v_mov_b64_dpp v[254:255], v[254:255] row_share:3 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x3a,0xfc,0x7f,0xfe,0x53,0x05,0x30]
18+
// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand.
19+
// GFX1250-ERR-NEXT:{{^}}v_mov_b64 v[254:255], v[254:255] row_share:3 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
20+
// GFX1250-ERR-NEXT:{{^}} ^
21+
22+
v_cvt_i32_f64 v2, v[4:5] row_share:1
23+
// GFX1251: v_cvt_i32_f64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x06,0x04,0x7e,0x04,0x51,0x01,0xff]
24+
// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand.
25+
// GFX1250-ERR-NEXT:{{^}}v_cvt_i32_f64 v2, v[4:5] row_share:1
26+
// GFX1250-ERR-NEXT:{{^}} ^
27+
28+
v_cvt_f64_i32 v[4:5], v2 row_share:1
29+
// GFX1251: v_cvt_f64_i32_dpp v[4:5], v2 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x08,0x08,0x7e,0x02,0x51,0x01,0xff]
30+
// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand.
31+
// GFX1250-ERR-NEXT:{{^}}v_cvt_f64_i32 v[4:5], v2 row_share:1
32+
// GFX1250-ERR-NEXT:{{^}} ^
33+
34+
v_cvt_f32_f64 v2, v[4:5] row_share:1
35+
// GFX1251: v_cvt_f32_f64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x1e,0x04,0x7e,0x04,0x51,0x01,0xff]
36+
// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand.
37+
// GFX1250-ERR-NEXT:{{^}}v_cvt_f32_f64 v2, v[4:5] row_share:1
38+
// GFX1250-ERR-NEXT:{{^}} ^
39+
40+
v_cvt_f64_f32 v[4:5], v2 row_share:1
41+
// GFX1251: v_cvt_f64_f32_dpp v[4:5], v2 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x20,0x08,0x7e,0x02,0x51,0x01,0xff]
42+
// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand.
43+
// GFX1250-ERR-NEXT:{{^}}v_cvt_f64_f32 v[4:5], v2 row_share:1
44+
// GFX1250-ERR-NEXT:{{^}} ^
45+
46+
v_cvt_u32_f64 v2, v[4:5] row_share:1
47+
// GFX1251: v_cvt_u32_f64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x2a,0x04,0x7e,0x04,0x51,0x01,0xff]
48+
// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand.
49+
// GFX1250-ERR-NEXT:{{^}}v_cvt_u32_f64 v2, v[4:5] row_share:1
50+
// GFX1250-ERR-NEXT:{{^}} ^
51+
52+
v_cvt_f64_u32 v[4:5], v2 row_share:1
53+
// GFX1251: v_cvt_f64_u32_dpp v[4:5], v2 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x2c,0x08,0x7e,0x02,0x51,0x01,0xff]
54+
// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand.
55+
// GFX1250-ERR-NEXT:{{^}}v_cvt_f64_u32 v[4:5], v2 row_share:1
56+
// GFX1250-ERR-NEXT:{{^}} ^
57+
58+
v_trunc_f64 v[2:3], v[4:5] row_share:1
59+
// GFX1251: v_trunc_f64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x2e,0x04,0x7e,0x04,0x51,0x01,0xff]
60+
// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand.
61+
// GFX1250-ERR-NEXT:{{^}}v_trunc_f64 v[2:3], v[4:5] row_share:1
62+
// GFX1250-ERR-NEXT:{{^}} ^
63+
64+
v_ceil_f64 v[2:3], v[4:5] row_share:1
65+
// GFX1251: v_ceil_f64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x30,0x04,0x7e,0x04,0x51,0x01,0xff]
66+
// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand.
67+
// GFX1250-ERR-NEXT:{{^}}v_ceil_f64 v[2:3], v[4:5] row_share:1
68+
// GFX1250-ERR-NEXT:{{^}} ^
69+
70+
v_rndne_f64 v[2:3], v[4:5] row_share:1
71+
// GFX1251: v_rndne_f64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x32,0x04,0x7e,0x04,0x51,0x01,0xff]
72+
// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand.
73+
// GFX1250-ERR-NEXT:{{^}}v_rndne_f64 v[2:3], v[4:5] row_share:1
74+
// GFX1250-ERR-NEXT:{{^}} ^
75+
76+
v_floor_f64 v[2:3], v[4:5] row_share:1
77+
// GFX1251: v_floor_f64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x34,0x04,0x7e,0x04,0x51,0x01,0xff]
78+
// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand.
79+
// GFX1250-ERR-NEXT:{{^}}v_floor_f64 v[2:3], v[4:5] row_share:1
80+
// GFX1250-ERR-NEXT:{{^}} ^
81+
82+
v_frexp_exp_i32_f64 v2, v[4:5] row_share:1
83+
// GFX1251: v_frexp_exp_i32_f64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x78,0x04,0x7e,0x04,0x51,0x01,0xff]
84+
// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand.
85+
// GFX1250-ERR-NEXT:{{^}}v_frexp_exp_i32_f64 v2, v[4:5] row_share:1
86+
// GFX1250-ERR-NEXT:{{^}} ^
87+
88+
v_frexp_mant_f64 v[2:3], v[4:5] row_share:1
89+
// GFX1251: v_frexp_mant_f64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x7a,0x04,0x7e,0x04,0x51,0x01,0xff]
90+
// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand.
91+
// GFX1250-ERR-NEXT:{{^}}v_frexp_mant_f64 v[2:3], v[4:5] row_share:1
92+
// GFX1250-ERR-NEXT:{{^}} ^
93+
94+
v_fract_f64 v[2:3], v[4:5] row_share:1
95+
// GFX1251: v_fract_f64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x7c,0x04,0x7e,0x04,0x51,0x01,0xff]
96+
// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand.
97+
// GFX1250-ERR-NEXT:{{^}}v_fract_f64 v[2:3], v[4:5] row_share:1
98+
// GFX1250-ERR-NEXT:{{^}} ^

0 commit comments

Comments
 (0)