Skip to content

Commit bc15782

Browse files
ronliebShoreshen
andauthored
Reland add features: cube-insts, lerp-inst, qsad-insts, sad-insts (#651)
rebase/squish of #634 --------- Co-authored-by: Shoreshen <[email protected]>
1 parent dc2c396 commit bc15782

22 files changed

+401
-218
lines changed

amd/device-libs/ockl/src/image.cl

Lines changed: 33 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#define EII() __oclc_ISA_version != 9010
1414

1515
#define RATTR __attribute__((pure))
16+
#define CRATTR __attribute__((pure, target("cube-insts")))
1617
#define ERATTR __attribute__((pure, target("extended-image-insts")))
1718
#define WATTR
1819
#define GATTR __attribute__((const))
@@ -510,14 +511,14 @@ OCKL_MANGLE_T(image_load,3D)(TSHARP i, int4 c)
510511
return my_image_load_3d_v4f32_i32(c.x, c.y, c.z, LOAD_TSHARP(i));
511512
}
512513

513-
RATTR float4
514-
OCKL_MANGLE_T(image_load,CM)(TSHARP i, int2 c, int f)
514+
CRATTR float4
515+
OCKL_MANGLE_T(image_load, CM)(TSHARP i, int2 c, int f)
515516
{
516517
return my_image_load_cube_v4f32_i32(c.x, c.y, f, LOAD_TSHARP(i));
517518
}
518519

519-
RATTR float4
520-
OCKL_MANGLE_T(image_load,CMa)(TSHARP i, int4 c, int f)
520+
CRATTR float4
521+
OCKL_MANGLE_T(image_load, CMa)(TSHARP i, int4 c, int f)
521522
{
522523
f = LS_ARRAY_FACE(c.z, f);
523524
return my_image_load_cube_v4f32_i32(c.x, c.y, f, LOAD_TSHARP(i));
@@ -565,14 +566,14 @@ OCKL_MANGLE_T(image_load_lod,3D)(TSHARP i, int4 c, int l)
565566
return my_image_load_mip_3d_v4f32_i32(c.x, c.y, c.z, l, LOAD_TSHARP(i));
566567
}
567568

568-
RATTR float4
569-
OCKL_MANGLE_T(image_load_lod,CM)(TSHARP i, int2 c, int f, int l)
569+
CRATTR float4
570+
OCKL_MANGLE_T(image_load_lod, CM)(TSHARP i, int2 c, int f, int l)
570571
{
571572
return my_image_load_mip_cube_v4f32_i32(c.x, c.y, f, l, LOAD_TSHARP(i));
572573
}
573574

574-
RATTR float4
575-
OCKL_MANGLE_T(image_load_lod,CMa)(TSHARP i, int4 c, int f, int l)
575+
CRATTR float4
576+
OCKL_MANGLE_T(image_load_lod, CMa)(TSHARP i, int4 c, int f, int l)
576577
{
577578
f = LS_ARRAY_FACE(c.z, f);
578579
return my_image_load_mip_cube_v4f32_i32(c.x, c.y, f, l, LOAD_TSHARP(i));
@@ -614,14 +615,14 @@ OCKL_MANGLE_T(image_loadh,3D)(TSHARP i, int4 c)
614615
return my_image_load_3d_v4f16_i32(c.x, c.y, c.z, LOAD_TSHARP(i));
615616
}
616617

617-
RATTR half4
618-
OCKL_MANGLE_T(image_loadh,CM)(TSHARP i, int2 c, int f)
618+
CRATTR half4
619+
OCKL_MANGLE_T(image_loadh, CM)(TSHARP i, int2 c, int f)
619620
{
620621
return my_image_load_cube_v4f16_i32(c.x, c.y, f, LOAD_TSHARP(i));
621622
}
622623

623-
RATTR half4
624-
OCKL_MANGLE_T(image_loadh,CMa)(TSHARP i, int4 c, int f)
624+
CRATTR half4
625+
OCKL_MANGLE_T(image_loadh, CMa)(TSHARP i, int4 c, int f)
625626
{
626627
f = LS_ARRAY_FACE(c.z, f);
627628
return my_image_load_cube_v4f16_i32(c.x, c.y, f, LOAD_TSHARP(i));
@@ -657,14 +658,14 @@ OCKL_MANGLE_T(image_loadh_lod,3D)(TSHARP i, int4 c, int l)
657658
return my_image_load_mip_3d_v4f16_i32(c.x, c.y, c.z, l, LOAD_TSHARP(i));
658659
}
659660

660-
RATTR half4
661-
OCKL_MANGLE_T(image_loadh_lod,CM)(TSHARP i, int2 c, int f, int l)
661+
CRATTR half4
662+
OCKL_MANGLE_T(image_loadh_lod, CM)(TSHARP i, int2 c, int f, int l)
662663
{
663664
return my_image_load_mip_cube_v4f16_i32(c.x, c.y, f, l, LOAD_TSHARP(i));
664665
}
665666

666-
RATTR half4
667-
OCKL_MANGLE_T(image_loadh_lod,CMa)(TSHARP i, int4 c, int f, int l)
667+
CRATTR half4
668+
OCKL_MANGLE_T(image_loadh_lod, CMa)(TSHARP i, int4 c, int f, int l)
668669
{
669670
f = LS_ARRAY_FACE(c.z, f);
670671
return my_image_load_mip_cube_v4f16_i32(c.x, c.y, f, l, LOAD_TSHARP(i));
@@ -950,8 +951,8 @@ OCKL_MANGLE_T(image_sample,3D)(TSHARP i, SSHARP s, float4 c)
950951
return my_image_sample_3d_v4f32_f32(c.x, c.y, c.z, LOAD_TSHARP(i), LOAD_SSHARP(s));
951952
}
952953

953-
RATTR float4
954-
OCKL_MANGLE_T(image_sample,CM)(TSHARP i, SSHARP s, float4 c)
954+
CRATTR float4
955+
OCKL_MANGLE_T(image_sample, CM)(TSHARP i, SSHARP s, float4 c)
955956
{
956957
CUBE_PREP(c);
957958
if (EII())
@@ -960,8 +961,8 @@ OCKL_MANGLE_T(image_sample,CM)(TSHARP i, SSHARP s, float4 c)
960961
return my_image_sample_cube_v4f32_f32(c.x, c.y, c.z, LOAD_TSHARP(i), LOAD_SSHARP(s));
961962
}
962963

963-
RATTR float4
964-
OCKL_MANGLE_T(image_sample,CMa)(TSHARP i, SSHARP s, float4 c)
964+
CRATTR float4
965+
OCKL_MANGLE_T(image_sample, CMa)(TSHARP i, SSHARP s, float4 c)
965966
{
966967
CUBE_PREP(c);
967968
c.z = SAMPLE_ARRAY_FACE(c.w, c.z);
@@ -1068,15 +1069,15 @@ OCKL_MANGLE_T(image_sample_lod,3D)(TSHARP i, SSHARP s, float4 c, float l)
10681069
return my_image_sample_l_3d_v4f32_f32(c.x, c.y, c.z, l, LOAD_TSHARP(i), LOAD_SSHARP(s));
10691070
}
10701071

1071-
RATTR float4
1072-
OCKL_MANGLE_T(image_sample_lod,CM)(TSHARP i, SSHARP s, float4 c, float l)
1072+
CRATTR float4
1073+
OCKL_MANGLE_T(image_sample_lod, CM)(TSHARP i, SSHARP s, float4 c, float l)
10731074
{
10741075
CUBE_PREP(c);
10751076
return my_image_sample_l_cube_v4f32_f32(c.x, c.y, c.z, l, LOAD_TSHARP(i), LOAD_SSHARP(s));
10761077
}
10771078

1078-
RATTR float4
1079-
OCKL_MANGLE_T(image_sample_lod,CMa)(TSHARP i, SSHARP s, float4 c, float l)
1079+
CRATTR float4
1080+
OCKL_MANGLE_T(image_sample_lod, CMa)(TSHARP i, SSHARP s, float4 c, float l)
10801081
{
10811082
CUBE_PREP(c);
10821083
c.z = SAMPLE_ARRAY_FACE(c.w, c.z);
@@ -1135,8 +1136,8 @@ OCKL_MANGLE_T(image_sampleh,3D)(TSHARP i, SSHARP s, float4 c)
11351136
return my_image_sample_3d_v4f16_f32(c.x, c.y, c.z, LOAD_TSHARP(i), LOAD_SSHARP(s));
11361137
}
11371138

1138-
RATTR half4
1139-
OCKL_MANGLE_T(image_sampleh,CM)(TSHARP i, SSHARP s, float4 c)
1139+
CRATTR half4
1140+
OCKL_MANGLE_T(image_sampleh, CM)(TSHARP i, SSHARP s, float4 c)
11401141
{
11411142
CUBE_PREP(c);
11421143
if (EII())
@@ -1145,8 +1146,8 @@ OCKL_MANGLE_T(image_sampleh,CM)(TSHARP i, SSHARP s, float4 c)
11451146
return my_image_sample_cube_v4f16_f32(c.x, c.y, c.z, LOAD_TSHARP(i), LOAD_SSHARP(s));
11461147
}
11471148

1148-
RATTR half4
1149-
OCKL_MANGLE_T(image_sampleh,CMa)(TSHARP i, SSHARP s, float4 c)
1149+
CRATTR half4
1150+
OCKL_MANGLE_T(image_sampleh, CMa)(TSHARP i, SSHARP s, float4 c)
11501151
{
11511152
CUBE_PREP(c);
11521153
c.z = SAMPLE_ARRAY_FACE(c.w, c.z);
@@ -1225,15 +1226,15 @@ OCKL_MANGLE_T(image_sampleh_lod,3D)(TSHARP i, SSHARP s, float4 c, float l)
12251226
return my_image_sample_l_3d_v4f16_f32(c.x, c.y, c.z, l, LOAD_TSHARP(i), LOAD_SSHARP(s));
12261227
}
12271228

1228-
RATTR half4
1229-
OCKL_MANGLE_T(image_sampleh_lod,CM)(TSHARP i, SSHARP s, float4 c, float l)
1229+
CRATTR half4
1230+
OCKL_MANGLE_T(image_sampleh_lod, CM)(TSHARP i, SSHARP s, float4 c, float l)
12301231
{
12311232
CUBE_PREP(c);
12321233
return my_image_sample_l_cube_v4f16_f32(c.x, c.y, c.z, l, LOAD_TSHARP(i), LOAD_SSHARP(s));
12331234
}
12341235

1235-
RATTR half4
1236-
OCKL_MANGLE_T(image_sampleh_lod,CMa)(TSHARP i, SSHARP s, float4 c, float l)
1236+
CRATTR half4
1237+
OCKL_MANGLE_T(image_sampleh_lod, CMa)(TSHARP i, SSHARP s, float4 c, float l)
12371238
{
12381239
CUBE_PREP(c);
12391240
c.z = SAMPLE_ARRAY_FACE(c.w, c.z);

amd/device-libs/ockl/src/media.cl

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@
1111
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
1212

1313
#define CATTR __attribute__((const))
14+
#define LCATTR __attribute__((const, target("lerp-inst")))
15+
#define QCATTR __attribute__((const, target("qsad-insts")))
16+
#define SCATTR __attribute__((const, target("sad-insts")))
1417
#define AS_UCHAR4(X) __builtin_astype(X, uchar4)
1518

1619
CATTR uint
@@ -44,7 +47,7 @@ OCKL_MANGLE_U32(bytealign)(uint a, uint b, uint c)
4447
return __builtin_amdgcn_alignbyte(a, b, c);
4548
}
4649

47-
CATTR uint
50+
LCATTR uint
4851
OCKL_MANGLE_U32(lerp)(uint a, uint b, uint c)
4952
{
5053
return __builtin_amdgcn_lerp(a, b, c);
@@ -155,13 +158,13 @@ OCKL_MANGLE_U32(pack)(float4 a)
155158
__builtin_amdgcn_cvt_pk_u8_f32(a.s0, 0, 0))));
156159
}
157160

158-
CATTR ulong
161+
QCATTR ulong
159162
OCKL_MANGLE_U64(qsad)(ulong a, uint b, ulong c)
160163
{
161164
return __builtin_amdgcn_qsad_pk_u16_u8(a, b, c);
162165
}
163166

164-
CATTR uint
167+
SCATTR uint
165168
OCKL_MANGLE_U32(sad)(uint a, uint b, uint c)
166169
{
167170
return __builtin_amdgcn_sad_u8(a, b, c);
@@ -174,13 +177,13 @@ OCKL_MANGLE_U32(sadd)(uint a, uint b, uint c)
174177
return (a > b ? a : b) - (a < b ? a : b) + c;
175178
}
176179

177-
CATTR uint
180+
SCATTR uint
178181
OCKL_MANGLE_U32(sadhi)(uint a, uint b, uint c)
179182
{
180183
return __builtin_amdgcn_sad_hi_u8(a, b, c);
181184
}
182185

183-
CATTR uint
186+
SCATTR uint
184187
OCKL_MANGLE_U32(sadw)(uint a, uint b, uint c)
185188
{
186189
return __builtin_amdgcn_sad_u16(a, b, c);

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -122,13 +122,13 @@ BUILTIN(__builtin_amdgcn_frexp_exp, "id", "nc")
122122
BUILTIN(__builtin_amdgcn_frexp_expf, "if", "nc")
123123
BUILTIN(__builtin_amdgcn_fract, "dd", "nc")
124124
BUILTIN(__builtin_amdgcn_fractf, "ff", "nc")
125-
BUILTIN(__builtin_amdgcn_lerp, "UiUiUiUi", "nc")
125+
TARGET_BUILTIN(__builtin_amdgcn_lerp, "UiUiUiUi", "nc", "lerp-inst")
126126
BUILTIN(__builtin_amdgcn_class, "bdi", "nc")
127127
BUILTIN(__builtin_amdgcn_classf, "bfi", "nc")
128-
BUILTIN(__builtin_amdgcn_cubeid, "ffff", "nc")
129-
BUILTIN(__builtin_amdgcn_cubesc, "ffff", "nc")
130-
BUILTIN(__builtin_amdgcn_cubetc, "ffff", "nc")
131-
BUILTIN(__builtin_amdgcn_cubema, "ffff", "nc")
128+
TARGET_BUILTIN(__builtin_amdgcn_cubeid, "ffff", "nc", "cube-insts")
129+
TARGET_BUILTIN(__builtin_amdgcn_cubesc, "ffff", "nc", "cube-insts")
130+
TARGET_BUILTIN(__builtin_amdgcn_cubetc, "ffff", "nc", "cube-insts")
131+
TARGET_BUILTIN(__builtin_amdgcn_cubema, "ffff", "nc", "cube-insts")
132132
BUILTIN(__builtin_amdgcn_s_sleep, "vIi", "n")
133133
BUILTIN(__builtin_amdgcn_s_incperflevel, "vIi", "n")
134134
BUILTIN(__builtin_amdgcn_s_decperflevel, "vIi", "n")
@@ -149,17 +149,17 @@ BUILTIN(__builtin_amdgcn_alignbyte, "UiUiUiUi", "nc")
149149
BUILTIN(__builtin_amdgcn_ubfe, "UiUiUiUi", "nc")
150150
BUILTIN(__builtin_amdgcn_sbfe, "UiUiUiUi", "nc")
151151
BUILTIN(__builtin_amdgcn_cvt_pkrtz, "E2hff", "nc")
152-
BUILTIN(__builtin_amdgcn_cvt_pknorm_i16, "E2sff", "nc")
153-
BUILTIN(__builtin_amdgcn_cvt_pknorm_u16, "E2Usff", "nc")
152+
TARGET_BUILTIN(__builtin_amdgcn_cvt_pknorm_i16, "E2sff", "nc", "cvt-pknorm-vop2-insts")
153+
TARGET_BUILTIN(__builtin_amdgcn_cvt_pknorm_u16, "E2Usff", "nc", "cvt-pknorm-vop2-insts")
154154
BUILTIN(__builtin_amdgcn_cvt_pk_i16, "E2sii", "nc")
155155
BUILTIN(__builtin_amdgcn_cvt_pk_u16, "E2UsUiUi", "nc")
156156
BUILTIN(__builtin_amdgcn_cvt_pk_u8_f32, "UifUiUi", "nc")
157157
BUILTIN(__builtin_amdgcn_cvt_off_f32_i4, "fi", "nc")
158-
BUILTIN(__builtin_amdgcn_sad_u8, "UiUiUiUi", "nc")
159158
BUILTIN(__builtin_amdgcn_msad_u8, "UiUiUiUi", "nc")
160-
BUILTIN(__builtin_amdgcn_sad_hi_u8, "UiUiUiUi", "nc")
161-
BUILTIN(__builtin_amdgcn_sad_u16, "UiUiUiUi", "nc")
162-
BUILTIN(__builtin_amdgcn_qsad_pk_u16_u8, "WUiWUiUiWUi", "nc")
159+
TARGET_BUILTIN(__builtin_amdgcn_sad_u8, "UiUiUiUi", "nc", "sad-insts")
160+
TARGET_BUILTIN(__builtin_amdgcn_sad_hi_u8, "UiUiUiUi", "nc", "sad-insts")
161+
TARGET_BUILTIN(__builtin_amdgcn_sad_u16, "UiUiUiUi", "nc", "sad-insts")
162+
TARGET_BUILTIN(__builtin_amdgcn_qsad_pk_u16_u8, "WUiWUiUiWUi", "nc", "qsad-insts")
163163
BUILTIN(__builtin_amdgcn_mqsad_pk_u16_u8, "WUiWUiUiWUi", "nc")
164164
BUILTIN(__builtin_amdgcn_mqsad_u32_u8, "V4UiWUiUiV4Ui", "nc")
165165

clang/test/CodeGen/amdgpu-builtin-is-invocable.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@ void foo() {
4242
return __builtin_trap();
4343
}
4444
//.
45-
// AMDGCN-GFX900: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
45+
// AMDGCN-GFX900: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" }
4646
//.
47-
// AMDGCN-GFX1010: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1010" "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" }
47+
// AMDGCN-GFX1010: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1010" "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32" }
4848
// AMDGCN-GFX1010: attributes #[[ATTR1:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) }
4949
//.
5050
// AMDGCNSPIRV: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-trans-insts,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+vmem-to-lds-load-insts,+wavefrontsize32,+wavefrontsize64" }

clang/test/CodeGen/amdgpu-builtin-processor-is.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,10 @@ void foo() {
4040
return __builtin_trap();
4141
}
4242
//.
43-
// AMDGCN-GFX900: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
43+
// AMDGCN-GFX900: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" }
4444
// AMDGCN-GFX900: attributes #[[ATTR1:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) }
4545
//.
46-
// AMDGCN-GFX1010: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1010" "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" }
46+
// AMDGCN-GFX1010: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1010" "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32" }
4747
//.
4848
// AMDGCNSPIRV: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-trans-insts,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+vmem-to-lds-load-insts,+wavefrontsize32,+wavefrontsize64" }
4949
// AMDGCNSPIRV: attributes #[[ATTR1:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) }

clang/test/CodeGen/link-builtin-bitcode.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ int bar() { return no_attr() + attr_in_target() + attr_not_in_target() + attr_in
4343
// CHECK-LABEL: @attr_incompatible
4444
// CHECK-SAME: () #[[ATTR_INCOMPATIBLE:[0-9]+]] {
4545

46-
// CHECK: attributes #[[ATTR_BAR]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
47-
// CHECK: attributes #[[ATTR_COMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+vmem-to-lds-load-insts,+wavefrontsize64" }
48-
// CHECK: attributes #[[ATTR_EXTEND]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+extended-image-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+vmem-to-lds-load-insts,+wavefrontsize64" }
49-
// CHECK: attributes #[[ATTR_INCOMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+vmem-to-lds-load-insts,+wavefrontsize64,-gfx9-insts" }
46+
// CHECK: attributes #[[ATTR_BAR]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" }
47+
// CHECK: attributes #[[ATTR_COMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+vmem-to-lds-load-insts,+wavefrontsize64" }
48+
// CHECK: attributes #[[ATTR_EXTEND]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+extended-image-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+vmem-to-lds-load-insts,+wavefrontsize64" }
49+
// CHECK: attributes #[[ATTR_INCOMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx90a-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+vmem-to-lds-load-insts,+wavefrontsize64,-gfx9-insts" }

0 commit comments

Comments
 (0)