|
4 | 4 |
|
5 | 5 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable |
6 | 6 |
|
| 7 | +typedef unsigned int uint; |
| 8 | +typedef unsigned short int ushort; |
| 9 | +typedef unsigned int __attribute__((ext_vector_type(2))) uint2; |
7 | 10 | typedef half __attribute__((ext_vector_type(2))) half2; |
8 | 11 |
|
9 | 12 | // CHECK-LABEL: @test_setprio_inc_wg( |
@@ -42,6 +45,24 @@ void test_s_wait_tensorcnt() { |
42 | 45 | __builtin_amdgcn_s_wait_tensorcnt(0); |
43 | 46 | } |
44 | 47 |
|
| 48 | +// CHECK-LABEL: @test_prng_b32( |
| 49 | +// CHECK-NEXT: entry: |
| 50 | +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) |
| 51 | +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5) |
| 52 | +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr |
| 53 | +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr |
| 54 | +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 |
| 55 | +// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4 |
| 56 | +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4 |
| 57 | +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.prng.b32(i32 [[TMP0]]) |
| 58 | +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 |
| 59 | +// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[TMP2]], align 4 |
| 60 | +// CHECK-NEXT: ret void |
| 61 | +// |
| 62 | +void test_prng_b32(global uint* out, uint a) { |
| 63 | + *out = __builtin_amdgcn_prng_b32(a); |
| 64 | +} |
| 65 | + |
45 | 66 | // CHECK-LABEL: @test_tanh_f32( |
46 | 67 | // CHECK-NEXT: entry: |
47 | 68 | // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) |
@@ -349,6 +370,76 @@ void test_cvt_pk_f16_bf8(global half2* out, short a) |
349 | 370 | out[0] = __builtin_amdgcn_cvt_pk_f16_bf8(a); |
350 | 371 | } |
351 | 372 |
|
| 373 | +// CHECK-LABEL: @test_sat_pk4_i4_i8( |
| 374 | +// CHECK-NEXT: entry: |
| 375 | +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| 376 | +// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca i32, align 4, addrspace(5) |
| 377 | +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr |
| 378 | +// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_ADDR]] to ptr |
| 379 | +// CHECK-NEXT: store ptr [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 |
| 380 | +// CHECK-NEXT: store i32 [[SRC:%.*]], ptr [[SRC_ADDR_ASCAST]], align 4 |
| 381 | +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC_ADDR_ASCAST]], align 4 |
| 382 | +// CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.amdgcn.sat.pk4.i4.i8(i32 [[TMP0]]) |
| 383 | +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8 |
| 384 | +// CHECK-NEXT: store i16 [[TMP1]], ptr [[TMP2]], align 2 |
| 385 | +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[SRC_ADDR_ASCAST]], align 4 |
| 386 | +// CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.amdgcn.sat.pk4.u4.u8(i32 [[TMP3]]) |
| 387 | +// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8 |
| 388 | +// CHECK-NEXT: store i16 [[TMP4]], ptr [[TMP5]], align 2 |
| 389 | +// CHECK-NEXT: ret void |
| 390 | +// |
| 391 | +void test_sat_pk4_i4_i8(ushort *out, uint src) |
| 392 | +{ |
| 393 | + *out = __builtin_amdgcn_sat_pk4_i4_i8(src); |
| 394 | + *out = __builtin_amdgcn_sat_pk4_u4_u8(src); |
| 395 | +} |
| 396 | + |
| 397 | +// CHECK-LABEL: @test_permlane16_swap( |
| 398 | +// CHECK-NEXT: entry: |
| 399 | +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) |
| 400 | +// CHECK-NEXT: [[OLD_ADDR:%.*]] = alloca i32, align 4, addrspace(5) |
| 401 | +// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca i32, align 4, addrspace(5) |
| 402 | +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr |
| 403 | +// CHECK-NEXT: [[OLD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OLD_ADDR]] to ptr |
| 404 | +// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_ADDR]] to ptr |
| 405 | +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 |
| 406 | +// CHECK-NEXT: store i32 [[OLD:%.*]], ptr [[OLD_ADDR_ASCAST]], align 4 |
| 407 | +// CHECK-NEXT: store i32 [[SRC:%.*]], ptr [[SRC_ADDR_ASCAST]], align 4 |
| 408 | +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4 |
| 409 | +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC_ADDR_ASCAST]], align 4 |
| 410 | +// CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 [[TMP0]], i32 [[TMP1]], i1 false, i1 false) |
| 411 | +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0 |
| 412 | +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1 |
| 413 | +// CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[TMP3]], i64 0 |
| 414 | +// CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP4]], i64 1 |
| 415 | +// CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 |
| 416 | +// CHECK-NEXT: store <2 x i32> [[TMP6]], ptr addrspace(1) [[TMP7]], align 8 |
| 417 | +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4 |
| 418 | +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[SRC_ADDR_ASCAST]], align 4 |
| 419 | +// CHECK-NEXT: [[TMP10:%.*]] = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 [[TMP8]], i32 [[TMP9]], i1 true, i1 false) |
| 420 | +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { i32, i32 } [[TMP10]], 0 |
| 421 | +// CHECK-NEXT: [[TMP12:%.*]] = extractvalue { i32, i32 } [[TMP10]], 1 |
| 422 | +// CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i64 0 |
| 423 | +// CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP12]], i64 1 |
| 424 | +// CHECK-NEXT: [[TMP15:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 |
| 425 | +// CHECK-NEXT: store <2 x i32> [[TMP14]], ptr addrspace(1) [[TMP15]], align 8 |
| 426 | +// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4 |
| 427 | +// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[SRC_ADDR_ASCAST]], align 4 |
| 428 | +// CHECK-NEXT: [[TMP18:%.*]] = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 [[TMP16]], i32 [[TMP17]], i1 false, i1 true) |
| 429 | +// CHECK-NEXT: [[TMP19:%.*]] = extractvalue { i32, i32 } [[TMP18]], 0 |
| 430 | +// CHECK-NEXT: [[TMP20:%.*]] = extractvalue { i32, i32 } [[TMP18]], 1 |
| 431 | +// CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x i32> poison, i32 [[TMP19]], i64 0 |
| 432 | +// CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i32> [[TMP21]], i32 [[TMP20]], i64 1 |
| 433 | +// CHECK-NEXT: [[TMP23:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 |
| 434 | +// CHECK-NEXT: store <2 x i32> [[TMP22]], ptr addrspace(1) [[TMP23]], align 8 |
| 435 | +// CHECK-NEXT: ret void |
| 436 | +// |
| 437 | +void test_permlane16_swap(global uint2* out, uint old, uint src) { |
| 438 | + *out = __builtin_amdgcn_permlane16_swap(old, src, false, false); |
| 439 | + *out = __builtin_amdgcn_permlane16_swap(old, src, true, false); |
| 440 | + *out = __builtin_amdgcn_permlane16_swap(old, src, false, true); |
| 441 | +} |
| 442 | + |
352 | 443 | // CHECK-LABEL: @test_cvt_f32_fp8_e5m3( |
353 | 444 | // CHECK-NEXT: entry: |
354 | 445 | // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) |
|
0 commit comments