Skip to content

Commit ef93b18

Browse files
committed
Addressing PR comments
1 parent f7f87a4 commit ef93b18

File tree

1 file changed

+16
-16
lines changed

1 file changed

+16
-16
lines changed

mlir/test/Dialect/AMDGPU/amdgpu-emulate-atomics.mlir

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: mlir-opt -split-input-file -amdgpu-emulate-atomics=chipset=gfx90a %s | FileCheck %s --check-prefixes=CHECK,GFX9
1+
// RUN: mlir-opt -split-input-file -amdgpu-emulate-atomics=chipset=gfx90a %s | FileCheck %s --check-prefixes=CHECK,GFX90A
22
// RUN: mlir-opt -split-input-file -amdgpu-emulate-atomics=chipset=gfx1030 %s | FileCheck %s --check-prefixes=CHECK,GFX10
33
// RUN: mlir-opt -split-input-file -amdgpu-emulate-atomics=chipset=gfx1100 %s | FileCheck %s --check-prefixes=CHECK,GFX11
44
// RUN: mlir-opt -split-input-file -amdgpu-emulate-atomics=chipset=gfx1200 %s | FileCheck %s --check-prefixes=CHECK,GFX12
@@ -14,16 +14,16 @@ func.func @atomic_fmax(%val: f32, %buffer: memref<?xf32>, %idx: i32) {
1414
// GFX10: amdgpu.raw_buffer_atomic_fmax {foo, indexOffset = 4 : i32} [[val]] -> [[buffer]][[[idx]]]
1515
// GFX11: amdgpu.raw_buffer_atomic_fmax {foo, indexOffset = 4 : i32} [[val]] -> [[buffer]][[[idx]]]
1616
// GFX12: amdgpu.raw_buffer_atomic_fmax {foo, indexOffset = 4 : i32} [[val]] -> [[buffer]][[[idx]]]
17-
// GFX9: [[ld:%.+]] = amdgpu.raw_buffer_load {foo, indexOffset = 4 : i32} [[buffer]][[[idx]]]
18-
// GFX9: cf.br [[loop:\^.+]]([[ld]] : f32)
19-
// GFX9: [[loop]]([[arg:%.+]]: f32):
20-
// GFX9: [[operated:%.+]] = arith.maximumf [[val]], [[arg]]
21-
// GFX9: [[atomicRes:%.+]] = amdgpu.raw_buffer_atomic_cmpswap {foo, indexOffset = 4 : i32} [[operated]], [[arg]] -> [[buffer]][[[idx]]]
22-
// GFX9: [[argCast:%.+]] = arith.bitcast [[arg]] : f32 to i32
23-
// GFX9: [[resCast:%.+]] = arith.bitcast [[atomicRes]] : f32 to i32
24-
// GFX9: [[test:%.+]] = arith.cmpi eq, [[resCast]], [[argCast]]
25-
// GFX9: cf.cond_br [[test]], [[post:\^.+]], [[loop]]([[atomicRes]] : f32)
26-
// GFX9: [[post]]:
17+
// GFX90A: [[ld:%.+]] = amdgpu.raw_buffer_load {foo, indexOffset = 4 : i32} [[buffer]][[[idx]]]
18+
// GFX90A: cf.br [[loop:\^.+]]([[ld]] : f32)
19+
// GFX90A: [[loop]]([[arg:%.+]]: f32):
20+
// GFX90A: [[operated:%.+]] = arith.maximumf [[val]], [[arg]]
21+
// GFX90A: [[atomicRes:%.+]] = amdgpu.raw_buffer_atomic_cmpswap {foo, indexOffset = 4 : i32} [[operated]], [[arg]] -> [[buffer]][[[idx]]]
22+
// GFX90A: [[argCast:%.+]] = arith.bitcast [[arg]] : f32 to i32
23+
// GFX90A: [[resCast:%.+]] = arith.bitcast [[atomicRes]] : f32 to i32
24+
// GFX90A: [[test:%.+]] = arith.cmpi eq, [[resCast]], [[argCast]]
25+
// GFX90A: cf.cond_br [[test]], [[post:\^.+]], [[loop]]([[atomicRes]] : f32)
26+
// GFX90A: [[post]]:
2727
// GFX942: [[ld:%.+]] = amdgpu.raw_buffer_load {foo, indexOffset = 4 : i32} [[buffer]][[[idx]]]
2828
// GFX942: cf.br [[loop:\^.+]]([[ld]] : f32)
2929
// GFX942: [[loop]]([[arg:%.+]]: f32):
@@ -57,7 +57,7 @@ func.func @atomic_fmax_f64(%val: f64, %buffer: memref<?xf64>, %idx: i32) {
5757
// CHECK: func @atomic_fmax_f64
5858
// CHECK-SAME: ([[val:%.+]]: f64, [[buffer:%.+]]: memref<?xf64>, [[idx:%.+]]: i32)
5959
// CHECK: gpu.printf "Begin\0A"
60-
// GFX9: amdgpu.raw_buffer_atomic_fmax [[val]] -> [[buffer]][[[idx]]]
60+
// GFX90A: amdgpu.raw_buffer_atomic_fmax [[val]] -> [[buffer]][[[idx]]]
6161
// GFX10: amdgpu.raw_buffer_atomic_fmax [[val]] -> [[buffer]][[[idx]]]
6262
// GFX11: amdgpu.raw_buffer_atomic_fmax [[val]] -> [[buffer]][[[idx]]]
6363
// GFX12: amdgpu.raw_buffer_atomic_fmax [[val]] -> [[buffer]][[[idx]]]
@@ -74,7 +74,7 @@ func.func @atomic_fmax_f64(%val: f64, %buffer: memref<?xf64>, %idx: i32) {
7474

7575
func.func @atomic_fadd(%val: f32, %buffer: memref<?xf32>, %idx: i32) {
7676
// CHECK: func @atomic_fadd
77-
// GFX9: amdgpu.raw_buffer_atomic_fadd
77+
// GFX90A: amdgpu.raw_buffer_atomic_fadd
7878
// GFX10: amdgpu.raw_buffer_load
7979
// GFX10: amdgpu.raw_buffer_atomic_cmpswap
8080
// GFX11: amdgpu.raw_buffer_atomic_fadd
@@ -87,7 +87,7 @@ func.func @atomic_fadd(%val: f32, %buffer: memref<?xf32>, %idx: i32) {
8787

8888
// CHECK: func @atomic_fadd_v2f16
8989
func.func @atomic_fadd_v2f16(%val: vector<2xf16>, %buffer: memref<?xf16>, %idx: i32) {
90-
// GFX9: amdgpu.raw_buffer_atomic_fadd
90+
// GFX90A: amdgpu.raw_buffer_atomic_fadd
9191
// GFX10: amdgpu.raw_buffer_load
9292
// GFX10: amdgpu.raw_buffer_atomic_cmpswap
9393
// Note: the atomic operation itself will be done over i32, and then we use bitcasts
@@ -108,8 +108,8 @@ func.func @atomic_fadd_v2f16(%val: vector<2xf16>, %buffer: memref<?xf16>, %idx:
108108

109109
// CHECK: func @atomic_fadd_v2bf16
110110
func.func @atomic_fadd_v2bf16(%val: vector<2xbf16>, %buffer: memref<?xbf16>, %idx: i32) {
111-
// GFX9: amdgpu.raw_buffer_load
112-
// GFX9: amdgpu.raw_buffer_atomic_cmpswap
111+
// GFX90A: amdgpu.raw_buffer_load
112+
// GFX90A: amdgpu.raw_buffer_atomic_cmpswap
113113
// GFX10: amdgpu.raw_buffer_load
114114
// GFX10: amdgpu.raw_buffer_atomic_cmpswap
115115
// GFX11: amdgpu.raw_buffer_load

0 commit comments

Comments
 (0)