1- // RUN: mlir-opt -split-input-file -amdgpu-emulate-atomics=chipset=gfx90a %s | FileCheck %s --check-prefixes=CHECK,GFX9
1+ // RUN: mlir-opt -split-input-file -amdgpu-emulate-atomics=chipset=gfx90a %s | FileCheck %s --check-prefixes=CHECK,GFX90A
22// RUN: mlir-opt -split-input-file -amdgpu-emulate-atomics=chipset=gfx1030 %s | FileCheck %s --check-prefixes=CHECK,GFX10
33// RUN: mlir-opt -split-input-file -amdgpu-emulate-atomics=chipset=gfx1100 %s | FileCheck %s --check-prefixes=CHECK,GFX11
44// RUN: mlir-opt -split-input-file -amdgpu-emulate-atomics=chipset=gfx1200 %s | FileCheck %s --check-prefixes=CHECK,GFX12
@@ -14,16 +14,16 @@ func.func @atomic_fmax(%val: f32, %buffer: memref<?xf32>, %idx: i32) {
1414// GFX10: amdgpu.raw_buffer_atomic_fmax {foo, indexOffset = 4 : i32} [[val]] -> [[buffer]][[[idx]]]
1515// GFX11: amdgpu.raw_buffer_atomic_fmax {foo, indexOffset = 4 : i32} [[val]] -> [[buffer]][[[idx]]]
1616// GFX12: amdgpu.raw_buffer_atomic_fmax {foo, indexOffset = 4 : i32} [[val]] -> [[buffer]][[[idx]]]
17- // GFX9 : [[ld:%.+]] = amdgpu.raw_buffer_load {foo, indexOffset = 4 : i32} [[buffer]][[[idx]]]
18- // GFX9 : cf.br [[loop:\^.+]]([[ld]] : f32)
19- // GFX9 : [[loop]]([[arg:%.+]]: f32):
20- // GFX9 : [[operated:%.+]] = arith.maximumf [[val]], [[arg]]
21- // GFX9 : [[atomicRes:%.+]] = amdgpu.raw_buffer_atomic_cmpswap {foo, indexOffset = 4 : i32} [[operated]], [[arg]] -> [[buffer]][[[idx]]]
22- // GFX9 : [[argCast:%.+]] = arith.bitcast [[arg]] : f32 to i32
23- // GFX9 : [[resCast:%.+]] = arith.bitcast [[atomicRes]] : f32 to i32
24- // GFX9 : [[test:%.+]] = arith.cmpi eq, [[resCast]], [[argCast]]
25- // GFX9 : cf.cond_br [[test]], [[post:\^.+]], [[loop]]([[atomicRes]] : f32)
26- // GFX9 : [[post]]:
17+ // GFX90A : [[ld:%.+]] = amdgpu.raw_buffer_load {foo, indexOffset = 4 : i32} [[buffer]][[[idx]]]
18+ // GFX90A : cf.br [[loop:\^.+]]([[ld]] : f32)
19+ // GFX90A : [[loop]]([[arg:%.+]]: f32):
20+ // GFX90A : [[operated:%.+]] = arith.maximumf [[val]], [[arg]]
21+ // GFX90A : [[atomicRes:%.+]] = amdgpu.raw_buffer_atomic_cmpswap {foo, indexOffset = 4 : i32} [[operated]], [[arg]] -> [[buffer]][[[idx]]]
22+ // GFX90A : [[argCast:%.+]] = arith.bitcast [[arg]] : f32 to i32
23+ // GFX90A : [[resCast:%.+]] = arith.bitcast [[atomicRes]] : f32 to i32
24+ // GFX90A : [[test:%.+]] = arith.cmpi eq, [[resCast]], [[argCast]]
25+ // GFX90A : cf.cond_br [[test]], [[post:\^.+]], [[loop]]([[atomicRes]] : f32)
26+ // GFX90A : [[post]]:
2727// GFX942: [[ld:%.+]] = amdgpu.raw_buffer_load {foo, indexOffset = 4 : i32} [[buffer]][[[idx]]]
2828// GFX942: cf.br [[loop:\^.+]]([[ld]] : f32)
2929// GFX942: [[loop]]([[arg:%.+]]: f32):
@@ -57,7 +57,7 @@ func.func @atomic_fmax_f64(%val: f64, %buffer: memref<?xf64>, %idx: i32) {
5757// CHECK: func @atomic_fmax_f64
5858// CHECK-SAME: ([[val:%.+]]: f64, [[buffer:%.+]]: memref<?xf64>, [[idx:%.+]]: i32)
5959// CHECK: gpu.printf "Begin\0A"
60- // GFX9 : amdgpu.raw_buffer_atomic_fmax [[val]] -> [[buffer]][[[idx]]]
60+ // GFX90A : amdgpu.raw_buffer_atomic_fmax [[val]] -> [[buffer]][[[idx]]]
6161// GFX10: amdgpu.raw_buffer_atomic_fmax [[val]] -> [[buffer]][[[idx]]]
6262// GFX11: amdgpu.raw_buffer_atomic_fmax [[val]] -> [[buffer]][[[idx]]]
6363// GFX12: amdgpu.raw_buffer_atomic_fmax [[val]] -> [[buffer]][[[idx]]]
@@ -74,7 +74,7 @@ func.func @atomic_fmax_f64(%val: f64, %buffer: memref<?xf64>, %idx: i32) {
7474
7575func.func @atomic_fadd (%val: f32 , %buffer: memref <?xf32 >, %idx: i32 ) {
7676// CHECK: func @atomic_fadd
77- // GFX9 : amdgpu.raw_buffer_atomic_fadd
77+ // GFX90A : amdgpu.raw_buffer_atomic_fadd
7878// GFX10: amdgpu.raw_buffer_load
7979// GFX10: amdgpu.raw_buffer_atomic_cmpswap
8080// GFX11: amdgpu.raw_buffer_atomic_fadd
@@ -87,7 +87,7 @@ func.func @atomic_fadd(%val: f32, %buffer: memref<?xf32>, %idx: i32) {
8787
8888// CHECK: func @atomic_fadd_v2f16
8989func.func @atomic_fadd_v2f16 (%val: vector <2 xf16 >, %buffer: memref <?xf16 >, %idx: i32 ) {
90- // GFX9 : amdgpu.raw_buffer_atomic_fadd
90+ // GFX90A : amdgpu.raw_buffer_atomic_fadd
9191// GFX10: amdgpu.raw_buffer_load
9292// GFX10: amdgpu.raw_buffer_atomic_cmpswap
9393// Note: the atomic operation itself will be done over i32, and then we use bitcasts
@@ -108,8 +108,8 @@ func.func @atomic_fadd_v2f16(%val: vector<2xf16>, %buffer: memref<?xf16>, %idx:
108108
109109// CHECK: func @atomic_fadd_v2bf16
110110func.func @atomic_fadd_v2bf16 (%val: vector <2 xbf16 >, %buffer: memref <?xbf16 >, %idx: i32 ) {
111- // GFX9 : amdgpu.raw_buffer_load
112- // GFX9 : amdgpu.raw_buffer_atomic_cmpswap
111+ // GFX90A : amdgpu.raw_buffer_load
112+ // GFX90A : amdgpu.raw_buffer_atomic_cmpswap
113113// GFX10: amdgpu.raw_buffer_load
114114// GFX10: amdgpu.raw_buffer_atomic_cmpswap
115115// GFX11: amdgpu.raw_buffer_load
0 commit comments