Skip to content

Conversation

@harrisonGPU
Copy link
Contributor

No description provided.

@llvmbot
Copy link
Member

llvmbot commented May 22, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Harrison Hao (harrisonGPU)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/141026.diff

1 Files Affected:

  • (modified) llvm/test/CodeGen/AMDGPU/amdpal.ll (+83-35)
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal.ll b/llvm/test/CodeGen/AMDGPU/amdpal.ll
index 2e47b0163aa8c..a97732b2f39a5 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal.ll
@@ -1,9 +1,23 @@
-; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=tahiti | FileCheck --check-prefixes=PAL,CI --enable-var-scope %s
-; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=tonga | FileCheck --check-prefixes=PAL,VI --enable-var-scope %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=gfx1100 | FileCheck --check-prefixes=PAL,CI --enable-var-scope %s
+; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=gfx1200 | FileCheck --check-prefixes=PAL,VI --enable-var-scope %s
 
-; PAL-NOT: .AMDGPU.config
-; PAL-LABEL: {{^}}simple:
 define amdgpu_kernel void @simple(ptr addrspace(1) %out) {
+; CI-LABEL: simple:
+; CI:       ; %bb.0: ; %entry
+; CI-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CI-NEXT:    v_mov_b32_e32 v0, 0
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    global_store_b32 v0, v0, s[0:1]
+; CI-NEXT:    s_endpgm
+;
+; VI-LABEL: simple:
+; VI:       ; %bb.0: ; %entry
+; VI-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; VI-NEXT:    v_mov_b32_e32 v0, 0
+; VI-NEXT:    s_wait_kmcnt 0x0
+; VI-NEXT:    global_store_b32 v0, v0, s[0:1]
+; VI-NEXT:    s_endpgm
 entry:
   store i32 0, ptr addrspace(1) %out
   ret void
@@ -12,13 +26,28 @@ entry:
 ; Check code sequence for amdpal use of scratch for alloca. This is the case
 ; where the high half of the address comes from s_getpc.
 
-; PAL-LABEL: {{^}}scratch:
-; PAL: s_getpc_b64 s[[[GITPTR:[0-9]+]]:
-; PAL: s_mov_b32 s[[GITPTR]], s0
-; PAL: s_load_dwordx4 s[[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s[[[GITPTR]]:
-; PAL: buffer_store{{.*}}, s[[[SCRATCHDESC]]:
-
 define amdgpu_kernel void @scratch(<2 x i32> %in, i32 %idx, ptr addrspace(5) %out) {
+; CI-LABEL: scratch:
+; CI:       ; %bb.0: ; %entry
+; CI-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    s_cmp_eq_u32 s2, 1
+; CI-NEXT:    s_cselect_b32 s0, s1, s0
+; CI-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CI-NEXT:    v_mov_b32_e32 v0, s0
+; CI-NEXT:    scratch_store_b32 off, v0, s3
+; CI-NEXT:    s_endpgm
+;
+; VI-LABEL: scratch:
+; VI:       ; %bb.0: ; %entry
+; VI-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
+; VI-NEXT:    s_wait_kmcnt 0x0
+; VI-NEXT:    s_cmp_eq_u32 s2, 1
+; VI-NEXT:    s_cselect_b32 s0, s1, s0
+; VI-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    scratch_store_b32 off, v0, s3
+; VI-NEXT:    s_endpgm
 entry:
   %v = alloca [2 x i32], addrspace(5)
   store <2 x i32> %in, ptr addrspace(5) %v
@@ -35,13 +64,28 @@ entry:
 ; that the s_movk_i32 is into a reg that is one more than the following
 ; s_mov_b32.
 
-; PAL-LABEL: {{^}}scratch2:
-; PAL: s_movk_i32 s{{[0-9]+}}, 0x1234
-; PAL: s_mov_b32 s[[GITPTR:[0-9]+]], s0
-; PAL: s_load_dwordx4 s[[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s[[[GITPTR]]:
-; PAL: buffer_store{{.*}}, s[[[SCRATCHDESC]]:
-
 define amdgpu_kernel void @scratch2(<2 x i32> %in, i32 %idx, ptr addrspace(5) %out) #0 {
+; CI-LABEL: scratch2:
+; CI:       ; %bb.0: ; %entry
+; CI-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    s_cmp_eq_u32 s2, 1
+; CI-NEXT:    s_cselect_b32 s0, s1, s0
+; CI-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CI-NEXT:    v_mov_b32_e32 v0, s0
+; CI-NEXT:    scratch_store_b32 off, v0, s3
+; CI-NEXT:    s_endpgm
+;
+; VI-LABEL: scratch2:
+; VI:       ; %bb.0: ; %entry
+; VI-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
+; VI-NEXT:    s_wait_kmcnt 0x0
+; VI-NEXT:    s_cmp_eq_u32 s2, 1
+; VI-NEXT:    s_cselect_b32 s0, s1, s0
+; VI-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    scratch_store_b32 off, v0, s3
+; VI-NEXT:    s_endpgm
 entry:
   %v = alloca [2 x i32], addrspace(5)
   store <2 x i32> %in, ptr addrspace(5) %v
@@ -56,14 +100,28 @@ entry:
 ; 0 in a graphics shader.
 ; Prior to GCN3 s_load_dword offsets are dwords, so the offset will be 0x4.
 
-; PAL-LABEL: {{^}}scratch2_cs:
-; PAL: s_movk_i32 s{{[0-9]+}}, 0x1234
-; PAL: s_mov_b32 s[[GITPTR:[0-9]+]], s0
-; CI: s_load_dwordx4 s[[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s[[[GITPTR]]:{{[0-9]+\]}}, 0x4
-; VI: s_load_dwordx4 s[[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s[[[GITPTR]]:{{[0-9]+\]}}, 0x10
-; PAL: buffer_store{{.*}}, s[[[SCRATCHDESC]]:
-
 define amdgpu_cs void @scratch2_cs(i32 inreg, i32 inreg, i32 inreg, <3 x i32> inreg, i32 inreg, <3 x i32> %coord, <2 x i32> %in, i32 %extra, i32 %idx) #0 {
+; CI-LABEL: scratch2_cs:
+; CI:       ; %bb.0: ; %entry
+; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v6
+; CI-NEXT:    v_mov_b32_e32 v2, v5
+; CI-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; CI-NEXT:    v_add3_u32 v0, 0, v0, 4
+; CI-NEXT:    scratch_store_b96 off, v[2:4], off
+; CI-NEXT:    scratch_load_b32 v0, v0, off
+; CI-NEXT:    s_waitcnt vmcnt(0)
+; CI-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
+; CI-NEXT:    s_endpgm
+;
+; VI-LABEL: scratch2_cs:
+; VI:       ; %bb.0: ; %entry
+; VI-NEXT:    v_mov_b32_e32 v2, v5
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v6
+; VI-NEXT:    scratch_store_b96 off, v[2:4], off
+; VI-NEXT:    scratch_load_b32 v0, v0, off offset:4
+; VI-NEXT:    s_wait_loadcnt 0x0
+; VI-NEXT:    buffer_store_b32 v0, off, s[0:3], null
+; VI-NEXT:    s_endpgm
 entry:
   %v = alloca [3 x i32], addrspace(5)
   %v1 = getelementptr [3 x i32], ptr addrspace(5) %v, i32 0, i32 1
@@ -79,15 +137,5 @@ entry:
 attributes #0 = { nounwind "amdgpu-git-ptr-high"="0x1234" }
 
 declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32 immarg)
-
-
-; PAL:         .amdgpu_pal_metadata
-; PAL-NEXT: ---
-; PAL-NEXT: amdpal.pipelines:
-; PAL-NEXT:   - .hardware_stages:
-; PAL-NEXT:       .cs:
-; PAL-NEXT:         .entry_point:    _amdgpu_cs_main
-; PAL-NEXT:         .entry_point_symbol:    scratch2_cs
-; PAL-NEXT:         .scratch_memory_size: 0x10
-; PAL-NEXT:         .sgpr_count:     0x
-; PAL-NEXT:         .vgpr_count:     0x
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; PAL: {{.*}}

Copy link
Contributor

@arsenm arsenm left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test cannot be converted to generated checks. The generated checks only include the instructions in the function body, and none of the metadata. This test only really cares about the metadata

@harrisonGPU
Copy link
Contributor Author

harrisonGPU commented May 22, 2025

This test cannot be converted to generated checks. The generated checks only include the instructions in the function body, and none of the metadata. This test only really cares about the metadata

Okay, can I add these Metadata check after using update_llc_test_checks.py? Because I have a patch will chang this lit test, so I updated it.

@arsenm
Copy link
Contributor

arsenm commented May 22, 2025

Okay, can I add these Metadata check after using update_llc_test_checks.py? Because I have a patch will chang this lit test, so I updated it.

The next time someone runs it, the checks will be lost again. There isn't that much in this test, you can probably just manually do it?

@harrisonGPU
Copy link
Contributor Author

Okay, can I add these Metadata check after using update_llc_test_checks.py? Because I have a patch will chang this lit test, so I updated it.

The next time someone runs it, the checks will be lost again. There isn't that much in this test, you can probably just manually do it?

Okay thanks !

@harrisonGPU harrisonGPU deleted the amdgpu/autoGenAmdPal branch May 22, 2025 09:53
@harrisonGPU
Copy link
Contributor Author

Okay, can I add these Metadata check after using update_llc_test_checks.py? Because I have a patch will chang this lit test, so I updated it.

The next time someone runs it, the checks will be lost again. There isn't that much in this test, you can probably just manually do it?

I think mcpu is tahiti or tonga, may be we can update it? Because I know these are old architecture.

@arsenm
Copy link
Contributor

arsenm commented May 22, 2025

I think mcpu is tahiti or tonga, may be we can update it? Because I know these are old architecture.

Add more run lines or a separate test

@harrisonGPU
Copy link
Contributor Author

I think mcpu is tahiti or tonga, may be we can update it? Because I know these are old architecture.

Add more run lines or a separate test

Okay thanks. :-)

Comment on lines +2 to +3
; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=gfx1100 | FileCheck --check-prefixes=PAL,CI --enable-var-scope %s
; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=gfx1200 | FileCheck --check-prefixes=PAL,VI --enable-var-scope %s
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This also was sneaking in target change, and the prefix names don't match

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants