Skip to content

Conversation

@arsenm
Copy link
Contributor

@arsenm arsenm commented Jan 13, 2025

No description provided.

Copy link
Contributor Author

arsenm commented Jan 13, 2025

@llvmbot
Copy link
Member

llvmbot commented Jan 13, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/122659.diff

1 Files Affected:

  • (modified) llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll (+93-11)
diff --git a/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll b/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll
index e8f86a6ce63ff6..949e6f38e9b423 100644
--- a/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll
+++ b/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=SI
-; RUN: llc < %s -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI
+; RUN: llc < %s -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefixes=GFX89,VI
+; RUN: llc < %s -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs -| FileCheck %s --check-prefixes=GFX89,GFX9
 
 ; XXX - Why the packing?
 define amdgpu_kernel void @scalar_to_vector_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
@@ -43,6 +44,27 @@ define amdgpu_kernel void @scalar_to_vector_v2i32(ptr addrspace(1) %out, ptr add
 ; VI-NEXT:    v_mov_b32_e32 v1, v0
 ; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: scalar_to_vector_v2i32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-NEXT:    s_mov_b32 s7, 0xf000
+; GFX9-NEXT:    s_mov_b32 s6, -1
+; GFX9-NEXT:    s_mov_b32 s10, s6
+; GFX9-NEXT:    s_mov_b32 s11, s7
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s8, s2
+; GFX9-NEXT:    s_mov_b32 s9, s3
+; GFX9-NEXT:    buffer_load_dword v0, off, s[8:11], 0
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff0000
+; GFX9-NEXT:    s_mov_b32 s4, s0
+; GFX9-NEXT:    s_mov_b32 s5, s1
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX9-NEXT:    v_and_or_b32 v0, v0, v2, v1
+; GFX9-NEXT:    v_mov_b32_e32 v1, v0
+; GFX9-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; GFX9-NEXT:    s_endpgm
   %tmp1 = load i32, ptr addrspace(1) %in, align 4
   %bc = bitcast i32 %tmp1 to <2 x i16>
   %tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -90,6 +112,27 @@ define amdgpu_kernel void @scalar_to_vector_v2f32(ptr addrspace(1) %out, ptr add
 ; VI-NEXT:    v_mov_b32_e32 v1, v0
 ; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: scalar_to_vector_v2f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-NEXT:    s_mov_b32 s7, 0xf000
+; GFX9-NEXT:    s_mov_b32 s6, -1
+; GFX9-NEXT:    s_mov_b32 s10, s6
+; GFX9-NEXT:    s_mov_b32 s11, s7
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s8, s2
+; GFX9-NEXT:    s_mov_b32 s9, s3
+; GFX9-NEXT:    buffer_load_dword v0, off, s[8:11], 0
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff0000
+; GFX9-NEXT:    s_mov_b32 s4, s0
+; GFX9-NEXT:    s_mov_b32 s5, s1
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX9-NEXT:    v_and_or_b32 v0, v0, v2, v1
+; GFX9-NEXT:    v_mov_b32_e32 v1, v0
+; GFX9-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; GFX9-NEXT:    s_endpgm
   %tmp1 = load float, ptr addrspace(1) %in, align 4
   %bc = bitcast float %tmp1 to <2 x i16>
   %tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -130,6 +173,23 @@ define amdgpu_kernel void @scalar_to_vector_v4i16() {
 ; VI-NEXT:    v_mov_b32_e32 v1, s0
 ; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: scalar_to_vector_v4i16:
+; GFX9:       ; %bb.0: ; %bb
+; GFX9-NEXT:    s_mov_b32 s3, 0xf000
+; GFX9-NEXT:    s_mov_b32 s2, -1
+; GFX9-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX9-NEXT:    s_lshl_b32 s1, s0, 8
+; GFX9-NEXT:    s_or_b32 s0, s0, s1
+; GFX9-NEXT:    s_and_b32 s1, s0, 0xffff
+; GFX9-NEXT:    s_lshl_b32 s0, s0, 16
+; GFX9-NEXT:    s_or_b32 s0, s1, s0
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-NEXT:    v_mov_b32_e32 v1, s0
+; GFX9-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX9-NEXT:    s_endpgm
 bb:
   %tmp = load <2 x i8>, ptr addrspace(1) undef, align 1
   %tmp1 = shufflevector <2 x i8> %tmp, <2 x i8> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
@@ -176,6 +236,28 @@ define amdgpu_kernel void @scalar_to_vector_v4f16() {
 ; VI-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: scalar_to_vector_v4f16:
+; GFX9:       ; %bb.0: ; %bb
+; GFX9-NEXT:    s_mov_b32 s3, 0xf000
+; GFX9-NEXT:    s_mov_b32 s2, -1
+; GFX9-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX9-NEXT:    s_lshl_b32 s1, s0, 8
+; GFX9-NEXT:    s_or_b32 s0, s1, s0
+; GFX9-NEXT:    s_and_b32 s1, s0, 0xff00
+; GFX9-NEXT:    s_bfe_u32 s4, s0, 0x80008
+; GFX9-NEXT:    s_or_b32 s1, s4, s1
+; GFX9-NEXT:    s_and_b32 s0, s0, 0xffff
+; GFX9-NEXT:    s_and_b32 s4, s1, 0xffff
+; GFX9-NEXT:    s_lshl_b32 s1, s1, 16
+; GFX9-NEXT:    s_or_b32 s4, s4, s1
+; GFX9-NEXT:    s_or_b32 s0, s0, s1
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-NEXT:    v_mov_b32_e32 v1, s4
+; GFX9-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX9-NEXT:    s_endpgm
 bb:
   %load = load half, ptr addrspace(1) undef, align 1
   %tmp = bitcast half %load to <2 x i8>
@@ -235,16 +317,16 @@ define amdgpu_kernel void @scalar_to_vector_test6(ptr addrspace(1) %out, i8 zero
 ; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-NEXT:    s_endpgm
 ;
-; VI-LABEL: scalar_to_vector_test6:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dword s6, s[4:5], 0x2c
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; VI-NEXT:    s_mov_b32 s3, 0xf000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v0, s6
-; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; GFX89-LABEL: scalar_to_vector_test6:
+; GFX89:       ; %bb.0:
+; GFX89-NEXT:    s_load_dword s6, s[4:5], 0x2c
+; GFX89-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX89-NEXT:    s_mov_b32 s3, 0xf000
+; GFX89-NEXT:    s_mov_b32 s2, -1
+; GFX89-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX89-NEXT:    v_mov_b32_e32 v0, s6
+; GFX89-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GFX89-NEXT:    s_endpgm
   %newvec0 = insertelement <4 x i8> undef, i8 %val, i32 0
   %bc = bitcast <4 x i8> %newvec0 to <2 x half>
   store <2 x half> %bc, ptr addrspace(1) %out

@arsenm arsenm marked this pull request as ready for review January 13, 2025 03:05
Copy link
Contributor Author

arsenm commented Jan 13, 2025

Merge activity

  • Jan 13, 7:31 AM EST: A user started a stack merge that includes this pull request via Graphite.
  • Jan 13, 7:34 AM EST: Graphite rebased this pull request as part of a merge.
  • Jan 13, 7:35 AM EST: A user merged this pull request with Graphite.

@arsenm arsenm force-pushed the users/arsenm/amdgpu/add-gfx9-run-line-scalar-to-vector-test branch from a71b462 to 8581518 Compare January 13, 2025 12:33
@arsenm arsenm merged commit e9a5577 into main Jan 13, 2025
5 of 7 checks passed
@arsenm arsenm deleted the users/arsenm/amdgpu/add-gfx9-run-line-scalar-to-vector-test branch January 13, 2025 12:35
@llvm-ci
Copy link
Collaborator

llvm-ci commented Jan 13, 2025

LLVM Buildbot has detected a new failure on builder lldb-x86_64-debian running on lldb-x86_64-debian while building llvm at step 6 "test".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/162/builds/13964

Here is the relevant piece of the build log for the reference
Step 6 (test) failure: build (failure)
...
PASS: lldb-api :: python_api/symbol-context/two-files/TestSymbolContextTwoFiles.py (167 of 2738)
PASS: lldb-api :: functionalities/data-formatter/data-formatter-stl/generic/unordered/TestDataFormatterGenericUnordered.py (168 of 2738)
PASS: lldb-shell :: Subprocess/clone-follow-parent-wp.test (169 of 2738)
PASS: lldb-shell :: Subprocess/vfork-follow-child-wp.test (170 of 2738)
PASS: lldb-api :: lang/cpp/multiple-inheritance/TestCppMultipleInheritance.py (171 of 2738)
PASS: lldb-api :: lang/cpp/class_types/TestClassTypes.py (172 of 2738)
PASS: lldb-api :: lang/c/forward/TestForwardDeclaration.py (173 of 2738)
PASS: lldb-api :: functionalities/stop-on-sharedlibrary-load/TestStopOnSharedlibraryEvents.py (174 of 2738)
PASS: lldb-api :: lang/cpp/step-through-trampoline/TestStepThroughTrampoline.py (175 of 2738)
PASS: lldb-api :: commands/expression/nested/TestNestedExpressions.py (176 of 2738)
FAIL: lldb-api :: functionalities/thread/thread_specific_break_plus_condition/TestThreadSpecificBpPlusCondition.py (177 of 2738)
******************** TEST 'lldb-api :: functionalities/thread/thread_specific_break_plus_condition/TestThreadSpecificBpPlusCondition.py' FAILED ********************
Script:
--
/usr/bin/python3 /home/worker/2.0.1/lldb-x86_64-debian/llvm-project/lldb/test/API/dotest.py -u CXXFLAGS -u CFLAGS --env LLVM_LIBS_DIR=/home/worker/2.0.1/lldb-x86_64-debian/build/./lib --env LLVM_INCLUDE_DIR=/home/worker/2.0.1/lldb-x86_64-debian/build/include --env LLVM_TOOLS_DIR=/home/worker/2.0.1/lldb-x86_64-debian/build/./bin --arch x86_64 --build-dir /home/worker/2.0.1/lldb-x86_64-debian/build/lldb-test-build.noindex --lldb-module-cache-dir /home/worker/2.0.1/lldb-x86_64-debian/build/lldb-test-build.noindex/module-cache-lldb/lldb-api --clang-module-cache-dir /home/worker/2.0.1/lldb-x86_64-debian/build/lldb-test-build.noindex/module-cache-clang/lldb-api --executable /home/worker/2.0.1/lldb-x86_64-debian/build/./bin/lldb --compiler /home/worker/2.0.1/lldb-x86_64-debian/build/./bin/clang --dsymutil /home/worker/2.0.1/lldb-x86_64-debian/build/./bin/dsymutil --make /usr/bin/gmake --llvm-tools-dir /home/worker/2.0.1/lldb-x86_64-debian/build/./bin --lldb-obj-root /home/worker/2.0.1/lldb-x86_64-debian/build/tools/lldb --lldb-libs-dir /home/worker/2.0.1/lldb-x86_64-debian/build/./lib -t /home/worker/2.0.1/lldb-x86_64-debian/llvm-project/lldb/test/API/functionalities/thread/thread_specific_break_plus_condition -p TestThreadSpecificBpPlusCondition.py
--
Exit Code: 1

Command Output (stdout):
--
lldb version 20.0.0git (https://github.com/llvm/llvm-project.git revision e9a55770dcee48a3c28b71720db383762049a778)
  clang revision e9a55770dcee48a3c28b71720db383762049a778
  llvm revision e9a55770dcee48a3c28b71720db383762049a778
Skipping the following test categories: ['libc++', 'dsym', 'gmodules', 'debugserver', 'objc']

--
Command Output (stderr):
--
Change dir to: /home/worker/2.0.1/lldb-x86_64-debian/llvm-project/lldb/test/API/functionalities/thread/thread_specific_break_plus_condition
UNSUPPORTED: LLDB (/home/worker/2.0.1/lldb-x86_64-debian/build/bin/clang-x86_64) :: test_python_dsym (TestThreadSpecificBpPlusCondition.ThreadSpecificBreakPlusConditionTestCase.test_python_dsym) (test case does not fall in any category of interest for this run) 
runCmd: settings clear -all

output: 

runCmd: settings set symbols.enable-external-lookup false

output: 

runCmd: settings set target.inherit-tcc true

output: 

runCmd: settings set target.disable-aslr false

output: 

runCmd: settings set target.detach-on-error false

output: 

kazutakahirata pushed a commit to kazutakahirata/llvm-project that referenced this pull request Jan 13, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

5 participants