AMDGPU: Add gfx9 run line to scalar_to_vector test #122659

arsenm · 2025-01-13T03:04:18Z

No description provided.

arsenm · 2025-01-13T03:04:33Z

DAG: Fold bitcast of scalar_to_vector to anyext #122660
AMDGPU: Add gfx9 run line to scalar_to_vector test #122659 👈 (View in Graphite)
main

This stack of pull requests is managed by Graphite. Learn more about stacking.

llvmbot · 2025-01-13T03:04:56Z

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/122659.diff

1 Files Affected:

(modified) llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll (+93-11)

diff --git a/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll b/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll
index e8f86a6ce63ff6..949e6f38e9b423 100644
--- a/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll
+++ b/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=SI
-; RUN: llc < %s -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI
+; RUN: llc < %s -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefixes=GFX89,VI
+; RUN: llc < %s -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs -| FileCheck %s --check-prefixes=GFX89,GFX9
 
 ; XXX - Why the packing?
 define amdgpu_kernel void @scalar_to_vector_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
@@ -43,6 +44,27 @@ define amdgpu_kernel void @scalar_to_vector_v2i32(ptr addrspace(1) %out, ptr add
 ; VI-NEXT:    v_mov_b32_e32 v1, v0
 ; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: scalar_to_vector_v2i32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-NEXT:    s_mov_b32 s7, 0xf000
+; GFX9-NEXT:    s_mov_b32 s6, -1
+; GFX9-NEXT:    s_mov_b32 s10, s6
+; GFX9-NEXT:    s_mov_b32 s11, s7
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s8, s2
+; GFX9-NEXT:    s_mov_b32 s9, s3
+; GFX9-NEXT:    buffer_load_dword v0, off, s[8:11], 0
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff0000
+; GFX9-NEXT:    s_mov_b32 s4, s0
+; GFX9-NEXT:    s_mov_b32 s5, s1
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX9-NEXT:    v_and_or_b32 v0, v0, v2, v1
+; GFX9-NEXT:    v_mov_b32_e32 v1, v0
+; GFX9-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; GFX9-NEXT:    s_endpgm
   %tmp1 = load i32, ptr addrspace(1) %in, align 4
   %bc = bitcast i32 %tmp1 to <2 x i16>
   %tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -90,6 +112,27 @@ define amdgpu_kernel void @scalar_to_vector_v2f32(ptr addrspace(1) %out, ptr add
 ; VI-NEXT:    v_mov_b32_e32 v1, v0
 ; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: scalar_to_vector_v2f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-NEXT:    s_mov_b32 s7, 0xf000
+; GFX9-NEXT:    s_mov_b32 s6, -1
+; GFX9-NEXT:    s_mov_b32 s10, s6
+; GFX9-NEXT:    s_mov_b32 s11, s7
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s8, s2
+; GFX9-NEXT:    s_mov_b32 s9, s3
+; GFX9-NEXT:    buffer_load_dword v0, off, s[8:11], 0
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff0000
+; GFX9-NEXT:    s_mov_b32 s4, s0
+; GFX9-NEXT:    s_mov_b32 s5, s1
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX9-NEXT:    v_and_or_b32 v0, v0, v2, v1
+; GFX9-NEXT:    v_mov_b32_e32 v1, v0
+; GFX9-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; GFX9-NEXT:    s_endpgm
   %tmp1 = load float, ptr addrspace(1) %in, align 4
   %bc = bitcast float %tmp1 to <2 x i16>
   %tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -130,6 +173,23 @@ define amdgpu_kernel void @scalar_to_vector_v4i16() {
 ; VI-NEXT:    v_mov_b32_e32 v1, s0
 ; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: scalar_to_vector_v4i16:
+; GFX9:       ; %bb.0: ; %bb
+; GFX9-NEXT:    s_mov_b32 s3, 0xf000
+; GFX9-NEXT:    s_mov_b32 s2, -1
+; GFX9-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX9-NEXT:    s_lshl_b32 s1, s0, 8
+; GFX9-NEXT:    s_or_b32 s0, s0, s1
+; GFX9-NEXT:    s_and_b32 s1, s0, 0xffff
+; GFX9-NEXT:    s_lshl_b32 s0, s0, 16
+; GFX9-NEXT:    s_or_b32 s0, s1, s0
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-NEXT:    v_mov_b32_e32 v1, s0
+; GFX9-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX9-NEXT:    s_endpgm
 bb:
   %tmp = load <2 x i8>, ptr addrspace(1) undef, align 1
   %tmp1 = shufflevector <2 x i8> %tmp, <2 x i8> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
@@ -176,6 +236,28 @@ define amdgpu_kernel void @scalar_to_vector_v4f16() {
 ; VI-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: scalar_to_vector_v4f16:
+; GFX9:       ; %bb.0: ; %bb
+; GFX9-NEXT:    s_mov_b32 s3, 0xf000
+; GFX9-NEXT:    s_mov_b32 s2, -1
+; GFX9-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX9-NEXT:    s_lshl_b32 s1, s0, 8
+; GFX9-NEXT:    s_or_b32 s0, s1, s0
+; GFX9-NEXT:    s_and_b32 s1, s0, 0xff00
+; GFX9-NEXT:    s_bfe_u32 s4, s0, 0x80008
+; GFX9-NEXT:    s_or_b32 s1, s4, s1
+; GFX9-NEXT:    s_and_b32 s0, s0, 0xffff
+; GFX9-NEXT:    s_and_b32 s4, s1, 0xffff
+; GFX9-NEXT:    s_lshl_b32 s1, s1, 16
+; GFX9-NEXT:    s_or_b32 s4, s4, s1
+; GFX9-NEXT:    s_or_b32 s0, s0, s1
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-NEXT:    v_mov_b32_e32 v1, s4
+; GFX9-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX9-NEXT:    s_endpgm
 bb:
   %load = load half, ptr addrspace(1) undef, align 1
   %tmp = bitcast half %load to <2 x i8>
@@ -235,16 +317,16 @@ define amdgpu_kernel void @scalar_to_vector_test6(ptr addrspace(1) %out, i8 zero
 ; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-NEXT:    s_endpgm
 ;
-; VI-LABEL: scalar_to_vector_test6:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dword s6, s[4:5], 0x2c
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; VI-NEXT:    s_mov_b32 s3, 0xf000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v0, s6
-; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; GFX89-LABEL: scalar_to_vector_test6:
+; GFX89:       ; %bb.0:
+; GFX89-NEXT:    s_load_dword s6, s[4:5], 0x2c
+; GFX89-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX89-NEXT:    s_mov_b32 s3, 0xf000
+; GFX89-NEXT:    s_mov_b32 s2, -1
+; GFX89-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX89-NEXT:    v_mov_b32_e32 v0, s6
+; GFX89-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GFX89-NEXT:    s_endpgm
   %newvec0 = insertelement <4 x i8> undef, i8 %val, i32 0
   %bc = bitcast <4 x i8> %newvec0 to <2 x half>
   store <2 x half> %bc, ptr addrspace(1) %out

arsenm · 2025-01-13T12:31:41Z

Merge activity

Jan 13, 7:31 AM EST: A user started a stack merge that includes this pull request via Graphite.
Jan 13, 7:34 AM EST: Graphite rebased this pull request as part of a merge.
Jan 13, 7:35 AM EST: A user merged this pull request with Graphite.

llvm-ci · 2025-01-13T12:39:30Z

LLVM Buildbot has detected a new failure on builder lldb-x86_64-debian running on lldb-x86_64-debian while building llvm at step 6 "test".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/162/builds/13964

Here is the relevant piece of the build log for the reference

Step 6 (test) failure: build (failure)
...
PASS: lldb-api :: python_api/symbol-context/two-files/TestSymbolContextTwoFiles.py (167 of 2738)
PASS: lldb-api :: functionalities/data-formatter/data-formatter-stl/generic/unordered/TestDataFormatterGenericUnordered.py (168 of 2738)
PASS: lldb-shell :: Subprocess/clone-follow-parent-wp.test (169 of 2738)
PASS: lldb-shell :: Subprocess/vfork-follow-child-wp.test (170 of 2738)
PASS: lldb-api :: lang/cpp/multiple-inheritance/TestCppMultipleInheritance.py (171 of 2738)
PASS: lldb-api :: lang/cpp/class_types/TestClassTypes.py (172 of 2738)
PASS: lldb-api :: lang/c/forward/TestForwardDeclaration.py (173 of 2738)
PASS: lldb-api :: functionalities/stop-on-sharedlibrary-load/TestStopOnSharedlibraryEvents.py (174 of 2738)
PASS: lldb-api :: lang/cpp/step-through-trampoline/TestStepThroughTrampoline.py (175 of 2738)
PASS: lldb-api :: commands/expression/nested/TestNestedExpressions.py (176 of 2738)
FAIL: lldb-api :: functionalities/thread/thread_specific_break_plus_condition/TestThreadSpecificBpPlusCondition.py (177 of 2738)
******************** TEST 'lldb-api :: functionalities/thread/thread_specific_break_plus_condition/TestThreadSpecificBpPlusCondition.py' FAILED ********************
Script:
--
/usr/bin/python3 /home/worker/2.0.1/lldb-x86_64-debian/llvm-project/lldb/test/API/dotest.py -u CXXFLAGS -u CFLAGS --env LLVM_LIBS_DIR=/home/worker/2.0.1/lldb-x86_64-debian/build/./lib --env LLVM_INCLUDE_DIR=/home/worker/2.0.1/lldb-x86_64-debian/build/include --env LLVM_TOOLS_DIR=/home/worker/2.0.1/lldb-x86_64-debian/build/./bin --arch x86_64 --build-dir /home/worker/2.0.1/lldb-x86_64-debian/build/lldb-test-build.noindex --lldb-module-cache-dir /home/worker/2.0.1/lldb-x86_64-debian/build/lldb-test-build.noindex/module-cache-lldb/lldb-api --clang-module-cache-dir /home/worker/2.0.1/lldb-x86_64-debian/build/lldb-test-build.noindex/module-cache-clang/lldb-api --executable /home/worker/2.0.1/lldb-x86_64-debian/build/./bin/lldb --compiler /home/worker/2.0.1/lldb-x86_64-debian/build/./bin/clang --dsymutil /home/worker/2.0.1/lldb-x86_64-debian/build/./bin/dsymutil --make /usr/bin/gmake --llvm-tools-dir /home/worker/2.0.1/lldb-x86_64-debian/build/./bin --lldb-obj-root /home/worker/2.0.1/lldb-x86_64-debian/build/tools/lldb --lldb-libs-dir /home/worker/2.0.1/lldb-x86_64-debian/build/./lib -t /home/worker/2.0.1/lldb-x86_64-debian/llvm-project/lldb/test/API/functionalities/thread/thread_specific_break_plus_condition -p TestThreadSpecificBpPlusCondition.py
--
Exit Code: 1

Command Output (stdout):
--
lldb version 20.0.0git (https://github.com/llvm/llvm-project.git revision e9a55770dcee48a3c28b71720db383762049a778)
  clang revision e9a55770dcee48a3c28b71720db383762049a778
  llvm revision e9a55770dcee48a3c28b71720db383762049a778
Skipping the following test categories: ['libc++', 'dsym', 'gmodules', 'debugserver', 'objc']

--
Command Output (stderr):
--
Change dir to: /home/worker/2.0.1/lldb-x86_64-debian/llvm-project/lldb/test/API/functionalities/thread/thread_specific_break_plus_condition
UNSUPPORTED: LLDB (/home/worker/2.0.1/lldb-x86_64-debian/build/bin/clang-x86_64) :: test_python_dsym (TestThreadSpecificBpPlusCondition.ThreadSpecificBreakPlusConditionTestCase.test_python_dsym) (test case does not fall in any category of interest for this run) 
runCmd: settings clear -all

output: 

runCmd: settings set symbols.enable-external-lookup false

output: 

runCmd: settings set target.inherit-tcc true

output: 

runCmd: settings set target.disable-aslr false

output: 

runCmd: settings set target.detach-on-error false

output:

arsenm mentioned this pull request Jan 13, 2025

DAG: Fold bitcast of scalar_to_vector to anyext #122660

Merged

arsenm added the backend:AMDGPU label Jan 13, 2025 — with Graphite App

arsenm requested review from Pierre-vh, cdevadas, jayfoad, rampitec and shiltian January 13, 2025 03:04

arsenm marked this pull request as ready for review January 13, 2025 03:05

shiltian approved these changes Jan 13, 2025

View reviewed changes

AMDGPU: Add gfx9 run line to scalar_to_vector test

8581518

arsenm force-pushed the users/arsenm/amdgpu/add-gfx9-run-line-scalar-to-vector-test branch from a71b462 to 8581518 Compare January 13, 2025 12:33

arsenm merged commit e9a5577 into main Jan 13, 2025
5 of 7 checks passed

arsenm deleted the users/arsenm/amdgpu/add-gfx9-run-line-scalar-to-vector-test branch January 13, 2025 12:35

kazutakahirata pushed a commit to kazutakahirata/llvm-project that referenced this pull request Jan 13, 2025

AMDGPU: Add gfx9 run line to scalar_to_vector test (llvm#122659)

67cdf2a

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

AMDGPU: Add gfx9 run line to scalar_to_vector test #122659

AMDGPU: Add gfx9 run line to scalar_to_vector test #122659

Uh oh!

arsenm commented Jan 13, 2025

Uh oh!

arsenm commented Jan 13, 2025

Uh oh!

llvmbot commented Jan 13, 2025

Uh oh!

arsenm commented Jan 13, 2025 •

edited

Loading

Uh oh!

Uh oh!

llvm-ci commented Jan 13, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

AMDGPU: Add gfx9 run line to scalar_to_vector test #122659

AMDGPU: Add gfx9 run line to scalar_to_vector test #122659

Uh oh!

Conversation

arsenm commented Jan 13, 2025

Uh oh!

arsenm commented Jan 13, 2025

Uh oh!

llvmbot commented Jan 13, 2025

Uh oh!

arsenm commented Jan 13, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Merge activity

Uh oh!

Uh oh!

llvm-ci commented Jan 13, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

arsenm commented Jan 13, 2025 •

edited

Loading