Skip to content

Conversation

@nhaehnle
Copy link
Collaborator

We previously got a duplicate implicit $exec operand. It didn't really
hurt anything (other than being a slight drag on compile-time
performance). Still, let's keep things clean.

We previously got a duplicate implicit $exec operand. It didn't really
hurt anything (other than being a slight drag on compile-time
performance). Still, let's keep things clean.

commit-id:203b6f66
@llvmbot
Copy link
Member

llvmbot commented Nov 17, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Nicolai Hähnle (nhaehnle)

Changes

We previously got a duplicate implicit $exec operand. It didn't really
hurt anything (other than being a slight drag on compile-time
performance). Still, let's keep things clean.


Full diff: https://github.com/llvm/llvm-project/pull/168426.diff

2 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/twoaddr-wmma.mir (+12-12)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 7cb7f47ddb220..a7333e3373f38 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4187,7 +4187,7 @@ SIInstrInfo::convertToThreeAddressImpl(MachineInstr &MI,
   if (NewMFMAOpc != -1) {
     MachineInstrBuilder MIB =
         BuildMI(MBB, MI, MI.getDebugLoc(), get(NewMFMAOpc));
-    for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I)
+    for (unsigned I = 0, E = MI.getNumExplicitOperands(); I != E; ++I)
       MIB.add(MI.getOperand(I));
     return MIB;
   }
@@ -4196,7 +4196,7 @@ SIInstrInfo::convertToThreeAddressImpl(MachineInstr &MI,
     unsigned NewOpc = AMDGPU::mapWMMA2AddrTo3AddrOpcode(MI.getOpcode());
     MachineInstrBuilder MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
                                   .setMIFlags(MI.getFlags());
-    for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I)
+    for (unsigned I = 0, E = MI.getNumExplicitOperands(); I != E; ++I)
       MIB->addOperand(MI.getOperand(I));
     return MIB;
   }
diff --git a/llvm/test/CodeGen/AMDGPU/twoaddr-wmma.mir b/llvm/test/CodeGen/AMDGPU/twoaddr-wmma.mir
index 98d2eca213aae..3aacfcd57b2de 100644
--- a/llvm/test/CodeGen/AMDGPU/twoaddr-wmma.mir
+++ b/llvm/test/CodeGen/AMDGPU/twoaddr-wmma.mir
@@ -2,7 +2,7 @@
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 %s --passes=two-address-instruction -verify-each -o - | FileCheck -check-prefix=GCN %s
 
 # GCN-LABEL: name: test_v_wmma_f32_16x16x16_f16_twoaddr_w32
-# GCN: early-clobber %2:vreg_256 = V_WMMA_F32_16X16X16_F16_threeaddr_w32 8, killed %1, 8, killed %1, 8, %0, 0, 0, implicit $exec
+# GCN: early-clobber %2:vreg_256 = V_WMMA_F32_16X16X16_F16_threeaddr_w32 8, killed %1, 8, killed %1, 8, %0, 0, 0, implicit $exec{{$}}
 
 ---
 name:            test_v_wmma_f32_16x16x16_f16_twoaddr_w32
@@ -20,7 +20,7 @@ body:             |
 ...
 
 # GCN-LABEL: name: test_v_wmma_f32_16x16x16_bf16_twoaddr_w32
-# GCN: early-clobber %2:vreg_256 = V_WMMA_F32_16X16X16_BF16_threeaddr_w32 8, killed %1, 8, killed %1, 8, %0, 0, 0, implicit $exec
+# GCN: early-clobber %2:vreg_256 = V_WMMA_F32_16X16X16_BF16_threeaddr_w32 8, killed %1, 8, killed %1, 8, %0, 0, 0, implicit $exec{{$}}
 
 ---
 name:            test_v_wmma_f32_16x16x16_bf16_twoaddr_w32
@@ -38,7 +38,7 @@ body:             |
 ...
 
 # GCN-LABEL: name: test_v_wmma_f16_16x16x16_f16_twoaddr_w32
-# GCN: early-clobber %2:vreg_256 = V_WMMA_F16_16X16X16_F16_threeaddr_w32 8, killed %1, 8, killed %1, 8, %0, 0, 0, 0, 0, implicit $exec
+# GCN: early-clobber %2:vreg_256 = V_WMMA_F16_16X16X16_F16_threeaddr_w32 8, killed %1, 8, killed %1, 8, %0, 0, 0, 0, 0, implicit $exec{{$}}
 
 ---
 name:            test_v_wmma_f16_16x16x16_f16_twoaddr_w32
@@ -56,7 +56,7 @@ body:             |
 ...
 
 # GCN-LABEL: name: test_v_wmma_bf16_16x16x16_bf16_twoaddr_w32
-# GCN: early-clobber %2:vreg_256 = V_WMMA_BF16_16X16X16_BF16_threeaddr_w32 8, killed %1, 8, killed %1, 8, %0, 0, 0, 0, 0, implicit $exec
+# GCN: early-clobber %2:vreg_256 = V_WMMA_BF16_16X16X16_BF16_threeaddr_w32 8, killed %1, 8, killed %1, 8, %0, 0, 0, 0, 0, implicit $exec{{$}}
 
 ---
 name:            test_v_wmma_bf16_16x16x16_bf16_twoaddr_w32
@@ -74,7 +74,7 @@ body:             |
 ...
 
 # GCN-LABEL: name: test_v_wmma_i32_16x16x16_iu8_twoaddr_w32
-# GCN: early-clobber %2:vreg_256 = V_WMMA_I32_16X16X16_IU8_threeaddr_w32 8, killed %1, 8, killed %1, 8, %0, 0, 0, 0, implicit $exec
+# GCN: early-clobber %2:vreg_256 = V_WMMA_I32_16X16X16_IU8_threeaddr_w32 8, killed %1, 8, killed %1, 8, %0, 0, 0, 0, implicit $exec{{$}}
 
 ---
 name:            test_v_wmma_i32_16x16x16_iu8_twoaddr_w32
@@ -92,7 +92,7 @@ body:             |
 ...
 
 # GCN-LABEL: name: test_v_wmma_i32_16x16x16_iu4_twoaddr_w32
-# GCN: early-clobber %2:vreg_256 = V_WMMA_I32_16X16X16_IU4_threeaddr_w32 8, killed %1, 8, killed %1, 8, %0, 0, 0, 0, implicit $exec
+# GCN: early-clobber %2:vreg_256 = V_WMMA_I32_16X16X16_IU4_threeaddr_w32 8, killed %1, 8, killed %1, 8, %0, 0, 0, 0, implicit $exec{{$}}
 
 ---
 name:            test_v_wmma_i32_16x16x16_iu4_twoaddr_w32
@@ -110,7 +110,7 @@ body:             |
 ...
 
 # GCN-LABEL: name: test_v_wmma_f32_16x16x16_f16_twoaddr_w64
-# GCN: early-clobber %2:vreg_128 = V_WMMA_F32_16X16X16_F16_threeaddr_w64 8, killed %1, 8, killed %1, 8, %0, 0, 0, implicit $exec
+# GCN: early-clobber %2:vreg_128 = V_WMMA_F32_16X16X16_F16_threeaddr_w64 8, killed %1, 8, killed %1, 8, %0, 0, 0, implicit $exec{{$}}
 
 ---
 name:            test_v_wmma_f32_16x16x16_f16_twoaddr_w64
@@ -128,7 +128,7 @@ body:             |
 ...
 
 # GCN-LABEL: name: test_v_wmma_f32_16x16x16_bf16_twoaddr_w64
-# GCN: early-clobber %2:vreg_128 = V_WMMA_F32_16X16X16_BF16_threeaddr_w64 8, killed %1, 8, killed %1, 8, %0, 0, 0, implicit $exec
+# GCN: early-clobber %2:vreg_128 = V_WMMA_F32_16X16X16_BF16_threeaddr_w64 8, killed %1, 8, killed %1, 8, %0, 0, 0, implicit $exec{{$}}
 
 ---
 name:            test_v_wmma_f32_16x16x16_bf16_twoaddr_w64
@@ -146,7 +146,7 @@ body:             |
 ...
 
 # GCN-LABEL: name: test_v_wmma_f16_16x16x16_f16_twoaddr_w64
-# GCN: early-clobber %2:vreg_128 = V_WMMA_F16_16X16X16_F16_threeaddr_w64 8, killed %1, 8, killed %1, 8, %0, 0, 0, 0, 0, implicit $exec
+# GCN: early-clobber %2:vreg_128 = V_WMMA_F16_16X16X16_F16_threeaddr_w64 8, killed %1, 8, killed %1, 8, %0, 0, 0, 0, 0, implicit $exec{{$}}
 
 ---
 name:            test_v_wmma_f16_16x16x16_f16_twoaddr_w64
@@ -164,7 +164,7 @@ body:             |
 ...
 
 # GCN-LABEL: name: test_v_wmma_bf16_16x16x16_bf16_twoaddr_w64
-# GCN: early-clobber %2:vreg_128 = V_WMMA_BF16_16X16X16_BF16_threeaddr_w64 8, killed %1, 8, killed %1, 8, %0, 0, 0, 0, 0, implicit $exec
+# GCN: early-clobber %2:vreg_128 = V_WMMA_BF16_16X16X16_BF16_threeaddr_w64 8, killed %1, 8, killed %1, 8, %0, 0, 0, 0, 0, implicit $exec{{$}}
 
 ---
 name:            test_v_wmma_bf16_16x16x16_bf16_twoaddr_w64
@@ -182,7 +182,7 @@ body:             |
 ...
 
 # GCN-LABEL: name: test_v_wmma_i32_16x16x16_iu8_twoaddr_w64
-# GCN: early-clobber %2:vreg_128 = V_WMMA_I32_16X16X16_IU8_threeaddr_w64 8, killed %1, 8, killed %1, 8, %0, 0, 0, 0, implicit $exec
+# GCN: early-clobber %2:vreg_128 = V_WMMA_I32_16X16X16_IU8_threeaddr_w64 8, killed %1, 8, killed %1, 8, %0, 0, 0, 0, implicit $exec{{$}}
 
 ---
 name:            test_v_wmma_i32_16x16x16_iu8_twoaddr_w64
@@ -200,7 +200,7 @@ body:             |
 ...
 
 # GCN-LABEL: name: test_v_wmma_i32_16x16x16_iu4_twoaddr_w64
-# GCN: early-clobber %2:vreg_128 = V_WMMA_I32_16X16X16_IU4_threeaddr_w64 8, killed %1, 8, killed %1, 8, %0, 0, 0, 0, implicit $exec
+# GCN: early-clobber %2:vreg_128 = V_WMMA_I32_16X16X16_IU4_threeaddr_w64 8, killed %1, 8, killed %1, 8, %0, 0, 0, 0, implicit $exec{{$}}
 
 ---
 name:            test_v_wmma_i32_16x16x16_iu4_twoaddr_w64

@github-actions
Copy link

🐧 Linux x64 Test Results

  • 186265 tests passed
  • 4848 tests skipped

@nhaehnle nhaehnle requested review from Sisyph and cmc-rep November 20, 2025 03:26
@nhaehnle nhaehnle merged commit ac55d78 into main Nov 21, 2025
12 checks passed
@nhaehnle nhaehnle deleted the users/nhaehnle/spr/main/203b6f66 branch November 21, 2025 00:25
@llvm-ci
Copy link
Collaborator

llvm-ci commented Nov 21, 2025

LLVM Buildbot has detected a new failure on builder openmp-offload-amdgpu-runtime-2 running on rocm-worker-hw-02 while building llvm at step 6 "test-openmp".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/10/builds/17744

Here is the relevant piece of the build log for the reference
Step 6 (test-openmp) failure: test (failure)
******************** TEST 'libarcher :: races/lock-nested-unrelated.c' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 13
/home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/./bin/clang -fopenmp  -gdwarf-4 -O1 -fsanitize=thread  -I /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests -I /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -L /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -Wl,-rpath,/home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/src   /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests/races/lock-nested-unrelated.c -o /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/tools/archer/tests/races/Output/lock-nested-unrelated.c.tmp -latomic && env TSAN_OPTIONS='ignore_noninstrumented_modules=0:ignore_noninstrumented_modules=1' /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests/deflake.bash /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/tools/archer/tests/races/Output/lock-nested-unrelated.c.tmp 2>&1 | tee /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/tools/archer/tests/races/Output/lock-nested-unrelated.c.tmp.log | /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/./bin/FileCheck /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests/races/lock-nested-unrelated.c
# executed command: /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/./bin/clang -fopenmp -gdwarf-4 -O1 -fsanitize=thread -I /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests -I /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -L /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -Wl,-rpath,/home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/src /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests/races/lock-nested-unrelated.c -o /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/tools/archer/tests/races/Output/lock-nested-unrelated.c.tmp -latomic
# note: command had no output on stdout or stderr
# executed command: env TSAN_OPTIONS=ignore_noninstrumented_modules=0:ignore_noninstrumented_modules=1 /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests/deflake.bash /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/tools/archer/tests/races/Output/lock-nested-unrelated.c.tmp
# note: command had no output on stdout or stderr
# executed command: tee /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/tools/archer/tests/races/Output/lock-nested-unrelated.c.tmp.log
# note: command had no output on stdout or stderr
# executed command: /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/./bin/FileCheck /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests/races/lock-nested-unrelated.c
# note: command had no output on stdout or stderr
# RUN: at line 14
/home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/./bin/clang -fopenmp  -gdwarf-4 -O1 -fsanitize=thread  -I /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests -I /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -L /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -Wl,-rpath,/home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/src   /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests/races/lock-nested-unrelated.c -o /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/tools/archer/tests/races/Output/lock-nested-unrelated.c.tmp -latomic && env ARCHER_OPTIONS="ignore_serial=1 report_data_leak=1" env TSAN_OPTIONS='ignore_noninstrumented_modules=0:ignore_noninstrumented_modules=1' /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests/deflake.bash /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/tools/archer/tests/races/Output/lock-nested-unrelated.c.tmp 2>&1 | tee /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/tools/archer/tests/races/Output/lock-nested-unrelated.c.tmp.log | /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/./bin/FileCheck /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests/races/lock-nested-unrelated.c
# executed command: /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/./bin/clang -fopenmp -gdwarf-4 -O1 -fsanitize=thread -I /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests -I /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -L /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -Wl,-rpath,/home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/src /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests/races/lock-nested-unrelated.c -o /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/tools/archer/tests/races/Output/lock-nested-unrelated.c.tmp -latomic
# note: command had no output on stdout or stderr
# executed command: env 'ARCHER_OPTIONS=ignore_serial=1 report_data_leak=1' env TSAN_OPTIONS=ignore_noninstrumented_modules=0:ignore_noninstrumented_modules=1 /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests/deflake.bash /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/tools/archer/tests/races/Output/lock-nested-unrelated.c.tmp
# note: command had no output on stdout or stderr
# executed command: tee /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/tools/archer/tests/races/Output/lock-nested-unrelated.c.tmp.log
# note: command had no output on stdout or stderr
# executed command: /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/./bin/FileCheck /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests/races/lock-nested-unrelated.c
# .---command stderr------------
# | /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests/races/lock-nested-unrelated.c:47:11: error: CHECK: expected string not found in input
# | // CHECK: ThreadSanitizer: reported {{[1-7]}} warnings
# |           ^
# | <stdin>:23:5: note: scanning from here
# | DONE
# |     ^
# | <stdin>:24:1: note: possible intended match here
# | ThreadSanitizer: thread T4 finished with ignores enabled, created at:
# | ^
# | 
# | Input file: <stdin>
# | Check file: /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests/races/lock-nested-unrelated.c
# | 
# | -dump-input=help explains the following input dump.
# | 
# | Input was:
# | <<<<<<
# |             .
# |             .
# |             .
# |            18:  #0 pthread_create /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp:1078:3 (lock-nested-unrelated.c.tmp+0xa416a) 
# |            19:  #1 __kmp_create_worker z_Linux_util.cpp (libomp.so+0xcb922) 
# |            20:  
# |            21: SUMMARY: ThreadSanitizer: data race /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests/races/lock-nested-unrelated.c:33:8 in main.omp_outlined_debug__ 
# |            22: ================== 
...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

6 participants