Skip to content

Conversation

@changpeng
Copy link
Contributor

No description provided.

@llvmbot llvmbot added backend:AMDGPU llvm:mc Machine (object) code labels Jun 25, 2025
@llvmbot
Copy link
Member

llvmbot commented Jun 25, 2025

@llvm/pr-subscribers-mc

Author: Changpeng Fang (changpeng)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/145781.diff

2 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/FLATInstructions.td (+14-12)
  • (modified) llvm/test/MC/AMDGPU/gfx1250_asm_unsupported.s (+3)
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 88b694862d376..5f575fc9fd588 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -1036,9 +1036,11 @@ defm GLOBAL_LOAD_LDS_DWORDX3 : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dwo
 defm GLOBAL_LOAD_LDS_DWORDX4 : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dwordx4">;
 }
 
+let SubtargetPredicate = isGFX12PlusNot12_50 in
+  defm GLOBAL_ATOMIC_ORDERED_ADD_B64 : FLAT_Global_Atomic_Pseudo <"global_atomic_ordered_add_b64", VReg_64, i64>;
+
 let SubtargetPredicate = isGFX12Plus in {
   defm GLOBAL_ATOMIC_COND_SUB_U32    : FLAT_Global_Atomic_Pseudo <"global_atomic_cond_sub_u32", VGPR_32, i32>;
-  defm GLOBAL_ATOMIC_ORDERED_ADD_B64 : FLAT_Global_Atomic_Pseudo <"global_atomic_ordered_add_b64", VReg_64, i64>;
 
   def GLOBAL_INV    : FLAT_Global_Invalidate_Writeback<"global_inv">;
   def GLOBAL_WB     : FLAT_Global_Invalidate_Writeback<"global_wb">;
@@ -1827,19 +1829,19 @@ let SubtargetPredicate = isGFX12Plus in {
     defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace  <"GLOBAL_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "global_addrspace",  i32>;
 }
 
-let OtherPredicates = [isGFX12Plus] in {
+let OtherPredicates = [isGFX12PlusNot12_50] in
   defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_ORDERED_ADD_B64", "int_amdgcn_global_atomic_ordered_add_b64", i64, i64, /* isIntr */ 1>;
 
-  let WaveSizePredicate = isWave32 in {
-    defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B64_w32, int_amdgcn_global_load_tr_b64, v2i32>;
-    foreach vt = [v8i16, v8f16, v8bf16] in
-      defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B128_w32, int_amdgcn_global_load_tr_b128, vt>;
-  }
-  let WaveSizePredicate = isWave64 in {
-    defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B64_w64, int_amdgcn_global_load_tr_b64, i32>;
-    foreach vt = [v4i16, v4f16, v4bf16] in
-      defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B128_w64, int_amdgcn_global_load_tr_b128, vt>;
-  }
+let WaveSizePredicate = isWave32, OtherPredicates = [isGFX12Plus] in {
+  defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B64_w32, int_amdgcn_global_load_tr_b64, v2i32>;
+  foreach vt = [v8i16, v8f16, v8bf16] in
+    defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B128_w32, int_amdgcn_global_load_tr_b128, vt>;
+}
+
+let WaveSizePredicate = isWave64, OtherPredicates = [isGFX12PlusNot12_50] in {
+  defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B64_w64, int_amdgcn_global_load_tr_b64, i32>;
+  foreach vt = [v4i16, v4f16, v4bf16] in
+    defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B128_w64, int_amdgcn_global_load_tr_b128, vt>;
 }
 
 let SubtargetPredicate = HasAtomicFMinFMaxF32GlobalInsts, OtherPredicates = [HasFlatGlobalInsts] in {
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_unsupported.s b/llvm/test/MC/AMDGPU/gfx1250_asm_unsupported.s
index 2111e91cd5ef2..89bd507942a22 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_unsupported.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_unsupported.s
@@ -1,5 +1,8 @@
 ; RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s
 
+global_atomic_ordered_add_b64 v0, v[2:3], s[0:1] offset:-64
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
 ;; DOT4_F32_*, DOT2_F32_*, DOT2_F16 and DOT2_BF16
 
 v_dot4_f32_fp8_fp8 v0, v1, v2, v3

@llvmbot
Copy link
Member

llvmbot commented Jun 25, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Changpeng Fang (changpeng)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/145781.diff

2 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/FLATInstructions.td (+14-12)
  • (modified) llvm/test/MC/AMDGPU/gfx1250_asm_unsupported.s (+3)
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 88b694862d376..5f575fc9fd588 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -1036,9 +1036,11 @@ defm GLOBAL_LOAD_LDS_DWORDX3 : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dwo
 defm GLOBAL_LOAD_LDS_DWORDX4 : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dwordx4">;
 }
 
+let SubtargetPredicate = isGFX12PlusNot12_50 in
+  defm GLOBAL_ATOMIC_ORDERED_ADD_B64 : FLAT_Global_Atomic_Pseudo <"global_atomic_ordered_add_b64", VReg_64, i64>;
+
 let SubtargetPredicate = isGFX12Plus in {
   defm GLOBAL_ATOMIC_COND_SUB_U32    : FLAT_Global_Atomic_Pseudo <"global_atomic_cond_sub_u32", VGPR_32, i32>;
-  defm GLOBAL_ATOMIC_ORDERED_ADD_B64 : FLAT_Global_Atomic_Pseudo <"global_atomic_ordered_add_b64", VReg_64, i64>;
 
   def GLOBAL_INV    : FLAT_Global_Invalidate_Writeback<"global_inv">;
   def GLOBAL_WB     : FLAT_Global_Invalidate_Writeback<"global_wb">;
@@ -1827,19 +1829,19 @@ let SubtargetPredicate = isGFX12Plus in {
     defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace  <"GLOBAL_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "global_addrspace",  i32>;
 }
 
-let OtherPredicates = [isGFX12Plus] in {
+let OtherPredicates = [isGFX12PlusNot12_50] in
   defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_ORDERED_ADD_B64", "int_amdgcn_global_atomic_ordered_add_b64", i64, i64, /* isIntr */ 1>;
 
-  let WaveSizePredicate = isWave32 in {
-    defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B64_w32, int_amdgcn_global_load_tr_b64, v2i32>;
-    foreach vt = [v8i16, v8f16, v8bf16] in
-      defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B128_w32, int_amdgcn_global_load_tr_b128, vt>;
-  }
-  let WaveSizePredicate = isWave64 in {
-    defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B64_w64, int_amdgcn_global_load_tr_b64, i32>;
-    foreach vt = [v4i16, v4f16, v4bf16] in
-      defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B128_w64, int_amdgcn_global_load_tr_b128, vt>;
-  }
+let WaveSizePredicate = isWave32, OtherPredicates = [isGFX12Plus] in {
+  defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B64_w32, int_amdgcn_global_load_tr_b64, v2i32>;
+  foreach vt = [v8i16, v8f16, v8bf16] in
+    defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B128_w32, int_amdgcn_global_load_tr_b128, vt>;
+}
+
+let WaveSizePredicate = isWave64, OtherPredicates = [isGFX12PlusNot12_50] in {
+  defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B64_w64, int_amdgcn_global_load_tr_b64, i32>;
+  foreach vt = [v4i16, v4f16, v4bf16] in
+    defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B128_w64, int_amdgcn_global_load_tr_b128, vt>;
 }
 
 let SubtargetPredicate = HasAtomicFMinFMaxF32GlobalInsts, OtherPredicates = [HasFlatGlobalInsts] in {
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_unsupported.s b/llvm/test/MC/AMDGPU/gfx1250_asm_unsupported.s
index 2111e91cd5ef2..89bd507942a22 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_unsupported.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_unsupported.s
@@ -1,5 +1,8 @@
 ; RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s
 
+global_atomic_ordered_add_b64 v0, v[2:3], s[0:1] offset:-64
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
 ;; DOT4_F32_*, DOT2_F32_*, DOT2_F16 and DOT2_BF16
 
 v_dot4_f32_fp8_fp8 v0, v1, v2, v3

@changpeng changpeng requested review from rampitec and shiltian June 25, 2025 21:05
@changpeng
Copy link
Contributor Author

Why "CI Checks Build and Test Linux" always fails? @shiltian

@shiltian
Copy link
Contributor

I have no idea…I did break it though in one of my previous PRs.

@changpeng
Copy link
Contributor Author

I have no idea…I did break it though in one of my previous PRs.

I meant I could not find what is the error, just see in the log:

ninja: build stopped: cannot make progress due to previous errors.

@changpeng
Copy link
Contributor Author

Could be from others' change:

2025-06-25T18:50:23.0053861Z FAILED: tools/bolt/lib/Core/CMakeFiles/LLVMBOLTCore.dir/BinarySection.cpp.o
2025-06-25T18:50:23.0064673Z CCACHE_CPP2=yes CCACHE_HASHDIR=yes CCACHE_SLOPPINESS=pch_defines,time_macros /usr/bin/ccache /opt/llvm/bin/clang++ -DCMAKE_INSTALL_FULL_LIBDIR="/home/gha/actions-runner/_work/llvm-project/llvm-project/build/install/lib" -DGTEST_HAS_RTTI=0 -DLLVM_BUILD_STATIC -D_DEBUG -D_GLIBCXX_ASSERTIONS -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/home/gha/actions-runner/_work/llvm-project/llvm-project/build/tools/bolt/lib/Core -I/home/gha/actions-runner/_work/llvm-project/llvm-project/bolt/lib/Core -I/home/gha/actions-runner/_work/llvm-project/llvm-project/build/include -I/home/gha/actions-runner/_work/llvm-project/llvm-project/llvm/include -I/home/gha/actions-runner/_work/llvm-project/llvm-project/bolt/include -I/home/gha/actions-runner/_work/llvm-project/llvm-project/build/tools/bolt/include -gmlt -fPIC -fno-semantic-interposition -fvisibility-inlines-hidden -Werror=date-time -Werror=unguarded-availability-new -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wmissing-field-initializers -pedantic -Wno-long-long -Wc++98-compat-extra-semi -Wimplicit-fallthrough -Wcovered-switch-default -Wno-noexcept-type -Wnon-virtual-dtor -Wdelete-non-virtual-dtor -Wsuggest-override -Wstring-conversion -Wmisleading-indentation -Wctad-maybe-unsupported -fdiagnostics-color -ffunction-sections -fdata-sections -O3 -DNDEBUG -std=c++17 -fno-exceptions -funwind-tables -fno-rtti -UNDEBUG -MD -MT tools/bolt/lib/Core/CMakeFiles/LLVMBOLTCore.dir/BinarySection.cpp.o -MF tools/bolt/lib/Core/CMakeFiles/LLVMBOLTCore.dir/BinarySection.cpp.o.d -o tools/bolt/lib/Core/CMakeFiles/LLVMBOLTCore.dir/BinarySection.cpp.o -c /home/gha/actions-runner/_work/llvm-project/llvm-project/bolt/lib/Core/BinarySection.cpp
2025-06-25T18:50:23.0077017Z /home/gha/actions-runner/_work/llvm-project/llvm-project/bolt/lib/Core/BinarySection.cpp:298:44: error: call to 'copyByteArray' is ambiguous
2025-06-25T18:50:23.0078434Z 298 | auto *NewData = reinterpret_cast<char *>(copyByteArray(OS.str()));
2025-06-25T18:50:23.0079197Z | ^~~~~~~~~~~~~
2025-06-25T18:50:23.0080455Z /home/gha/actions-runner/_work/llvm-project/llvm-project/bolt/include/bolt/Core/BinarySection.h:526:17: note: candidate function
2025-06-25T18:50:23.0081623Z 526 | inline uint8_t *copyByteArray(StringRef Buffer) {
2025-06-25T18:50:23.0082217Z | ^
2025-06-25T18:50:23.0083216Z /home/gha/actions-runner/_work/llvm-project/llvm-project/bolt/include/bolt/Core/BinarySection.h:531:17: note: candidate function
2025-06-25T18:50:23.0084346Z 531 | inline uint8_t *copyByteArray(ArrayRef Buffer) {
2025-06-25T18:50:23.0084918Z | ^
2025-06-25T18:50:23.0085253Z 1 error generated.

@changpeng changpeng merged commit ac29858 into llvm:main Jun 25, 2025
8 of 10 checks passed
@changpeng changpeng deleted the atom branch June 25, 2025 22:09
anthonyhatran pushed a commit to anthonyhatran/llvm-project that referenced this pull request Jun 26, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

backend:AMDGPU llvm:mc Machine (object) code

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants