Skip to content

Conversation

@davemgreen
Copy link
Collaborator

This helps with shuffles a little, and one ofthe amd gpu tests is now equivalent to the SDAG version.

This helps with shuffles a little, and one ofthe amd gpu tests is now
equivalent to the SDAG version.
@llvmbot
Copy link
Member

llvmbot commented Dec 7, 2024

@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-backend-aarch64

Author: David Green (davemgreen)

Changes

This helps with shuffles a little, and one ofthe amd gpu tests is now equivalent to the SDAG version.


Full diff: https://github.com/llvm/llvm-project/pull/119073.diff

4 Files Affected:

  • (modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+1-1)
  • (modified) llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir (+24)
  • (modified) llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll (+4-12)
  • (modified) llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll (+21-55)
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index b0c63fc7c7b806..ec6b16b70a96cd 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -428,7 +428,7 @@ def unary_undef_to_zero: GICombineRule<
 // replaced with undef.
 def propagate_undef_any_op: GICombineRule<
   (defs root:$root),
-  (match (wip_match_opcode G_ADD, G_FPTOSI, G_FPTOUI, G_SUB, G_XOR, G_TRUNC, G_BITCAST, G_ANYEXT):$root,
+  (match (wip_match_opcode G_ADD, G_FPTOSI, G_FPTOUI, G_SUB, G_XOR, G_TRUNC, G_BITCAST, G_ANYEXT, G_PTRTOINT, G_INTTOPTR):$root,
          [{ return Helper.matchAnyExplicitUseIsUndef(*${root}); }]),
   (apply [{ Helper.replaceInstWithUndef(*${root}); }])>;
 
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
index b045deebc56e03..8eab47171e0d11 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
@@ -217,3 +217,27 @@ body:             |
     %large:_(<2 x s64>) = G_ANYEXT %bv(<2 x s32>)
     $q0 = COPY %large(<2 x s64>)
     $d0 = COPY %bv(<2 x s32>)
+...
+---
+name:            test_undef_ptrtoint
+legalized: true
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_undef_ptrtoint
+    ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: $q0 = COPY [[DEF]](<2 x s64>)
+    %0:_(<2 x p0>) = G_IMPLICIT_DEF
+    %1:_(<2 x s64>) = G_PTRTOINT %0(<2 x p0>)
+    $q0 = COPY %1(<2 x s64>)
+...
+---
+name:            test_undef_inttoptr
+legalized: true
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_undef_inttoptr
+    ; CHECK: [[DEF:%[0-9]+]]:_(<2 x p0>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: $q0 = COPY [[DEF]](<2 x p0>)
+    %0:_(<2 x s64>) = G_IMPLICIT_DEF
+    %1:_(<2 x p0>) = G_INTTOPTR %0(<2 x s64>)
+    $q0 = COPY %1(<2 x p0>)
diff --git a/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll b/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll
index 89838391956f29..abdfb996fa1668 100644
--- a/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll
+++ b/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll
@@ -13,18 +13,10 @@ entry:
 }
 
 define <2 x ptr> @v2p0(<2 x ptr> %a) {
-; CHECK-SD-LABEL: v2p0:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: v2p0:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    adrp x8, .LCPI1_0
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 def $q0_q1
-; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI1_0]
-; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: v2p0:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    ret
 entry:
   %V128 = shufflevector <2 x ptr> %a, <2 x ptr> undef, <2 x i32> <i32 1, i32 0>
   ret <2 x ptr> %V128
diff --git a/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll b/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll
index 397502711283e5..e70dc8f7a6576a 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll
@@ -100,59 +100,29 @@ define void @func_use_lds_global() {
 
 ; ERR: warning: <unknown>:0:0: in function func_use_lds_global_constexpr_cast void (ptr addrspace(1)): local memory global used by non-kernel function
 define void @func_use_lds_global_constexpr_cast(ptr addrspace(1) %out) {
-; GFX8-SDAG-LABEL: func_use_lds_global_constexpr_cast:
-; GFX8-SDAG:       ; %bb.0:
-; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT:    s_mov_b64 s[4:5], 0xc8
-; GFX8-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX8-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-SDAG-NEXT:    s_trap 2
-; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-GISEL-LABEL: func_use_lds_global_constexpr_cast:
-; GFX8-GISEL:       ; %bb.0:
-; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT:    s_mov_b64 s[4:5], 0xc8
-; GFX8-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX8-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-GISEL-NEXT:    s_trap 2
-; GFX8-GISEL-NEXT:    flat_store_dword v[0:1], v0
-; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-SDAG-LABEL: func_use_lds_global_constexpr_cast:
-; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT:    s_trap 2
-; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX8-LABEL: func_use_lds_global_constexpr_cast:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    s_mov_b64 s[4:5], 0xc8
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    s_trap 2
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-GISEL-LABEL: func_use_lds_global_constexpr_cast:
-; GFX9-GISEL:       ; %bb.0:
-; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT:    s_trap 2
-; GFX9-GISEL-NEXT:    global_store_dword v[0:1], v0, off
-; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-LABEL: func_use_lds_global_constexpr_cast:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_trap 2
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; SDAG-LABEL: func_use_lds_global_constexpr_cast:
-; SDAG:       ; %bb.0:
-; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-NEXT:    s_cbranch_execnz .LBB1_2
-; SDAG-NEXT:  ; %bb.1:
-; SDAG-NEXT:    s_setpc_b64 s[30:31]
-; SDAG-NEXT:  .LBB1_2:
-; SDAG-NEXT:    s_endpgm
-;
-; GISEL-LABEL: func_use_lds_global_constexpr_cast:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    s_cbranch_execnz .LBB1_2
-; GISEL-NEXT:  ; %bb.1:
-; GISEL-NEXT:    global_store_dword v[0:1], v0, off
-; GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
-; GISEL-NEXT:  .LBB1_2:
-; GISEL-NEXT:    s_endpgm
+; CHECK-LABEL: func_use_lds_global_constexpr_cast:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_cbranch_execnz .LBB1_2
+; CHECK-NEXT:  ; %bb.1:
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+; CHECK-NEXT:  .LBB1_2:
+; CHECK-NEXT:    s_endpgm
   store i32 ptrtoint (ptr addrspace(3) @lds to i32), ptr addrspace(1) %out, align 4
   ret void
 }
@@ -611,7 +581,3 @@ ret:
   ret i32 %phi
 }
 
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK: {{.*}}
-; GFX8: {{.*}}
-; GFX9: {{.*}}

@llvmbot
Copy link
Member

llvmbot commented Dec 7, 2024

@llvm/pr-subscribers-llvm-globalisel

Author: David Green (davemgreen)

Changes

This helps with shuffles a little, and one ofthe amd gpu tests is now equivalent to the SDAG version.


Full diff: https://github.com/llvm/llvm-project/pull/119073.diff

4 Files Affected:

  • (modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+1-1)
  • (modified) llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir (+24)
  • (modified) llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll (+4-12)
  • (modified) llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll (+21-55)
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index b0c63fc7c7b806..ec6b16b70a96cd 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -428,7 +428,7 @@ def unary_undef_to_zero: GICombineRule<
 // replaced with undef.
 def propagate_undef_any_op: GICombineRule<
   (defs root:$root),
-  (match (wip_match_opcode G_ADD, G_FPTOSI, G_FPTOUI, G_SUB, G_XOR, G_TRUNC, G_BITCAST, G_ANYEXT):$root,
+  (match (wip_match_opcode G_ADD, G_FPTOSI, G_FPTOUI, G_SUB, G_XOR, G_TRUNC, G_BITCAST, G_ANYEXT, G_PTRTOINT, G_INTTOPTR):$root,
          [{ return Helper.matchAnyExplicitUseIsUndef(*${root}); }]),
   (apply [{ Helper.replaceInstWithUndef(*${root}); }])>;
 
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
index b045deebc56e03..8eab47171e0d11 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
@@ -217,3 +217,27 @@ body:             |
     %large:_(<2 x s64>) = G_ANYEXT %bv(<2 x s32>)
     $q0 = COPY %large(<2 x s64>)
     $d0 = COPY %bv(<2 x s32>)
+...
+---
+name:            test_undef_ptrtoint
+legalized: true
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_undef_ptrtoint
+    ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: $q0 = COPY [[DEF]](<2 x s64>)
+    %0:_(<2 x p0>) = G_IMPLICIT_DEF
+    %1:_(<2 x s64>) = G_PTRTOINT %0(<2 x p0>)
+    $q0 = COPY %1(<2 x s64>)
+...
+---
+name:            test_undef_inttoptr
+legalized: true
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_undef_inttoptr
+    ; CHECK: [[DEF:%[0-9]+]]:_(<2 x p0>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: $q0 = COPY [[DEF]](<2 x p0>)
+    %0:_(<2 x s64>) = G_IMPLICIT_DEF
+    %1:_(<2 x p0>) = G_INTTOPTR %0(<2 x s64>)
+    $q0 = COPY %1(<2 x p0>)
diff --git a/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll b/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll
index 89838391956f29..abdfb996fa1668 100644
--- a/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll
+++ b/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll
@@ -13,18 +13,10 @@ entry:
 }
 
 define <2 x ptr> @v2p0(<2 x ptr> %a) {
-; CHECK-SD-LABEL: v2p0:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: v2p0:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    adrp x8, .LCPI1_0
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 def $q0_q1
-; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI1_0]
-; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: v2p0:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    ret
 entry:
   %V128 = shufflevector <2 x ptr> %a, <2 x ptr> undef, <2 x i32> <i32 1, i32 0>
   ret <2 x ptr> %V128
diff --git a/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll b/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll
index 397502711283e5..e70dc8f7a6576a 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll
@@ -100,59 +100,29 @@ define void @func_use_lds_global() {
 
 ; ERR: warning: <unknown>:0:0: in function func_use_lds_global_constexpr_cast void (ptr addrspace(1)): local memory global used by non-kernel function
 define void @func_use_lds_global_constexpr_cast(ptr addrspace(1) %out) {
-; GFX8-SDAG-LABEL: func_use_lds_global_constexpr_cast:
-; GFX8-SDAG:       ; %bb.0:
-; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT:    s_mov_b64 s[4:5], 0xc8
-; GFX8-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX8-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-SDAG-NEXT:    s_trap 2
-; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-GISEL-LABEL: func_use_lds_global_constexpr_cast:
-; GFX8-GISEL:       ; %bb.0:
-; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT:    s_mov_b64 s[4:5], 0xc8
-; GFX8-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX8-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-GISEL-NEXT:    s_trap 2
-; GFX8-GISEL-NEXT:    flat_store_dword v[0:1], v0
-; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-SDAG-LABEL: func_use_lds_global_constexpr_cast:
-; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT:    s_trap 2
-; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX8-LABEL: func_use_lds_global_constexpr_cast:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    s_mov_b64 s[4:5], 0xc8
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    s_trap 2
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-GISEL-LABEL: func_use_lds_global_constexpr_cast:
-; GFX9-GISEL:       ; %bb.0:
-; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT:    s_trap 2
-; GFX9-GISEL-NEXT:    global_store_dword v[0:1], v0, off
-; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-LABEL: func_use_lds_global_constexpr_cast:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_trap 2
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; SDAG-LABEL: func_use_lds_global_constexpr_cast:
-; SDAG:       ; %bb.0:
-; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-NEXT:    s_cbranch_execnz .LBB1_2
-; SDAG-NEXT:  ; %bb.1:
-; SDAG-NEXT:    s_setpc_b64 s[30:31]
-; SDAG-NEXT:  .LBB1_2:
-; SDAG-NEXT:    s_endpgm
-;
-; GISEL-LABEL: func_use_lds_global_constexpr_cast:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    s_cbranch_execnz .LBB1_2
-; GISEL-NEXT:  ; %bb.1:
-; GISEL-NEXT:    global_store_dword v[0:1], v0, off
-; GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
-; GISEL-NEXT:  .LBB1_2:
-; GISEL-NEXT:    s_endpgm
+; CHECK-LABEL: func_use_lds_global_constexpr_cast:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_cbranch_execnz .LBB1_2
+; CHECK-NEXT:  ; %bb.1:
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+; CHECK-NEXT:  .LBB1_2:
+; CHECK-NEXT:    s_endpgm
   store i32 ptrtoint (ptr addrspace(3) @lds to i32), ptr addrspace(1) %out, align 4
   ret void
 }
@@ -611,7 +581,3 @@ ret:
   ret i32 %phi
 }
 
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK: {{.*}}
-; GFX8: {{.*}}
-; GFX9: {{.*}}

@davemgreen davemgreen merged commit 9a415f6 into llvm:main Dec 9, 2024
12 checks passed
@davemgreen davemgreen deleted the gh-gi-undefptrtoint branch December 9, 2024 08:52
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants