Skip to content

Conversation

@arsenm
Copy link
Contributor

@arsenm arsenm commented Mar 4, 2025

No description provided.

Some of these demonstrates miscompiles that we just happen to not
hit yet.
Copy link
Contributor Author

arsenm commented Mar 4, 2025

@llvmbot
Copy link
Member

llvmbot commented Mar 4, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/129663.diff

2 Files Affected:

  • (modified) llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir (+34)
  • (added) llvm/test/CodeGen/AMDGPU/si-fold-operands-subreg-imm.mir (+129)
diff --git a/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir b/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir
index 6ab1395a0dcca..413408b417c5a 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir
@@ -393,3 +393,37 @@ body:             |
     SI_RETURN implicit $vgpr0, implicit $vgpr1
 
 ...
+
+---
+name:  fold_frame_index__through_reg_sequence_to_user_subreg
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    4
+  localFrameSize:  16384
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    liveins: $sgpr8
+    ; CHECK-LABEL: name: fold_frame_index__through_reg_sequence_to_user_subreg
+    ; CHECK: liveins: $sgpr8
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr8
+    ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 123
+    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+    ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], %stack.0, implicit-def $scc
+    ; CHECK-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], 123, implicit-def $scc
+    ; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]]
+    ; CHECK-NEXT: $sgpr5 = COPY [[S_ADD_I32_1]]
+    ; CHECK-NEXT: SI_RETURN implicit $sgpr4, implicit $sgpr5
+    %0:sreg_32 = COPY $sgpr8
+    %1:sreg_32 = S_MOV_B32 123
+    %2:sreg_32 = S_MOV_B32 %stack.0
+    %3:sreg_64 = REG_SEQUENCE %1, %subreg.sub0, %2, %subreg.sub1
+    %4:sreg_32 = S_ADD_I32 %0, %3.sub1, implicit-def $scc
+    %5:sreg_32 = S_ADD_I32 %0, %3.sub0, implicit-def $scc
+    $sgpr4 = COPY %4
+    $sgpr5 = COPY %5
+    SI_RETURN implicit $sgpr4, implicit $sgpr5
+...
diff --git a/llvm/test/CodeGen/AMDGPU/si-fold-operands-subreg-imm.mir b/llvm/test/CodeGen/AMDGPU/si-fold-operands-subreg-imm.mir
new file mode 100644
index 0000000000000..591bda2b22f12
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/si-fold-operands-subreg-imm.mir
@@ -0,0 +1,129 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -run-pass=si-fold-operands -o - %s | FileCheck %s
+
+# Make sure materializes of 64-bit immediates fold the correct value
+# into subregister uses.
+
+---
+name:            s_mov_b64_sub1_folds_wrong_value_0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr8_sgpr9
+
+    ; CHECK-LABEL: name: s_mov_b64_sub1_folds_wrong_value_0
+    ; CHECK: liveins: $sgpr8_sgpr9
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[COPY]].sub1, %subreg.sub1
+    ; CHECK-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[REG_SEQUENCE]].sub0, 8, implicit-def $scc
+    ; CHECK-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[REG_SEQUENCE]].sub1, 8, implicit-def $scc, implicit $scc
+    ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE1]]
+    %0:sgpr_64 = COPY $sgpr8_sgpr9
+    %1:sreg_64 = REG_SEQUENCE %0.sub0, %subreg.sub0, %0.sub1, %subreg.sub1
+    %2:sreg_64 = S_MOV_B64 8
+    %3:sreg_32 = S_ADD_U32 %1.sub0, %2.sub0, implicit-def $scc
+    %4:sreg_32 = S_ADDC_U32 %1.sub1, %2.sub1, implicit-def $scc, implicit $scc
+    %5:sreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
+    S_ENDPGM 0, implicit %5
+
+...
+
+---
+name:            v_mov_b64_pseudo_sub1_folds_wrong_value
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr8_vgpr9
+
+    ; CHECK-LABEL: name: v_mov_b64_pseudo_sub1_folds_wrong_value
+    ; CHECK: liveins: $vgpr8_vgpr9
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr8_vgpr9
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[COPY]].sub1, %subreg.sub1
+    ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[REG_SEQUENCE]].sub0, 30064771075, 0, implicit $exec
+    ; CHECK-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[REG_SEQUENCE]].sub1, 30064771075, [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE1]]
+    %0:vreg_64 = COPY $vgpr8_vgpr9
+    %1:vreg_64 = REG_SEQUENCE %0.sub0, %subreg.sub0, %0.sub1, %subreg.sub1
+    %2:vreg_64 = V_MOV_B64_PSEUDO 30064771075, implicit $exec
+    %3:vgpr_32, %6:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub0, %2.sub0, 0, implicit $exec
+    %4:vgpr_32, %7:sreg_64_xexec = V_ADDC_U32_e64 %1.sub1, %2.sub1, %6, 0, implicit $exec
+    %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
+    S_ENDPGM 0, implicit %5
+
+...
+
+---
+name:            subreg_fold_imm
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr8_sgpr9, $vgpr8_vgpr9
+
+    ; CHECK-LABEL: name: subreg_fold_imm
+    ; CHECK: liveins: $sgpr8_sgpr9, $vgpr8_vgpr9
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[COPY]].sub1, %subreg.sub1
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]]
+    ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[REG_SEQUENCE]].sub1, [[COPY1]].sub1, 0, implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
+    %0:sgpr_64 = COPY $sgpr8_sgpr9
+    %1:sreg_64 = REG_SEQUENCE %0.sub0, %subreg.sub0, %0.sub1, %subreg.sub1
+    %2:vreg_64 = COPY %0
+    %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub1, %2.sub1, 0, implicit $exec
+    S_ENDPGM 0, implicit %3
+
+...
+
+---
+name:            s_mov_b64_into_reg_sequence_user
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr8_sgpr9
+
+    ; CHECK-LABEL: name: s_mov_b64_into_reg_sequence_user
+    ; CHECK: liveins: $sgpr8_sgpr9
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+    ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 8
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B64_]].sub1, %subreg.sub0, [[S_MOV_B64_]].sub0, %subreg.sub1
+    ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[REG_SEQUENCE]], implicit-def $scc
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]]
+    %0:sgpr_64 = COPY $sgpr8_sgpr9
+    %1:sreg_64 = S_MOV_B64 8
+    %2:sreg_64 = REG_SEQUENCE %1.sub1, %subreg.sub0, %1.sub0, %subreg.sub1
+    %3:sreg_64 = S_AND_B64 %0, %2, implicit-def $scc
+    S_ENDPGM 0, implicit %3
+
+...
+
+---
+name:            s_mov_b64_into_reg_sequence_user_with_subregs
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr8_sgpr9
+
+    ; CHECK-LABEL: name: s_mov_b64_into_reg_sequence_user_with_subregs
+    ; CHECK: liveins: $sgpr8_sgpr9
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+    ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 8
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B64_]].sub1, %subreg.sub0, [[S_MOV_B64_]].sub0, %subreg.sub1
+    ; CHECK-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]].sub0, 8, implicit-def $scc
+    ; CHECK-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY]].sub1, 8, implicit-def $scc, implicit $scc
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_ADDC_U32_]]
+    %0:sgpr_64 = COPY $sgpr8_sgpr9
+    %1:sreg_64 = S_MOV_B64 8
+    %2:sreg_64 = REG_SEQUENCE %1.sub1, %subreg.sub0, %1.sub0, %subreg.sub1
+    %3:sreg_32 = S_ADD_U32 %0.sub0, %2.sub0, implicit-def $scc
+    %4:sreg_32 = S_ADDC_U32 %0.sub1, %2.sub1, implicit-def $scc, implicit $scc
+    S_ENDPGM 0, implicit %3, implicit %4
+
+...
+

@arsenm arsenm marked this pull request as ready for review March 4, 2025 07:32
@arsenm arsenm merged commit 0247a75 into main Mar 4, 2025
15 checks passed
@arsenm arsenm deleted the users/arsenm/amdgpu-add-tests-subreg-with-imm-si-fold-operands branch March 4, 2025 16:12
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants