Skip to content

Conversation

@jrbyrnes
Copy link
Contributor

Precommit test for #131229

Change-Id: I836d088f954181709c7460eabb5e474654cd7132
@llvmbot
Copy link
Member

llvmbot commented Mar 13, 2025

@llvm/pr-subscribers-llvm-transforms

Author: Jeffrey Byrnes (jrbyrnes)

Changes

Precommit test for #131229


Full diff: https://github.com/llvm/llvm-project/pull/131236.diff

1 Files Affected:

  • (added) llvm/test/Transforms/SLPVectorizer/AMDGPU/extract-ordering.ll (+150)
diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/extract-ordering.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/extract-ordering.ll
new file mode 100644
index 0000000000000..67ef5ba3732f4
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/extract-ordering.ll
@@ -0,0 +1,150 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=slp-vectorizer -S  | FileCheck %s --check-prefix=GFX9
+
+define protected amdgpu_kernel void @myfun(i32 %in, ptr addrspace(1) %aptr1, ptr addrspace(1) %bptr1, ptr addrspace(1) %aptr2, ptr addrspace(1) %bptr2)  {
+; GFX9-LABEL: define protected amdgpu_kernel void @myfun(
+; GFX9-SAME: i32 [[IN:%.*]], ptr addrspace(1) [[APTR1:%.*]], ptr addrspace(1) [[BPTR1:%.*]], ptr addrspace(1) [[APTR2:%.*]], ptr addrspace(1) [[BPTR2:%.*]]) #[[ATTR0:[0-9]+]] {
+; GFX9-NEXT:  [[ENTRY:.*]]:
+; GFX9-NEXT:    [[VEC1:%.*]] = load <8 x i16>, ptr addrspace(1) [[APTR1]], align 16
+; GFX9-NEXT:    [[EL0:%.*]] = extractelement <8 x i16> [[VEC1]], i64 0
+; GFX9-NEXT:    [[EL3:%.*]] = extractelement <8 x i16> [[VEC1]], i64 3
+; GFX9-NEXT:    [[BVEC1:%.*]] = load <8 x i16>, ptr addrspace(1) [[BPTR1]], align 16
+; GFX9-NEXT:    [[BEL0:%.*]] = extractelement <8 x i16> [[BVEC1]], i64 0
+; GFX9-NEXT:    [[BEL3:%.*]] = extractelement <8 x i16> [[BVEC1]], i64 3
+; GFX9-NEXT:    [[TMP0:%.*]] = shufflevector <8 x i16> [[VEC1]], <8 x i16> poison, <2 x i32> <i32 1, i32 2>
+; GFX9-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i16> [[VEC1]], <8 x i16> poison, <2 x i32> <i32 4, i32 5>
+; GFX9-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i16> [[VEC1]], <8 x i16> poison, <2 x i32> <i32 6, i32 7>
+; GFX9-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i16> [[BVEC1]], <8 x i16> poison, <2 x i32> <i32 1, i32 2>
+; GFX9-NEXT:    [[TMP6:%.*]] = shufflevector <8 x i16> [[BVEC1]], <8 x i16> poison, <2 x i32> <i32 4, i32 5>
+; GFX9-NEXT:    [[TMP7:%.*]] = shufflevector <8 x i16> [[BVEC1]], <8 x i16> poison, <2 x i32> <i32 6, i32 7>
+; GFX9-NEXT:    br label %[[DO_BODY:.*]]
+; GFX9:       [[DO_BODY]]:
+; GFX9-NEXT:    [[A_THREAD_BUF3:%.*]] = phi i16 [ [[EL3]], %[[ENTRY]] ], [ [[NEWEL3:%.*]], %[[DO_BODY]] ]
+; GFX9-NEXT:    [[B_THREAD_BUF3:%.*]] = phi i16 [ [[BEL3]], %[[ENTRY]] ], [ [[BNEWEL3:%.*]], %[[DO_BODY]] ]
+; GFX9-NEXT:    [[ADD:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[NEWADD:%.*]], %[[DO_BODY]] ]
+; GFX9-NEXT:    [[A_THREAD_BUF0:%.*]] = phi i16 [ [[EL0]], %[[ENTRY]] ], [ [[NEWEL0:%.*]], %[[DO_BODY]] ]
+; GFX9-NEXT:    [[B_THREAD_BUF0:%.*]] = phi i16 [ [[BEL0]], %[[ENTRY]] ], [ [[BNEWEL0:%.*]], %[[DO_BODY]] ]
+; GFX9-NEXT:    [[TMP9:%.*]] = phi <2 x i16> [ [[TMP0]], %[[ENTRY]] ], [ [[TMP25:%.*]], %[[DO_BODY]] ]
+; GFX9-NEXT:    [[TMP10:%.*]] = phi <2 x i16> [ [[TMP2]], %[[ENTRY]] ], [ [[TMP32:%.*]], %[[DO_BODY]] ]
+; GFX9-NEXT:    [[TMP11:%.*]] = phi <2 x i16> [ [[TMP3]], %[[ENTRY]] ], [ [[TMP33:%.*]], %[[DO_BODY]] ]
+; GFX9-NEXT:    [[TMP13:%.*]] = phi <2 x i16> [ [[TMP4]], %[[ENTRY]] ], [ [[TMP30:%.*]], %[[DO_BODY]] ]
+; GFX9-NEXT:    [[TMP14:%.*]] = phi <2 x i16> [ [[TMP6]], %[[ENTRY]] ], [ [[TMP36:%.*]], %[[DO_BODY]] ]
+; GFX9-NEXT:    [[TMP15:%.*]] = phi <2 x i16> [ [[TMP7]], %[[ENTRY]] ], [ [[TMP37:%.*]], %[[DO_BODY]] ]
+; GFX9-NEXT:    [[A_THREAD_VEC0:%.*]] = insertelement <8 x i16> poison, i16 [[A_THREAD_BUF0]], i64 0
+; GFX9-NEXT:    [[TMP17:%.*]] = shufflevector <2 x i16> [[TMP9]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GFX9-NEXT:    [[TMP16:%.*]] = shufflevector <8 x i16> [[A_THREAD_VEC0]], <8 x i16> [[TMP17]], <8 x i32> <i32 0, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GFX9-NEXT:    [[A_THREAD_VEC3:%.*]] = insertelement <8 x i16> [[TMP16]], i16 [[A_THREAD_BUF3]], i64 3
+; GFX9-NEXT:    [[TMP19:%.*]] = shufflevector <2 x i16> [[TMP10]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GFX9-NEXT:    [[TMP20:%.*]] = shufflevector <8 x i16> [[A_THREAD_VEC3]], <8 x i16> [[TMP19]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; GFX9-NEXT:    [[TMP21:%.*]] = shufflevector <2 x i16> [[TMP11]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GFX9-NEXT:    [[TMP22:%.*]] = shufflevector <8 x i16> [[TMP20]], <8 x i16> [[TMP21]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+; GFX9-NEXT:    [[B_THREAD_VEC0:%.*]] = insertelement <8 x i16> poison, i16 [[B_THREAD_BUF0]], i64 0
+; GFX9-NEXT:    [[TMP24:%.*]] = shufflevector <2 x i16> [[TMP13]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GFX9-NEXT:    [[TMP23:%.*]] = shufflevector <8 x i16> [[B_THREAD_VEC0]], <8 x i16> [[TMP24]], <8 x i32> <i32 0, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GFX9-NEXT:    [[B_THREAD_VEC3:%.*]] = insertelement <8 x i16> [[TMP23]], i16 [[B_THREAD_BUF3]], i64 3
+; GFX9-NEXT:    [[TMP26:%.*]] = shufflevector <2 x i16> [[TMP14]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GFX9-NEXT:    [[TMP27:%.*]] = shufflevector <8 x i16> [[B_THREAD_VEC3]], <8 x i16> [[TMP26]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; GFX9-NEXT:    [[TMP28:%.*]] = shufflevector <2 x i16> [[TMP15]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GFX9-NEXT:    [[TMP29:%.*]] = shufflevector <8 x i16> [[TMP27]], <8 x i16> [[TMP28]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+; GFX9-NEXT:    [[RES:%.*]] = add <8 x i16> [[TMP22]], [[TMP29]]
+; GFX9-NEXT:    [[VEC2:%.*]] = load <8 x i16>, ptr addrspace(1) [[APTR2]], align 16
+; GFX9-NEXT:    [[NEWEL0]] = extractelement <8 x i16> [[VEC2]], i64 0
+; GFX9-NEXT:    [[NEWEL3]] = extractelement <8 x i16> [[VEC2]], i64 3
+; GFX9-NEXT:    [[BVEC2:%.*]] = load <8 x i16>, ptr addrspace(1) [[BPTR2]], align 16
+; GFX9-NEXT:    [[BNEWEL0]] = extractelement <8 x i16> [[BVEC2]], i64 0
+; GFX9-NEXT:    [[BNEWEL3]] = extractelement <8 x i16> [[BVEC2]], i64 3
+; GFX9-NEXT:    [[NEWADD]] = add i32 [[ADD]], 1
+; GFX9-NEXT:    [[COND:%.*]] = icmp sgt i32 [[NEWADD]], [[IN]]
+; GFX9-NEXT:    [[TMP25]] = shufflevector <8 x i16> [[VEC2]], <8 x i16> poison, <2 x i32> <i32 1, i32 2>
+; GFX9-NEXT:    [[TMP32]] = shufflevector <8 x i16> [[VEC2]], <8 x i16> poison, <2 x i32> <i32 4, i32 5>
+; GFX9-NEXT:    [[TMP33]] = shufflevector <8 x i16> [[VEC2]], <8 x i16> poison, <2 x i32> <i32 6, i32 7>
+; GFX9-NEXT:    [[TMP30]] = shufflevector <8 x i16> [[BVEC2]], <8 x i16> poison, <2 x i32> <i32 1, i32 2>
+; GFX9-NEXT:    [[TMP36]] = shufflevector <8 x i16> [[BVEC2]], <8 x i16> poison, <2 x i32> <i32 4, i32 5>
+; GFX9-NEXT:    [[TMP37]] = shufflevector <8 x i16> [[BVEC2]], <8 x i16> poison, <2 x i32> <i32 6, i32 7>
+; GFX9-NEXT:    br i1 [[COND]], label %[[DO_BODY]], label %[[END:.*]]
+; GFX9:       [[END]]:
+; GFX9-NEXT:    ret void
+;
+entry:
+  %vec1 = load <8 x i16>, ptr addrspace(1) %aptr1
+  %el0 = extractelement <8 x i16> %vec1, i64 0
+  %el1 = extractelement <8 x i16> %vec1, i64 1
+  %el2 = extractelement <8 x i16> %vec1, i64 2
+  %el3 = extractelement <8 x i16> %vec1, i64 3
+  %el4 = extractelement <8 x i16> %vec1, i64 4
+  %el5 = extractelement <8 x i16> %vec1, i64 5
+  %el6 = extractelement <8 x i16> %vec1, i64 6
+  %el7 = extractelement <8 x i16> %vec1, i64 7
+  %bvec1 = load <8 x i16>, ptr addrspace(1) %bptr1
+  %bel0 = extractelement <8 x i16> %bvec1, i64 0
+  %bel1 = extractelement <8 x i16> %bvec1, i64 1
+  %bel2 = extractelement <8 x i16> %bvec1, i64 2
+  %bel3 = extractelement <8 x i16> %bvec1, i64 3
+  %bel4 = extractelement <8 x i16> %bvec1, i64 4
+  %bel5 = extractelement <8 x i16> %bvec1, i64 5
+  %bel6 = extractelement <8 x i16> %bvec1, i64 6
+  %bel7 = extractelement <8 x i16> %bvec1, i64 7
+  br label %do.body
+
+do.body:
+  %a_thread_buf1 = phi i16 [%el1, %entry], [%newel1, %do.body]
+  %a_thread_buf2 = phi i16 [%el2, %entry], [%newel2, %do.body]
+  %a_thread_buf3 = phi i16 [%el3, %entry], [%newel3, %do.body]
+  %a_thread_buf4 = phi i16 [%el4, %entry], [%newel4, %do.body]
+  %a_thread_buf5 = phi i16 [%el5, %entry], [%newel5, %do.body]
+  %a_thread_buf6 = phi i16 [%el6, %entry], [%newel6, %do.body]
+  %a_thread_buf7 = phi i16 [%el7, %entry], [%newel7, %do.body]
+  %b_thread_buf1 = phi i16 [%bel1, %entry], [%bnewel1, %do.body]
+  %b_thread_buf2 = phi i16 [%bel2, %entry], [%bnewel2, %do.body]
+  %b_thread_buf3 = phi i16 [%bel3, %entry], [%bnewel3, %do.body]
+  %b_thread_buf4 = phi i16 [%bel4, %entry], [%bnewel4, %do.body]
+  %b_thread_buf5 = phi i16 [%bel5, %entry], [%bnewel5, %do.body]
+  %b_thread_buf6 = phi i16 [%bel6, %entry], [%bnewel6, %do.body]
+  %b_thread_buf7 = phi i16 [%bel7, %entry], [%bnewel7, %do.body]
+  %add = phi i32 [0, %entry], [%newadd, %do.body]
+  %a_thread_buf0 = phi i16 [%el0, %entry], [%newel0, %do.body]
+  %b_thread_buf0 = phi i16 [%bel0, %entry], [%bnewel0, %do.body]
+  %a_thread_vec0 = insertelement <8 x i16> poison, i16 %a_thread_buf0, i64 0
+  %a_thread_vec1 = insertelement <8 x i16> %a_thread_vec0, i16 %a_thread_buf1, i64 1
+  %a_thread_vec2 = insertelement <8 x i16> %a_thread_vec1, i16 %a_thread_buf2, i64 2
+  %a_thread_vec3 = insertelement <8 x i16> %a_thread_vec2, i16 %a_thread_buf3, i64 3
+  %a_thread_vec4 = insertelement <8 x i16> %a_thread_vec3, i16 %a_thread_buf4, i64 4
+  %a_thread_vec5 = insertelement <8 x i16> %a_thread_vec4, i16 %a_thread_buf5, i64 5
+  %a_thread_vec6 = insertelement <8 x i16> %a_thread_vec5, i16 %a_thread_buf6, i64 6
+  %a_thread_vec7 = insertelement <8 x i16> %a_thread_vec6, i16 %a_thread_buf7, i64 7
+  %b_thread_vec0 = insertelement <8 x i16> poison, i16 %b_thread_buf0, i64 0
+  %b_thread_vec1 = insertelement <8 x i16> %b_thread_vec0, i16 %b_thread_buf1, i64 1
+  %b_thread_vec2 = insertelement <8 x i16> %b_thread_vec1, i16 %b_thread_buf2, i64 2
+  %b_thread_vec3 = insertelement <8 x i16> %b_thread_vec2, i16 %b_thread_buf3, i64 3
+  %b_thread_vec4 = insertelement <8 x i16> %b_thread_vec3, i16 %b_thread_buf4, i64 4
+  %b_thread_vec5 = insertelement <8 x i16> %b_thread_vec4, i16 %b_thread_buf5, i64 5
+  %b_thread_vec6 = insertelement <8 x i16> %b_thread_vec5, i16 %b_thread_buf6, i64 6
+  %b_thread_vec7 = insertelement <8 x i16> %b_thread_vec6, i16 %b_thread_buf7, i64 7
+  %res = add <8 x i16> %a_thread_vec7, %b_thread_vec7
+  %vec2 = load <8 x i16>, ptr addrspace(1) %aptr2
+  %newel0 = extractelement <8 x i16> %vec2, i64 0
+  %newel1 = extractelement <8 x i16> %vec2, i64 1
+  %newel2 = extractelement <8 x i16> %vec2, i64 2
+  %newel3 = extractelement <8 x i16> %vec2, i64 3
+  %newel4 = extractelement <8 x i16> %vec2, i64 4
+  %newel5 = extractelement <8 x i16> %vec2, i64 5
+  %newel6 = extractelement <8 x i16> %vec2, i64 6
+  %newel7 = extractelement <8 x i16> %vec2, i64 7
+  %bvec2 = load <8 x i16>, ptr addrspace(1) %bptr2
+  %bnewel0 = extractelement <8 x i16> %bvec2, i64 0
+  %bnewel1 = extractelement <8 x i16> %bvec2, i64 1
+  %bnewel2 = extractelement <8 x i16> %bvec2, i64 2
+  %bnewel3 = extractelement <8 x i16> %bvec2, i64 3
+  %bnewel4 = extractelement <8 x i16> %bvec2, i64 4
+  %bnewel5 = extractelement <8 x i16> %bvec2, i64 5
+  %bnewel6 = extractelement <8 x i16> %bvec2, i64 6
+  %bnewel7 = extractelement <8 x i16> %bvec2, i64 7
+  %newadd = add i32 %add, 1
+  %cond = icmp sgt i32 %newadd, %in
+  br i1 %cond, label %do.body, label %end
+
+end:
+  ret void
+}
+
+

@llvmbot
Copy link
Member

llvmbot commented Mar 13, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Jeffrey Byrnes (jrbyrnes)

Changes

Precommit test for #131229


Full diff: https://github.com/llvm/llvm-project/pull/131236.diff

1 Files Affected:

  • (added) llvm/test/Transforms/SLPVectorizer/AMDGPU/extract-ordering.ll (+150)
diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/extract-ordering.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/extract-ordering.ll
new file mode 100644
index 0000000000000..67ef5ba3732f4
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/extract-ordering.ll
@@ -0,0 +1,150 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=slp-vectorizer -S  | FileCheck %s --check-prefix=GFX9
+
+define protected amdgpu_kernel void @myfun(i32 %in, ptr addrspace(1) %aptr1, ptr addrspace(1) %bptr1, ptr addrspace(1) %aptr2, ptr addrspace(1) %bptr2)  {
+; GFX9-LABEL: define protected amdgpu_kernel void @myfun(
+; GFX9-SAME: i32 [[IN:%.*]], ptr addrspace(1) [[APTR1:%.*]], ptr addrspace(1) [[BPTR1:%.*]], ptr addrspace(1) [[APTR2:%.*]], ptr addrspace(1) [[BPTR2:%.*]]) #[[ATTR0:[0-9]+]] {
+; GFX9-NEXT:  [[ENTRY:.*]]:
+; GFX9-NEXT:    [[VEC1:%.*]] = load <8 x i16>, ptr addrspace(1) [[APTR1]], align 16
+; GFX9-NEXT:    [[EL0:%.*]] = extractelement <8 x i16> [[VEC1]], i64 0
+; GFX9-NEXT:    [[EL3:%.*]] = extractelement <8 x i16> [[VEC1]], i64 3
+; GFX9-NEXT:    [[BVEC1:%.*]] = load <8 x i16>, ptr addrspace(1) [[BPTR1]], align 16
+; GFX9-NEXT:    [[BEL0:%.*]] = extractelement <8 x i16> [[BVEC1]], i64 0
+; GFX9-NEXT:    [[BEL3:%.*]] = extractelement <8 x i16> [[BVEC1]], i64 3
+; GFX9-NEXT:    [[TMP0:%.*]] = shufflevector <8 x i16> [[VEC1]], <8 x i16> poison, <2 x i32> <i32 1, i32 2>
+; GFX9-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i16> [[VEC1]], <8 x i16> poison, <2 x i32> <i32 4, i32 5>
+; GFX9-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i16> [[VEC1]], <8 x i16> poison, <2 x i32> <i32 6, i32 7>
+; GFX9-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i16> [[BVEC1]], <8 x i16> poison, <2 x i32> <i32 1, i32 2>
+; GFX9-NEXT:    [[TMP6:%.*]] = shufflevector <8 x i16> [[BVEC1]], <8 x i16> poison, <2 x i32> <i32 4, i32 5>
+; GFX9-NEXT:    [[TMP7:%.*]] = shufflevector <8 x i16> [[BVEC1]], <8 x i16> poison, <2 x i32> <i32 6, i32 7>
+; GFX9-NEXT:    br label %[[DO_BODY:.*]]
+; GFX9:       [[DO_BODY]]:
+; GFX9-NEXT:    [[A_THREAD_BUF3:%.*]] = phi i16 [ [[EL3]], %[[ENTRY]] ], [ [[NEWEL3:%.*]], %[[DO_BODY]] ]
+; GFX9-NEXT:    [[B_THREAD_BUF3:%.*]] = phi i16 [ [[BEL3]], %[[ENTRY]] ], [ [[BNEWEL3:%.*]], %[[DO_BODY]] ]
+; GFX9-NEXT:    [[ADD:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[NEWADD:%.*]], %[[DO_BODY]] ]
+; GFX9-NEXT:    [[A_THREAD_BUF0:%.*]] = phi i16 [ [[EL0]], %[[ENTRY]] ], [ [[NEWEL0:%.*]], %[[DO_BODY]] ]
+; GFX9-NEXT:    [[B_THREAD_BUF0:%.*]] = phi i16 [ [[BEL0]], %[[ENTRY]] ], [ [[BNEWEL0:%.*]], %[[DO_BODY]] ]
+; GFX9-NEXT:    [[TMP9:%.*]] = phi <2 x i16> [ [[TMP0]], %[[ENTRY]] ], [ [[TMP25:%.*]], %[[DO_BODY]] ]
+; GFX9-NEXT:    [[TMP10:%.*]] = phi <2 x i16> [ [[TMP2]], %[[ENTRY]] ], [ [[TMP32:%.*]], %[[DO_BODY]] ]
+; GFX9-NEXT:    [[TMP11:%.*]] = phi <2 x i16> [ [[TMP3]], %[[ENTRY]] ], [ [[TMP33:%.*]], %[[DO_BODY]] ]
+; GFX9-NEXT:    [[TMP13:%.*]] = phi <2 x i16> [ [[TMP4]], %[[ENTRY]] ], [ [[TMP30:%.*]], %[[DO_BODY]] ]
+; GFX9-NEXT:    [[TMP14:%.*]] = phi <2 x i16> [ [[TMP6]], %[[ENTRY]] ], [ [[TMP36:%.*]], %[[DO_BODY]] ]
+; GFX9-NEXT:    [[TMP15:%.*]] = phi <2 x i16> [ [[TMP7]], %[[ENTRY]] ], [ [[TMP37:%.*]], %[[DO_BODY]] ]
+; GFX9-NEXT:    [[A_THREAD_VEC0:%.*]] = insertelement <8 x i16> poison, i16 [[A_THREAD_BUF0]], i64 0
+; GFX9-NEXT:    [[TMP17:%.*]] = shufflevector <2 x i16> [[TMP9]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GFX9-NEXT:    [[TMP16:%.*]] = shufflevector <8 x i16> [[A_THREAD_VEC0]], <8 x i16> [[TMP17]], <8 x i32> <i32 0, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GFX9-NEXT:    [[A_THREAD_VEC3:%.*]] = insertelement <8 x i16> [[TMP16]], i16 [[A_THREAD_BUF3]], i64 3
+; GFX9-NEXT:    [[TMP19:%.*]] = shufflevector <2 x i16> [[TMP10]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GFX9-NEXT:    [[TMP20:%.*]] = shufflevector <8 x i16> [[A_THREAD_VEC3]], <8 x i16> [[TMP19]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; GFX9-NEXT:    [[TMP21:%.*]] = shufflevector <2 x i16> [[TMP11]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GFX9-NEXT:    [[TMP22:%.*]] = shufflevector <8 x i16> [[TMP20]], <8 x i16> [[TMP21]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+; GFX9-NEXT:    [[B_THREAD_VEC0:%.*]] = insertelement <8 x i16> poison, i16 [[B_THREAD_BUF0]], i64 0
+; GFX9-NEXT:    [[TMP24:%.*]] = shufflevector <2 x i16> [[TMP13]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GFX9-NEXT:    [[TMP23:%.*]] = shufflevector <8 x i16> [[B_THREAD_VEC0]], <8 x i16> [[TMP24]], <8 x i32> <i32 0, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GFX9-NEXT:    [[B_THREAD_VEC3:%.*]] = insertelement <8 x i16> [[TMP23]], i16 [[B_THREAD_BUF3]], i64 3
+; GFX9-NEXT:    [[TMP26:%.*]] = shufflevector <2 x i16> [[TMP14]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GFX9-NEXT:    [[TMP27:%.*]] = shufflevector <8 x i16> [[B_THREAD_VEC3]], <8 x i16> [[TMP26]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; GFX9-NEXT:    [[TMP28:%.*]] = shufflevector <2 x i16> [[TMP15]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GFX9-NEXT:    [[TMP29:%.*]] = shufflevector <8 x i16> [[TMP27]], <8 x i16> [[TMP28]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+; GFX9-NEXT:    [[RES:%.*]] = add <8 x i16> [[TMP22]], [[TMP29]]
+; GFX9-NEXT:    [[VEC2:%.*]] = load <8 x i16>, ptr addrspace(1) [[APTR2]], align 16
+; GFX9-NEXT:    [[NEWEL0]] = extractelement <8 x i16> [[VEC2]], i64 0
+; GFX9-NEXT:    [[NEWEL3]] = extractelement <8 x i16> [[VEC2]], i64 3
+; GFX9-NEXT:    [[BVEC2:%.*]] = load <8 x i16>, ptr addrspace(1) [[BPTR2]], align 16
+; GFX9-NEXT:    [[BNEWEL0]] = extractelement <8 x i16> [[BVEC2]], i64 0
+; GFX9-NEXT:    [[BNEWEL3]] = extractelement <8 x i16> [[BVEC2]], i64 3
+; GFX9-NEXT:    [[NEWADD]] = add i32 [[ADD]], 1
+; GFX9-NEXT:    [[COND:%.*]] = icmp sgt i32 [[NEWADD]], [[IN]]
+; GFX9-NEXT:    [[TMP25]] = shufflevector <8 x i16> [[VEC2]], <8 x i16> poison, <2 x i32> <i32 1, i32 2>
+; GFX9-NEXT:    [[TMP32]] = shufflevector <8 x i16> [[VEC2]], <8 x i16> poison, <2 x i32> <i32 4, i32 5>
+; GFX9-NEXT:    [[TMP33]] = shufflevector <8 x i16> [[VEC2]], <8 x i16> poison, <2 x i32> <i32 6, i32 7>
+; GFX9-NEXT:    [[TMP30]] = shufflevector <8 x i16> [[BVEC2]], <8 x i16> poison, <2 x i32> <i32 1, i32 2>
+; GFX9-NEXT:    [[TMP36]] = shufflevector <8 x i16> [[BVEC2]], <8 x i16> poison, <2 x i32> <i32 4, i32 5>
+; GFX9-NEXT:    [[TMP37]] = shufflevector <8 x i16> [[BVEC2]], <8 x i16> poison, <2 x i32> <i32 6, i32 7>
+; GFX9-NEXT:    br i1 [[COND]], label %[[DO_BODY]], label %[[END:.*]]
+; GFX9:       [[END]]:
+; GFX9-NEXT:    ret void
+;
+entry:
+  %vec1 = load <8 x i16>, ptr addrspace(1) %aptr1
+  %el0 = extractelement <8 x i16> %vec1, i64 0
+  %el1 = extractelement <8 x i16> %vec1, i64 1
+  %el2 = extractelement <8 x i16> %vec1, i64 2
+  %el3 = extractelement <8 x i16> %vec1, i64 3
+  %el4 = extractelement <8 x i16> %vec1, i64 4
+  %el5 = extractelement <8 x i16> %vec1, i64 5
+  %el6 = extractelement <8 x i16> %vec1, i64 6
+  %el7 = extractelement <8 x i16> %vec1, i64 7
+  %bvec1 = load <8 x i16>, ptr addrspace(1) %bptr1
+  %bel0 = extractelement <8 x i16> %bvec1, i64 0
+  %bel1 = extractelement <8 x i16> %bvec1, i64 1
+  %bel2 = extractelement <8 x i16> %bvec1, i64 2
+  %bel3 = extractelement <8 x i16> %bvec1, i64 3
+  %bel4 = extractelement <8 x i16> %bvec1, i64 4
+  %bel5 = extractelement <8 x i16> %bvec1, i64 5
+  %bel6 = extractelement <8 x i16> %bvec1, i64 6
+  %bel7 = extractelement <8 x i16> %bvec1, i64 7
+  br label %do.body
+
+do.body:
+  %a_thread_buf1 = phi i16 [%el1, %entry], [%newel1, %do.body]
+  %a_thread_buf2 = phi i16 [%el2, %entry], [%newel2, %do.body]
+  %a_thread_buf3 = phi i16 [%el3, %entry], [%newel3, %do.body]
+  %a_thread_buf4 = phi i16 [%el4, %entry], [%newel4, %do.body]
+  %a_thread_buf5 = phi i16 [%el5, %entry], [%newel5, %do.body]
+  %a_thread_buf6 = phi i16 [%el6, %entry], [%newel6, %do.body]
+  %a_thread_buf7 = phi i16 [%el7, %entry], [%newel7, %do.body]
+  %b_thread_buf1 = phi i16 [%bel1, %entry], [%bnewel1, %do.body]
+  %b_thread_buf2 = phi i16 [%bel2, %entry], [%bnewel2, %do.body]
+  %b_thread_buf3 = phi i16 [%bel3, %entry], [%bnewel3, %do.body]
+  %b_thread_buf4 = phi i16 [%bel4, %entry], [%bnewel4, %do.body]
+  %b_thread_buf5 = phi i16 [%bel5, %entry], [%bnewel5, %do.body]
+  %b_thread_buf6 = phi i16 [%bel6, %entry], [%bnewel6, %do.body]
+  %b_thread_buf7 = phi i16 [%bel7, %entry], [%bnewel7, %do.body]
+  %add = phi i32 [0, %entry], [%newadd, %do.body]
+  %a_thread_buf0 = phi i16 [%el0, %entry], [%newel0, %do.body]
+  %b_thread_buf0 = phi i16 [%bel0, %entry], [%bnewel0, %do.body]
+  %a_thread_vec0 = insertelement <8 x i16> poison, i16 %a_thread_buf0, i64 0
+  %a_thread_vec1 = insertelement <8 x i16> %a_thread_vec0, i16 %a_thread_buf1, i64 1
+  %a_thread_vec2 = insertelement <8 x i16> %a_thread_vec1, i16 %a_thread_buf2, i64 2
+  %a_thread_vec3 = insertelement <8 x i16> %a_thread_vec2, i16 %a_thread_buf3, i64 3
+  %a_thread_vec4 = insertelement <8 x i16> %a_thread_vec3, i16 %a_thread_buf4, i64 4
+  %a_thread_vec5 = insertelement <8 x i16> %a_thread_vec4, i16 %a_thread_buf5, i64 5
+  %a_thread_vec6 = insertelement <8 x i16> %a_thread_vec5, i16 %a_thread_buf6, i64 6
+  %a_thread_vec7 = insertelement <8 x i16> %a_thread_vec6, i16 %a_thread_buf7, i64 7
+  %b_thread_vec0 = insertelement <8 x i16> poison, i16 %b_thread_buf0, i64 0
+  %b_thread_vec1 = insertelement <8 x i16> %b_thread_vec0, i16 %b_thread_buf1, i64 1
+  %b_thread_vec2 = insertelement <8 x i16> %b_thread_vec1, i16 %b_thread_buf2, i64 2
+  %b_thread_vec3 = insertelement <8 x i16> %b_thread_vec2, i16 %b_thread_buf3, i64 3
+  %b_thread_vec4 = insertelement <8 x i16> %b_thread_vec3, i16 %b_thread_buf4, i64 4
+  %b_thread_vec5 = insertelement <8 x i16> %b_thread_vec4, i16 %b_thread_buf5, i64 5
+  %b_thread_vec6 = insertelement <8 x i16> %b_thread_vec5, i16 %b_thread_buf6, i64 6
+  %b_thread_vec7 = insertelement <8 x i16> %b_thread_vec6, i16 %b_thread_buf7, i64 7
+  %res = add <8 x i16> %a_thread_vec7, %b_thread_vec7
+  %vec2 = load <8 x i16>, ptr addrspace(1) %aptr2
+  %newel0 = extractelement <8 x i16> %vec2, i64 0
+  %newel1 = extractelement <8 x i16> %vec2, i64 1
+  %newel2 = extractelement <8 x i16> %vec2, i64 2
+  %newel3 = extractelement <8 x i16> %vec2, i64 3
+  %newel4 = extractelement <8 x i16> %vec2, i64 4
+  %newel5 = extractelement <8 x i16> %vec2, i64 5
+  %newel6 = extractelement <8 x i16> %vec2, i64 6
+  %newel7 = extractelement <8 x i16> %vec2, i64 7
+  %bvec2 = load <8 x i16>, ptr addrspace(1) %bptr2
+  %bnewel0 = extractelement <8 x i16> %bvec2, i64 0
+  %bnewel1 = extractelement <8 x i16> %bvec2, i64 1
+  %bnewel2 = extractelement <8 x i16> %bvec2, i64 2
+  %bnewel3 = extractelement <8 x i16> %bvec2, i64 3
+  %bnewel4 = extractelement <8 x i16> %bvec2, i64 4
+  %bnewel5 = extractelement <8 x i16> %bvec2, i64 5
+  %bnewel6 = extractelement <8 x i16> %bvec2, i64 6
+  %bnewel7 = extractelement <8 x i16> %bvec2, i64 7
+  %newadd = add i32 %add, 1
+  %cond = icmp sgt i32 %newadd, %in
+  br i1 %cond, label %do.body, label %end
+
+end:
+  ret void
+}
+
+

@jrbyrnes jrbyrnes merged commit fc28f83 into llvm:main Mar 13, 2025
9 of 13 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants