Scalarize dynamically-indexed insertelement

Icohedron · Icohedron · commit 15cf98cfec40 · 2025-05-27T23:38:08.000Z
diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
@@ -68,7 +68,7 @@ class DataScalarizerVisitor : public InstVisitor<DataScalarizerVisitor, bool> {
   bool visitGetElementPtrInst(GetElementPtrInst &GEPI);
   bool visitCastInst(CastInst &CI) { return false; }
   bool visitBitCastInst(BitCastInst &BCI) { return false; }
-  bool visitInsertElementInst(InsertElementInst &IEI) { return false; }
+  bool visitInsertElementInst(InsertElementInst &IEI);
   bool visitExtractElementInst(ExtractElementInst &EEI);
   bool visitShuffleVectorInst(ShuffleVectorInst &SVI) { return false; }
   bool visitPHINode(PHINode &PHI) { return false; }
@@ -172,6 +172,38 @@ bool DataScalarizerVisitor::visitStoreInst(StoreInst &SI) {
   return false;
 }
 
+bool DataScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) {
+  Value *Vec = IEI.getOperand(0);
+  Value *Val = IEI.getOperand(1);
+  Value *Index = IEI.getOperand(2);
+  Type *IndexTy = Index->getType();
+
+  // If the index is a constant then we don't need to scalarize it
+  if (isa<ConstantInt>(Index))
+    return false;
+
+  IRBuilder<> Builder(&IEI);
+  Type *VecTy = Vec->getType();
+
+  Type *ArrTy = equivalentArrayTypeFromVector(VecTy);
+  Value *ArrAlloca = Builder.CreateAlloca(ArrTy);
+
+  for (unsigned I = 0; I < ArrTy->getArrayNumElements(); ++I) {
+    Value *EE = Builder.CreateExtractElement(Vec, I);
+    Value *GEP = Builder.CreateInBoundsGEP(
+        ArrTy, ArrAlloca,
+        {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, I)});
+    Builder.CreateStore(EE, GEP);
+  }
+
+  Value *GEP = Builder.CreateInBoundsGEP(ArrTy, ArrAlloca,
+                                         {ConstantInt::get(IndexTy, 0), Index});
+  Builder.CreateStore(Val, GEP);
+
+  IEI.eraseFromParent();
+  return true;
+}
+
 bool DataScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
   // If the index is a constant then we don't need to scalarize it
   Value *Index = EEI.getIndexOperand();
diff --git a/llvm/test/CodeGen/DirectX/scalarize-dynamic-vector-index.ll b/llvm/test/CodeGen/DirectX/scalarize-dynamic-vector-index.ll
@@ -1,38 +1,70 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt -S -passes='dxil-data-scalarization' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
 
-define float @extract_float_vec_dynamic(<4 x float> %0, i32 %1) {
+define float @extract_float_vec_dynamic(<4 x float> %v, i32 %i) {
 ; CHECK-LABEL: define float @extract_float_vec_dynamic(
-; CHECK-SAME: <4 x float> [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
-; CHECK-NEXT:    [[TMP3:%.*]] = alloca [4 x float], align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP3]], i32 0, i32 0
+; CHECK-SAME: <4 x float> [[V:%.*]], i32 [[I:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = alloca [4 x float], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[V]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP1]], i32 0, i32 0
+; CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[V]], i64 1
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP1]], i32 0, i32 1
 ; CHECK-NEXT:    store float [[TMP4]], ptr [[TMP5]], align 4
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP3]], i32 0, i32 1
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[V]], i64 2
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP1]], i32 0, i32 2
 ; CHECK-NEXT:    store float [[TMP6]], ptr [[TMP7]], align 4
-; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP3]], i32 0, i32 2
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[V]], i64 3
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP1]], i32 0, i32 3
 ; CHECK-NEXT:    store float [[TMP8]], ptr [[TMP9]], align 4
-; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP3]], i32 0, i32 3
-; CHECK-NEXT:    store float [[TMP10]], ptr [[TMP11]], align 4
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP3]], i32 0, i32 [[TMP1]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4
-; CHECK-NEXT:    ret float [[TMP13]]
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP1]], i32 0, i32 [[I]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load float, ptr [[TMP10]], align 4
+; CHECK-NEXT:    ret float [[TMP11]]
 ;
-  %e = extractelement <4 x float> %0, i32 %1
-  ret float %e
+  %ee = extractelement <4 x float> %v, i32 %i
+  ret float %ee
+}
+
+define void @insert_i32_vec_dynamic(<3 x i32> %v, i32 %a, i32 %i) {
+; CHECK-LABEL: define void @insert_i32_vec_dynamic(
+; CHECK-SAME: <3 x i32> [[V:%.*]], i32 [[A:%.*]], i32 [[I:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = alloca [3 x i32], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <3 x i32> [[V]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1]], i32 0, i32 0
+; CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <3 x i32> [[V]], i64 1
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1]], i32 0, i32 1
+; CHECK-NEXT:    store i32 [[TMP4]], ptr [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <3 x i32> [[V]], i64 2
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1]], i32 0, i32 2
+; CHECK-NEXT:    store i32 [[TMP6]], ptr [[TMP7]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1]], i32 0, i32 [[I]]
+; CHECK-NEXT:    store i32 [[A]], ptr [[TMP8]], align 4
+; CHECK-NEXT:    ret void
+;
+  insertelement <3 x i32> %v, i32 %a, i32 %i
+  ret void
 }
 
 ; An extractelement with a constant index should not be converted to array form
-define i16 @extract_i16_vec_constant(<4 x i16> %0) {
+define i16 @extract_i16_vec_constant(<4 x i16> %v) {
 ; CHECK-LABEL: define i16 @extract_i16_vec_constant(
-; CHECK-SAME: <4 x i16> [[TMP0:%.*]]) {
-; CHECK-NEXT:    [[E:%.*]] = extractelement <4 x i16> [[TMP0]], i32 1
-; CHECK-NEXT:    ret i16 [[E]]
+; CHECK-SAME: <4 x i16> [[V:%.*]]) {
+; CHECK-NEXT:    [[EE:%.*]] = extractelement <4 x i16> [[V]], i32 1
+; CHECK-NEXT:    ret i16 [[EE]]
+;
+  %ee = extractelement <4 x i16> %v, i32 1
+  ret i16 %ee
+}
+
+; An insertelement with a constant index should not be converted to array form
+define void @insert_half_vec_constant(<2 x half> %v, half %a) {
+; CHECK-LABEL: define void @insert_half_vec_constant(
+; CHECK-SAME: <2 x half> [[V:%.*]], half [[A:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x half> [[V]], half [[A]], i32 1
+; CHECK-NEXT:    ret void
 ;
-  %e = extractelement <4 x i16> %0, i32 1
-  ret i16 %e
+  insertelement <2 x half> %v, half %a, i32 1
+  ret void
 }