llvm · quic-rb10 · Feb 21, 2025 · dcaballe · Feb 21, 2025 · quic-rb10
diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
@@ -1414,6 +1414,9 @@ def ConvertVectorToLLVMPass : Pass<"convert-vector-to-llvm"> {
            "vector::VectorTransformsOptions",
            /*default=*/"vector::VectorTransformsOptions()",
            "Options to lower some operations like contractions and transposes.">,
+    Option<"indexBitwidth", "index-bitwidth", "unsigned",
+           /*default=kDeriveIndexBitwidthFromDataLayout*/"0",
+           "Bitwidth of the index type, 0 to use size of machine word">,
   ];
 }
 

@@ -1439,8 +1439,6 @@ class VectorTypeCastOpConversion
     if (llvm::any_of(*targetStrides, ShapedType::isDynamic))
       return failure();
 
-    auto int64Ty = IntegerType::get(rewriter.getContext(), 64);
-
     // Create descriptor.
     auto desc = MemRefDescriptor::poison(rewriter, loc, llvmTargetDescriptorTy);
     // Set allocated ptr.
@@ -1451,21 +1449,26 @@ class VectorTypeCastOpConversion
     Value ptr = sourceMemRef.alignedPtr(rewriter, loc);
     desc.setAlignedPtr(rewriter, loc, ptr);
     // Fill offset 0.
-    auto attr = rewriter.getIntegerAttr(rewriter.getIndexType(), 0);
-    auto zero = rewriter.create<LLVM::ConstantOp>(loc, int64Ty, attr);
+
+    auto idxType = rewriter.getIndexType();
+    auto zero = rewriter.create<LLVM::ConstantOp>(
+        loc, typeConverter->convertType(idxType),
+        rewriter.getIntegerAttr(idxType, 0));
     desc.setOffset(rewriter, loc, zero);
 
     // Fill size and stride descriptors in memref.
     for (const auto &indexedSize :
          llvm::enumerate(targetMemRefType.getShape())) {
       int64_t index = indexedSize.index();
-      auto sizeAttr =
-          rewriter.getIntegerAttr(rewriter.getIndexType(), indexedSize.value());
-      auto size = rewriter.create<LLVM::ConstantOp>(loc, int64Ty, sizeAttr);
+
+      auto size = rewriter.create<LLVM::ConstantOp>(
+          loc, typeConverter->convertType(idxType),
+          rewriter.getIntegerAttr(idxType, indexedSize.value()));
       desc.setSize(rewriter, loc, index, size);
-      auto strideAttr = rewriter.getIntegerAttr(rewriter.getIndexType(),
-                                                (*targetStrides)[index]);
-      auto stride = rewriter.create<LLVM::ConstantOp>(loc, int64Ty, strideAttr);
+
+      auto stride = rewriter.create<LLVM::ConstantOp>(
+          loc, typeConverter->convertType(idxType),
+          rewriter.getIntegerAttr(idxType, (*targetStrides)[index]));
       desc.setStride(rewriter, loc, index, stride);
     }
 

@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.h"
-
+#include "mlir/Analysis/DataLayoutAnalysis.h"
 #include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
 #include "mlir/Conversion/LLVMCommon/TypeConverter.h"
 #include "mlir/Dialect/AMX/AMXDialect.h"
@@ -64,6 +64,8 @@ void ConvertVectorToLLVMPass::runOnOperation() {
   // Perform progressive lowering of operations on slices and all contraction
   // operations. Also materializes masks, lowers vector.step, rank-reduces FMA,
   // applies folding and DCE.
+  Operation *op = getOperation();
+  const auto &dataLayoutAnalysis = getAnalysis<DataLayoutAnalysis>();
   {
     RewritePatternSet patterns(&getContext());
     populateVectorToVectorCanonicalizationPatterns(patterns);
@@ -83,10 +85,12 @@ void ConvertVectorToLLVMPass::runOnOperation() {
     populateVectorRankReducingFMAPattern(patterns);
     (void)applyPatternsGreedily(getOperation(), std::move(patterns));
   }
-
   // Convert to the LLVM IR dialect.
-  LowerToLLVMOptions options(&getContext());
-  LLVMTypeConverter converter(&getContext(), options);
+  LowerToLLVMOptions options(&getContext(),
+                             dataLayoutAnalysis.getAtOrAbove(op));
+  if (indexBitwidth != kDeriveIndexBitwidthFromDataLayout)
+    options.overrideIndexBitwidth(indexBitwidth);
+  LLVMTypeConverter converter(&getContext(), options, &dataLayoutAnalysis);
   RewritePatternSet patterns(&getContext());
   populateVectorTransferLoweringPatterns(patterns);
   populateVectorToLLVMMatrixConversionPatterns(converter, patterns);

@@ -0,0 +1,25 @@
+// RUN: mlir-opt %s -convert-vector-to-llvm -split-input-file | FileCheck %s
+
+module attributes {dlti.dl_spec = #dlti.dl_spec< #dlti.dl_entry<index, 32>>} {
+// CHECK-LABEL:   func.func @broadcast_vec2d_from_vec0d(
+// CHECK-SAME:                                          %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: vector<f32>) -> vector<3x2xf32> {
+// CHECK:           %[[VAL_1:.*]] = builtin.unrealized_conversion_cast %[[VAL_0]] : vector<f32> to vector<1xf32>
+// CHECK:           %[[VAL_2:.*]] = ub.poison : vector<3x2xf32>
+// CHECK:           %[[VAL_3:.*]] = builtin.unrealized_conversion_cast %[[VAL_2]] : vector<3x2xf32> to !llvm.array<3 x vector<2xf32>>
+// CHECK:           %[[VAL_4:.*]] = llvm.mlir.constant(0 : index) : i32
+// CHECK:           %[[VAL_5:.*]] = llvm.extractelement %[[VAL_1]]{{\[}}%[[VAL_4]] : i32] : vector<1xf32>
+// CHECK:           %[[VAL_6:.*]] = llvm.mlir.poison : vector<2xf32>
+// CHECK:           %[[VAL_7:.*]] = llvm.mlir.constant(0 : i32) : i32
+// CHECK:           %[[VAL_8:.*]] = llvm.insertelement %[[VAL_5]], %[[VAL_6]]{{\[}}%[[VAL_7]] : i32] : vector<2xf32>
+// CHECK:           %[[VAL_9:.*]] = llvm.shufflevector %[[VAL_8]], %[[VAL_6]] [0, 0] : vector<2xf32>
+// CHECK:           %[[VAL_10:.*]] = llvm.insertvalue %[[VAL_9]], %[[VAL_3]][0] : !llvm.array<3 x vector<2xf32>>
+// CHECK:           %[[VAL_11:.*]] = llvm.insertvalue %[[VAL_9]], %[[VAL_10]][1] : !llvm.array<3 x vector<2xf32>>
+// CHECK:           %[[VAL_12:.*]] = llvm.insertvalue %[[VAL_9]], %[[VAL_11]][2] : !llvm.array<3 x vector<2xf32>>
+// CHECK:           %[[VAL_13:.*]] = builtin.unrealized_conversion_cast %[[VAL_12]] : !llvm.array<3 x vector<2xf32>> to vector<3x2xf32>
+// CHECK:           return %[[VAL_13]] : vector<3x2xf32>
+// CHECK:         }
+func.func @broadcast_vec2d_from_vec0d(%arg0: vector<f32>) -> vector<3x2xf32> {
+  %0 = vector.broadcast %arg0 : vector<f32> to vector<3x2xf32>
+  return %0 : vector<3x2xf32>
+}
+}