intel
diff --git a/‎lib/Conversion/XeGPUToXeVM/XeGPUToXeVM.cpp
Lines changed: 23 additions & 22 deletions b/‎lib/Conversion/XeGPUToXeVM/XeGPUToXeVM.cpp
Lines changed: 23 additions & 22 deletions
diff --git a/‎test/Conversion/XeGPUToXeVM/loadstore_nd.mlir
Lines changed: 28 additions & 1 deletion b/‎test/Conversion/XeGPUToXeVM/loadstore_nd.mlir
Lines changed: 28 additions & 1 deletion
diff --git a/‎test/Integration/Dialect/XeGPUToXeVM/gemm_4kx4kx4k_f16_f16_f16_simt.mlir renamed to ‎test/Integration/Dialect/XeGPU/SIMT/gemm_4kx4kx4k_f16_f16_f16.mlir b/‎test/Integration/Dialect/XeGPUToXeVM/gemm_4kx4kx4k_f16_f16_f16_simt.mlir renamed to ‎test/Integration/Dialect/XeGPU/SIMT/gemm_4kx4kx4k_f16_f16_f16.mlir
@@ -179,48 +179,49 @@ class CreateNdDescToXeVMPattern
     Value baseShapeH;
     Value offsetW;
     Value offsetH;
+    auto convertToValue = [&](OpFoldResult ofr) -> Value {
+      Value val;
+      if (auto v = llvm::dyn_cast_if_present<Value>(ofr)) {
+        val = rewriter.create<arith::IndexCastOp>(loc, i64Ty, v);
+        val = rewriter.create<arith::TruncIOp>(loc, payloadElemTy, val);
+      } else {
+        int32_t off = llvm::cast<IntegerAttr>(cast<Attribute>(ofr)).getInt();
+        val = rewriter.create<arith::ConstantIntOp>(loc, payloadElemTy, off);
+      }
+      return val;
+    };
+
+    int rank = op.getMixedOffsets().size();
+    if (rank != 2) {
+      op.emitError() << "Expected 2D offsets, got " << rank << "D offsets.";
+      return mlir::failure();
+    }
+    offsetW = convertToValue(op.getMixedOffsets()[rank - 1]);
+    offsetH = convertToValue(op.getMixedOffsets()[rank - 2]);
 
     if (auto sourceTy = source.getType(); isa<MemRefType>(sourceTy)) {
       baseAddr =
           rewriter.create<memref::ExtractAlignedPointerAsIndexOp>(loc, source);
+      baseAddr = rewriter.create<arith::IndexCastUIOp>(loc, i64Ty, baseAddr);
       auto sourceMemrefTy = cast<MemRefType>(sourceTy);
       if (!sourceMemrefTy.hasStaticShape()) {
         op.emitError() << "Expected static memref shape.";
         return mlir::failure();
       }
       auto rank = sourceMemrefTy.getRank();
-      if (rank != 2) {
-        op.emitError() << "Expected a 2D memref.";
-        return mlir::failure();
-      }
-      auto createOffset = [&](unsigned idx) -> Value {
-        Value val;
-        OpFoldResult ofr = op.getMixedOffsets()[idx];
-        if (auto v = llvm::dyn_cast_if_present<Value>(ofr)) {
-          val = rewriter.create<arith::IndexCastOp>(loc, i64Ty, v);
-          val = rewriter.create<arith::TruncIOp>(loc, payloadElemTy, val);
-        } else {
-          int32_t off = llvm::cast<IntegerAttr>(cast<Attribute>(ofr)).getInt();
-          val = rewriter.create<arith::ConstantIntOp>(loc, payloadElemTy, off);
-        }
-        return val;
-      };
-      offsetW = createOffset(rank - 1);
-      offsetH = createOffset(rank - 2);
       baseShapeW = rewriter.create<arith::ConstantIntOp>(
           loc, payloadElemTy, sourceMemrefTy.getDimSize(rank - 1));
       baseShapeH = rewriter.create<arith::ConstantIntOp>(
           loc, payloadElemTy, sourceMemrefTy.getDimSize(rank - 2));
     } else if (isa<IntegerType>(sourceTy)) {
-      op.emitError()
-          << "Integer as source are currently not supported by the pass.";
-      return mlir::failure();
+      baseAddr = source;
+      baseShapeW = convertToValue(op.getMixedSizes()[rank - 1]);
+      baseShapeH = convertToValue(op.getMixedSizes()[rank - 2]);
     } else {
       op.emitError() << "Unknown source type.";
       return mlir::failure();
     }
 
-    baseAddr = rewriter.create<arith::IndexCastUIOp>(loc, i64Ty, baseAddr);
     Value payLoadAsI64 =
         rewriter.create<vector::BitCastOp>(loc, payloadI64Ty, payload);
     payLoadAsI64 = rewriter.create<vector::InsertOp>(
 
@@ -1,4 +1,4 @@
-// RUN: imex-opt -convert-xegpu-to-xevm %s | FileCheck %s
+// RUN: imex-opt -convert-xegpu-to-xevm -allow-unregistered-dialect %s | FileCheck %s
 
 gpu.module @load_store_check {
     gpu.func @load_store(%src: memref<8x16xf32, 1>, %dst: memref<8x16xf32, 1>) kernel {
@@ -66,4 +66,31 @@ gpu.module @load_store_check {
         xegpu.store_nd %loaded_modified, %dst_tdesc <{l1_hint = #xegpu.cache_hint<write_back>, l2_hint = #xegpu.cache_hint<uncached>}>: vector<8xf32>, !xegpu.tensor_desc<8x16xf32, #xegpu.block_tdesc_attr<memory_space = global>>
         gpu.return
     }
+
+    gpu.func @create_nd_tdesc_integer_source(%src: i64, %src_h : index, %src_w : index)  kernel {
+        %c1 = arith.constant 1 : index
+        %c4 = arith.constant 4 : index
+        %c8 = arith.constant 8 : index
+        %c0 = arith.constant 0 : index
+        // CHECK: %[[PAYLOAD:.*]] = arith.constant dense<0> : vector<8xi32>
+        // CHECK: %[[T0:.*]] = arith.index_cast %{{.*}} : index to i64
+        // CHECK: %[[T1:.*]] = arith.trunci %[[T0]] : i64 to i32
+        // CHECK: %[[T2:.*]] = arith.index_cast %{{.*}} : index to i64
+        // CHECK: %[[T3:.*]] = arith.trunci %[[T2]] : i64 to i32
+        // CHECK: %[[T4:.*]] = arith.index_cast %{{.*}} : index to i64
+        // CHECK: %[[T5:.*]] = arith.trunci %[[T4]] : i64 to i32
+        // CHECK: %[[T6:.*]] = arith.index_cast %{{.*}} : index to i64
+        // CHECK: %[[T7:.*]] = arith.trunci %[[T6]] : i64 to i32
+        // CHECK: %[[T8:.*]] = vector.bitcast %[[PAYLOAD]] : vector<8xi32> to vector<4xi64>
+        // CHECK: %[[T9:.*]] = vector.insert %{{.*}}, %[[T8]] [0] : i64 into vector<4xi64>
+        // CHECK: %[[T10:.*]] = vector.bitcast %[[T9]] : vector<4xi64> to vector<8xi32>
+        // CHECK: %[[T11:.*]] = vector.insert %[[T5]], %[[T10]] [2] : i32 into vector<8xi32>
+        // CHECK: %[[T12:.*]] = vector.insert %[[T7]], %[[T11]] [3] : i32 into vector<8xi32>
+        // CHECK: %[[T13:.*]] = vector.insert %[[T1]], %[[T12]] [4] : i32 into vector<8xi32>
+        // CHECK: %[[T14:.*]] = vector.insert %[[T3]], %[[T13]] [5] : i32 into vector<8xi32>
+        %src_tdesc = xegpu.create_nd_tdesc %src [%c4, %c8], [%src_h, %src_w], [%src_w, %c1] : i64
+            -> !xegpu.tensor_desc<8x16xf32, #xegpu.block_tdesc_attr<memory_space = global>>
+        "some_op"(%src_tdesc) : (!xegpu.tensor_desc<8x16xf32, #xegpu.block_tdesc_attr<memory_space = global>>) -> ()
+        gpu.return
+    }
 }