@@ -813,6 +813,32 @@ func.func @create_tensor_map(%devicePtr2d : memref<64x128xf32>, %devicePtr1d : m
813813 func.return
814814}
815815
816+ func.func @create_tensor_map_constant_box_dim (%devicePtr2d : memref <64 x128 xf32 >, %devicePtr1d : memref <128 xf32 >) {
817+ %devicePtr2d_unranked = memref.cast %devicePtr2d : memref <64 x128 xf32 > to memref <*xf32 >
818+ // CHECK: %[[C5_0:.*]] = llvm.mlir.constant(5 : i32) : i64
819+ // CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[C5_0]] x i64 : (i64) -> !llvm.ptr
820+ // CHECK: %[[C0_0:.*]] = llvm.mlir.constant(0 : i32) : i64
821+ // CHECK: %[[GEP_0:.*]] = llvm.getelementptr %[[ALLOCA]]{{\[}}%[[C0_0]]] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.ptr
822+ // CHECK: %[[C64:.*]] = llvm.mlir.constant(64 : i32) : i64
823+ // CHECK: llvm.store %[[C64]], %[[GEP_0]] : i64, !llvm.ptr
824+ // CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i32) : i64
825+ // CHECK: %[[GEP_1:.*]] = llvm.getelementptr %[[ALLOCA]]{{\[}}%[[C1]]] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.ptr
826+ // CHECK: %[[C128_0:.*]] = llvm.mlir.constant(128 : i32) : i64
827+ // CHECK: llvm.store %[[C128_0]], %[[GEP_1]] : i64, !llvm.ptr
828+ // CHECK: llvm.call @mgpuTensorMapEncodeTiledMemref({{.*}}, {{.*}}, {{.*}}, {{.*}}, {{.*}}, {{.*}}, {{.*}}, %[[ALLOCA]])
829+ %tensorMap2d = nvgpu.tma.create.descriptor %devicePtr2d_unranked box [64 , 128 ] : memref <*xf32 > -> !tensorMap2d
830+ %devicePtr1d_unranked = memref.cast %devicePtr1d : memref <128 xf32 > to memref <*xf32 >
831+ // CHECK: %[[C5_1:.*]] = llvm.mlir.constant(5 : i32) : i64
832+ // CHECK: %[[ALLOCA_1:.*]] = llvm.alloca %[[C5_1]] x i64 : (i64) -> !llvm.ptr
833+ // CHECK: %[[C0_1:.*]] = llvm.mlir.constant(0 : i32) : i64
834+ // CHECK: %[[GEP_2:.*]] = llvm.getelementptr %[[ALLOCA_1]]{{\[}}%[[C0_1]]] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.ptr
835+ // CHECK: %[[C128_1:.*]] = llvm.mlir.constant(128 : i32) : i64
836+ // CHECK: llvm.store %[[C128_1]], %[[GEP_2]] : i64, !llvm.ptr
837+ // CHECK: llvm.call @mgpuTensorMapEncodeTiledMemref({{.*}}, {{.*}}, {{.*}}, {{.*}}, {{.*}}, {{.*}}, {{.*}}, %[[ALLOCA_1]])
838+ %tensorMap1d = nvgpu.tma.create.descriptor %devicePtr1d_unranked box [128 ] : memref <*xf32 > -> !tensorMap1d
839+ func.return
840+ }
841+
816842// CHECK-LABEL: @tma_prefetch(
817843// CHECK-SAME: %[[arg0:[a-zA-Z0-9_]+]]: !nvgpu.tensormap.descriptor<tensor = memref<128xf32, 3>, swizzle = none, l2promo = none, oob = nan, interleave = none>, %[[arg1:[a-zA-Z0-9_]+]]: i1
818844func.func @tma_prefetch (%tensorMap1d: !tensorMap1d , %p : i1 ) {
0 commit comments