@@ -31,10 +31,6 @@ def run(f):
3131def testManualReconstructedKernel ():
3232 module = Module .create ()
3333
34- # Add required module attributes
35- module .operation .attributes ["dlti.dl_spec" ] = Attribute .parse ("#dlti.dl_spec<>" )
36- module .operation .attributes ["gpu.container_module" ] = UnitAttr .get ()
37-
3834 i32 = IntegerType .get_signless (32 )
3935 i64 = IntegerType .get_signless (64 )
4036 f32 = F32Type .get ()
@@ -102,14 +98,9 @@ def testManualReconstructedKernel():
10298 loop_block = Block .create_at_start (parent = loop_op .region , arg_types = [i64 ])
10399
104100 with InsertionPoint (loop_block ):
105- idx0 = arith .index_cast (
106- out = IndexType .get (), in_ = loop_block .arguments [0 ]
107- )
108- val = memref .load (memref = f .arguments [1 ], indices = [idx0 ])
109- idx1 = arith .index_cast (
110- out = IndexType .get (), in_ = loop_block .arguments [0 ]
111- )
112- memref .store (value = val , memref = f .arguments [0 ], indices = [idx1 ])
101+ idx = arith .index_cast (out = IndexType .get (), in_ = loop_block .arguments [0 ])
102+ val = memref .load (memref = f .arguments [1 ], indices = [idx ])
103+ memref .store (value = val , memref = f .arguments [0 ], indices = [idx ])
113104 openacc .YieldOp ([])
114105
115106 openacc .YieldOp ([])
@@ -118,20 +109,19 @@ def testManualReconstructedKernel():
118109
119110 print (module )
120111
121- # CHECK-LABEL: func.func public @memcpy_idiom
122- # CHECK-SAME: ( %[[ARG0:.*]]: memref<?xf32>, %[[ARG1:.*]]: memref<?xf32>, %[[ARG2:.*]]: i64) {
112+ # CHECK-LABEL: func.func public @memcpy_idiom(
113+ # CHECK-SAME: %[[ARG0:.*]]: memref<?xf32>, %[[ARG1:.*]]: memref<?xf32>, %[[ARG2:.*]]: i64) {
123114 # CHECK: %[[CONSTANT_0:.*]] = arith.constant 1024 : i32
124115 # CHECK: %[[CONSTANT_1:.*]] = arith.constant 128 : i32
125116 # CHECK: acc.parallel num_gangs({%[[CONSTANT_0]] : i32}) vector_length(%[[CONSTANT_1]] : i32) {
126117 # CHECK: %[[CONSTANT_2:.*]] = arith.constant 0 : i64
127118 # CHECK: %[[CONSTANT_3:.*]] = arith.constant 1 : i64
128119 # CHECK: acc.loop gang control(%[[VAL_0:.*]] : i64) = (%[[CONSTANT_2]] : i64) to (%[[ARG2]] : i64) step (%[[CONSTANT_3]] : i64) {
129120 # CHECK: %[[INDEX_CAST_0:.*]] = arith.index_cast %[[VAL_0]] : i64 to index
130- # CHECK: %[[LOAD_0:.*]] = memref.load %[[ARG1]][%[[INDEX_CAST_0]]] : memref<?xf32>
131- # CHECK: %[[INDEX_CAST_1:.*]] = arith.index_cast %[[VAL_0]] : i64 to index
132- # CHECK: memref.store %[[LOAD_0]], %[[ARG0]][%[[INDEX_CAST_1]]] : memref<?xf32>
121+ # CHECK: %[[LOAD_0:.*]] = memref.load %[[ARG1]]{{\[}}%[[INDEX_CAST_0]]] : memref<?xf32>
122+ # CHECK: memref.store %[[LOAD_0]], %[[ARG0]]{{\[}}%[[INDEX_CAST_0]]] : memref<?xf32>
133123 # CHECK: acc.yield
134- # CHECK: }
124+ # CHECK: } attributes {independent = [#acc.device_type<none>]}
135125 # CHECK: acc.yield
136126 # CHECK: }
137127 # CHECK: return
0 commit comments