@@ -331,42 +331,6 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 32 : i32, ttg.sha
331331
332332// -----
333333
334- #blocked = #ttg.blocked <{sizePerThread = [1 ], threadsPerWarp = [64 ], warpsPerCTA = [1 ], order = [0 ]}>
335- #shared = #ttg.swizzled_shared <{vec = 1 , perPhase = 1 , maxPhase = 1 , order = [0 ]}>
336- #smem = #ttg.shared_memory
337- module attributes {" ttg.num-ctas" = 1 : i32 , " ttg.num-warps" = 1 : i32 , " ttg.threads-per-warp" = 64 : i32 } {
338- // COMMON-LABEL: buffer_load_to_local_wave_id
339- tt.func public @buffer_load_to_local_wave_id (%arg0: !tt.ptr <f32 > {tt.divisibility = 16 : i32 , tt.pointer_range = 32 : i32 },
340- %arg2: !ttg.memdesc <64 xf32 , #shared , #smem , mutable >, %arg3: i32 ) {
341- // COMMON: %[[C64:.+]] = llvm.mlir.constant(64 : i32) : i32
342- // COMMON-NEXT: %[[IDX:.+]] = rocdl.workitem.id.x : i32
343- // COMMON-NEXT: %[[C63:.+]] = llvm.mlir.constant(63 : i32) : i32
344- // COMMON-NEXT: %[[AND:.+]] = llvm.and %[[IDX]], %[[C63]] : i32
345- // COMMON-NEXT: %[[DIV:.+]] = llvm.udiv %[[AND]], %[[C64]] : i32
346- // COMMON-NEXT: %{{.+}} = rocdl.readfirstlane %[[DIV]] : i32
347-
348- // COMMON: %[[C64:.+]] = llvm.mlir.constant(64 : i32) : i32
349- // COMMON-NEXT: %[[IDX:.+]] = rocdl.workitem.id.x : i32
350- // COMMON-NEXT: %[[C63:.+]] = llvm.mlir.constant(63 : i32) : i32
351- // COMMON-NEXT: %[[AND:.+]] = llvm.and %[[IDX]], %[[C63]] : i32
352- // COMMON-NEXT: %[[DIV:.+]] = llvm.udiv %[[AND]], %[[C64]] : i32
353- // COMMON-NEXT: %{{.+}} = rocdl.readfirstlane %[[DIV]] : i32
354-
355- %0 = tt.make_range {end = 64 : i32 , start = 0 : i32 } : tensor <64 xi32 , #blocked >
356- %1 = amdg.buffer_load_to_local %arg0 [%0 ] into %arg2: <f32 >[tensor <64 xi32 , #blocked >] -> <64 xf32 , #shared , #smem , mutable >
357- %c0_i32 = arith.constant 0 : i32
358- %cond = llvm.icmp " eq" %arg3 , %c0_i32 : i32
359- cf.cond_br %cond , ^bb1 , ^bb2
360- ^bb1 :
361- amdg.buffer_load_to_local %arg0 [%0 ] into %arg2: <f32 >[tensor <64 xi32 , #blocked >] -> <64 xf32 , #shared , #smem , mutable >
362- cf.br ^bb1
363- ^bb2 :
364- tt.return
365- }
366- }
367-
368- // -----
369-
370334#blocked = #ttg.blocked <{sizePerThread = [4 ], threadsPerWarp = [32 ], warpsPerCTA = [4 ], order = [0 ]}>
371335#shared1D = #ttg.swizzled_shared <{vec = 2 , perPhase = 1 , maxPhase = 8 , order = [0 ]}>
372336#smem = #ttg.shared_memory
0 commit comments