|
59 | 59 | # Expr(:call, :+, q, incr)
|
60 | 60 | # end
|
61 | 61 | # end
|
62 |
| -function lower_load_scalar!( |
| 62 | +function pushscalarload!(q::Expr, op, var, u, U) |
| 63 | + ptr = refname(op) |
| 64 | + push!(q.args, Expr(:(=), Symbol("##", var), Expr(:call, lv(:load), ptr, mem_offset(op)))) |
| 65 | +end |
| 66 | +function pushvectorload!(q::Expr, op, var, u, U, W) |
| 67 | + ptr = refname(op) |
| 68 | + instrcall = Expr(:call, lv(:vload), W, ptr, mem_offset(op, u, W)) |
| 69 | + if mask !== nothing && u == U - 1 |
| 70 | + push!(instrcall.args, mask) |
| 71 | + end |
| 72 | + push!(q.args, Expr(:(=), Symbol("##",var,:_,u), instrcall)) |
| 73 | +end |
| 74 | +function lower_load_scalar!( |
63 | 75 | q::Expr, op::Operation, vectorized::Symbol, W::Symbol, unrolled::Symbol, U::Int,
|
64 | 76 | suffix::Union{Nothing,Int}, mask::Union{Nothing,Symbol,Unsigned} = nothing
|
65 | 77 | )
|
66 | 78 | loopdeps = loopdependencies(op)
|
67 |
| - @assert unrolled ∉ loopdeps |
| 79 | + @assert vectorized ∉ loopdeps |
68 | 80 | var = op.variable
|
69 | 81 | if suffix !== nothing
|
70 | 82 | var = Symbol(var, :_, suffix)
|
71 | 83 | end
|
72 | 84 | ptr = refname(op)
|
73 |
| - push!(q.args, Expr(:(=), Symbol("##", var), Expr(:call, lv(:load), ptr, mem_offset(op)))) |
| 85 | + if unrolled ∈ loopdeps |
| 86 | + for u ∈ 0:U-1 |
| 87 | + push!(q.args, Expr(:(=), Symbol("##", var,:_, u), Expr(:call, lv(:load), ptr, mem_offset(op, u)))) |
| 88 | + end |
| 89 | + else |
| 90 | + push!(q.args, Expr(:(=), Symbol("##", var), Expr(:call, lv(:load), ptr, mem_offset(op)))) |
| 91 | + end |
74 | 92 | nothing
|
75 | 93 | end
|
76 | 94 | function lower_load_unrolled!(
|
77 | 95 | q::Expr, op::Operation, vectorized::Symbol, W::Symbol, unrolled::Symbol, U::Int,
|
78 | 96 | suffix::Union{Nothing,Int}, mask::Union{Nothing,Symbol,Unsigned} = nothing
|
79 | 97 | )
|
80 | 98 | loopdeps = loopdependencies(op)
|
81 |
| - @assert unrolled ∈ loopdeps |
| 99 | + @assert vectorized ∈ loopdeps |
82 | 100 | var = op.variable
|
83 | 101 | if suffix !== nothing
|
84 | 102 | var = Symbol(var, :_, suffix)
|
@@ -116,10 +134,10 @@ function lower_load!(
|
116 | 134 | )
|
117 | 135 | # @show op.instruction
|
118 | 136 | # @show unrolled, loopdependencies(op)
|
119 |
| - if unrolled ∈ loopdependencies(op) |
120 |
| - lower_load_unrolled!(q, op, W, unrolled, U, suffix, mask) |
| 137 | + if vectorized ∈ loopdependencies(op) |
| 138 | + lower_load_unrolled!(q, op, vectorized, W, unrolled, U, suffix, mask) |
121 | 139 | else
|
122 |
| - lower_load_scalar!(q, op, W, unrolled, U, suffix, mask) |
| 140 | + lower_load_scalar!(q, op, vectorized, W, unrolled, U, suffix, mask) |
123 | 141 | end
|
124 | 142 | end
|
125 | 143 | function reduce_range!(q::Expr, toreduct::Symbol, instr::Symbol, Uh::Int, Uh2::Int)
|
|
0 commit comments