Skip to content

Commit 78e0cc1

Browse files
committed
add *_nsw to costs. Fixes #295.
1 parent 1d67f0d commit 78e0cc1

File tree

2 files changed

+27
-1
lines changed

2 files changed

+27
-1
lines changed

src/modeling/costs.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,9 @@ const COST = Dict{Symbol,InstructionCost}(
132132
# :vsub! => InstructionCost(4,0.5),
133133
# :vmul! => InstructionCost(4,0.5),
134134
:vmul => InstructionCost(4,0.5),
135+
:vmul_nsw => InstructionCost(4,0.5),
136+
:vadd_nsw => InstructionCost(4,0.5),
137+
:vsub_nsw => InstructionCost(4,0.5),
135138
:mul_fast => InstructionCost(4,0.5),
136139
# :vfdiv => InstructionCost(13,4.0,-2.0),
137140
# :vfdiv! => InstructionCost(13,4.0,-2.0),

test/offsetarrays.jl

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,8 @@ using LoopVectorization: Static
214214
px = parent(x)
215215
px === x ? x : pparent(px)
216216
end
217+
218+
217219
for T (Float32, Float64)
218220
@show T, @__LINE__
219221
Abase = fill(T(NaN), 200, 200);
@@ -277,6 +279,27 @@ using LoopVectorization: Static
277279
fill!(out4, NaN); @test pparent(avxgeneric2!(out4', A, skern)')' pparent(out1)
278280
fill!(out4, NaN); @test pparent(avxgeneric2!(out4', At', kern)')' pparent(out1)
279281
fill!(out4, NaN); @test pparent(avxgeneric2!(out4', At', skern)')' pparent(out1)
280-
end
282+
end
283+
end
284+
function issue_295!(R, A, B)
285+
@turbo for j in 1:4
286+
for i in 0:1
287+
R[i, j] = A[2i + 2j] + 0 * B[j]
288+
end
289+
end
290+
R
291+
end
292+
293+
A = [i^2 for i in 1:10];
294+
B = [1,2,3,4];
295+
R = OffsetArray(zeros(Int, 2,4), 0:1, 1:4);
296+
function issue_295!(R, A, B)
297+
@turbo for j in 1:4
298+
for i in 0:1
299+
R[i, j] = A[2i + 2j] + 0 * B[j]
300+
end
281301
end
302+
R
303+
end
304+
@test issue_295!(R, A, B) == OffsetArray([4 16 36 64; 16 36 64 100], -1, 0)
282305
end

0 commit comments

Comments
 (0)