Skip to content

Commit 8c37cae

Browse files
committed
Bump version.
1 parent 029948c commit 8c37cae

File tree

9 files changed

+82
-54
lines changed

9 files changed

+82
-54
lines changed

.travis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
- julia: nightly
1717
include:
1818
- stage: "Documentation"
19-
julia: 1.3
19+
julia: 1.4
2020
os: linux
2121
script:
2222
- julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd()));

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.7.6"
4+
version = "0.7.7"
55

66
[deps]
77
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"

src/condense_loopset.jl

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,14 @@ function ArrayRefStruct(ls::LoopSet, mref::ArrayReferenceMeta, arraysymbolinds::
4343
indices |= getloopid(ls, ind)
4444
else
4545
parent = get(ls.opdict, ind, nothing)
46-
if parent === nothing
47-
index_types |= SymbolicIndex
48-
indices |= findindoradd!(arraysymbolinds, ind)
49-
else
50-
index_types |= ComputedIndex
51-
indices |= identifier(parent)
52-
end
46+
@assert !isnothing(parent) # Symbolic indices should have been subset
47+
# if parent === nothing
48+
# index_types |= SymbolicIndex
49+
# indices |= findindoradd!(arraysymbolinds, ind)
50+
# else
51+
index_types |= ComputedIndex
52+
indices |= identifier(parent)
53+
# end
5354
end
5455
end
5556
ArrayRefStruct{mref.ref.array,mref.ptr}( index_types, indices, offsets )
@@ -154,13 +155,14 @@ function argmeta_and_consts_description(ls::LoopSet, arraysymbolinds)
154155
end
155156

156157
function loopset_return_value(ls::LoopSet, ::Val{extract}) where {extract}
157-
if length(ls.outer_reductions) == 1
158+
@assert !iszero(length(ls.outer_reductions))
159+
if isone(length(ls.outer_reductions))
158160
if extract
159161
Expr(:call, :extract_data, Symbol(mangledvar(getop(ls, ls.outer_reductions[1])), 0))
160162
else
161163
Symbol(mangledvar(getop(ls, ls.outer_reductions[1])), 0)
162164
end
163-
elseif length(ls.outer_reductions) > 1
165+
else#if length(ls.outer_reductions) > 1
164166
ret = Expr(:tuple)
165167
ops = operations(ls)
166168
for or ls.outer_reductions
@@ -171,8 +173,6 @@ function loopset_return_value(ls::LoopSet, ::Val{extract}) where {extract}
171173
end
172174
end
173175
ret
174-
else
175-
nothing
176176
end
177177
end
178178

@@ -296,8 +296,8 @@ make_fast_and_crashy(q) = q |> make_fast |> make_crashy
296296

297297
function setup_call_inline(ls::LoopSet, inline::Int8 = zero(Int8), U::Int8 = zero(Int8), T::Int8 = zero(Int8))
298298
call = generate_call(ls, (inline,U,T))
299-
hasouterreductions = length(ls.outer_reductions) > 0
300-
if !hasouterreductions
299+
noouterreductions = iszero(length(ls.outer_reductions))
300+
if noouterreductions
301301
q = Expr(:block,gc_preserve(ls, call))
302302
append!(ls.preamble.args, q.args)
303303
return ls.preamble
@@ -315,7 +315,7 @@ function setup_call_inline(ls::LoopSet, inline::Int8 = zero(Int8), U::Int8 = zer
315315
push!(outer_reducts.args, out)
316316
push!(q.args, Expr(:(=), var, Expr(:call, lv(reduction_scalar_combine(instr)), out, var)))
317317
end
318-
hasouterreductions && pushpreamble!(ls, outer_reducts)
318+
pushpreamble!(ls, outer_reducts)
319319
append!(ls.preamble.args, q.args)
320320
ls.preamble
321321
end

src/costs.jl

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -91,19 +91,19 @@ function vector_cost(ic::InstructionCost, Wshift, sizeof_T)
9191
srt, sl, srp
9292
end
9393

94-
const OPAQUE_INSTRUCTION = InstructionCost(50, 50.0, -1.0, VectorizationBase.REGISTER_COUNT)
94+
const OPAQUE_INSTRUCTION = InstructionCost(-1.0, 50, 50.0, VectorizationBase.REGISTER_COUNT)
9595

9696
instruction_cost(instruction::Instruction) = instruction.mod === :LoopVectorization ? COST[instruction.instr] : OPAQUE_INSTRUCTION
9797
instruction_cost(instruction::Symbol) = get(COST, instruction, OPAQUE_INSTRUCTION)
9898
scalar_cost(instr::Instruction) = scalar_cost(instruction_cost(instr))
9999
vector_cost(instr::Instruction, Wshift, sizeof_T) = vector_cost(instruction_cost(instr), Wshift, sizeof_T)
100-
function cost(instruction::InstructionCost, Wshift, sizeof_T)
101-
Wshift == 0 ? scalar_cost(instruction) : vector_cost(instruction, Wshift, sizeof_T)
102-
end
100+
# function cost(instruction::InstructionCost, Wshift, sizeof_T)
101+
# Wshift == 0 ? scalar_cost(instruction) : vector_cost(instruction, Wshift, sizeof_T)
102+
# end
103103

104-
function cost(instruction::Instruction, Wshift, sizeof_T)
105-
cost( instruction_cost(instruction), Wshift, sizeof_T )
106-
end
104+
# function cost(instruction::Instruction, Wshift, sizeof_T)
105+
# cost( instruction_cost(instruction), Wshift, sizeof_T )
106+
# end
107107

108108

109109
# Just a semi-reasonable assumption; should not be that sensitive to anything other than loads
@@ -323,11 +323,11 @@ function reduction_scalar_combine(x::Float64)
323323
x == ADDITIVE_IN_REDUCTIONS ? :reduced_add : x == MULTIPLICATIVE_IN_REDUCTIONS ? :reduced_prod : x == MAX ? :reduced_max : x == MIN ? :reduced_min : throw("Reduction not found.")
324324
end
325325
reduction_scalar_combine(x) = reduction_scalar_combine(reduction_instruction_class(x))
326-
function reduction_combine_to(x::Float64)
327-
# x == 1.0 ? :reduce_to_add : x == 2.0 ? :reduce_to_prod : x == 3.0 ? :reduce_to_any : x == 4.0 ? :reduce_to_all : x == 5.0 ? :reduce_to_max : x == 6.0 ? :reduce_to_min : throw("Reduction not found.")
328-
x == ADDITIVE_IN_REDUCTIONS ? :reduce_to_add : x == MULTIPLICATIVE_IN_REDUCTIONS ? :reduce_to_prod : x == MAX ? :reduce_to_max : x == MIN ? :reduce_to_min : throw("Reduction not found.")
329-
end
330-
reduction_combine_to(x) = reduction_combine_to(reduction_instruction_class(x))
326+
# function reduction_combine_to(x::Float64)
327+
# # x == 1.0 ? :reduce_to_add : x == 2.0 ? :reduce_to_prod : x == 3.0 ? :reduce_to_any : x == 4.0 ? :reduce_to_all : x == 5.0 ? :reduce_to_max : x == 6.0 ? :reduce_to_min : throw("Reduction not found.")
328+
# x == ADDITIVE_IN_REDUCTIONS ? :reduce_to_add : x == MULTIPLICATIVE_IN_REDUCTIONS ? :reduce_to_prod : x == MAX ? :reduce_to_max : x == MIN ? :reduce_to_min : throw("Reduction not found.")
329+
# end
330+
# reduction_combine_to(x) = reduction_combine_to(reduction_instruction_class(x))
331331
function reduction_zero(x::Float64)
332332
# x == 1.0 ? :zero : x == 2.0 ? :one : x == 3.0 ? :false : x == 4.0 ? :true : x == 5.0 ? :typemin : x == 6.0 ? :typemax : throw("Reduction not found.")
333333
x == ADDITIVE_IN_REDUCTIONS ? :zero : x == MULTIPLICATIVE_IN_REDUCTIONS ? :one : x == MAX ? :typemin : x == MIN ? :typemax : throw("Reduction not found.")

src/precompile.jl

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,32 @@
11
function _precompile_()
22
ccall(:jl_generating_output, Cint, ()) == 1 || return nothing
3-
3+
precompile(Tuple{typeof(LoopVectorization.lower),LoopVectorization.LoopSet})
44
precompile(Tuple{Type{LoopVectorization.LoopSet},Expr})
5-
precompile(Tuple{typeof(Base.mapreduce_impl),typeof(LoopVectorization.elsize),typeof(max),Array{LoopVectorization.Operation,1},Int64,Int64})
5+
precompile(Tuple{typeof(Base.mapreduce_impl),typeof(LoopVectorization.elsize),typeof(max),Array{LoopVectorization.Operation,1},Int,Int})
66
precompile(Tuple{typeof(LoopVectorization._avx_loopset),Core.SimpleVector,Core.SimpleVector,Core.SimpleVector,Core.SimpleVector,Core.SimpleVector,Any})
7-
precompile(Tuple{typeof(LoopVectorization.add_broadcast!),LoopVectorization.LoopSet,Symbol,Symbol,Array{Symbol,1},Type{Array{Bool,1}},Int64})
8-
precompile(Tuple{typeof(LoopVectorization.add_ci_call!),Expr,Any,Array{Any,1},Array{Symbol,1},Int64,Symbol})
9-
precompile(Tuple{typeof(LoopVectorization.add_ci_call!),Expr,Any,Array{Any,1},Array{Symbol,1},Int64})
10-
precompile(Tuple{typeof(LoopVectorization.add_constant!),LoopVectorization.LoopSet,Float64,Array{Symbol,1},Symbol,Int64})
11-
precompile(Tuple{typeof(LoopVectorization.add_parent!),Array{LoopVectorization.Operation,1},Array{Symbol,1},Array{Symbol,1},LoopVectorization.LoopSet,Int64,Int64,Int64})
12-
precompile(Tuple{typeof(LoopVectorization.avx_body),LoopVectorization.LoopSet,Tuple{Int8,Int8}})
7+
precompile(Tuple{typeof(LoopVectorization.add_broadcast!),LoopVectorization.LoopSet,Symbol,Symbol,Array{Symbol,1},Type{Array{Bool,1}},Int})
8+
precompile(Tuple{typeof(LoopVectorization.add_ci_call!),Expr,Any,Array{Any,1},Array{Symbol,1},Int,Symbol})
9+
precompile(Tuple{typeof(LoopVectorization.add_ci_call!),Expr,Any,Array{Any,1},Array{Symbol,1},Int})
10+
precompile(Tuple{typeof(LoopVectorization.add_constant!),LoopVectorization.LoopSet,Float64,Array{Symbol,1},Symbol,Int})
11+
precompile(Tuple{typeof(LoopVectorization.add_parent!),Array{LoopVectorization.Operation,1},Array{Symbol,1},Array{Symbol,1},LoopVectorization.LoopSet,Int,Int,Int})
12+
precompile(Tuple{typeof(LoopVectorization.avx_body),LoopVectorization.LoopSet,Tuple{Int8,Int8,Int8}})
1313
precompile(Tuple{typeof(LoopVectorization.avx_loopset),Array{LoopVectorization.Instruction,1},Array{LoopVectorization.OperationStruct,1},Array{LoopVectorization.ArrayRefStruct,1},Core.SimpleVector,Core.SimpleVector,Core.SimpleVector,Any})
1414
precompile(Tuple{typeof(LoopVectorization.cost_vec_buf),LoopVectorization.LoopSet})
1515
precompile(Tuple{typeof(LoopVectorization.evaluate_cost_tile),LoopVectorization.LoopSet,Array{Symbol,1},Symbol,Symbol,Symbol})
1616
precompile(Tuple{typeof(LoopVectorization.evaluate_cost_unroll),LoopVectorization.LoopSet,Array{Symbol,1},Symbol,Float64})
17-
precompile(Tuple{typeof(LoopVectorization.lower_block),LoopVectorization.LoopSet,LoopVectorization.UnrollSpecification,Int64,Nothing,Int64})
18-
precompile(Tuple{typeof(LoopVectorization.lower_block),LoopVectorization.LoopSet,LoopVectorization.UnrollSpecification,Int64,Symbol,Int64})
19-
precompile(Tuple{typeof(LoopVectorization.lower_compute!),Expr,LoopVectorization.Operation,Symbol,Symbol,Symbol,Symbol,Int64,Int64,Nothing,Bool})
20-
precompile(Tuple{typeof(LoopVectorization.lower_compute!),Expr,LoopVectorization.Operation,Symbol,Symbol,Symbol,Symbol,Int64,Int64,Symbol,Bool})
21-
precompile(Tuple{typeof(LoopVectorization.lower_compute!),Expr,LoopVectorization.Operation,Symbol,Symbol,Symbol,Symbol,Int64,Nothing,Nothing,Bool})
22-
precompile(Tuple{typeof(LoopVectorization.lower_compute!),Expr,LoopVectorization.Operation,Symbol,Symbol,Symbol,Symbol,Int64,Nothing,Symbol,Bool})
23-
precompile(Tuple{typeof(LoopVectorization.lower_load!),Expr,LoopVectorization.Operation,Symbol,LoopVectorization.LoopSet,Symbol,Symbol,Int64,Int64,Nothing})
24-
precompile(Tuple{typeof(LoopVectorization.lower_load_scalar!),Expr,LoopVectorization.Operation,Symbol,Symbol,Symbol,Symbol,Int64,Nothing,Int64})
17+
# precompile(Tuple{typeof(LoopVectorization.lower_block),LoopVectorization.LoopSet,LoopVectorization.UnrollSpecification,Int,Nothing,Int})
18+
# precompile(Tuple{typeof(LoopVectorization.lower_block),LoopVectorization.LoopSet,LoopVectorization.UnrollSpecification,Int,Symbol,Int})
19+
# precompile(Tuple{typeof(LoopVectorization.lower_compute!),Expr,LoopVectorization.Operation,Symbol,Symbol,Symbol,Symbol,Int,Int,Nothing,Bool})
20+
# precompile(Tuple{typeof(LoopVectorization.lower_compute!),Expr,LoopVectorization.Operation,Symbol,Symbol,Symbol,Symbol,Int,Int,Symbol,Bool})
21+
# precompile(Tuple{typeof(LoopVectorization.lower_compute!),Expr,LoopVectorization.Operation,Symbol,Symbol,Symbol,Symbol,Int,Nothing,Nothing,Bool})
22+
# precompile(Tuple{typeof(LoopVectorization.lower_compute!),Expr,LoopVectorization.Operation,Symbol,Symbol,Symbol,Symbol,Int,Nothing,Symbol,Bool})
23+
# precompile(Tuple{typeof(LoopVectorization.lower_load!),Expr,LoopVectorization.Operation,Symbol,LoopVectorization.LoopSet,Symbol,Symbol,Int,Int,Nothing})
24+
# precompile(Tuple{typeof(LoopVectorization.lower_load_scalar!),Expr,LoopVectorization.Operation,Symbol,Symbol,Symbol,Symbol,Int,Nothing,Int})
2525
precompile(Tuple{typeof(LoopVectorization.reg_pres_buf),LoopVectorization.LoopSet})
2626
precompile(Tuple{typeof(LoopVectorization.setup_call),LoopVectorization.LoopSet})
27-
precompile(Tuple{typeof(LoopVectorization.solve_unroll),SubArray{Float64,1,Array{Float64,2},Tuple{Base.Slice{Base.OneTo{Int64}},Int64},true},SubArray{Float64,1,Array{Float64,2},Tuple{Base.Slice{Base.OneTo{Int64}},Int64},true},Int64,Int64})
27+
precompile(Tuple{typeof(LoopVectorization.solve_unroll),SubArray{Float64,1,Array{Float64,2},Tuple{Base.Slice{Base.OneTo{Int}},Int},true},SubArray{Float64,1,Array{Float64,2},Tuple{Base.Slice{Base.OneTo{Int}},Int},true},Int,Int})
2828
precompile(Tuple{typeof(LoopVectorization.substitute_broadcast),Expr,Symbol})
29-
precompile(Tuple{typeof(LoopVectorization.vmap_quote),Int64,Type{Float32}})
29+
precompile(Tuple{typeof(LoopVectorization.vmap_quote),Int,Type{Float32}})
3030
precompile(Tuple{typeof(println),Base.GenericIOBuffer{Array{UInt8,1}},Array{LoopVectorization.Operation,1}})
31-
precompile(Tuple{typeof(resize!),LoopVectorization.LoopOrder,Int64})
31+
precompile(Tuple{typeof(resize!),LoopVectorization.LoopOrder,Int})
3232
end

src/reconstruct_loopset.jl

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,8 @@ function ArrayReferenceMeta(
7575
pushfirst!(offset_vec, offset)
7676
pushfirst!(loopedindex, true)
7777
end
78-
elseif index_types == ComputedIndex
78+
else#if index_types == ComputedIndex
79+
@assert index_types == ComputedIndex
7980
opsym = opsymbols[ind]
8081
if expandedv[ind]
8182
nops = nopsv[ind]
@@ -89,11 +90,11 @@ function ArrayReferenceMeta(
8990
pushfirst!(offset_vec, offset)
9091
pushfirst!(loopedindex, false)
9192
end
92-
else
93-
@assert index_types == SymbolicIndex
94-
pushfirst!(index_vec, arraysymbolinds[ind])
95-
pushfirst!(offset_vec, offset)
96-
pushfirst!(loopedindex, false)
93+
# else
94+
# @assert index_types == SymbolicIndex
95+
# pushfirst!(index_vec, arraysymbolinds[ind])
96+
# pushfirst!(offset_vec, offset)
97+
# pushfirst!(loopedindex, false)
9798
end
9899
index_types >>>= 8
99100
indices >>>= 8

test/copy.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,10 @@ using LoopVectorization, OffsetArrays, Test
5353
end
5454
end
5555
function make2point3avx!(x)
56+
a = 1.742416161578685
57+
b = 1.5
5658
@avx for i eachindex(x)
57-
x[i] = 2.3
59+
x[i] = a ^ b
5860
end
5961
end
6062
function make2point3_avx!(x)

test/dot.jl

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,27 @@ using Test
183183
4acc/length(x)
184184
end
185185

186+
function dotloopinductvarpow(x)
187+
s = zero(eltype(x))
188+
for i eachindex(x)
189+
s += x[i] * i^3
190+
end
191+
s
192+
end
193+
function dotloopinductvarpowavx(x)
194+
s = zero(eltype(x))
195+
@avx for i eachindex(x)
196+
s += x[i] * i^3
197+
end
198+
s
199+
end
200+
function dot_from_n_to_100(a, b, n)
201+
s = zero(eltype(a))
202+
@avx for i n:100
203+
s += a[i] * b[i]
204+
end
205+
s
206+
end
186207
# @macroexpand @_avx for i = 1:length(a_re) - 1
187208
# c_re[i] = b_re[i] * a_re[i + 1] - b_im[i] * a_im[i + 1]
188209
# c_im[i] = b_re[i] * a_im[i + 1] + b_im[i] * a_re[i + 1]
@@ -220,6 +241,9 @@ using Test
220241
@test πest == pi_avx_u4(a, b)
221242
end
222243

244+
@test dotloopinductvarpow(a) dotloopinductvarpowavx(a)
245+
@test dot_from_n_to_100(a, b, 33) == @views mydotavx(a[33:100], b[33:100])
246+
223247
a_re = rand(R, N); a_im = rand(R, N);
224248
b_re = rand(R, N); b_im = rand(R, N);
225249
ac = Complex.(a_re, a_im);

test/miscellaneous.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -835,6 +835,7 @@ using Test
835835
end
836836

837837
@testset "Mixed CartesianIndex/Int indexing" begin
838+
@show T, @__LINE__
838839
# A demo similar to the exponential filtering demo from https://julialang.org/blog/2016/02/iteration/,
839840
# but with no loop-carried dependency.
840841
function smoothdim!(s, x, α, Rpre, irng::AbstractUnitRange, Rpost)

0 commit comments

Comments
 (0)