Skip to content

Commit b39791e

Browse files
committed
Merge branch 'graphs' of https://github.com/chriselrod/LoopVectorization.jl into graphs
2 parents 62c20f6 + 873eb30 commit b39791e

File tree

4 files changed

+14
-9
lines changed

4 files changed

+14
-9
lines changed

Manifest.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,15 +61,15 @@ uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
6161

6262
[[SIMDPirates]]
6363
deps = ["MacroTools", "VectorizationBase"]
64-
git-tree-sha1 = "18c27a6a5ad4c0fab6a478aca1947061f087d5fd"
64+
git-tree-sha1 = "72c002943060fe3518b77faf671a986652ca8f44"
6565
repo-rev = "master"
6666
repo-url = "https://github.com/chriselrod/SIMDPirates.jl"
6767
uuid = "21efa798-c60a-11e8-04d3-e1a92915a26a"
6868
version = "0.1.0"
6969

7070
[[SLEEFPirates]]
7171
deps = ["SIMDPirates", "VectorizationBase"]
72-
git-tree-sha1 = "05ab2c6c112a2ac249951f847e5ccf448323e98a"
72+
git-tree-sha1 = "42cbc7f06b1f2063fc08b2aa2f8cd2e70d1e91bc"
7373
repo-rev = "master"
7474
repo-url = "https://github.com/chriselrod/SLEEFPirates.jl"
7575
uuid = "476501e8-09a2-5ece-8869-fb82de89a1fa"
@@ -87,7 +87,7 @@ uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
8787

8888
[[VectorizationBase]]
8989
deps = ["CpuId", "LinearAlgebra"]
90-
git-tree-sha1 = "5105ee9720b9f4d90059ef1f99506c99c17b95cd"
90+
git-tree-sha1 = "913138bbc3e1892fbdd379fa48cafffe46a445c2"
9191
repo-rev = "master"
9292
repo-url = "https://github.com/chriselrod/VectorizationBase.jl"
9393
uuid = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"

src/determinestrategy.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ function evaluate_cost_tile(
239239
# Add to set of defined symbles
240240
push!(nested_loop_syms, itersym)
241241
if n == 1
242-
iter = length(ls, itersym) * length(ls, order[2]) / N
242+
iter = length(ls, itersym) * length(ls, order[2]) / W
243243
elseif n > 2
244244
iter *= Float64(length(ls, itersym))
245245
end
@@ -279,6 +279,7 @@ function evaluate_cost_tile(
279279
end
280280
Tstatic = isstaticloop(ls, tiled)
281281
Ustatic = isstaticloop(ls, unrolled)
282+
# @show order, cost_vec, reg_pressure
282283
if Tstatic
283284
if Ustatic
284285
solve_tilesize(cost_vec, reg_pressure, looprangehint(ls, tiled), looprangehint(ls, unrolled))

src/precompile.jl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ function _precompile_()
2020
isdefined(MacroTools, Symbol("#21#22")) && precompile(Tuple{getfield(MacroTools, Symbol("#21#22")),Symbol})
2121
isdefined(MacroTools, Symbol("#21#22")) && precompile(Tuple{getfield(MacroTools, Symbol("#21#22")),Symbol})
2222
isdefined(MacroTools, Symbol("#21#22")) && precompile(Tuple{getfield(MacroTools, Symbol("#21#22")),Symbol})
23-
isdefined(MacroTools, Symbol("#21#22")) && precompile(Tuple{getfield(MacroTools, Symbol("#21#22")),Type})
24-
isdefined(MacroTools, Symbol("#21#22")) && precompile(Tuple{getfield(MacroTools, Symbol("#21#22")),Type})
23+
isdefined(MacroTools, Symbol("#21#22")) && precompile(Tuple{getfield(MacroTools, Symbol("#21#22")),Type{T} where T})
24+
isdefined(MacroTools, Symbol("#21#22")) && precompile(Tuple{getfield(MacroTools, Symbol("#21#22")),Type{T} where T})
2525
precompile(Tuple{Core.kwftype(typeof(LoopVectorization._vectorloads!)),NamedTuple{(:itersym, :declared_iter_sym, :VectorizationDict, :mod),Tuple{Symbol,Symbol,Dict{Symbol,Tuple{Symbol,Symbol}},Module}},typeof(LoopVectorization._vectorloads!),Expr,Expr,Tuple{Dict{Symbol,Symbol},Dict{Tuple{Symbol,Symbol},Symbol},Dict{Expr,Symbol},Dict{Expr,Symbol}},Type{NTuple{8,VecElement{Float64}}},Expr,Expr})
2626
precompile(Tuple{Core.kwftype(typeof(LoopVectorization._vectorloads!)),NamedTuple{(:itersym, :declared_iter_sym, :VectorizationDict, :mod),Tuple{Symbol,Symbol,Dict{Symbol,Tuple{Symbol,Symbol}},Symbol}},typeof(LoopVectorization._vectorloads!),Expr,Expr,Tuple{Dict{Symbol,Symbol},Dict{Tuple{Symbol,Symbol},Symbol},Dict{Expr,Symbol},Dict{Expr,Symbol}},Type{NTuple{8,VecElement{Float64}}},Expr,Expr})
2727
precompile(Tuple{typeof(LoopVectorization.add_masks),Expr,Symbol,Dict{Tuple{Symbol,Symbol},Symbol},Module})
@@ -36,7 +36,7 @@ function _precompile_()
3636
precompile(Tuple{typeof(LoopVectorization.vectorize_body),Int64,Type{Float64},Int64,Symbol,Array{Any,1},Dict{Symbol,Tuple{Symbol,Symbol}},Any,Bool})
3737
precompile(Tuple{typeof(LoopVectorization.vectorize_body),Symbol,Symbol,Type{NTuple{8,VecElement{Float64}}},Int64,Symbol,Array{Any,1},Dict{Symbol,Tuple{Symbol,Symbol}},Bool,Int64,Int64,Symbol})
3838
precompile(Tuple{typeof(LoopVectorization.vectorize_body),Symbol,Type{Float64},Int64,Symbol,Array{Any,1},Dict{Symbol,Tuple{Symbol,Symbol}},Any,Bool})
39-
precompile(Tuple{typeof(LoopVectorization.vectorize_linear_index!),Expr,Dict{Expr,Symbol},Dict{Symbol,Symbol},Symbol,Expr,Symbol,Symbol,Symbol,Type})
40-
precompile(Tuple{typeof(LoopVectorization.vectorize_linear_index!),Expr,Dict{Expr,Symbol},Dict{Symbol,Symbol},Symbol,Symbol,Symbol,Symbol,Module,Type})
41-
precompile(Tuple{typeof(LoopVectorization.vectorize_linear_index!),Expr,Dict{Expr,Symbol},Dict{Symbol,Symbol},Symbol,Symbol,Symbol,Symbol,Symbol,Type})
39+
precompile(Tuple{typeof(LoopVectorization.vectorize_linear_index!),Expr,Dict{Expr,Symbol},Dict{Symbol,Symbol},Symbol,Expr,Symbol,Symbol,Symbol,Type{T} where T})
40+
precompile(Tuple{typeof(LoopVectorization.vectorize_linear_index!),Expr,Dict{Expr,Symbol},Dict{Symbol,Symbol},Symbol,Symbol,Symbol,Symbol,Module,Type{T} where T})
41+
precompile(Tuple{typeof(LoopVectorization.vectorize_linear_index!),Expr,Dict{Expr,Symbol},Dict{Symbol,Symbol},Symbol,Symbol,Symbol,Symbol,Symbol,Type{T} where T})
4242
end

test/runtests.jl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,13 @@ else
5454
(5,5)
5555
end
5656
@test LoopVectorization.choose_order(lsgemm) == (Symbol[:j,:i,:k], U, T)
57+
LoopVectorization.choose_order(lsgemm)
5758
LoopVectorization.lower(lsgemm)
5859
lsgemm.operations
5960

61+
LoopVectorization.choose_tile(lsgemm)
62+
LoopVectorization.choose_unroll_order(lsgemm)
63+
6064
ops = LoopVectorization.oporder(lsgemm);
6165
findall(length.(ops) .!= 0)
6266

0 commit comments

Comments
 (0)