Skip to content

Commit f9f19e3

Browse files
committed
Reduce unrolling when unnecessary. Additionally, fix #143.
1 parent 4dbdd1a commit f9f19e3

File tree

5 files changed

+6
-5
lines changed

5 files changed

+6
-5
lines changed

src/determinestrategy.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,7 @@ function solve_unroll(X, R, u₁L, u₂L, u₁step, u₂step)
402402
u₂low = min(u₂low, maxunroll)
403403
u₁high = min(u₁high, maxunroll)
404404
u₂high = min(u₂high, maxunroll)
405-
solve_unroll_iter(X, R, u₁L, u₂L, u₁low:u₁step:u₁high, u₂low:u₂step:u₂high)
405+
solve_unroll_iter(X, R, u₁L, u₂L, u₁high:-u₁step:u₁low, u₂high:-u₂step:u₂low)
406406
end
407407

408408
function solve_unroll_constU(R::AbstractVector, u₁::Int)

src/operations.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,7 @@ end
324324
function Operation(id::Int, var::Symbol, elementbytes::Int, instr, optype::OperationType, mpref::ArrayReferenceMetaPosition)
325325
Operation( id, var, elementbytes, instr, optype, mpref.loopdependencies, mpref.reduceddeps, mpref.parents, mpref.mref )
326326
end
327-
Base.:(==)(x::ArrayReferenceMetaPosition, y::ArrayReferenceMetaPosition) = x.mref.ref == y.mref.ref
327+
Base.:(==)(x::ArrayReferenceMetaPosition, y::ArrayReferenceMetaPosition) = x.mref == y.mref
328328
# Avoid memory allocations by using this for ops that aren't references
329329
const NOTAREFERENCE = ArrayReferenceMeta(ArrayReference(Symbol(""), Union{Symbol,Int}[]),Bool[],Symbol(""))
330330
const NOTAREFERENCEMP = ArrayReferenceMetaPosition(NOTAREFERENCE, NOPARENTS, Symbol[], Symbol[],Symbol(""))

test/gemv.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ using Test
33
# T = Float32
44
@testset "GEMV" begin
55
# Unum, Tnum = LoopVectorization.VectorizationBase.REGISTER_COUNT == 16 ? (3, 4) : (4, 6)
6-
Unum, Tnum = LoopVectorization.VectorizationBase.REGISTER_COUNT == 16 ? (2, 6) : (2, 8)
6+
Unum, Tnum = LoopVectorization.VectorizationBase.REGISTER_COUNT == 16 ? (1, 6) : (1, 8)
77
gemvq = :(for i eachindex(y)
88
yᵢ = 0.0
99
for j eachindex(x)

test/miscellaneous.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ using Test
44

55
@testset "Miscellaneous" begin
66

7-
Unum, Tnum = LoopVectorization.REGISTER_COUNT == 16 ? (2, 6) : (2, 8)
7+
Unum, Tnum = LoopVectorization.REGISTER_COUNT == 16 ? (1, 6) : (1, 8)
88
dot3q = :(for m 1:M, n 1:N
99
s += x[m] * A[m,n] * y[n]
1010
end);

test/offsetarrays.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,9 +87,10 @@ using LoopVectorization.VectorizationBase: StaticUnitRange
8787
end
8888

8989

90-
struct SizedOffsetMatrix{T,LR,UR,LC,RC} <: DenseMatrix{T}
90+
struct SizedOffsetMatrix{T,LR,UR,LC,UC} <: DenseMatrix{T}
9191
data::Matrix{T}
9292
end
93+
Base.size(::SizedOffsetMatrix{<:Any,LR,UR,LC,UC}) where {LR,UR,LC,UC} = (UR-LR+1,UC-LC+1)
9394
Base.axes(::SizedOffsetMatrix{T,LR,UR,LC,UC}) where {T,LR,UR,LC,UC} = (StaticUnitRange{LR,UR}(),StaticUnitRange{LC,UC}())
9495
Base.parent(A::SizedOffsetMatrix) = A.data
9596
@generated function LoopVectorization.stridedpointer(A::SizedOffsetMatrix{T,LR,UR,LC,RC}) where {T,LR,UR,LC,RC}

0 commit comments

Comments
 (0)