Skip to content

Commit 1081aff

Browse files
committed
Need to wait for non-temporal stores to actually sync, and fig bug where storing reductions was offset by register size, but loading them was hardcoded to 64 (cacheline size)
1 parent 80217c3 commit 1081aff

File tree

3 files changed

+9
-4
lines changed

3 files changed

+9
-4
lines changed

src/codegen/lower_threads.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,8 @@ end
202202

203203
function outer_reduct_combine_expressions(ls::LoopSet, retv)
204204
gf = GlobalRef(Core, :getfield)
205-
q = Expr(:block, :(var"#load#thread#ret#" = $gf(ThreadingUtilities.load(var"#thread#ptr#", typeof($retv), 64),2,false)))
205+
q = Expr(:block, :(var"#load#thread#ret#" = $gf(ThreadingUtilities.load(var"#thread#ptr#", typeof($retv), $(reg_size(ls))),2,false)))
206+
# push!(q.args, :(@show var"#load#thread#ret#"))
206207
for (i,or) enumerate(ls.outer_reductions)
207208
op = ls.operations[or]
208209
var = name(op)
@@ -559,6 +560,7 @@ function thread_two_loops_expr(
559560
end
560561
# @show $lastboundexpr
561562
$_avx_call_
563+
# @show $retv
562564
var"#thread#id#" = 0x00000000
563565
var"#thread#mask#" = CheapThreads.mask(var"#threads#")
564566
var"#threads#remain#" = true

test/map.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,12 @@
1212
c2 = vmapnt(foo, a, b);
1313
@test c1 c2
1414
c2 = vmapntt(foo, a, b);
15+
sleep(1e-3) # non-temporal stores won't be automatically synced/coherant, so need to wait!
1516
@test c1 c2
1617
fill!(c2, NaN); @views vmapnt!(foo, c2[2:end], a[2:end], b[2:end]);
1718
@test @views c1[2:end] c2[2:end]
1819
fill!(c2, NaN); @views vmapntt!(foo, c2[2:end], a[2:end], b[2:end]);
20+
sleep(1e-3) # non-temporal stores won't be automatically synced/coherant, so need to wait!
1921
@test @views c1[2:end] c2[2:end]
2022
end
2123

test/threading.jl

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,16 +57,17 @@ end
5757

5858
@testset "Threading" begin
5959
for M 17:399
60+
# @show M
6061
K = M; N = M;
6162
A = rand(M,K); B = rand(K,N);
6263
@test dot(A,B) mydotavx(A,B)
6364

6465
C1 = A * B; C0 = similar(C1);
6566
@test AmulB!(C0, A, B) C1
6667

67-
x = randn(Complex{Float64}, 1783);
68-
W = randn(Complex{Float64}, 1783, 1577);
69-
y = randn(Complex{Float64}, 1577);
68+
x = randn(Complex{Float64}, 3M-1);
69+
W = randn(Complex{Float64}, 3M-1, 3M+1);
70+
y = randn(Complex{Float64}, 3M+1);
7071
@test dot(x,W,y) dot3(x,W,y)
7172

7273
kern = OffsetArray(randn(3,3),-2,-2)

0 commit comments

Comments
 (0)