Skip to content

Commit 90038c8

Browse files
committed
Fix threading when cost is estimated to be high
1 parent 66b6e53 commit 90038c8

File tree

2 files changed

+4
-2
lines changed

2 files changed

+4
-2
lines changed

src/codegen/lower_threads.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -476,6 +476,7 @@ function thread_two_loops_expr(
476476
)
477477
looplen = looplengthprod(ls)
478478
c = 0.05460264079015985 * c / looplen
479+
# @show c
479480
if Sys.ARCH !== :x86_64
480481
c *= 0.25
481482
end
@@ -536,6 +537,7 @@ function thread_two_loops_expr(
536537
retexpr = length(ls.outer_reductions) > 0 ? :(return $retv) : :(return nothing)
537538
q = quote
538539
$choose_nthread # UInt
540+
# @show var"#nthreads#"
539541
$loopstart1
540542
$loopstart2
541543
var"##do#thread##" = var"#nthreads#" > one(var"#nthreads#")
@@ -567,7 +569,7 @@ function thread_two_loops_expr(
567569
var"#thread#factor#1#" = min(var"#thread#factor#1#", var"#num#unrolls#thread#1#")
568570
end
569571
# @show (var"#thread#factor#0#", var"#thread#factor#1#")
570-
var"#nrequest#" = vsub_nsw((var"#nthreads#" % UInt32), 0x00000001)
572+
var"#nrequest#" = vsub_nsw(vmul_nsw(var"#thread#factor#0#", var"#thread#factor#1#" % UInt32), 0x00000001)
571573
var"#loop#1#start#init#" = var"#iter#start#0#"
572574
var"##do#thread##" = var"#nrequest#" 0x00000000
573575
if var"##do#thread##"

src/modeling/costs.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ function vector_cost(ic::InstructionCost, Wshift, sizeof_T)
8888
srt, sl, srp
8989
end
9090

91-
const OPAQUE_INSTRUCTION = InstructionCost(-1.0, 40, 40.0, 32)
91+
const OPAQUE_INSTRUCTION = InstructionCost(-1.0, 20, 20.0, 16)
9292

9393
instruction_cost(instruction::Instruction) = instruction.mod === :LoopVectorization ? COST[instruction.instr] : OPAQUE_INSTRUCTION
9494
instruction_cost(instruction::Symbol) = get(COST, instruction, OPAQUE_INSTRUCTION)

0 commit comments

Comments
 (0)