Skip to content
Closed
111 changes: 73 additions & 38 deletions src/GC/GC.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,77 +9,112 @@ module GC

using ..C: C

const ENABLED = Ref(true)
const QUEUE = C.PyPtr[]
# `ENABLED`: whether or not python GC is enabled, or paused to process later
const ENABLED = Threads.Atomic{Bool}(true)
# this event allows us to `wait` in a task until GC is re-enabled
const ENABLED_EVENT = Threads.Event()

# this is the queue to process pointers for GC (`C.Py_DecRef`)
const QUEUE = Channel{C.PyPtr}(Inf)

# this is the task which performs GC from thread 1
const GC_TASK = Ref{Task}()

# This we use in testing to know when our GC is running
const GC_FINISHED = Threads.Condition()

"""
PythonCall.GC.disable()

Disable the PythonCall garbage collector.
Disable the PythonCall garbage collector. This should generally not be required.

This means that whenever a Python object owned by Julia is finalized, it is not immediately
freed but is instead added to a queue of objects to free later when `enable()` is called.

Like most PythonCall functions, you must only call this from the main thread.
"""
function disable()
ENABLED[] = false
reset(ENABLED_EVENT)
return
end

"""
PythonCall.GC.enable()

Re-enable the PythonCall garbage collector.

This frees any Python objects which were finalized while the GC was disabled, and allows
objects finalized in the future to be freed immediately.
Re-enable the PythonCall garbage collector. This should generally not be required.

Like most PythonCall functions, you must only call this from the main thread.
"""
function enable()
ENABLED[] = true
notify(ENABLED_EVENT)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could wait for the GC_finished event here to preserve the semantics that this function call only returns after all GC events are processed

return
end

function enqueue(ptr::C.PyPtr)
if ptr != C.PyNULL && C.CTX.is_initialized
put!(QUEUE, ptr)
end
return
end

function enqueue_all(ptrs)
if C.CTX.is_initialized
for ptr in ptrs
put!(QUEUE, ptr)
end
end
return
end

# must only be called from thread 1 by the task in `GC_TASK[]`
function unsafe_process_queue!()
if !isempty(QUEUE)
C.with_gil(false) do
for ptr in QUEUE
while !isempty(QUEUE) && ENABLED[]
# This should never block, since there should
# only be one consumer
# (we would like to not block while holding the GIL)
ptr = take!(QUEUE)
if ptr != C.PyNULL
C.Py_DecRef(ptr)
end
end
end
end
empty!(QUEUE)
return
return nothing
end

function enqueue(ptr::C.PyPtr)
if ptr != C.PyNULL && C.CTX.is_initialized
if ENABLED[]
C.with_gil(false) do
C.Py_DecRef(ptr)
end
else
push!(QUEUE, ptr)
function gc_loop()
while true
if ENABLED[] && !isempty(QUEUE)
unsafe_process_queue!()
# just for testing purposes
Base.@lock GC_FINISHED notify(GC_FINISHED)
end
# wait until there is both something to process
# and GC is `enabled`
wait(QUEUE)
wait(ENABLED_EVENT)
end
return
end

function enqueue_all(ptrs)
if C.CTX.is_initialized
if ENABLED[]
C.with_gil(false) do
for ptr in ptrs
if ptr != C.PyNULL
C.Py_DecRef(ptr)
end
end
end
else
append!(QUEUE, ptrs)
end
function launch_gc_task()
if isassigned(GC_TASK) && Base.istaskstarted(GC_TASK[]) && !Base.istaskdone(GC_TASK[])
throw(ConcurrencyViolationError("PythonCall GC task already running!"))
end
return
task = Task(gc_loop)
task.sticky = VERSION >= v"1.7" # disallow task migration which was introduced in 1.7
# ensure the task runs from thread 1
ccall(:jl_set_task_tid, Cvoid, (Any, Cint), task, 0)
schedule(task)
Comment on lines +186 to +190
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if isdefined(Base, :errormonitor)
Base.errormonitor(task)
end
GC_TASK[] = task
task
end

function __init__()
launch_gc_task()
enable() # start enabled
nothing
end

end # module GC
54 changes: 53 additions & 1 deletion test/GC.jl
Original file line number Diff line number Diff line change
@@ -1 +1,53 @@
# TODO
@testset "201: GC segfaults" begin
# https://github.com/JuliaPy/PythonCall.jl/issues/201
# This should not segfault!
cmd = Base.julia_cmd()
path = joinpath(@__DIR__, "finalize_test_script.jl")
p = run(`$cmd -t2 --project $path`)
@test p.exitcode == 0
end

@testset "GC.enable() and GC.disable()" begin
PythonCall.GC.disable()
try
# Put some stuff in the GC queue
let
pyobjs = map(pylist, 1:100)
foreach(PythonCall.Core.py_finalizer, pyobjs)
end
# Since GC is disabled, we should have built up entries in the queue
# (there is no race since we push to the queue eagerly)
@test !isempty(PythonCall.GC.QUEUE)
# now, setup a task so we can know once GC has triggered.
# We start waiting *before* enabling GC, so we know we'll catch
# the notification that GC finishes
# We also don't enable GC until we get confirmation via `is_waiting[]`.
# At this point we have acquired the lock for `PythonCall.GC.GC_FINISHED`.
# We won't relinquish it until the next line where we `wait`,
# at which point the GC can trigger it. Therefore we should be certain
# that the ordering is correct and we can't miss the event.
is_waiting = Threads.Atomic{Bool}(false)
rdy = Threads.@spawn begin
Base.@lock PythonCall.GC.GC_FINISHED begin
is_waiting[] = true
wait(PythonCall.GC.GC_FINISHED)
end
end
# Wait until the task starts
ret = timedwait(5) do
istaskstarted(rdy) && is_waiting[]
end
@test ret == :ok
# Now, re-enable GC
PythonCall.GC.enable()
# Wait for GC to run
wait(rdy)
# There should be nothing left in the queue, since we fully process it.
@test isempty(PythonCall.GC.QUEUE)
finally
# Make sure we re-enable GC regardless of whether our tests pass
PythonCall.GC.enable()
end
end

@test_throws ConcurrencyViolationError("PythonCall GC task already running!") PythonCall.GC.launch_gc_task()
9 changes: 9 additions & 0 deletions test/finalize_test_script.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
using PythonCall

# This would consistently segfault pre-GC-thread-safety
let
pyobjs = map(pylist, 1:100)
Threads.@threads for obj in pyobjs
PythonCall.Core.py_finalizer(obj)
end
end