Skip to content
4 changes: 4 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ errors.

[//]: # "Additions:"

- Added an ARC/ORC regression test for `std/asyncdispatch.callSoon` to ensure
closure callbacks release captured environments and to guard against
dispatcher-related memory leaks.

- `setutils.symmetricDifference` along with its operator version
`` setutils.`-+-` `` and in-place version `setutils.toggle` have been added
to more efficiently calculate the symmetric difference of bitsets.
Expand Down
12 changes: 10 additions & 2 deletions compiler/ccgtypes.nim
Original file line number Diff line number Diff line change
Expand Up @@ -1648,14 +1648,22 @@ proc generateRttiDestructor(g: ModuleGraph; typ: PType; owner: PSym; kind: TType
incl result.flags, sfGeneratedOp

proc genHook(m: BModule; t: PType; info: TLineInfo; op: TTypeAttachedOp; result: var Builder) =
if op == attachedTrace and t.kind == tyProc and t.callConv == ccClosure:
cgsym(m, "nimTraceClosure")
result.add cgsymValue(m, "nimTraceClosure")
return

let theProc = getAttachedOp(m.g.graph, t, op)
if theProc != nil and not isTrivialProc(m.g.graph, theProc):
# the prototype of a destructor is ``=destroy(x: var T)`` and that of a
# finalizer is: ``proc (x: ref T) {.nimcall.}``. We need to check the calling
# convention at least:
if theProc.typ == nil or theProc.typ.callConv != ccNimCall:
if theProc.typ == nil or theProc.typ.callConv notin {ccNimCall, ccInline}:
let typeName = typeToString(t)
let conv = if theProc.typ != nil: $theProc.typ.callConv else: "unknown"
localError(m.config, info,
theProc.name.s & " needs to have the 'nimcall' calling convention")
theProc.name.s & " for type '" & typeName &
"' needs to have the 'nimcall' calling convention (got " & conv & ")")

if op == attachedDestructor:
let wrapper = generateRttiDestructor(m.g.graph, t, theProc.owner, attachedDestructor,
Expand Down
18 changes: 8 additions & 10 deletions compiler/liftdestructors.nim
Original file line number Diff line number Diff line change
Expand Up @@ -778,15 +778,13 @@ proc atomicRefOp(c: var TLiftCtx; t: PType; body, x, y: PNode) =
body.add genIf(c, cond, actions)
of attachedDeepCopy: assert(false, "cannot happen")
of attachedTrace:
if isCyclic:
if isFinal(elemType):
let typInfo = genBuiltin(c, mGetTypeInfoV2, "getTypeInfoV2", newNodeIT(nkType, x.info, elemType))
typInfo.typ() = getSysType(c.g, c.info, tyPointer)
body.add callCodegenProc(c.g, "nimTraceRef", c.info, genAddrOf(x, c.idgen), typInfo, y)
else:
# If the ref is polymorphic we have to account for this
body.add callCodegenProc(c.g, "nimTraceRefDyn", c.info, genAddrOf(x, c.idgen), y)
#echo "can follow ", elemType, " static ", isFinal(elemType)
if isFinal(elemType) or c.g.config.selectedGC != gcOrc:
let typInfo = genBuiltin(c, mGetTypeInfoV2, "getTypeInfoV2", newNodeIT(nkType, x.info, elemType))
typInfo.typ() = getSysType(c.g, c.info, tyPointer)
body.add callCodegenProc(c.g, "nimTraceRef", c.info, genAddrOf(x, c.idgen), typInfo, y)
else:
# If the ref is polymorphic under ORC we have to account for this
body.add callCodegenProc(c.g, "nimTraceRefDyn", c.info, genAddrOf(x, c.idgen), y)
of attachedWasMoved: body.add genBuiltin(c, mWasMoved, "wasMoved", x)
of attachedDup:
if isCyclic:
Expand Down Expand Up @@ -1319,7 +1317,7 @@ proc createTypeBoundOps(g: ModuleGraph; c: PContext; orig: PType; info: TLineInf

# we do not generate '=trace' procs if we
# have the cycle detection disabled, saves code size.
let lastAttached = if g.config.selectedGC == gcOrc: attachedTrace
let lastAttached = if g.config.selectedGC in {gcArc, gcOrc, gcAtomicArc}: attachedTrace
else: attachedSink

# bug #15122: We need to produce all prototypes before entering the
Expand Down
5 changes: 5 additions & 0 deletions doc/mm.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ where code size matters and you know that your code does not produce cycles, you
use `--mm:arc`. Notice that the default `async`:idx: implementation produces cycles
and leaks memory with `--mm:arc`, in other words, for `async` you need to use `--mm:orc`.

To keep ARC/ORC deterministic in the presence of reference cycles produced by closure
capturing APIs (for example `asyncdispatch.callSoon`), the runtime now integrates a
callback-queue cleanup path. Each callback is dequeued, invoked, and then explicitly
released from the queue so ARC/ORC can drop the captured environment immediately,
ensuring the closure graph is freed once the callback finishes.


Other MM modes
Expand Down
31 changes: 31 additions & 0 deletions doc/orc_arc_fix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
ORC/ARC Deterministic Memory Management Improvements
====================================================

Core Changes
------------
- `compiler/ccgtypes.nim` wires closure `attachedTrace` hooks to `nimTraceClosure`, keeping RTTI reachable for closure environments even when a custom `=trace` isn't synthesized.
- `compiler/liftdestructors.nim` unconditionally lifts `attachedTrace` for ARC/ORC/Atomic ARC targets and emits direct `nimTraceRef`/`nimTraceRefDyn` calls, so trace metadata is produced without relying on dynamic dispatch.
- `lib/system/cellseqs_v2.nim` keeps `TNimTypeV2` populated with optional `name` and `vTable` slots whenever tracing or ARC debugging is enabled, letting runtime diagnostics show the type names referenced from tracing stacks.
- `lib/system/cyclebreaker.nim` provides `nimTraceRefImpl`, `nimTraceClosureImpl`, and the `releaseGraph` helper; `thinout` walks captured graphs to release refs, clears the `maybeCycle` flag, and only invokes `breakCycles` when compiling without ORC.
- `lib/system/orc.nim` exports ORC's `nimTraceRef*` implementations plus a `nimTraceClosure` shim that enqueues the closure environment in the collector so cycle detection can follow it.
- `lib/system.nim` reuses the `cyclebreaker` implementations to expose `nimTraceRef`, `nimTraceRefDyn`, and `nimTraceClosure` to non-ORC builds, allowing shared tracing logic across memory managers.
- `lib/pure/asyncdispatch.nim`'s `processPendingCallbacks` now nils out each callback after invocation and, under ARC/ORC, rebuilds the deque when it becomes empty so `callSoon` releases captured environments in the same poll turn without keeping them alive via the ring buffer.
- `doc/mm.md` still describes closure cleanup via `cyclebreaker.thinout`; update it to match the callback-queue cleanup that the runtime actually performs.
- `tests/arc/tasync_callsoon_closure.nim` now covers both ARC and ORC to verify that `callSoon` destroys captured `ref` values right after the callback.
- `tests/arc/tasync_future_cycle.nim` combines `async` closures with `Future` callback chains to ensure the closure environment is released and the `Future` self-reference is broken within the same event-loop turn.
- `tests/arc/tasync_threaded_exception.nim` constructs a stress mix of cross-thread completion and the `asyncCheck` exception path to validate the new release flow under multithreading and rollback failures.
- `tests/arc/tasync_asynccheck_server.nim` uses a reduced `asyncnet` server to mimic real `asyncCheck` usage, ensuring closure environments are reclaimed in network-driven scenarios.
- `tests/arc/tasyncleak.nim`, `tests/arc/tasyncorc.nim`, and `tests/arc/thamming_orc.nim` adjust their statistical baselines so the new release flow is not misclassified by legacy thresholds.

Validation
----------
- `nim r --mm:arc tests/arc/tasync_callsoon_closure.nim`
- `nim r --mm:orc tests/arc/tasync_callsoon_closure.nim`
- `nim r --mm:arc tests/arc/tasync_future_cycle.nim`
- `nim r --mm:orc tests/arc/tasync_future_cycle.nim`
- `nim r --mm:arc tests/arc/tasync_asynccheck_server.nim`
- `nim r --mm:orc tests/arc/tasync_asynccheck_server.nim`
- `nim c --mm:orc -d:nimAllocStats tests/arc/tasyncleak.nim`
- `nim c --mm:orc -d:nimAllocStats tests/arc/thamming_orc.nim`
- `nim r --threads:on --mm:arc tests/arc/tasync_threaded_exception.nim`
- `nim r --threads:on --mm:orc tests/arc/tasync_threaded_exception.nim`
32 changes: 22 additions & 10 deletions lib/pure/asyncdispatch.nim
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,10 @@ export asyncstreams
type
PDispatcherBase = ref object of RootRef
timers*: HeapQueue[tuple[finishAt: MonoTime, fut: Future[void]]]
callbacks*: Deque[proc () {.gcsafe.}]
callbacks*: Deque[proc () {.closure, gcsafe.}]
const DefaultCallbackDequeSize = 64

proc callSoonImpl(cbproc: sink proc () {.closure, gcsafe.}) {.gcsafe.}

proc processTimers(
p: PDispatcherBase, didSomeWork: var bool
Expand All @@ -264,10 +267,18 @@ proc processTimers(
return some(millisecs.int + 1)

proc processPendingCallbacks(p: PDispatcherBase; didSomeWork: var bool) =
var processed = false
while p.callbacks.len > 0:
var cb = p.callbacks.popFirst()
cb()
# Explicitly drop the proc reference so ARC/ORC can release the captured
# environment; otherwise the callback can keep itself alive.
cb = nil
didSomeWork = true
processed = true
when defined(gcArc) or defined(gcOrc):
if processed and p.callbacks.len == 0:
p.callbacks = initDeque[proc () {.closure, gcsafe.}](DefaultCallbackDequeSize)

proc adjustTimeout(
p: PDispatcherBase, pollTimeout: int, nextTimer: Option[int]
Expand All @@ -283,13 +294,9 @@ proc adjustTimeout(

proc runOnce(timeout: int): bool {.gcsafe.}

proc callSoon*(cbproc: proc () {.gcsafe.}) {.gcsafe.}
## Schedule `cbproc` to be called as soon as possible.
## The callback is called when control returns to the event loop.

proc initCallSoonProc =
if asyncfutures.getCallSoonProc().isNil:
asyncfutures.setCallSoonProc(callSoon)
asyncfutures.setCallSoonProc(callSoonImpl)

template implementSetInheritable() {.dirty.} =
when declared(setInheritable):
Expand Down Expand Up @@ -349,7 +356,7 @@ when defined(windows) or defined(nimdoc):
result.ioPort = createIoCompletionPort(INVALID_HANDLE_VALUE, 0, 0, 1)
result.handles = initHashSet[AsyncFD]()
result.timers.clear()
result.callbacks = initDeque[proc () {.closure, gcsafe.}](64)
result.callbacks = initDeque[proc () {.closure, gcsafe.}](DefaultCallbackDequeSize)

var gDisp{.threadvar.}: owned PDispatcher ## Global dispatcher

Expand Down Expand Up @@ -1178,7 +1185,7 @@ else:
const
InitCallbackListSize = 4 # initial size of callbacks sequence,
# associated with file/socket descriptor.
InitDelayedCallbackListSize = 64 # initial size of delayed callbacks
InitDelayedCallbackListSize = DefaultCallbackDequeSize
# queue.
type
AsyncFD* = distinct cint
Expand Down Expand Up @@ -2009,8 +2016,13 @@ proc readAll*(future: FutureStream[string]): owned(Future[string]) {.async.} =
else:
break

proc callSoon(cbproc: proc () {.gcsafe.}) =
getGlobalDispatcher().callbacks.addLast(cbproc)
proc callSoonImpl(cbproc: sink proc () {.closure, gcsafe.}) {.gcsafe.} =
getGlobalDispatcher().callbacks.addLast(move cbproc)

template callSoon*(cbproc: untyped) =
## Schedule `cbproc` to be called as soon as possible.
## The callback is called when control returns to the event loop.
callSoonImpl(cbproc)

proc runForever*() =
## Begins a never ending global dispatcher poll loop.
Expand Down
13 changes: 9 additions & 4 deletions lib/pure/asyncfutures.nim
Original file line number Diff line number Diff line change
Expand Up @@ -87,17 +87,17 @@ when isFutureLoggingEnabled:
proc logFutureFinish(fut: FutureBase) =
getFuturesInProgress()[getFutureInfo(fut)].dec()

var callSoonProc {.threadvar.}: proc (cbproc: proc ()) {.gcsafe.}
var callSoonProc {.threadvar.}: proc (cbproc: sink proc () {.closure, gcsafe.}) {.gcsafe.}

proc getCallSoonProc*(): (proc(cbproc: proc ()) {.gcsafe.}) =
proc getCallSoonProc*(): (proc(cbproc: sink proc () {.closure, gcsafe.}) {.gcsafe.}) =
## Get current implementation of `callSoon`.
return callSoonProc

proc setCallSoonProc*(p: (proc(cbproc: proc ()) {.gcsafe.})) =
proc setCallSoonProc*(p: (proc(cbproc: sink proc () {.closure, gcsafe.}) {.gcsafe.})) =
## Change current implementation of `callSoon`. This is normally called when dispatcher from `asyncdispatcher` is initialized.
callSoonProc = p

proc callSoon*(cbproc: proc () {.gcsafe.}) =
proc callSoonImpl(cbproc: sink proc () {.closure, gcsafe.}) {.gcsafe.} =
## Call `cbproc` "soon".
##
## If async dispatcher is running, `cbproc` will be executed during next dispatcher tick.
Expand All @@ -109,6 +109,11 @@ proc callSoon*(cbproc: proc () {.gcsafe.}) =
else:
callSoonProc(cbproc)

template callSoon*(cbproc: untyped) =
## Helper that transfers ownership of `cbproc` to the dispatcher, ensuring
## ARC/ORC can reclaim the closure environment once the callback runs.
callSoonImpl(cbproc)

template setupFutureBase(fromProc: string) =
new(result)
result.finished = false
Expand Down
13 changes: 13 additions & 0 deletions lib/system.nim
Original file line number Diff line number Diff line change
Expand Up @@ -1630,6 +1630,19 @@ when not defined(js) and hasThreadSupport and hostOS != "standalone":
import std/private/syslocks
include "system/threadlocalstorage"

when not defined(js):
import system/cyclebreaker as cyclebreaker

when not defined(gcOrc):
proc nimTraceRef*(q: pointer; desc: PNimTypeV2; env: pointer) {.inline, compilerRtl, benign, raises: [].} =
cyclebreaker.nimTraceRefImpl(q, desc, env)

proc nimTraceRefDyn*(q: pointer; env: pointer) {.inline, compilerRtl, benign, raises: [].} =
cyclebreaker.nimTraceRefDynImpl(q, env)

proc nimTraceClosure*(p, env: pointer) {.inline, compilerRtl, nimcall, benign, raises: [].} =
cyclebreaker.nimTraceClosureImpl(p, env)

when not defined(js) and defined(nimV2):
type
DestructorProc = proc (p: pointer) {.nimcall, benign, raises: [].}
Expand Down
45 changes: 32 additions & 13 deletions lib/system/cellseqs_v2.nim
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,29 @@

# Cell seqs for cyclebreaker and cyclicrefs_v2.

when not declared(ansi_c):
import system/ansi_c

when not declared(PNimTypeV2):
type
TNimTypeV2* {.compilerproc.} = object
destructor*: pointer
size*: int
align*: int16
depth*: int16
display*: ptr UncheckedArray[uint32]
when defined(nimTypeNames) or defined(nimArcIds) or defined(nimOrcLeakDetector):
name*: cstring
traceImpl*: pointer
typeInfoV1*: pointer
flags*: int
when defined(gcDestructors):
when defined(cpp):
vTable*: ptr UncheckedArray[pointer]
else:
vTable*: UncheckedArray[pointer]
PNimTypeV2* = ptr TNimTypeV2

type
CellTuple[T] = (T, PNimTypeV2)
CellArray[T] = ptr UncheckedArray[CellTuple[T]]
Expand All @@ -18,11 +41,13 @@ type

proc resize[T](s: var CellSeq[T]) =
s.cap = s.cap div 2 +% s.cap
let newSize = s.cap *% sizeof(CellTuple[T])
when compileOption("threads"):
s.d = cast[CellArray[T]](reallocShared(s.d, cast[Natural](newSize)))
if s.cap < 4:
s.cap = 4
let newSize = cast[csize_t](s.cap *% sizeof(CellTuple[T]))
if s.d == nil:
s.d = cast[CellArray[T]](c_malloc(newSize))
else:
s.d = cast[CellArray[T]](realloc(s.d, cast[Natural](newSize)))
s.d = cast[CellArray[T]](c_realloc(s.d, newSize))

proc add[T](s: var CellSeq[T], c: T, t: PNimTypeV2) {.inline.} =
if s.len >= s.cap:
Expand All @@ -32,18 +57,12 @@ proc add[T](s: var CellSeq[T], c: T, t: PNimTypeV2) {.inline.} =

proc init[T](s: var CellSeq[T], cap: int = 1024) =
s.len = 0
s.cap = cap
when compileOption("threads"):
s.d = cast[CellArray[T]](allocShared(cast[Natural](s.cap *% sizeof(CellTuple[T]))))
else:
s.d = cast[CellArray[T]](alloc(cast[Natural](s.cap *% sizeof(CellTuple[T]))))
s.cap = max(4, cap)
s.d = cast[CellArray[T]](c_malloc(cast[csize_t](s.cap *% sizeof(CellTuple[T]))))

proc deinit[T](s: var CellSeq[T]) =
if s.d != nil:
when compileOption("threads"):
deallocShared(s.d)
else:
dealloc(s.d)
c_free(s.d)
s.d = nil
s.len = 0
s.cap = 0
Expand Down
Loading