Skip to content

Commit 17a64ae

Browse files
authored
Fix stack overflow resulting from co-recursive lazy seqs (#632)
* Fix stack overflow resulting from co-recursive lazy seqs * Writing tests is good * Queues need tests too * Documentation * Re-documentation * Slight updates to severals of things * Co-recursion test
1 parent fbe8833 commit 17a64ae

File tree

16 files changed

+186
-43
lines changed

16 files changed

+186
-43
lines changed

src/basilisp/core.lpy

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2302,18 +2302,17 @@
23022302
coll))
23032303

23042304
(defn iterate
2305-
"Returns a lazy sequence of (f x), (f (f x)) and so on."
2305+
"Returns a lazy sequence of x, (f x), (f (f x)) and so on."
23062306
[f x]
23072307
(lazy-seq
2308-
(let [v (f x)]
2309-
(when v
2310-
(cons v (iterate f v))))))
2308+
(when x
2309+
(cons x (iterate f (f x))))))
23112310

23122311
(defn range
23132312
"Return a range of integers from start. If end is specified, the
23142313
sequence will terminate at end."
23152314
([]
2316-
(iterate inc -1))
2315+
(iterate inc 0))
23172316
([end]
23182317
(lazy-seq (cons 0 (range 1 end))))
23192318
([start end]
@@ -2328,8 +2327,12 @@
23282327
"Return a function which returns the logical complement of the return
23292328
value of (apply f args)."
23302329
[f]
2331-
(fn [& args]
2332-
(not (apply f args))))
2330+
(fn
2331+
([x] (not (f x)))
2332+
([x y] (not (f x y)))
2333+
([x y z] (not (f x y z)))
2334+
([x y z & args]
2335+
(not (apply f x y z args)))))
23332336

23342337
(defn constantly
23352338
"Returns a function that accepts any number of arguments and returns x."
@@ -2674,8 +2677,8 @@
26742677
(ensure-reduced result)))))))
26752678
([n coll]
26762679
(lazy-seq
2677-
(when (seq coll)
2678-
(when (> n 0)
2680+
(when (> n 0)
2681+
(when (seq coll)
26792682
(cons (first coll) (take (dec n) (rest coll))))))))
26802683

26812684
(defn take-while

src/basilisp/lang/compiler/generator.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,12 @@ def new_frame(self, name: str, is_context_boundary: bool):
169169
"""Context manager for creating a new stack frame."""
170170
yield SymbolTable(name, is_context_boundary=is_context_boundary, parent=self)
171171

172+
@property
173+
def is_top(self) -> bool:
174+
"""Return true if this is the top-level symbol table (e.g. there is no
175+
parent)."""
176+
return self._parent is None
177+
172178
@property
173179
def context_boundary(self) -> "SymbolTable":
174180
"""Return the nearest context boundary parent symbol table to this one. If the
@@ -1494,10 +1500,18 @@ def _synthetic_do_to_py_ast(ctx: GeneratorContext, node: Do) -> GeneratedPyAST:
14941500
MetaNode = Union[Const, MapNode]
14951501

14961502

1497-
def __fn_name(s: Optional[str]) -> str:
1503+
def __fn_name(ctx: GeneratorContext, s: Optional[str]) -> str:
14981504
"""Generate a safe Python function name from a function name symbol.
1499-
If no symbol is provided, generate a name with a default prefix."""
1500-
return genname("__" + munge(Maybe(s).or_else_get(_FN_PREFIX)))
1505+
1506+
If no symbol is provided, generate a name with a default prefix.
1507+
1508+
Prepends the name of the parent symbol table (if one exists) to help with debugging
1509+
and readability of stack traces."""
1510+
ctx_boundary = ctx.symbol_table.context_boundary
1511+
ctx_boundary_prefix = "" if ctx_boundary.is_top else f"{ctx_boundary.name}__"
1512+
return genname(
1513+
munge("".join(("__", ctx_boundary_prefix, Maybe(s).or_else_get(_FN_PREFIX))))
1514+
)
15011515

15021516

15031517
def __fn_args_to_py_ast(
@@ -1619,7 +1633,7 @@ def __single_arity_fn_to_py_ast(
16191633
assert method.op == NodeOp.FN_ARITY
16201634

16211635
lisp_fn_name = node.local.name if node.local is not None else None
1622-
py_fn_name = __fn_name(lisp_fn_name) if def_name is None else munge(def_name)
1636+
py_fn_name = __fn_name(ctx, lisp_fn_name) if def_name is None else munge(def_name)
16231637
py_fn_node = ast.AsyncFunctionDef if node.is_async else ast.FunctionDef
16241638
with ctx.new_symbol_table(
16251639
py_fn_name, is_context_boundary=True
@@ -1859,7 +1873,7 @@ def __multi_arity_fn_to_py_ast( # pylint: disable=too-many-locals
18591873
assert node.kwarg_support is None, "multi-arity functions do not support kwargs"
18601874

18611875
lisp_fn_name = node.local.name if node.local is not None else None
1862-
py_fn_name = __fn_name(lisp_fn_name) if def_name is None else munge(def_name)
1876+
py_fn_name = __fn_name(ctx, lisp_fn_name) if def_name is None else munge(def_name)
18631877

18641878
py_fn_node = ast.AsyncFunctionDef if node.is_async else ast.FunctionDef
18651879

src/basilisp/lang/interfaces.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ class ISeqable(Iterable[T]):
172172
__slots__ = ()
173173

174174
@abstractmethod
175-
def seq(self) -> "ISeq[T]":
175+
def seq(self) -> "Optional[ISeq[T]]":
176176
raise NotImplementedError()
177177

178178

@@ -297,7 +297,7 @@ def cons(self: T_vec, *elems: T) -> T_vec: # type: ignore[override]
297297
raise NotImplementedError()
298298

299299
@abstractmethod
300-
def seq(self) -> "ISeq[T]": # type: ignore[override]
300+
def seq(self) -> "Optional[ISeq[T]]": # type: ignore[override]
301301
raise NotImplementedError()
302302

303303

@@ -481,7 +481,7 @@ def rest(self) -> "ISeq[T]":
481481
def cons(self, elem: T) -> "ISeq[T]":
482482
raise NotImplementedError()
483483

484-
def seq(self) -> "ISeq[T]":
484+
def seq(self) -> "Optional[ISeq[T]]":
485485
return self
486486

487487
def _lrepr(self, **kwargs):

src/basilisp/lang/list.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,11 @@ def cons(self, *elems: T) -> "PersistentList[T]":
7373
def empty() -> "PersistentList":
7474
return EMPTY
7575

76+
def seq(self) -> Optional[ISeq[T]]:
77+
if len(self._inner) == 0:
78+
return None
79+
return super().seq()
80+
7681
def peek(self):
7782
return self.first
7883

src/basilisp/lang/map.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,9 @@ def cons( # type: ignore[override]
256256
def empty() -> "PersistentMap":
257257
return EMPTY
258258

259-
def seq(self) -> ISeq[IMapEntry[K, V]]:
259+
def seq(self) -> Optional[ISeq[IMapEntry[K, V]]]:
260+
if len(self._inner) == 0:
261+
return None
260262
return sequence(MapEntry.of(k, v) for k, v in self._inner.items())
261263

262264
def to_transient(self) -> TransientMap[K, V]:

src/basilisp/lang/queue.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,9 @@ def pop(self) -> "PersistentQueue[T]":
7575
raise IndexError("Cannot pop an empty queue")
7676
return PersistentQueue(self._inner.popleft())
7777

78-
def seq(self) -> ISeq[T]:
78+
def seq(self) -> Optional[ISeq[T]]:
79+
if len(self._inner) == 0:
80+
return None
7981
return sequence(self)
8082

8183

src/basilisp/lang/reader.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -965,7 +965,9 @@ def _walk(inner_f, outer_f, form):
965965
elif isinstance(form, IPersistentVector):
966966
return outer_f(vec.vector(map(inner_f, form)))
967967
elif isinstance(form, IPersistentMap):
968-
return outer_f(lmap.hash_map(*chain.from_iterable(map(inner_f, form.seq()))))
968+
return outer_f(
969+
lmap.hash_map(*chain.from_iterable(map(inner_f, form.seq() or ())))
970+
)
969971
elif isinstance(form, IPersistentSet):
970972
return outer_f(lset.set(map(inner_f, form)))
971973
else:

src/basilisp/lang/seq.py

Lines changed: 79 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,26 @@
1414

1515

1616
class _EmptySequence(IWithMeta, ISequential, ISeq[T]):
17+
__slots__ = ("_meta",)
18+
19+
def __init__(self, meta: Optional[IPersistentMap] = None):
20+
self._meta = meta
21+
1722
def __repr__(self):
1823
return "()"
1924

2025
def __bool__(self):
2126
return True
2227

28+
def seq(self) -> Optional[ISeq[T]]:
29+
return None
30+
2331
@property
2432
def meta(self) -> Optional[IPersistentMap]:
25-
return None
33+
return self._meta
2634

27-
def with_meta(self, meta: Optional[IPersistentMap]) -> "Cons[T]": # type: ignore
28-
return Cons(meta=meta)
35+
def with_meta(self, meta: Optional[IPersistentMap]) -> "_EmptySequence[T]":
36+
return _EmptySequence(meta=meta)
2937

3038
@property
3139
def is_empty(self) -> bool:
@@ -51,7 +59,7 @@ class Cons(ISeq[T], ISequential, IWithMeta):
5159

5260
def __init__(
5361
self,
54-
first=None,
62+
first,
5563
seq: Optional[ISeq[T]] = None,
5664
meta: Optional[IPersistentMap] = None,
5765
) -> None:
@@ -79,7 +87,7 @@ def meta(self) -> Optional[IPersistentMap]:
7987
return self._meta
8088

8189
def with_meta(self, meta: Optional[IPersistentMap]) -> "Cons[T]":
82-
return Cons(first=self._first, seq=self._rest, meta=meta)
90+
return Cons(self._first, seq=self._rest, meta=meta)
8391

8492

8593
class _Sequence(IWithMeta, ISequential, ISeq[T]):
@@ -141,19 +149,23 @@ def cons(self, elem):
141149
class LazySeq(IWithMeta, ISequential, ISeq[T]):
142150
"""LazySeqs are wrappers for delaying sequence computation. Create a LazySeq
143151
with a function that can either return None or a Seq. If a Seq is returned,
144-
the LazySeq is a proxy to that Seq."""
152+
the LazySeq is a proxy to that Seq.
145153
146-
__slots__ = ("_gen", "_seq", "_meta")
154+
Callers should never provide the `obj` or `seq` arguments -- these are provided
155+
only to support `with_meta` returning a new LazySeq instance."""
156+
157+
__slots__ = ("_gen", "_obj", "_seq", "_meta")
147158

148-
# pylint:disable=assigning-non-slot
149159
def __init__(
150160
self,
151161
gen: Optional[LazySeqGenerator],
162+
obj: Optional[ISeq[T]] = None,
152163
seq: Optional[ISeq[T]] = None,
153164
*,
154165
meta: Optional[IPersistentMap] = None,
155166
) -> None:
156167
self._gen: Optional[LazySeqGenerator] = gen
168+
self._obj: Optional[ISeq[T]] = obj
157169
self._seq: Optional[ISeq[T]] = seq
158170
self._meta = meta
159171

@@ -162,32 +174,72 @@ def meta(self) -> Optional[IPersistentMap]:
162174
return self._meta
163175

164176
def with_meta(self, meta: Optional[IPersistentMap]) -> "LazySeq[T]":
165-
return LazySeq(self._gen, seq=self._seq, meta=meta)
166-
167-
# pylint:disable=assigning-non-slot
168-
def _realize(self):
177+
return LazySeq(self._gen, obj=self._obj, seq=self._seq, meta=meta)
178+
179+
# LazySeqs have a fairly complex inner state, in spite of the simple interface.
180+
# Calls from Basilisp code should be providing the only generator seed function.
181+
# Calls to `(seq ...)` cause the LazySeq to cache the generator function locally
182+
# (as explained in _compute_seq), clear the _gen attribute, and cache the results
183+
# of that generator function call as _obj. _obj may be None, some other ISeq, or
184+
# perhaps another LazySeq. Finally, the LazySeq attempts to consume all returned
185+
# LazySeq objects before calling `(seq ...)` on the result, which is cached in the
186+
# _seq attribute.
187+
188+
def _compute_seq(self) -> Optional[ISeq[T]]:
169189
if self._gen is not None:
170-
self._seq = to_seq(self._gen())
190+
# This local caching of the generator function and clearing of self._gen
191+
# is absolutely critical for supporting co-recursive lazy sequences.
192+
#
193+
# The original example that prompted this change is below:
194+
#
195+
# (def primes (remove
196+
# (fn [x] (some #(zero? (mod x %)) primes))
197+
# (iterate inc 2)))
198+
#
199+
# (take 5 primes) ;; => stack overflow
200+
#
201+
# If we don't clear self._gen, each successive call to (some ... primes)
202+
# will end up forcing the primes LazySeq object to call self._gen, rather
203+
# than caching the results, allowing examination of the partial seq
204+
# computed up to that point.
205+
gen = self._gen
171206
self._gen = None
207+
self._obj = gen()
208+
return self._obj if self._obj is not None else self._seq
209+
210+
def seq(self) -> Optional[ISeq[T]]:
211+
self._compute_seq()
212+
if self._obj is not None:
213+
o = self._obj
214+
self._obj = None
215+
# Consume any additional lazy sequences returned immediately so we have a
216+
# "real" concrete sequence to proxy to.
217+
#
218+
# The common idiom with LazySeqs is to return (cons value (lazy-seq ...))
219+
# from the generator function, so this will only result in evaluating away
220+
# instances where _another_ LazySeq is returned rather than a cons cell
221+
# with a concrete first value. This loop will not consume the LazySeq in
222+
# the rest position of the cons.
223+
while isinstance(o, LazySeq):
224+
o = o._compute_seq() # type: ignore
225+
self._seq = to_seq(o)
226+
return self._seq
172227

173228
@property
174229
def is_empty(self) -> bool:
175-
self._realize()
176-
return self._seq is None
230+
return self.seq() is None
177231

178232
@property
179233
def first(self) -> Optional[T]:
180-
self._realize()
181234
try:
182-
return self._seq.first # type: ignore
235+
return self.seq().first # type: ignore[union-attr]
183236
except AttributeError:
184237
return None
185238

186239
@property
187240
def rest(self) -> "ISeq[T]":
188-
self._realize()
189241
try:
190-
return self._seq.rest # type: ignore
242+
return self.seq().rest # type: ignore[union-attr]
191243
except AttributeError:
192244
return EMPTY
193245

@@ -208,9 +260,9 @@ def sequence(s: Iterable) -> ISeq[Any]:
208260
return EMPTY
209261

210262

211-
def _seq_or_nil(s: ISeq) -> Optional[ISeq]:
263+
def _seq_or_nil(s: Optional[ISeq]) -> Optional[ISeq]:
212264
"""Return None if a ISeq is empty, the ISeq otherwise."""
213-
if s.is_empty:
265+
if s is None or s.is_empty:
214266
return None
215267
return s
216268

@@ -231,6 +283,12 @@ def _to_seq_iseq(o: ISeq) -> Optional[ISeq]:
231283
return _seq_or_nil(o)
232284

233285

286+
@to_seq.register(LazySeq)
287+
def _to_seq_lazyseq(o: LazySeq) -> Optional[ISeq]:
288+
# Force evaluation of the LazySeq by calling o.seq() directly.
289+
return o.seq()
290+
291+
234292
@to_seq.register(ISeqable)
235293
def _to_seq_iseqable(o: ISeqable) -> Optional[ISeq]:
236294
return _seq_or_nil(o.seq())

src/basilisp/lang/set.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,9 @@ def disj(self, *elems: T) -> "PersistentSet[T]":
174174
def empty() -> "PersistentSet":
175175
return EMPTY
176176

177-
def seq(self) -> ISeq[T]:
177+
def seq(self) -> Optional[ISeq[T]]:
178+
if len(self._inner) == 0:
179+
return None
178180
return sequence(self)
179181

180182
def to_transient(self) -> TransientSet:

src/basilisp/lang/vector.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,9 @@ def val_at(self, k, default=None):
165165
def empty() -> "PersistentVector[T]":
166166
return EMPTY
167167

168-
def seq(self) -> ISeq[T]: # type: ignore[override]
168+
def seq(self) -> Optional[ISeq[T]]: # type: ignore[override]
169+
if len(self._inner) == 0:
170+
return None
169171
return sequence(self)
170172

171173
def peek(self) -> Optional[T]:

0 commit comments

Comments
 (0)