Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
* Added support for referring imported Python names as by `from ... import ...` (#1154)

### Changed
* Removed implicit support for single-use iterables in sequences, and introduced `iterator-seq` to expliciltly handle them (#1192)

### Fixed
* Fix a bug where protocols with methods with leading hyphens in the could not be defined (#1230)
* Fix a bug where attempting to `:refer` a non-existent Var from another namespace would throw an unhelpful exception (#1231)
Expand Down
35 changes: 35 additions & 0 deletions docs/pyinterop.rst
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,41 @@ Type hints may be applied to :lpy:form:`def` names, function arguments and retur
Return annotations are combined as by :external:py:obj:`typing.Union`, so ``typing.Union[str, str] == str``.
The annotations for individual arity arguments are preserved in their compiled form, but they are challenging to access programmatically.

.. _python_iterators:

Python Iterators
----------------

In Python, an **iterable** is an object like a list, range, or generator that can be looped over, while an **iterator** is the object that actually yields each item of the iterable one at a time using ``next()``. They are ubiquituous in Python, showing up in ``for`` loops, list comprehensions and many built in functions.

In Basilisp, iterables are treated as first-class sequences and are :lpy:fn:`basilisp.core/seq`-able, except for **single-use** iterables, which must be explicitly converted to a sequence using :lpy:fn:`basilisp.core/iterator-seq` before use.

Single-use iterables are those that return the same iterator every time one is requested.
This becomes problematic when the single-use iterable is coerced to a sequence more than once. For example:

.. code-block:: clojure

(when (> (count iterable-coll) 0)
(first iterable-coll))


Here, both :lpy:fn:`basilisp.core/count` and :lpy:fn:`basilisp.core/first` internally request an iterator from ``iterable-coll``.
If it is **re-iterable**, each call gets a fresh iterator beginning at the start of the collection, and the code behaves as expected.
But if it is a **single-use** iterable, like a generator, both operations share the same iterator.
As a result, ``count`` consumes all elements, and ``first`` returns ``nil``, which is wrong, since the iterator is already exhausted, leading to incorect behavior.

To prevent this subtle bug, Basilisp throws a :external:py:obj:`TypeError` an iterator is requested from such functions.
The correct approach is to use :lpy:fn:`basilisp.core/iterator-seq` to create a sequence from it:

.. code-block:: clojure

(let [s (iterator-seq iterable-coll)]
(when (> (count s) 0)
(first s)))


This ensures ``count`` and ``first`` operate on the same stable sequence rather than consuming a shared iterator.

.. _python_decorators:

Python Decorators
Expand Down
4 changes: 2 additions & 2 deletions src/basilisp/contrib/nrepl_server.lpy
Original file line number Diff line number Diff line change
Expand Up @@ -299,8 +299,8 @@
(create-ns (symbol ns))
eval-ns)
completions (when-not (str/blank? prefix)
(seq (binding [*ns* completion-ns]
(basilisp.lang.runtime/repl_completions prefix))))]
(iterator-seq (binding [*ns* completion-ns]
(basilisp.lang.runtime/repl_completions prefix))))]
(send-fn request {"completions" (->> (map str completions)
sort
(map (fn [completion]
Expand Down
2 changes: 1 addition & 1 deletion src/basilisp/contrib/sphinx/autodoc.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def can_document_member(
return False

def parse_name(self) -> bool:
v = runtime.first(reader.read_str(self.name))
v = runtime.first(runtime.to_iterator_seq(reader.read_str(self.name)))
if isinstance(v, sym.Symbol) and v.ns is None:
self.modname = v.name
self.objpath: list[str] = []
Expand Down
6 changes: 3 additions & 3 deletions src/basilisp/contrib/sphinx/domain.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ def handle_signature(self, sig: str, signode: desc_signature) -> tuple[str, str]

self._add_source_annotations(signode)

sig_sexp = runtime.first(reader.read_str(sig))
sig_sexp = runtime.first(runtime.to_iterator_seq(reader.read_str(sig)))
assert isinstance(sig_sexp, IPersistentList)
fn_sym = runtime.first(sig_sexp)
assert isinstance(fn_sym, sym.Symbol)
Expand Down Expand Up @@ -285,7 +285,7 @@ def get_signature_prefix(self, sig: str) -> str:

def get_index_text(self, modname: str, name: tuple[str, str]) -> str:
sig, prefix = name
sig_sexp = runtime.first(reader.read_str(sig))
sig_sexp = runtime.first(runtime.to_iterator_seq(reader.read_str(sig)))
if isinstance(sig_sexp, IPersistentList):
sig = runtime.first(sig_sexp)
return f"{sig} ({prefix} in {modname})"
Expand Down Expand Up @@ -560,7 +560,7 @@ def resolve_xref( # pylint: disable=too-many-arguments
maybe_obj = self.forms.get(target)
title = target
else:
obj_sym = runtime.first(reader.read_str(target))
obj_sym = runtime.first(runtime.to_iterator_seq(reader.read_str(target)))
assert isinstance(
obj_sym, sym.Symbol
), f"Symbol expected; not {obj_sym.__class__}"
Expand Down
51 changes: 31 additions & 20 deletions src/basilisp/core.lpy
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,15 @@
(fn seq [o]
(basilisp.lang.runtime/to-seq o)))

(def ^{:doc "Coerce the python Iterable ``it`` to a Seq. Supports both re-iterable
(as with ``seq``) as well as single-use iterables that always return
the same iterator."
:arglists '([it])}
iterator-seq
(fn iterator-seq [it]
(basilisp.lang.runtime/to-iterator-seq it)))


(def ^{:doc "Apply function ``f`` to the arguments provided.

The last argument must always be coercible to a Seq. Intermediate
Expand Down Expand Up @@ -326,6 +335,7 @@
(.alter-meta #'vector? assoc :inline ^:safe-py-params (fn [o] `(instance? basilisp.lang.interfaces/IPersistentVector ~o)))
(.alter-meta #'seq? assoc :inline ^:safe-py-params (fn [o] `(instance? basilisp.lang.interfaces/ISeq ~o)))
(.alter-meta #'seq assoc :inline ^:safe-py-params (fn [o] `(basilisp.lang.runtime/to-seq ~o)))
(.alter-meta #'iterator-seq assoc :inline ^:safe-py-params (fn [it] `(basilisp.lang.runtime/to-iterator-seq ~it)))
(.alter-meta #'set assoc :inline ^:safe-py-params (fn [coll] `(basilisp.lang.runtime/to-set ~coll)))
(.alter-meta #'vec assoc :inline ^:safe-py-params (fn [coll] `(basilisp.lang.runtime/vector ~coll)))

Expand Down Expand Up @@ -1484,10 +1494,10 @@
(instance? basilisp.lang.interfaces/IReversible x))

(defn ^:inline seqable?
"Return ``true`` if an :py:class:`basilisp.lang.interfaces.ISeq` can be produced from
``x``\\."
"Return ``true`` if :lpy:fn:``seq`` is supported on``x``\\."
[x]
(instance? basilisp.lang.interfaces/ISeqable x))
(or (instance? basilisp.lang.interfaces/ISeqable x)
(basilisp.lang.runtime/is-reiterable-iterable x)))

(defn ^:inline sequential?
"Return ``true`` if ``x`` implements :py:class:`basilisp.lang.interfaces.ISequential`\\."
Expand Down Expand Up @@ -2514,7 +2524,8 @@
"Return a lazy sequence of the concatenation of ``colls``\\. None of the input
collections will be evaluated until it is needed."
[& colls]
`(concat ~@(map (fn [coll] `(lazy-seq ~coll)) colls)))
`(concat ~@(iterator-seq (python/map (fn [coll] `(lazy-seq ~coll)) colls))))


(defn dorun
"Force a lazy sequence be fully realized. Returns ``nil``\\.
Expand Down Expand Up @@ -3259,7 +3270,7 @@
sequence."
[v]
(lazy-seq
(when (and (or (seq? v) (seqable? v)) (seq v))
(when (and (sequential? v) (seq v))
(let [e (first v)
r (rest v)]
(if (or (seq? e) (seqable? e))
Expand Down Expand Up @@ -4570,17 +4581,17 @@
(read-seq {} stream))
([opts stream]
(let [read (:read opts basilisp.lang.reader/read)]
(seq (read stream
*resolver*
*data-readers*
(:eof opts)
(= (:eof opts) :eofthrow)
(:features opts)
(not= :preserve (:read-cond opts))
*default-data-reader-fn*
**
:init-line (:init-line opts)
:init-column (:init-column opts))))))
(iterator-seq (read stream
*resolver*
*data-readers*
(:eof opts)
(= (:eof opts) :eofthrow)
(:features opts)
(not= :preserve (:read-cond opts))
*default-data-reader-fn*
**
:init-line (:init-line opts)
:init-column (:init-column opts))))))

(defn read-string
"Read a string of Basilisp code.
Expand Down Expand Up @@ -5497,7 +5508,7 @@
string. Otherwise, return a vector with the string in the first position and the
match groups in the following positions."
[pattern s]
(lazy-re-seq (seq (re/finditer pattern s))))
(lazy-re-seq (iterator-seq (re/finditer pattern s))))

;;;;;;;;;;;;;;;;;
;; Hierarchies ;;
Expand Down Expand Up @@ -6125,7 +6136,7 @@
fmeta (merge (meta fn-decl) (meta name))
decorators (:decorators fmeta)]
(if decorators
(loop [wrappers (seq (python/reversed decorators))
(loop [wrappers (iterator-seq (python/reversed decorators))
final fn-decl]
(if (seq wrappers)
(recur (rest wrappers)
Expand Down Expand Up @@ -7640,13 +7651,13 @@
(mapcat (fn [^os/DirEntry entry]
(if (.is-dir entry)
;; immediate subdirectory
(os/scandir (.-path entry))
(iterator-seq (os/scandir (.-path entry)))
;; top level file
[entry])))
(filter #(.is-file %))
(map #(pathlib/Path (.-path %)))
(filter (comp #{"data_readers.lpy" "data_readers.cljc"} name)))
(eduction (os/scandir dir))))))
(eduction (iterator-seq (os/scandir dir)))))))
(group-by #(.-parent %))
vals
;; Only load one data readers file per directory and prefer
Expand Down
4 changes: 2 additions & 2 deletions src/basilisp/lang/map.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
process_lrepr_kwargs,
)
from basilisp.lang.reduced import Reduced
from basilisp.lang.seq import sequence
from basilisp.lang.seq import iterator_sequence
from basilisp.lang.vector import MapEntry
from basilisp.util import partition

Expand Down Expand Up @@ -354,7 +354,7 @@ def empty(self) -> "PersistentMap":
def seq(self) -> Optional[ISeq[IMapEntry[K, V]]]:
if len(self._inner) == 0:
return None
return sequence(MapEntry.of(k, v) for k, v in self._inner.items())
return iterator_sequence((MapEntry.of(k, v) for k, v in self._inner.items()))

def to_transient(self) -> TransientMap[K, V]:
return TransientMap(self._inner.mutate())
Expand Down
16 changes: 15 additions & 1 deletion src/basilisp/lang/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -1287,10 +1287,14 @@ def cons(o, seq) -> ISeq:

to_seq = lseq.to_seq

to_iterator_seq = lseq.iterator_sequence


def concat(*seqs: Any) -> ISeq:
"""Concatenate the sequences given by seqs into a single ISeq."""
return lseq.sequence(itertools.chain.from_iterable(filter(None, map(to_seq, seqs))))
return lseq.iterator_sequence(
itertools.chain.from_iterable(filter(None, map(to_seq, seqs)))
)


T_reduce_init = TypeVar("T_reduce_init")
Expand Down Expand Up @@ -1391,6 +1395,11 @@ def apply_kw(f, args):

@functools.singledispatch
def count(coll) -> int:
if isinstance(coll, Iterable) and iter(coll) is coll:
raise TypeError(
f"The count function is not supported on single-use iterable objects because it would exhaust them during counting. Please use iterator-seq to coerce them into sequences first. Iterable Object type: {type(coll)}"
)

try:
return sum(1 for _ in coll)
except TypeError as e:
Expand Down Expand Up @@ -2638,3 +2647,8 @@ def get_compiler_opts() -> CompilerOpts:
v = Var.find_in_ns(CORE_NS_SYM, sym.symbol(COMPILER_OPTIONS_VAR_NAME))
assert v is not None, "*compiler-options* Var not defined"
return cast(CompilerOpts, v.value)


def is_reiterable_iterable(x: Any) -> bool:
"""Return ``true`` if x is a re-iterable Iterable object."""
return isinstance(x, Iterable) and iter(x) is not x
19 changes: 17 additions & 2 deletions src/basilisp/lang/seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,10 +218,20 @@ def empty(self):
return EMPTY


def sequence(s: Iterable[T]) -> ISeq[T]:
"""Create a Sequence from Iterable s."""
def sequence(s: Iterable[T], support_single_use: bool = False) -> ISeq[T]:
"""Create a Sequence from Iterable `s`.

By default, raise a ``TypeError`` if `s` is a single-use
Iterable, unless `fail_single_use` is ``True``.

"""
i = iter(s)

if not support_single_use and i is s:
raise TypeError(
f"Can't create sequence out of single-use iterable object, please use iterator-seq instead. Iterable Object type: {type(s)}"
)

def _next_elem() -> ISeq[T]:
try:
e = next(i)
Expand All @@ -233,6 +243,11 @@ def _next_elem() -> ISeq[T]:
return LazySeq(_next_elem)


def iterator_sequence(s: Iterable[T]) -> ISeq[T]:
"""Create a Sequence from any iterable `s`."""
return sequence(s, support_single_use=True)


@overload
def _seq_or_nil(s: None) -> None: ...

Expand Down
4 changes: 2 additions & 2 deletions src/basilisp/lang/vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from basilisp.lang.obj import PrintSettings
from basilisp.lang.obj import seq_lrepr as _seq_lrepr
from basilisp.lang.reduced import Reduced
from basilisp.lang.seq import sequence
from basilisp.lang.seq import iterator_sequence, sequence
from basilisp.util import partition

T = TypeVar("T")
Expand Down Expand Up @@ -213,7 +213,7 @@ def pop(self) -> "PersistentVector[T]":
return self[:-1]

def rseq(self) -> ISeq[T]:
return sequence(reversed(self))
return iterator_sequence(reversed(self))

def to_transient(self) -> TransientVector:
return TransientVector(self._inner.evolver())
Expand Down
19 changes: 19 additions & 0 deletions tests/basilisp/seq_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,25 @@ def test_seq_iterator():
assert 10000 == len(vec.vector(s))


def test_seq_iterables():
iterable1 = range(3)
s1 = lseq.sequence(iterable1)
s2 = lseq.sequence(iterable1)
assert 3 == runtime.count(s1)
assert llist.l(0, 1, 2) == s1 == s2

# A generator is an example of a single-use iterable
iterable2 = (i for i in [4, 5, 6])
with pytest.raises(TypeError):
lseq.sequence(iterable2)
with pytest.raises(TypeError):
runtime.count(iterable2)

s3 = lseq.iterator_sequence(iterable2)
assert 3 == runtime.count(s3)
assert llist.l(4, 5, 6) == s3


def test_seq_equals():
# to_seq should be first to ensure that `ISeq.__eq__` is used
assert runtime.to_seq(vec.v(1, 2, 3)) == llist.l(1, 2, 3)
Expand Down
Loading
Loading