Skip to content

Commit d85df9d

Browse files
authored
Fix Span.sents for edge case of Span being the only Span in the last sentence of a Doc. (#12484)
1 parent 372a908 commit d85df9d

File tree

2 files changed

+19
-0
lines changed

2 files changed

+19
-0
lines changed

spacy/tests/doc/test_span.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -716,3 +716,18 @@ def test_for_partial_ent_sents():
716716
# equal to the sentences referenced in ent.sents.
717717
for doc_sent, ent_sent in zip(doc.sents, doc.ents[0].sents):
718718
assert doc_sent == ent_sent
719+
720+
721+
def test_for_no_ent_sents():
722+
"""Span.sents() should set .sents correctly, even if Span in question is trailing and doesn't form a full
723+
sentence.
724+
"""
725+
doc = Doc(
726+
English().vocab,
727+
words=["This", "is", "a", "test.", "ENTITY"],
728+
sent_starts=[1, 0, 0, 0, 1],
729+
)
730+
doc.set_ents([Span(doc, 4, 5, "WORK")])
731+
sents = list(doc.ents[0].sents)
732+
assert len(sents) == 1
733+
assert str(sents[0]) == str(doc.ents[0].sent) == "ENTITY"

spacy/tokens/span.pyx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,10 @@ cdef class Span:
463463
elif i == self.doc.length - 1:
464464
yield Span(self.doc, start, self.doc.length)
465465

466+
# Ensure that trailing parts of the Span instance are included in last element of .sents.
467+
if start == self.doc.length - 1:
468+
yield Span(self.doc, start, self.doc.length)
469+
466470
@property
467471
def ents(self):
468472
"""The named entities that fall completely within the span. Returns

0 commit comments

Comments
 (0)