Skip to content

Commit 082e845

Browse files
committed
fix: doc strings and function name
1 parent 5784834 commit 082e845

File tree

2 files changed

+13
-12
lines changed

2 files changed

+13
-12
lines changed

flair/training_utils.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -463,24 +463,25 @@ def create_labeled_sentence_from_tokens(
463463
return sentence
464464

465465

466-
def create_labeled_sentence(
466+
def create_labeled_sentence_from_entity_offsets(
467467
text: str,
468468
entities: list[CharEntity],
469469
token_limit: float = inf,
470470
) -> Sentence:
471-
"""Chunks and labels a text from a list of entity annotations.
471+
"""Creates a labeled sentence from a text and a list of entity annotations.
472472
473473
The function explicitly tokenizes the text and labels separately, ensuring entity labels are
474-
not partially split across tokens.
474+
not partially split across tokens. The sentence is truncated if a token limit is set.
475475
476476
Args:
477477
text (str): The full text to be tokenized and labeled.
478478
entities (list of tuples): Ordered non-overlapping entity annotations with each tuple in the
479479
format (start_char_index, end_char_index, entity_class, entity_text).
480-
token_limit: numerical value that determines the maximum size of a chunk. use inf to not perform chunking
480+
token_limit: numerical value that determines the maximum token length of the sentence.
481+
use inf to not perform chunking
481482
482483
Returns:
483-
A list of labeled Sentence objects representing the chunks of the original text
484+
A labeled Sentence objects representing the text and entity annotations.
484485
"""
485486
tokens: list[Token] = []
486487
current_index = 0

tests/test_sentence_labeling.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import pytest
44

55
from flair.data import Sentence
6-
from flair.training_utils import CharEntity, TokenEntity, create_labeled_sentence
6+
from flair.training_utils import CharEntity, TokenEntity, create_labeled_sentence_from_entity_offsets
77

88

99
@pytest.fixture(params=["resume1.txt"])
@@ -63,7 +63,7 @@ def small_token_limit_response() -> list[Sentence]:
6363

6464
class TestChunking:
6565
def test_empty_string(self):
66-
sentences = create_labeled_sentence("", [])
66+
sentences = create_labeled_sentence_from_entity_offsets("", [])
6767
assert len(sentences) == 0
6868

6969
def check_tokens(self, sentence: Sentence, expected_tokens: list[str]):
@@ -101,11 +101,11 @@ def check_split_entities(self, entity_labels, sentence: Sentence):
101101
)
102102
def test_short_text(self, test_text: str, expected_text: str):
103103
"""Short texts that should fit nicely into a single chunk."""
104-
chunks = create_labeled_sentence(test_text, [])
104+
chunks = create_labeled_sentence_from_entity_offsets(test_text, [])
105105
assert chunks.text == expected_text
106106

107107
def test_create_labeled_sentence(self, parsed_resume_dict: dict):
108-
create_labeled_sentence(parsed_resume_dict["raw_text"], parsed_resume_dict["entities"])
108+
create_labeled_sentence_from_entity_offsets(parsed_resume_dict["raw_text"], parsed_resume_dict["entities"])
109109

110110
@pytest.mark.parametrize(
111111
"test_text, entities, expected_tokens, expected_labels",
@@ -161,7 +161,7 @@ def test_create_labeled_sentence(self, parsed_resume_dict: dict):
161161
def test_contractions_and_hyphens(
162162
self, test_text: str, entities: list[CharEntity], expected_tokens: list[str], expected_labels: list[TokenEntity]
163163
):
164-
sentence = create_labeled_sentence(test_text, entities)
164+
sentence = create_labeled_sentence_from_entity_offsets(test_text, entities)
165165
self.check_tokens(sentence, expected_tokens)
166166
self.check_token_entities(sentence, expected_labels)
167167

@@ -176,7 +176,7 @@ def test_contractions_and_hyphens(
176176
)
177177
def test_long_text(self, test_text: str, entities: list[CharEntity]):
178178
"""Test for handling long texts that should be split into multiple chunks."""
179-
create_labeled_sentence(test_text, entities)
179+
create_labeled_sentence_from_entity_offsets(test_text, entities)
180180

181181
@pytest.mark.parametrize(
182182
"test_text, entities, expected_labels",
@@ -201,5 +201,5 @@ def test_long_text(self, test_text: str, entities: list[CharEntity]):
201201
def test_text_with_punctuation(
202202
self, test_text: str, entities: list[CharEntity], expected_labels: list[TokenEntity]
203203
):
204-
sentence = create_labeled_sentence(test_text, entities)
204+
sentence = create_labeled_sentence_from_entity_offsets(test_text, entities)
205205
self.check_token_entities(sentence, expected_labels)

0 commit comments

Comments
 (0)