Black formatting

alanakbik · alanakbik · commit 6e39cfc87882 · 2025-06-11T16:29:46.000+02:00
diff --git a/flair/tokenization.py b/flair/tokenization.py
@@ -521,7 +521,9 @@ def __init__(self):
 
         # Combined pattern for re.findall:
         # Captures abbreviations OR letter sequences OR digit sequences OR Kanji OR punctuation/symbols
-        combined_pattern = f"({self.abbreviations})|({self.alphabet_pattern})|({self.digits})|({self.kanji})|({self.punctuation})"
+        combined_pattern = (
+            f"({self.abbreviations})|({self.alphabet_pattern})|({self.digits})|({self.kanji})|({self.punctuation})"
+        )
         # Pre-compile the regex for efficiency
         self.token_pattern = re.compile(combined_pattern)
 
diff --git a/tests/test_tokenize_sentence.py b/tests/test_tokenize_sentence.py
@@ -599,25 +599,62 @@ def test_staccato_tokenizer_abbreviations():
     text_1 = "The firm is U.S.A. Inc. and i.e. in the U.S. we use e.g. to give examples."
     sentence_1 = Sentence(text_1, use_tokenizer=tokenizer)
     expected_tokens_1 = [
-        "The", "firm", "is", "U.S.A.", "Inc", ".", "and", "i.e.", "in", "the",
-        "U.S.", "we", "use", "e.g.", "to", "give", "examples", ".",
+        "The",
+        "firm",
+        "is",
+        "U.S.A.",
+        "Inc",
+        ".",
+        "and",
+        "i.e.",
+        "in",
+        "the",
+        "U.S.",
+        "we",
+        "use",
+        "e.g.",
+        "to",
+        "give",
+        "examples",
+        ".",
     ]
     assert [token.text for token in sentence_1.tokens] == expected_tokens_1
 
     # Case 2: Single letter/short word with a dot at sentence end should be split
     text_2 = "He wrote on X. Then Dr. Smith arrived."
     sentence_2 = Sentence(text_2, use_tokenizer=tokenizer)
     expected_tokens_2 = [
-        "He", "wrote", "on", "X", ".", "Then", "Dr", ".", "Smith", "arrived", ".",
+        "He",
+        "wrote",
+        "on",
+        "X",
+        ".",
+        "Then",
+        "Dr",
+        ".",
+        "Smith",
+        "arrived",
+        ".",
     ]
     assert [token.text for token in sentence_2.tokens] == expected_tokens_2
 
     # Case 3: A mix of cases
     text_3 = "The item is from the U.K. (i.e. not the U.S.A.)."
     sentence_3 = Sentence(text_3, use_tokenizer=tokenizer)
     expected_tokens_3 = [
-        "The", "item", "is", "from", "the", "U.K.", "(", "i.e.",
-        "not", "the", "U.S.A.", ")", ".",
+        "The",
+        "item",
+        "is",
+        "from",
+        "the",
+        "U.K.",
+        "(",
+        "i.e.",
+        "not",
+        "the",
+        "U.S.A.",
+        ")",
+        ".",
     ]
     assert [token.text for token in sentence_3.tokens] == expected_tokens_3