Skip to content
This repository was archived by the owner on Apr 11, 2025. It is now read-only.

Commit 626a825

Browse files
committed
[REF] _group_and_process_chars
1 parent ac221e6 commit 626a825

File tree

1 file changed

+45
-5
lines changed

1 file changed

+45
-5
lines changed

camelot/utils.py

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1075,25 +1075,65 @@ def _group_and_process_chars(
10751075
flag_size: bool,
10761076
direction: str,
10771077
strip_text: str,
1078-
): # -> List[Tuple[int, int, str]]
1079-
"""Group characters and process them based on size flag."""
1080-
grouped_chars: list[tuple[int, int, str]] = [] # LTChar
1078+
) -> list[tuple[int, int, str]]:
1079+
"""
1080+
Group characters and process them based on size flag.
1081+
1082+
Parameters
1083+
----------
1084+
cut_text : list of tuples
1085+
Each tuple consists of (x0, y0, character), where x0 and y0 are
1086+
coordinates and character can be an instance of LTChar, LTAnno,
1087+
or a list of any type.
1088+
1089+
flag_size : bool
1090+
A flag indicating whether to group by font size.
1091+
1092+
direction : str
1093+
Direction for processing the text (e.g., 'horizontal' or 'vertical').
1094+
1095+
strip_text : str
1096+
Characters to strip from the text.
1097+
1098+
Returns
1099+
-------
1100+
list of tuples
1101+
Each tuple consists of (x0, y0, processed_text), where processed_text
1102+
is the grouped and processed text based on the specified conditions.
1103+
"""
1104+
grouped_chars: list[tuple[int, int, str]] = []
1105+
10811106
for key, chars in groupby(cut_text, itemgetter(0, 1)):
1107+
chars_list = list(chars) # Convert the iterator to a list to reuse it
1108+
10821109
if flag_size:
10831110
grouped_chars.append(
10841111
(
10851112
key[0],
10861113
key[1],
10871114
flag_font_size(
1088-
[t[2] for t in chars], direction, strip_text=strip_text
1115+
[t[2] for t in chars_list], direction, strip_text=strip_text
10891116
),
10901117
)
10911118
)
10921119
else:
1093-
gchars = [t[2].get_text() for t in chars] # .get_text()
1120+
# Check types before calling get_text
1121+
gchars = []
1122+
for t in chars_list:
1123+
if isinstance(
1124+
t[2], (LTChar, LTAnno)
1125+
): # Ensure it's one of the expected types
1126+
gchars.append(t[2].get_text()) # Call get_text() safely
1127+
else:
1128+
# Handle the case where t[2] is a list or other type
1129+
gchars.extend(
1130+
t[2]
1131+
) # Assuming it's iterable and we want to extend the list
1132+
10941133
grouped_chars.append(
10951134
(key[0], key[1], text_strip("".join(gchars), strip_text))
10961135
)
1136+
10971137
return grouped_chars
10981138

10991139

0 commit comments

Comments
 (0)