@@ -1075,25 +1075,65 @@ def _group_and_process_chars(
10751075 flag_size : bool ,
10761076 direction : str ,
10771077 strip_text : str ,
1078- ): # -> List[Tuple[int, int, str]]
1079- """Group characters and process them based on size flag."""
1080- grouped_chars : list [tuple [int , int , str ]] = [] # LTChar
1078+ ) -> list [tuple [int , int , str ]]:
1079+ """
1080+ Group characters and process them based on size flag.
1081+
1082+ Parameters
1083+ ----------
1084+ cut_text : list of tuples
1085+ Each tuple consists of (x0, y0, character), where x0 and y0 are
1086+ coordinates and character can be an instance of LTChar, LTAnno,
1087+ or a list of any type.
1088+
1089+ flag_size : bool
1090+ A flag indicating whether to group by font size.
1091+
1092+ direction : str
1093+ Direction for processing the text (e.g., 'horizontal' or 'vertical').
1094+
1095+ strip_text : str
1096+ Characters to strip from the text.
1097+
1098+ Returns
1099+ -------
1100+ list of tuples
1101+ Each tuple consists of (x0, y0, processed_text), where processed_text
1102+ is the grouped and processed text based on the specified conditions.
1103+ """
1104+ grouped_chars : list [tuple [int , int , str ]] = []
1105+
10811106 for key , chars in groupby (cut_text , itemgetter (0 , 1 )):
1107+ chars_list = list (chars ) # Convert the iterator to a list to reuse it
1108+
10821109 if flag_size :
10831110 grouped_chars .append (
10841111 (
10851112 key [0 ],
10861113 key [1 ],
10871114 flag_font_size (
1088- [t [2 ] for t in chars ], direction , strip_text = strip_text
1115+ [t [2 ] for t in chars_list ], direction , strip_text = strip_text
10891116 ),
10901117 )
10911118 )
10921119 else :
1093- gchars = [t [2 ].get_text () for t in chars ] # .get_text()
1120+ # Check types before calling get_text
1121+ gchars = []
1122+ for t in chars_list :
1123+ if isinstance (
1124+ t [2 ], (LTChar , LTAnno )
1125+ ): # Ensure it's one of the expected types
1126+ gchars .append (t [2 ].get_text ()) # Call get_text() safely
1127+ else :
1128+ # Handle the case where t[2] is a list or other type
1129+ gchars .extend (
1130+ t [2 ]
1131+ ) # Assuming it's iterable and we want to extend the list
1132+
10941133 grouped_chars .append (
10951134 (key [0 ], key [1 ], text_strip ("" .join (gchars ), strip_text ))
10961135 )
1136+
10971137 return grouped_chars
10981138
10991139
0 commit comments