@@ -81,12 +81,11 @@ def __init__(
8181 self .page_obj = page_obj # Reference to the PageObject for font width maps
8282 self .obj = obj
8383 self .pdf = pdf
84- self . orientations = orientations
84+
8585 self .space_width = space_width
8686 self .content_key = content_key
8787 self .visitor_operand_before = visitor_operand_before
8888 self .visitor_operand_after = visitor_operand_after
89- self .visitor_text = visitor_text
9089
9190 # Text state
9291 self .text : str = ""
@@ -96,7 +95,17 @@ def __init__(
9695 # Matrix state
9796 self .cm_matrix : List [float ] = [1.0 , 0.0 , 0.0 , 1.0 , 0.0 , 0.0 ]
9897 self .tm_matrix : List [float ] = [1.0 , 0.0 , 0.0 , 1.0 , 0.0 , 0.0 ]
99- self .cm_stack : List [Tuple [Any , ...]] = []
98+ self .cm_stack : List [
99+ Tuple [
100+ List [float ],
101+ Tuple [Union [str , Dict [int , str ]], Dict [str , str ], str , Optional [DictionaryObject ]],
102+ float ,
103+ float ,
104+ float ,
105+ float ,
106+ float ,
107+ ]
108+ ] = []
100109
101110 # Previous matrices for tracking changes
102111 self .cm_prev : List [float ] = [1.0 , 0.0 , 0.0 , 1.0 , 0.0 , 0.0 ]
@@ -120,16 +129,16 @@ def __init__(
120129 "NotInitialized" ,
121130 None ,
122131 ) # (encoding, CMAP, font resource name, font)
123-
124- # Actual string size tracking
125- self ._actual_str_size : Dict [str , float ] = {"str_widths" : 0.0 , "space_width" : 0.0 , "str_height" : 0.0 }
126-
127- # Character maps for fonts
132+ self .orientations = orientations
133+ self .visitor_text : Optional [Callable [[Any , Any , Any , Any , Any ], None ]] = None
128134 self .cmaps : Dict [
129135 str ,
130136 Tuple [str , float , Union [str , Dict [int , str ]], Dict [str , str ], DictionaryObject ],
131137 ] = {}
132138
139+ # Actual string size tracking
140+ self ._actual_str_size : Dict [str , float ] = {"str_widths" : 0.0 , "space_width" : 0.0 , "str_height" : 0.0 }
141+
133142 # Resources dictionary
134143 self .resources_dict : Optional [DictionaryObject ] = None
135144
@@ -231,8 +240,7 @@ def _process_operation(self, operator: bytes, operands: List[Any]) -> None:
231240 if self .visitor_operand_after is not None :
232241 self .visitor_operand_after (operator , operands , self .cm_matrix , self .tm_matrix )
233242
234- def _compute_str_widths (self , str_widths : float ) -> float :
235- """Compute string widths."""
243+ def compute_str_widths (self , str_widths : float ) -> float :
236244 return str_widths / 1000
237245
238246 def _flush_text (self ) -> None :
@@ -355,22 +363,22 @@ def _handle_operation_move_text_position(self, operands: List[Any]) -> None:
355363 tx , ty = float (operands [0 ]), float (operands [1 ])
356364 self .tm_matrix [4 ] += tx * self .tm_matrix [0 ] + ty * self .tm_matrix [2 ]
357365 self .tm_matrix [5 ] += tx * self .tm_matrix [1 ] + ty * self .tm_matrix [3 ]
358- str_widths = self ._compute_str_widths (self ._actual_str_size ["str_widths" ])
366+ str_widths = self .compute_str_widths (self ._actual_str_size ["str_widths" ])
359367 self ._actual_str_size ["str_widths" ] = 0.0
360368 self ._handle_position_change (str_widths )
361369
362370 def _handle_operation_set_text_matrix (self , operands : List [Any ]) -> None :
363371 """Handle Tm (Set text matrix) operation."""
364372 self .tm_matrix = [float (operand ) for operand in operands [:6 ]]
365- str_widths = self ._compute_str_widths (self ._actual_str_size ["str_widths" ])
373+ str_widths = self .compute_str_widths (self ._actual_str_size ["str_widths" ])
366374 self ._actual_str_size ["str_widths" ] = 0.0
367375 self ._handle_position_change (str_widths )
368376
369377 def _handle_operation_move_to_next_line (self , operands : List [Any ]) -> None :
370378 """Handle T* (Move to next line) operation."""
371379 self .tm_matrix [4 ] -= self .TL * self .tm_matrix [2 ]
372380 self .tm_matrix [5 ] -= self .TL * self .tm_matrix [3 ]
373- str_widths = self ._compute_str_widths (self ._actual_str_size ["str_widths" ])
381+ str_widths = self .compute_str_widths (self ._actual_str_size ["str_widths" ])
374382 self ._actual_str_size ["str_widths" ] = 0.0
375383 self ._handle_position_change (str_widths )
376384
@@ -389,7 +397,7 @@ def _handle_operation_show_text(self, operands: List[Any]) -> None:
389397 self ._space_width ,
390398 self ._actual_str_size ,
391399 )
392- str_widths = self ._compute_str_widths (self ._actual_str_size ["str_widths" ])
400+ str_widths = self .compute_str_widths (self ._actual_str_size ["str_widths" ])
393401 self ._handle_position_change (str_widths )
394402
395403 def _handle_operation_show_text_with_positioning (self , operands : List [Any ]) -> None :
@@ -471,7 +479,7 @@ def _handle_position_change(self, str_widths: float) -> None:
471479 self .font_size ,
472480 self .visitor_text ,
473481 str_widths ,
474- self ._compute_str_widths (self ._actual_str_size ["space_width" ]),
482+ self .compute_str_widths (self ._actual_str_size ["space_width" ]),
475483 self ._actual_str_size ["str_height" ],
476484 )
477485 if self .text == "" :
@@ -482,16 +490,15 @@ def _handle_position_change(self, str_widths: float) -> None:
482490
483491 def _get_actual_font_widths (
484492 self ,
485- cmap : Tuple [Union [str , Dict [int , str ]], Dict [str , str ], str , Optional [DictionaryObject ]],
493+ cmap : Tuple [
494+ Union [str , Dict [int , str ]], Dict [str , str ], str , Optional [DictionaryObject ]
495+ ],
486496 text_operands : str ,
487497 font_size : float ,
488498 space_width : float ,
489499 ) -> Tuple [float , float , float ]:
490- """Get actual font widths for text operands."""
491500 font_widths : float = 0
492501 font_name : str = cmap [2 ]
493-
494- # Use the page object's font width maps
495502 if font_name not in self .page_obj ._font_width_maps :
496503 if cmap [3 ] is None :
497504 font_width_map : Dict [Any , float ] = {}
@@ -505,7 +512,6 @@ def _get_actual_font_widths(
505512 if actual_space_width == 0 :
506513 actual_space_width = space_width
507514 self .page_obj ._font_width_maps [font_name ] = (font_width_map , space_char , actual_space_width )
508-
509515 font_width_map = self .page_obj ._font_width_maps [font_name ][0 ]
510516 space_char = self .page_obj ._font_width_maps [font_name ][1 ]
511517 actual_space_width = self .page_obj ._font_width_maps [font_name ][2 ]
@@ -516,27 +522,26 @@ def _get_actual_font_widths(
516522 font_widths += actual_space_width
517523 continue
518524 font_widths += compute_font_width (font_width_map , char )
519-
520525 return (font_widths * font_size , space_width * font_size , font_size )
521526
522-
523-
524527 def _handle_tj (
525528 self ,
526529 text : str ,
527530 operands : List [Union [str , TextStringObject ]],
528531 cm_matrix : List [float ],
529532 tm_matrix : List [float ],
530- cmap : Tuple [Union [str , Dict [int , str ]], Dict [str , str ], str , Optional [DictionaryObject ]],
533+ cmap : Tuple [
534+ Union [str , Dict [int , str ]], Dict [str , str ], str , Optional [DictionaryObject ]
535+ ],
531536 orientations : Tuple [int , ...],
532537 font_size : float ,
533538 rtl_dir : bool ,
534539 visitor_text : Optional [Callable [[Any , Any , Any , Any , Any ], None ]],
535540 space_width : float ,
536541 actual_str_size : Dict [str , float ],
537542 ) -> Tuple [str , bool , Dict [str , float ]]:
538- """Handle text showing operations."""
539- text_operands , is_str_operands = get_text_operands ( operands , cm_matrix , tm_matrix , cmap , orientations )
543+ text_operands , is_str_operands = get_text_operands (
544+ operands , cm_matrix , tm_matrix , cmap , orientations )
540545 if is_str_operands :
541546 text += text_operands
542547 else :
@@ -550,13 +555,8 @@ def _handle_tj(
550555 rtl_dir ,
551556 visitor_text ,
552557 )
553-
554- font_widths , actual_str_size ["space_width" ], actual_str_size ["str_height" ] = self ._get_actual_font_widths (
555- cmap ,
556- text_operands ,
557- font_size ,
558- space_width ,
559- )
558+ font_widths , actual_str_size ["space_width" ], actual_str_size ["str_height" ] = (
559+ self ._get_actual_font_widths (cmap , text_operands , font_size , space_width ))
560560 actual_str_size ["str_widths" ] += font_widths
561561
562562 return text , rtl_dir , actual_str_size
0 commit comments