@@ -81,28 +81,32 @@ def __init__(
8181 self .page_obj = page_obj # Reference to the PageObject for font width maps
8282 self .obj = obj
8383 self .pdf = pdf
84- self . orientations = orientations
84+
8585 self .space_width = space_width
8686 self .content_key = content_key
8787 self .visitor_operand_before = visitor_operand_before
8888 self .visitor_operand_after = visitor_operand_after
89- self .visitor_text = visitor_text
90-
91- # Text state
92- self .text : str = ""
93- self .output : str = ""
94- self .rtl_dir : bool = False # right-to-left
9589
9690 # Matrix state
9791 self .cm_matrix : List [float ] = [1.0 , 0.0 , 0.0 , 1.0 , 0.0 , 0.0 ]
9892 self .tm_matrix : List [float ] = [1.0 , 0.0 , 0.0 , 1.0 , 0.0 , 0.0 ]
99- self .cm_stack : List [Tuple [Any , ...]] = []
100-
101- # Previous matrices for tracking changes
93+ self .cm_stack : List [
94+ Tuple [
95+ List [float ],
96+ Tuple [Union [str , Dict [int , str ]], Dict [str , str ], str , Optional [DictionaryObject ]],
97+ float ,
98+ float ,
99+ float ,
100+ float ,
101+ float ,
102+ ]
103+ ] = []
104+
105+ # Store the last modified matrices; can be an intermediate position
102106 self .cm_prev : List [float ] = [1.0 , 0.0 , 0.0 , 1.0 , 0.0 , 0.0 ]
103107 self .tm_prev : List [float ] = [1.0 , 0.0 , 0.0 , 1.0 , 0.0 , 0.0 ]
104108
105- # Memo matrices for visitor callbacks
109+ # Store the position at the beginning of building the text
106110 self .memo_cm : List [float ] = [1.0 , 0.0 , 0.0 , 1.0 , 0.0 , 0.0 ]
107111 self .memo_tm : List [float ] = [1.0 , 0.0 , 0.0 , 1.0 , 0.0 , 0.0 ]
108112
@@ -113,23 +117,25 @@ def __init__(
113117 self .TL : float = 0.0
114118 self .font_size : float = 12.0 # init just in case
115119
120+ # Text state
121+ self .text : str = ""
122+ self .output : str = ""
123+ self .rtl_dir : bool = False # right-to-left
124+
116125 # Character map state
117126 self .cmap : Tuple [Union [str , Dict [int , str ]], Dict [str , str ], str , Optional [DictionaryObject ]] = (
118127 "charmap" ,
119128 {},
120129 "NotInitialized" ,
121130 None ,
122131 ) # (encoding, CMAP, font resource name, font)
132+ self .orientations : Tuple [int , ...] = orientations
133+ self .visitor_text : Optional [Callable [[Any , Any , Any , Any , Any ], None ]] = None
134+ self .cmaps : Dict [str , Tuple [str , float , Union [str , Dict [int , str ]], Dict [str , str ], DictionaryObject ]] = {}
123135
124136 # Actual string size tracking
125137 self ._actual_str_size : Dict [str , float ] = {"str_widths" : 0.0 , "space_width" : 0.0 , "str_height" : 0.0 }
126138
127- # Character maps for fonts
128- self .cmaps : Dict [
129- str ,
130- Tuple [str , float , Union [str , Dict [int , str ]], Dict [str , str ], DictionaryObject ],
131- ] = {}
132-
133139 # Resources dictionary
134140 self .resources_dict : Optional [DictionaryObject ] = None
135141
@@ -231,8 +237,7 @@ def _process_operation(self, operator: bytes, operands: List[Any]) -> None:
231237 if self .visitor_operand_after is not None :
232238 self .visitor_operand_after (operator , operands , self .cm_matrix , self .tm_matrix )
233239
234- def _compute_str_widths (self , str_widths : float ) -> float :
235- """Compute string widths."""
240+ def compute_str_widths (self , str_widths : float ) -> float :
236241 return str_widths / 1000
237242
238243 def _flush_text (self ) -> None :
@@ -355,22 +360,22 @@ def _handle_operation_move_text_position(self, operands: List[Any]) -> None:
355360 tx , ty = float (operands [0 ]), float (operands [1 ])
356361 self .tm_matrix [4 ] += tx * self .tm_matrix [0 ] + ty * self .tm_matrix [2 ]
357362 self .tm_matrix [5 ] += tx * self .tm_matrix [1 ] + ty * self .tm_matrix [3 ]
358- str_widths = self ._compute_str_widths (self ._actual_str_size ["str_widths" ])
363+ str_widths = self .compute_str_widths (self ._actual_str_size ["str_widths" ])
359364 self ._actual_str_size ["str_widths" ] = 0.0
360365 self ._handle_position_change (str_widths )
361366
362367 def _handle_operation_set_text_matrix (self , operands : List [Any ]) -> None :
363368 """Handle Tm (Set text matrix) operation."""
364369 self .tm_matrix = [float (operand ) for operand in operands [:6 ]]
365- str_widths = self ._compute_str_widths (self ._actual_str_size ["str_widths" ])
370+ str_widths = self .compute_str_widths (self ._actual_str_size ["str_widths" ])
366371 self ._actual_str_size ["str_widths" ] = 0.0
367372 self ._handle_position_change (str_widths )
368373
369374 def _handle_operation_move_to_next_line (self , operands : List [Any ]) -> None :
370375 """Handle T* (Move to next line) operation."""
371376 self .tm_matrix [4 ] -= self .TL * self .tm_matrix [2 ]
372377 self .tm_matrix [5 ] -= self .TL * self .tm_matrix [3 ]
373- str_widths = self ._compute_str_widths (self ._actual_str_size ["str_widths" ])
378+ str_widths = self .compute_str_widths (self ._actual_str_size ["str_widths" ])
374379 self ._actual_str_size ["str_widths" ] = 0.0
375380 self ._handle_position_change (str_widths )
376381
@@ -389,7 +394,7 @@ def _handle_operation_show_text(self, operands: List[Any]) -> None:
389394 self ._space_width ,
390395 self ._actual_str_size ,
391396 )
392- str_widths = self ._compute_str_widths (self ._actual_str_size ["str_widths" ])
397+ str_widths = self .compute_str_widths (self ._actual_str_size ["str_widths" ])
393398 self ._handle_position_change (str_widths )
394399
395400 def _handle_operation_show_text_with_positioning (self , operands : List [Any ]) -> None :
@@ -471,7 +476,7 @@ def _handle_position_change(self, str_widths: float) -> None:
471476 self .font_size ,
472477 self .visitor_text ,
473478 str_widths ,
474- self ._compute_str_widths (self ._actual_str_size ["space_width" ]),
479+ self .compute_str_widths (self ._actual_str_size ["space_width" ]),
475480 self ._actual_str_size ["str_height" ],
476481 )
477482 if self .text == "" :
@@ -482,16 +487,15 @@ def _handle_position_change(self, str_widths: float) -> None:
482487
483488 def _get_actual_font_widths (
484489 self ,
485- cmap : Tuple [Union [str , Dict [int , str ]], Dict [str , str ], str , Optional [DictionaryObject ]],
490+ cmap : Tuple [
491+ Union [str , Dict [int , str ]], Dict [str , str ], str , Optional [DictionaryObject ]
492+ ],
486493 text_operands : str ,
487494 font_size : float ,
488495 space_width : float ,
489496 ) -> Tuple [float , float , float ]:
490- """Get actual font widths for text operands."""
491497 font_widths : float = 0
492498 font_name : str = cmap [2 ]
493-
494- # Use the page object's font width maps
495499 if font_name not in self .page_obj ._font_width_maps :
496500 if cmap [3 ] is None :
497501 font_width_map : Dict [Any , float ] = {}
@@ -505,7 +509,6 @@ def _get_actual_font_widths(
505509 if actual_space_width == 0 :
506510 actual_space_width = space_width
507511 self .page_obj ._font_width_maps [font_name ] = (font_width_map , space_char , actual_space_width )
508-
509512 font_width_map = self .page_obj ._font_width_maps [font_name ][0 ]
510513 space_char = self .page_obj ._font_width_maps [font_name ][1 ]
511514 actual_space_width = self .page_obj ._font_width_maps [font_name ][2 ]
@@ -516,27 +519,26 @@ def _get_actual_font_widths(
516519 font_widths += actual_space_width
517520 continue
518521 font_widths += compute_font_width (font_width_map , char )
519-
520522 return (font_widths * font_size , space_width * font_size , font_size )
521523
522-
523-
524524 def _handle_tj (
525525 self ,
526526 text : str ,
527527 operands : List [Union [str , TextStringObject ]],
528528 cm_matrix : List [float ],
529529 tm_matrix : List [float ],
530- cmap : Tuple [Union [str , Dict [int , str ]], Dict [str , str ], str , Optional [DictionaryObject ]],
530+ cmap : Tuple [
531+ Union [str , Dict [int , str ]], Dict [str , str ], str , Optional [DictionaryObject ]
532+ ],
531533 orientations : Tuple [int , ...],
532534 font_size : float ,
533535 rtl_dir : bool ,
534536 visitor_text : Optional [Callable [[Any , Any , Any , Any , Any ], None ]],
535537 space_width : float ,
536538 actual_str_size : Dict [str , float ],
537539 ) -> Tuple [str , bool , Dict [str , float ]]:
538- """Handle text showing operations."""
539- text_operands , is_str_operands = get_text_operands ( operands , cm_matrix , tm_matrix , cmap , orientations )
540+ text_operands , is_str_operands = get_text_operands (
541+ operands , cm_matrix , tm_matrix , cmap , orientations )
540542 if is_str_operands :
541543 text += text_operands
542544 else :
@@ -550,13 +552,8 @@ def _handle_tj(
550552 rtl_dir ,
551553 visitor_text ,
552554 )
553-
554- font_widths , actual_str_size ["space_width" ], actual_str_size ["str_height" ] = self ._get_actual_font_widths (
555- cmap ,
556- text_operands ,
557- font_size ,
558- space_width ,
559- )
555+ font_widths , actual_str_size ["space_width" ], actual_str_size ["str_height" ] = (
556+ self ._get_actual_font_widths (cmap , text_operands , font_size , space_width ))
560557 actual_str_size ["str_widths" ] += font_widths
561558
562559 return text , rtl_dir , actual_str_size
0 commit comments