@@ -81,55 +81,60 @@ def __init__(
8181 self .page_obj = page_obj # Reference to the PageObject for font width maps
8282 self .obj = obj
8383 self .pdf = pdf
84- self . orientations = orientations
84+
8585 self .space_width = space_width
8686 self .content_key = content_key
8787 self .visitor_operand_before = visitor_operand_before
8888 self .visitor_operand_after = visitor_operand_after
89- self .visitor_text = visitor_text
90-
91- # Text state
92- self .text : str = ""
93- self .output : str = ""
94- self .rtl_dir : bool = False # right-to-left
9589
9690 # Matrix state
9791 self .cm_matrix : List [float ] = [1.0 , 0.0 , 0.0 , 1.0 , 0.0 , 0.0 ]
9892 self .tm_matrix : List [float ] = [1.0 , 0.0 , 0.0 , 1.0 , 0.0 , 0.0 ]
99- self .cm_stack : List [Tuple [Any , ...]] = []
100-
101- # Previous matrices for tracking changes
93+ self .cm_stack : List [
94+ Tuple [
95+ List [float ],
96+ Tuple [Union [str , Dict [int , str ]], Dict [str , str ], str , Optional [DictionaryObject ]],
97+ float ,
98+ float ,
99+ float ,
100+ float ,
101+ float ,
102+ ]
103+ ] = []
104+
105+ # Store the last modified matrices; can be an intermediate position
102106 self .cm_prev : List [float ] = [1.0 , 0.0 , 0.0 , 1.0 , 0.0 , 0.0 ]
103107 self .tm_prev : List [float ] = [1.0 , 0.0 , 0.0 , 1.0 , 0.0 , 0.0 ]
104108
105- # Memo matrices for visitor callbacks
109+ # Store the position at the beginning of building the text
106110 self .memo_cm : List [float ] = [1.0 , 0.0 , 0.0 , 1.0 , 0.0 , 0.0 ]
107111 self .memo_tm : List [float ] = [1.0 , 0.0 , 0.0 , 1.0 , 0.0 , 0.0 ]
108112
109113 # Font and text scaling state
110- self .char_scale : float = 1.0
111- self .space_scale : float = 1.0
114+ self .char_scale = 1.0
115+ self .space_scale = 1.0
112116 self ._space_width : float = 500.0 # will be set correctly at first Tf
113- self .TL : float = 0.0
114- self .font_size : float = 12.0 # init just in case
117+ self .TL = 0.0
118+ self .font_size = 12.0 # init just in case
119+
120+ # Text state
121+ self .text : str = ""
122+ self .output : str = ""
123+ self .rtl_dir : bool = False # right-to-left
115124
116- # Character map state
117125 self .cmap : Tuple [Union [str , Dict [int , str ]], Dict [str , str ], str , Optional [DictionaryObject ]] = (
118126 "charmap" ,
119127 {},
120128 "NotInitialized" ,
121129 None ,
122130 ) # (encoding, CMAP, font resource name, font)
131+ self .orientations : Tuple [int , ...] = orientations
132+ self .visitor_text : Optional [Callable [[Any , Any , Any , Any , Any ], None ]] = None
133+ self .cmaps : Dict [str , Tuple [str , float , Union [str , Dict [int , str ]], Dict [str , str ], DictionaryObject ]] = {}
123134
124135 # Actual string size tracking
125136 self ._actual_str_size : Dict [str , float ] = {"str_widths" : 0.0 , "space_width" : 0.0 , "str_height" : 0.0 }
126137
127- # Character maps for fonts
128- self .cmaps : Dict [
129- str ,
130- Tuple [str , float , Union [str , Dict [int , str ]], Dict [str , str ], DictionaryObject ],
131- ] = {}
132-
133138 # Resources dictionary
134139 self .resources_dict : Optional [DictionaryObject ] = None
135140
@@ -231,8 +236,7 @@ def _process_operation(self, operator: bytes, operands: List[Any]) -> None:
231236 if self .visitor_operand_after is not None :
232237 self .visitor_operand_after (operator , operands , self .cm_matrix , self .tm_matrix )
233238
234- def _compute_str_widths (self , str_widths : float ) -> float :
235- """Compute string widths."""
239+ def compute_str_widths (self , str_widths : float ) -> float :
236240 return str_widths / 1000
237241
238242 def _flush_text (self ) -> None :
@@ -355,22 +359,22 @@ def _handle_operation_move_text_position(self, operands: List[Any]) -> None:
355359 tx , ty = float (operands [0 ]), float (operands [1 ])
356360 self .tm_matrix [4 ] += tx * self .tm_matrix [0 ] + ty * self .tm_matrix [2 ]
357361 self .tm_matrix [5 ] += tx * self .tm_matrix [1 ] + ty * self .tm_matrix [3 ]
358- str_widths = self ._compute_str_widths (self ._actual_str_size ["str_widths" ])
362+ str_widths = self .compute_str_widths (self ._actual_str_size ["str_widths" ])
359363 self ._actual_str_size ["str_widths" ] = 0.0
360364 self ._handle_position_change (str_widths )
361365
362366 def _handle_operation_set_text_matrix (self , operands : List [Any ]) -> None :
363367 """Handle Tm (Set text matrix) operation."""
364368 self .tm_matrix = [float (operand ) for operand in operands [:6 ]]
365- str_widths = self ._compute_str_widths (self ._actual_str_size ["str_widths" ])
369+ str_widths = self .compute_str_widths (self ._actual_str_size ["str_widths" ])
366370 self ._actual_str_size ["str_widths" ] = 0.0
367371 self ._handle_position_change (str_widths )
368372
369373 def _handle_operation_move_to_next_line (self , operands : List [Any ]) -> None :
370374 """Handle T* (Move to next line) operation."""
371375 self .tm_matrix [4 ] -= self .TL * self .tm_matrix [2 ]
372376 self .tm_matrix [5 ] -= self .TL * self .tm_matrix [3 ]
373- str_widths = self ._compute_str_widths (self ._actual_str_size ["str_widths" ])
377+ str_widths = self .compute_str_widths (self ._actual_str_size ["str_widths" ])
374378 self ._actual_str_size ["str_widths" ] = 0.0
375379 self ._handle_position_change (str_widths )
376380
@@ -389,7 +393,7 @@ def _handle_operation_show_text(self, operands: List[Any]) -> None:
389393 self ._space_width ,
390394 self ._actual_str_size ,
391395 )
392- str_widths = self ._compute_str_widths (self ._actual_str_size ["str_widths" ])
396+ str_widths = self .compute_str_widths (self ._actual_str_size ["str_widths" ])
393397 self ._handle_position_change (str_widths )
394398
395399 def _handle_operation_show_text_with_positioning (self , operands : List [Any ]) -> None :
@@ -471,7 +475,7 @@ def _handle_position_change(self, str_widths: float) -> None:
471475 self .font_size ,
472476 self .visitor_text ,
473477 str_widths ,
474- self ._compute_str_widths (self ._actual_str_size ["space_width" ]),
478+ self .compute_str_widths (self ._actual_str_size ["space_width" ]),
475479 self ._actual_str_size ["str_height" ],
476480 )
477481 if self .text == "" :
@@ -482,16 +486,15 @@ def _handle_position_change(self, str_widths: float) -> None:
482486
483487 def _get_actual_font_widths (
484488 self ,
485- cmap : Tuple [Union [str , Dict [int , str ]], Dict [str , str ], str , Optional [DictionaryObject ]],
489+ cmap : Tuple [
490+ Union [str , Dict [int , str ]], Dict [str , str ], str , Optional [DictionaryObject ]
491+ ],
486492 text_operands : str ,
487493 font_size : float ,
488494 space_width : float ,
489495 ) -> Tuple [float , float , float ]:
490- """Get actual font widths for text operands."""
491496 font_widths : float = 0
492497 font_name : str = cmap [2 ]
493-
494- # Use the page object's font width maps
495498 if font_name not in self .page_obj ._font_width_maps :
496499 if cmap [3 ] is None :
497500 font_width_map : Dict [Any , float ] = {}
@@ -505,7 +508,6 @@ def _get_actual_font_widths(
505508 if actual_space_width == 0 :
506509 actual_space_width = space_width
507510 self .page_obj ._font_width_maps [font_name ] = (font_width_map , space_char , actual_space_width )
508-
509511 font_width_map = self .page_obj ._font_width_maps [font_name ][0 ]
510512 space_char = self .page_obj ._font_width_maps [font_name ][1 ]
511513 actual_space_width = self .page_obj ._font_width_maps [font_name ][2 ]
@@ -516,27 +518,26 @@ def _get_actual_font_widths(
516518 font_widths += actual_space_width
517519 continue
518520 font_widths += compute_font_width (font_width_map , char )
519-
520521 return (font_widths * font_size , space_width * font_size , font_size )
521522
522-
523-
524523 def _handle_tj (
525524 self ,
526525 text : str ,
527526 operands : List [Union [str , TextStringObject ]],
528527 cm_matrix : List [float ],
529528 tm_matrix : List [float ],
530- cmap : Tuple [Union [str , Dict [int , str ]], Dict [str , str ], str , Optional [DictionaryObject ]],
529+ cmap : Tuple [
530+ Union [str , Dict [int , str ]], Dict [str , str ], str , Optional [DictionaryObject ]
531+ ],
531532 orientations : Tuple [int , ...],
532533 font_size : float ,
533534 rtl_dir : bool ,
534535 visitor_text : Optional [Callable [[Any , Any , Any , Any , Any ], None ]],
535536 space_width : float ,
536537 actual_str_size : Dict [str , float ],
537538 ) -> Tuple [str , bool , Dict [str , float ]]:
538- """Handle text showing operations."""
539- text_operands , is_str_operands = get_text_operands ( operands , cm_matrix , tm_matrix , cmap , orientations )
539+ text_operands , is_str_operands = get_text_operands (
540+ operands , cm_matrix , tm_matrix , cmap , orientations )
540541 if is_str_operands :
541542 text += text_operands
542543 else :
@@ -550,13 +551,8 @@ def _handle_tj(
550551 rtl_dir ,
551552 visitor_text ,
552553 )
553-
554- font_widths , actual_str_size ["space_width" ], actual_str_size ["str_height" ] = self ._get_actual_font_widths (
555- cmap ,
556- text_operands ,
557- font_size ,
558- space_width ,
559- )
554+ font_widths , actual_str_size ["space_width" ], actual_str_size ["str_height" ] = (
555+ self ._get_actual_font_widths (cmap , text_operands , font_size , space_width ))
560556 actual_str_size ["str_widths" ] += font_widths
561557
562558 return text , rtl_dir , actual_str_size
0 commit comments