@@ -1044,10 +1044,11 @@ def get_contents(self) -> Optional[ContentStream]:
10441044 pdf = cast (IndirectObject , self .indirect_reference ).pdf
10451045 except AttributeError :
10461046 pdf = None
1047- obj = self [PG .CONTENTS ]. get_object ()
1048- if isinstance (obj , NullObject ):
1047+ obj = self [PG .CONTENTS ]
1048+ if is_null_or_none (obj ):
10491049 return None
1050- return ContentStream (obj , pdf )
1050+ resolved_object = obj .get_object ()
1051+ return ContentStream (resolved_object , pdf )
10511052 return None
10521053
10531054 def replace_contents (
@@ -1846,8 +1847,8 @@ def _extract_text(
18461847 # file as not damaged, no need to check for TJ or Tj
18471848 return ""
18481849
1849- if "/Font" in resources_dict :
1850- for f in cast (DictionaryObject , resources_dict [ "/Font" ] ):
1850+ if "/Font" in resources_dict and ( font := resources_dict [ "/Font" ]) :
1851+ for f in cast (DictionaryObject , font ):
18511852 cmaps [f ] = build_char_map (f , space_width , obj )
18521853 cmap : Tuple [
18531854 Union [str , Dict [int , str ]], Dict [str , str ], str , Optional [DictionaryObject ]
@@ -1864,7 +1865,7 @@ def _extract_text(
18641865 )
18651866 if not isinstance (content , ContentStream ):
18661867 content = ContentStream (content , pdf , "bytes" )
1867- except KeyError : # no content can be extracted (certainly empty page)
1868+ except ( AttributeError , KeyError ) : # no content can be extracted (certainly empty page)
18681869 return ""
18691870 # We check all strings are TextStringObjects. ByteStringObjects
18701871 # are strings where the byte->string encoding was unknown, so adding
0 commit comments