1717import pandas as pd
1818import pyarrow as pa
1919import pyarrow .dataset
20- import pyarrow .orc
2120from typing_extensions import Literal
2221
2322from awswrangler import _data_types , _utils , exceptions
4039
4140if TYPE_CHECKING :
4241 from mypy_boto3_s3 import S3Client
42+ from pyarrow .orc import ORCFile
4343
4444FULL_READ_S3_BLOCK_SIZE = 20_971_520 # 20 MB (20 * 2**20)
4545METADATA_READ_S3_BLOCK_SIZE = 131_072 # 128 KB (128 * 2**10)
4646
4747_logger : logging .Logger = logging .getLogger (__name__ )
4848
4949
50- def _pyarrow_orc_file_wrapper (source : Any ) -> pyarrow .orc .ORCFile :
50+ def _pyarrow_orc_file_wrapper (source : Any ) -> "ORCFile" :
51+ from pyarrow .orc import ORCFile
52+
5153 try :
52- return pyarrow . orc . ORCFile (source = source )
54+ return ORCFile (source = source )
5355 except pyarrow .ArrowInvalid as ex :
5456 if str (ex ) == "ORC file size is 0 bytes" :
5557 _logger .warning ("Ignoring empty file..." )
@@ -74,7 +76,7 @@ def _read_orc_metadata_file(
7476 s3_block_size = METADATA_READ_S3_BLOCK_SIZE ,
7577 s3_additional_kwargs = s3_additional_kwargs ,
7678 ) as f :
77- orc_file : Optional [pyarrow . orc . ORCFile ] = _pyarrow_orc_file_wrapper (source = f )
79+ orc_file : Optional [" ORCFile" ] = _pyarrow_orc_file_wrapper (source = f )
7880 if orc_file :
7981 return orc_file .schema
8082 return None
@@ -118,7 +120,7 @@ def _read_orc_file(
118120 s3_additional_kwargs = s3_additional_kwargs ,
119121 s3_client = s3_client ,
120122 ) as f :
121- orc_file : Optional [pyarrow . orc . ORCFile ] = _pyarrow_orc_file_wrapper (
123+ orc_file : Optional [" ORCFile" ] = _pyarrow_orc_file_wrapper (
122124 source = f ,
123125 )
124126 if orc_file is None :
0 commit comments