1+ # Some code are adapted from
2+ # https://github.com/huggingface/transformers/blob/master/src/transformers/file_utils.py
3+
4+ from typing import Any , BinaryIO , Dict , List , Optional , Tuple , Union
5+ import sys
6+ import os
7+ import logging
8+ import importlib .util
9+ from types import ModuleType
10+
11+ logger = logging .getLogger (__name__ ) # pylint: disable=invalid-name
12+
13+ # The package importlib_metadata is in a different place, depending on the python version.
14+ if sys .version_info < (3 , 8 ):
15+ import importlib_metadata
16+ else :
17+ import importlib .metadata as importlib_metadata
18+
19+ ###########################################
20+ ############ Layout Model Deps ############
21+ ###########################################
22+
23+ _torch_available = importlib .util .find_spec ("torch" ) is not None
24+ try :
25+ _torch_version = importlib_metadata .version ("torch" )
26+ logger .debug (f"PyTorch version { _torch_version } available." )
27+ except importlib_metadata .PackageNotFoundError :
28+ _torch_available = False
29+
30+ _detectron2_available = importlib .util .find_spec ("detectron2" ) is not None
31+ try :
32+ _detectron2_version = importlib_metadata .version ("detectron2" )
33+ logger .debug (f"Detectron2 version { _detectron2_version } available" )
34+ except importlib_metadata .PackageNotFoundError :
35+ _detectron2_available = False
36+
37+ _paddle_available = importlib .util .find_spec ("paddle" ) is not None
38+ try :
39+ # The name of the paddlepaddle library:
40+ # Install name: pip install paddlepaddle
41+ # Import name: import paddle
42+ _paddle_version = importlib_metadata .version ("paddlepaddle" )
43+ logger .debug (f"Paddle version { _paddle_version } available." )
44+ except importlib_metadata .PackageNotFoundError :
45+ _paddle_available = False
46+
47+ ###########################################
48+ ############## OCR Tool Deps ##############
49+ ###########################################
50+
51+ _pytesseract_available = importlib .util .find_spec ("pytesseract" ) is not None
52+ try :
53+ _pytesseract_version = importlib_metadata .version ("pytesseract" )
54+ logger .debug (f"Pytesseract version { _pytesseract_version } available." )
55+ except importlib_metadata .PackageNotFoundError :
56+ _pytesseract_available = False
57+
58+ _gcv_available = importlib .util .find_spec ("google.cloud.vision" ) is not None
59+ try :
60+ _gcv_version = importlib_metadata .version (
61+ "google-cloud-vision"
62+ ) # This is slightly different
63+ logger .debug (f"Google Cloud Vision Utils version { _gcv_version } available." )
64+ except importlib_metadata .PackageNotFoundError :
65+ _gcv_available = False
66+
67+
68+ def is_torch_available ():
69+ return _torch_available
70+
71+
72+ def is_torch_cuda_available ():
73+ if is_torch_available ():
74+ import torch
75+
76+ return torch .cuda .is_available ()
77+ else :
78+ return False
79+
80+
81+ def is_paddle_available ():
82+ return _paddle_available
83+
84+
85+ def is_detectron2_available ():
86+ return _detectron2_available
87+
88+
89+ def is_pytesseract_available ():
90+ return _pytesseract_available
91+
92+
93+ def is_gcv_available ():
94+ return _gcv_available
95+
96+
97+ PYTORCH_IMPORT_ERROR = """
98+ {0} requires the PyTorch library but it was not found in your environment. Checkout the instructions on the
99+ installation page: https://pytorch.org/get-started/locally/ and follow the ones that match your environment.
100+ """
101+
102+ DETECTRON2_IMPORT_ERROR = """
103+ {0} requires the detectron2 library but it was not found in your environment. Checkout the instructions on the
104+ installation page: https://github.com/facebookresearch/detectron2/blob/master/INSTALL.md and follow the ones
105+ that match your environment. Typically the following would work for MacOS or Linux CPU machines:
106+ pip install 'git+https://github.com/facebookresearch/[email protected] #egg=detectron2' 107+ """
108+
109+ PADDLE_IMPORT_ERROR = """
110+ {0} requires the PaddlePaddle library but it was not found in your environment. Checkout the instructions on the
111+ installation page: https://github.com/PaddlePaddle/Paddle and follow the ones that match your environment.
112+ """
113+
114+ PYTESSERACT_IMPORT_ERROR = """
115+ {0} requires the PyTesseract library but it was not found in your environment. You can install it with pip:
116+ `pip install pytesseract`
117+ """
118+
119+ GCV_IMPORT_ERROR = """
120+ {0} requires the Google Cloud Vision Python utils but it was not found in your environment. You can install it with pip:
121+ `pip install google-cloud-vision==1`
122+ """
123+
124+ BACKENDS_MAPPING = dict (
125+ [
126+ ("torch" , (is_torch_available , PYTORCH_IMPORT_ERROR )),
127+ ("detectron2" , (is_detectron2_available , DETECTRON2_IMPORT_ERROR )),
128+ ("paddle" , (is_paddle_available , PADDLE_IMPORT_ERROR )),
129+ ("pytesseract" , (is_pytesseract_available , PYTESSERACT_IMPORT_ERROR )),
130+ ("google-cloud-vision" , (is_gcv_available , GCV_IMPORT_ERROR )),
131+ ]
132+ )
133+
134+
135+ def requires_backends (obj , backends ):
136+ if not isinstance (backends , (list , tuple )):
137+ backends = [backends ]
138+
139+ name = obj .__name__ if hasattr (obj , "__name__" ) else obj .__class__ .__name__
140+ if not all (BACKENDS_MAPPING [backend ][0 ]() for backend in backends ):
141+ raise ImportError (
142+ "" .join ([BACKENDS_MAPPING [backend ][1 ].format (name ) for backend in backends ])
143+ )
144+
145+
146+ class _LazyModule (ModuleType ):
147+ """
148+ Module class that surfaces all objects but only performs associated imports when the objects are requested.
149+ """
150+
151+ # Adapted from HuggingFace
152+ # https://github.com/huggingface/transformers/blob/c37573806ab3526dd805c49cbe2489ad4d68a9d7/src/transformers/file_utils.py#L1990
153+
154+ def __init__ (
155+ self , name , module_file , import_structure , module_spec = None , extra_objects = None
156+ ):
157+ super ().__init__ (name )
158+ self ._modules = set (import_structure .keys ())
159+ self ._class_to_module = {}
160+ for key , values in import_structure .items ():
161+ for value in values :
162+ self ._class_to_module [value ] = key
163+ # Needed for autocompletion in an IDE
164+ self .__all__ = list (import_structure .keys ()) + sum (
165+ import_structure .values (), []
166+ )
167+ self .__file__ = module_file
168+ self .__spec__ = module_spec
169+ self .__path__ = [os .path .dirname (module_file )]
170+ self ._objects = {} if extra_objects is None else extra_objects
171+ self ._name = name
172+ self ._import_structure = import_structure
173+
174+ # Following [PEP 366](https://www.python.org/dev/peps/pep-0366/)
175+ # The __package__ variable should be set
176+ # https://docs.python.org/3/reference/import.html#__package__
177+ self .__package__ = self .__name__
178+
179+ # Needed for autocompletion in an IDE
180+ def __dir__ (self ):
181+ return super ().__dir__ () + self .__all__
182+
183+ def __getattr__ (self , name : str ) -> Any :
184+ if name in self ._objects :
185+ return self ._objects [name ]
186+ if name in self ._modules :
187+ value = self ._get_module (name )
188+ elif name in self ._class_to_module .keys ():
189+ module = self ._get_module (self ._class_to_module [name ])
190+ value = getattr (module , name )
191+ else :
192+ raise AttributeError (f"module { self .__name__ } has no attribute { name } " )
193+
194+ setattr (self , name , value )
195+ return value
196+
197+ def _get_module (self , module_name : str ):
198+ return importlib .import_module ("." + module_name , self .__name__ )
199+
200+ def __reduce__ (self ):
201+ return (self .__class__ , (self ._name , self .__file__ , self ._import_structure ))
0 commit comments