Skip to content

Commit 4ff55fa

Browse files
authored
[feat] AutoLayoutModel and flexible model configs (#69)
* move _reconstruct_path_with_detector_name to the baseclass * improve the installation logic * allow three model config formats * update tests * minor fixes * Add complete tests * Add AutoLayoutModel * Better error message * Move to automodel folder * warnings for enforce_cpu * Add AutoModel tests * cleanup
1 parent 06fca71 commit 4ff55fa

File tree

13 files changed

+398
-178
lines changed

13 files changed

+398
-178
lines changed

setup.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,17 +31,29 @@
3131
'google-cloud-vision==1',
3232
'pytesseract'
3333
],
34+
"gcv": [
35+
'google-cloud-vision==1',
36+
],
37+
"tesseract": [
38+
'pytesseract'
39+
],
40+
"layoutmodels": [
41+
"torch",
42+
"torchvision",
43+
"effdet"
44+
],
3445
"effdet": [
3546
"torch",
3647
"torchvision",
3748
"effdet"
3849
],
3950
"detectron2": [
40-
"detectron2@git+https://github.com/facebookresearch/[email protected]#egg=detectron2"
51+
"detectron2@git+https://github.com/facebookresearch/[email protected]#egg=detectron2"
52+
# Supporting detectron0.5 for compatibility with newer torch versions
4153
],
4254
"paddledetection": [
4355
"paddlepaddle==2.1.0"
44-
]
56+
],
4557
},
4658
include_package_data=True
4759
)

src/layoutparser/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@
4040
]
4141
}
4242

43+
_import_structure["models"] = ["AutoLayoutModel"]
44+
4345
if is_detectron2_available():
4446
_import_structure["models.detectron2"] = ["Detectron2LayoutModel"]
4547

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
from .detectron2.layoutmodel import Detectron2LayoutModel
22
from .paddledetection.layoutmodel import PaddleDetectionLayoutModel
3-
from .effdet.layoutmodel import EfficientDetLayoutModel
3+
from .effdet.layoutmodel import EfficientDetLayoutModel
4+
from .auto_layoutmodel import AutoLayoutModel
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
from typing import Optional, Dict, Union, List
2+
from .detectron2.layoutmodel import Detectron2LayoutModel
3+
from .paddledetection.layoutmodel import PaddleDetectionLayoutModel
4+
from .effdet.layoutmodel import EfficientDetLayoutModel
5+
from .model_config import (
6+
is_lp_layout_model_config_any_format,
7+
)
8+
9+
ALL_AVAILABLE_BACKENDS = {
10+
Detectron2LayoutModel.DETECTOR_NAME: Detectron2LayoutModel,
11+
PaddleDetectionLayoutModel.DETECTOR_NAME: PaddleDetectionLayoutModel,
12+
EfficientDetLayoutModel.DETECTOR_NAME: EfficientDetLayoutModel,
13+
}
14+
15+
16+
def AutoLayoutModel(
17+
config_path: str,
18+
model_path: Optional[str] = None,
19+
label_map: Optional[Dict]=None,
20+
device: Optional[str]=None,
21+
extra_config: Optional[Union[Dict, List]]=None,
22+
) -> "BaseLayoutModel":
23+
"""[summary]
24+
25+
Args:
26+
config_path (:obj:`str`):
27+
The path to the configuration file.
28+
model_path (:obj:`str`, None):
29+
The path to the saved weights of the model.
30+
If set, overwrite the weights in the configuration file.
31+
Defaults to `None`.
32+
label_map (:obj:`dict`, optional):
33+
The map from the model prediction (ids) to real
34+
word labels (strings). If the config is from one of the supported
35+
datasets, Layout Parser will automatically initialize the label_map.
36+
Defaults to `None`.
37+
device(:obj:`str`, optional):
38+
Whether to use cuda or cpu devices. If not set, LayoutParser will
39+
automatically determine the device to initialize the models on.
40+
extra_config (:obj:`dict`, optional):
41+
Extra configuration passed used for initializing the layout model.
42+
43+
Returns:
44+
# BaseLayoutModel: the create LayoutModel instance
45+
"""
46+
if not is_lp_layout_model_config_any_format(config_path):
47+
raise ValueError(f"Invalid model config_path {config_path}")
48+
for backend_name in ALL_AVAILABLE_BACKENDS:
49+
if backend_name in config_path:
50+
return ALL_AVAILABLE_BACKENDS[backend_name](
51+
config_path,
52+
model_path=model_path,
53+
label_map=label_map,
54+
extra_config=extra_config,
55+
device=device,
56+
)

src/layoutparser/models/base_layoutmodel.py

Lines changed: 51 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,72 @@
1-
from typing import Union
1+
from typing import Optional, Tuple, Union, Dict
22
from abc import ABC, abstractmethod
33

4+
from .model_config import LayoutModelConfig, add_identifier_for_config, layout_model_config_parser, is_lp_layout_model_config_any_format
45
from ..file_utils import requires_backends
56

6-
77
class BaseLayoutModel(ABC):
8+
9+
# TODO: Build a metaclass for lazy module loader
10+
@property
11+
@abstractmethod
12+
def DEPENDENCIES(self):
13+
"""DEPENDENCIES lists all necessary dependencies for the class."""
14+
pass
15+
816
@property
917
@abstractmethod
1018
def DETECTOR_NAME(self):
1119
pass
1220

21+
@property
1322
@abstractmethod
14-
def detect(self, image):
23+
def MODEL_CATALOG(self) -> Dict[str, Dict[str, str]]:
1524
pass
1625

1726
@abstractmethod
18-
def image_loader(self, image: Union["ndarray", "Image"]):
19-
"""It will process the input images appropriately to the target format.
20-
"""
27+
def detect(self, image: Union["np.ndarray", "Image.Image"]):
2128
pass
2229

23-
# Add lazy loading mechanisms for layout models, refer to
24-
# layoutparser.ocr.BaseOCRAgent
25-
# TODO: Build a metaclass for lazy module loader
26-
@property
30+
2731
@abstractmethod
28-
def DEPENDENCIES(self):
29-
"""DEPENDENCIES lists all necessary dependencies for the class."""
32+
def image_loader(self, image: Union["np.ndarray", "Image.Image"]):
33+
"""It will process the input images appropriately to the target format."""
3034
pass
35+
36+
def _parse_config(self, config_path:str, identifier:str) -> Union[LayoutModelConfig, str]:
37+
38+
if is_lp_layout_model_config_any_format(config_path):
39+
config_path = add_identifier_for_config(config_path, identifier)
40+
for dataset_name in self.MODEL_CATALOG:
41+
if dataset_name in config_path:
42+
default_model_arch = list(self.MODEL_CATALOG[dataset_name].keys())[0]
43+
# Use the first model_name for the dataset as the default_model_arch
44+
return layout_model_config_parser(config_path, self.DETECTOR_NAME, default_model_arch)
45+
raise ValueError(f"The config {config_path} is not a valid config for {self.__class__}, "
46+
f"possibly because there aren't models trained for the specified dataset.")
47+
else:
48+
return config_path
49+
50+
def config_parser(self, config_path:str, model_path: Optional[str], allow_empty_path=False) -> Tuple[str, str]:
51+
52+
config_path = self._parse_config(config_path, "config")
53+
54+
if isinstance(config_path, str) and model_path is None:
55+
if not allow_empty_path:
56+
raise ValueError(
57+
f"Invalid config and model path pairs ({(config_path, model_path)}):"
58+
f"When config_path is a regular URL, the model_path should not be empty"
59+
)
60+
else:
61+
return config_path, model_path
62+
elif isinstance(config_path, LayoutModelConfig) and model_path is None:
63+
model_path = config_path.dual()
64+
else:
65+
model_path = self._parse_config(model_path, "weight")
66+
67+
config_path = config_path if isinstance(config_path, str) else config_path.full
68+
model_path = model_path if isinstance(model_path, str) else model_path.full
69+
return config_path, model_path
3170

3271
def __new__(cls, *args, **kwargs):
3372

src/layoutparser/models/detectron2/layoutmodel.py

Lines changed: 29 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
from typing import Union
22
from PIL import Image
33
import numpy as np
4+
import warnings
45

5-
from .catalog import PathManager, LABEL_MAP_CATALOG
6+
from .catalog import MODEL_CATALOG, PathManager, LABEL_MAP_CATALOG
67
from ..base_layoutmodel import BaseLayoutModel
78
from ...elements import Rectangle, TextBlock, Layout
89
from ...file_utils import is_torch_cuda_available, is_detectron2_available
@@ -30,9 +31,9 @@ class Detectron2LayoutModel(BaseLayoutModel):
3031
word labels (strings). If the config is from one of the supported
3132
datasets, Layout Parser will automatically initialize the label_map.
3233
Defaults to `None`.
33-
enforce_cpu(:obj:`bool`, optional):
34-
When set to `True`, it will enforce using cpu even if it is on a CUDA
35-
available device.
34+
device(:obj:`str`, optional):
35+
Whether to use cuda or cpu devices. If not set, LayoutParser will
36+
automatically determine the device to initialize the models on.
3637
extra_config (:obj:`list`, optional):
3738
Extra configuration passed to the Detectron2 model
3839
configuration. The argument will be used in the `merge_from_list
@@ -49,70 +50,55 @@ class Detectron2LayoutModel(BaseLayoutModel):
4950

5051
DEPENDENCIES = ["detectron2"]
5152
DETECTOR_NAME = "detectron2"
53+
MODEL_CATALOG = MODEL_CATALOG
5254

5355
def __init__(
5456
self,
5557
config_path,
5658
model_path=None,
5759
label_map=None,
5860
extra_config=None,
59-
enforce_cpu=False,
61+
enforce_cpu=None,
62+
device=None,
6063
):
6164

65+
if enforce_cpu is not None:
66+
warnings.warn(
67+
"Setting enforce_cpu is deprecated. Please set `device` instead.",
68+
DeprecationWarning,
69+
)
70+
6271
if extra_config is None:
6372
extra_config = []
6473

65-
if config_path.startswith("lp://") and label_map is None:
66-
dataset_name = config_path.lstrip("lp://").split("/")[0]
67-
label_map = LABEL_MAP_CATALOG[dataset_name]
68-
69-
if enforce_cpu:
70-
extra_config.extend(["MODEL.DEVICE", "cpu"])
74+
config_path, model_path = self.config_parser(
75+
config_path, model_path, allow_empty_path=True
76+
)
77+
config_path = PathManager.get_local_path(config_path)
7178

7279
cfg = detectron2.config.get_cfg()
73-
config_path = self._reconstruct_path_with_detector_name(config_path)
74-
config_path = PathManager.get_local_path(config_path)
7580
cfg.merge_from_file(config_path)
7681
cfg.merge_from_list(extra_config)
7782

7883
if model_path is not None:
79-
model_path = self._reconstruct_path_with_detector_name(model_path)
84+
model_path = PathManager.get_local_path(model_path)
85+
# Because it will be forwarded to the detectron2 paths
8086
cfg.MODEL.WEIGHTS = model_path
81-
82-
if not enforce_cpu:
83-
cfg.MODEL.DEVICE = "cuda" if is_torch_cuda_available() else "cpu"
87+
88+
if is_torch_cuda_available():
89+
if device is None:
90+
device = "cuda"
91+
else:
92+
device = "cpu"
93+
cfg.MODEL.DEVICE = device
8494

8595
self.cfg = cfg
8696

8797
self.label_map = label_map
8898
self._create_model()
8999

90-
def _reconstruct_path_with_detector_name(self, path: str) -> str:
91-
"""This function will add the detector name (detectron2) into the
92-
lp model config path to get the "canonical" model name.
93-
94-
For example, for a given config_path `lp://HJDataset/faster_rcnn_R_50_FPN_3x/config`,
95-
it will transform it into `lp://detectron2/HJDataset/faster_rcnn_R_50_FPN_3x/config`.
96-
However, if the config_path already contains the detector name, we won't change it.
97-
98-
This function is a general step to support multiple backends in the layout-parser
99-
library.
100-
101-
Args:
102-
path (str): The given input path that might or might not contain the detector name.
103-
104-
Returns:
105-
str: a modified path that contains the detector name.
106-
"""
107-
if path.startswith("lp://"): # TODO: Move "lp://" to a constant
108-
model_name = path[len("lp://") :]
109-
model_name_segments = model_name.split("/")
110-
if (
111-
len(model_name_segments) == 3
112-
and self.DETECTOR_NAME not in model_name_segments
113-
):
114-
return "lp://" + self.DETECTOR_NAME + "/" + path[len("lp://") :]
115-
return path
100+
def _create_model(self):
101+
self.model = detectron2.engine.DefaultPredictor(self.cfg)
116102

117103
def gather_output(self, outputs):
118104

@@ -136,9 +122,6 @@ def gather_output(self, outputs):
136122

137123
return layout
138124

139-
def _create_model(self):
140-
self.model = detectron2.engine.DefaultPredictor(self.cfg)
141-
142125
def detect(self, image):
143126
"""Detect the layout of a given image.
144127

src/layoutparser/models/effdet/catalog.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
3: "List",
2323
4: "Table",
2424
5: "Figure"
25+
},
26+
"MFD": {
27+
1: "Equation",
2528
}
2629
}
2730

0 commit comments

Comments
 (0)