|
1 | | -import os |
2 | 1 | from dataclasses import dataclass |
3 | 2 | from typing import Callable, List, Optional, Tuple, Union |
4 | 3 |
|
5 | 4 | import numpy as np |
6 | 5 | import torch |
7 | 6 | from doctr.io import Document |
8 | | -from doctr.models import ocr_predictor |
| 7 | +from doctr.models import detection_predictor, ocr_predictor, recognition_predictor |
9 | 8 | from inference_exp import Detections |
10 | 9 | from inference_exp.configuration import DEFAULT_DEVICE |
11 | 10 | from inference_exp.entities import ColorFormat, ImageDimensions |
12 | 11 | from inference_exp.errors import CorruptedModelPackageError, ModelRuntimeError |
13 | | -from inference_exp.models.base.documents_parsing import DocumentParsingModel |
| 12 | +from inference_exp.models.base.documents_parsing import StructuredOCRModel |
14 | 13 | from inference_exp.models.common.model_packages import get_model_package_contents |
15 | 14 | from inference_exp.utils.file_system import read_json |
16 | 15 |
|
17 | | -WEIGHTS_NAMES_MAPPING = { |
18 | | - "db_resnet50": "db_resnet50-79bd7d70.pt", |
19 | | - "db_resnet34": "db_resnet34-cb6aed9e.pt", |
20 | | - "db_mobilenet_v3_large": "db_mobilenet_v3_large-21748dd0.pt", |
21 | | - "crnn_vgg16_bn": "crnn_vgg16_bn-9762b0b0.pt", |
22 | | - "crnn_mobilenet_v3_small": "crnn_mobilenet_v3_small_pt-3b919a02.pt", |
23 | | - "crnn_mobilenet_v3_large": "crnn_mobilenet_v3_large_pt-f5259ec2.pt", |
| 16 | +SUPPORTED_DETECTION_MODELS = { |
| 17 | + "fast_base", |
| 18 | + "fast_small", |
| 19 | + "fast_tiny", |
| 20 | + "db_resnet50", |
| 21 | + "db_resnet34", |
| 22 | + "db_mobilenet_v3_large", |
| 23 | + "linknet_resnet18", |
| 24 | + "linknet_resnet34", |
| 25 | + "linknet_resnet50", |
| 26 | +} |
| 27 | +SUPPORTED_RECOGNITION_MODELS = { |
| 28 | + "crnn_vgg16_bn", |
| 29 | + "crnn_mobilenet_v3_small", |
| 30 | + "crnn_mobilenet_v3_large", |
| 31 | + "master", |
| 32 | + "sar_resnet31", |
| 33 | + "vitstr_small", |
| 34 | + "vitstr_base", |
| 35 | + "parseq", |
24 | 36 | } |
25 | 37 |
|
26 | 38 |
|
27 | | -class DocTR(DocumentParsingModel[List[np.ndarray], ImageDimensions, Document]): |
| 39 | +class DocTR(StructuredOCRModel[List[np.ndarray], ImageDimensions, Document]): |
28 | 40 |
|
29 | 41 | @classmethod |
30 | 42 | def from_pretrained( |
31 | 43 | cls, |
32 | 44 | model_name_or_path: str, |
33 | 45 | device: torch.device = DEFAULT_DEVICE, |
| 46 | + assume_straight_pages: bool = True, |
| 47 | + preserve_aspect_ratio: bool = True, |
| 48 | + detection_max_batch_size: int = 2, |
| 49 | + recognition_max_batch_size: int = 128, |
34 | 50 | **kwargs, |
35 | | - ) -> "DocumentParsingModel": |
36 | | - os.environ["DOCTR_CACHE_DIR"] = model_name_or_path |
| 51 | + ) -> "StructuredOCRModel": |
37 | 52 | model_package_content = get_model_package_contents( |
38 | 53 | model_package_dir=model_name_or_path, |
39 | | - elements=["doctr_det", "doctr_rec", "config.json"], |
| 54 | + elements=["detection_weights.pt", "recognition_weights.pt", "config.json"], |
40 | 55 | ) |
41 | 56 | config = parse_model_config(config_path=model_package_content["config.json"]) |
42 | | - os.makedirs(f"{model_name_or_path}/doctr_det/models/", exist_ok=True) |
43 | | - os.makedirs(f"{model_name_or_path}/doctr_rec/models/", exist_ok=True) |
44 | | - det_model_source_path = os.path.join( |
45 | | - model_name_or_path, "doctr_det", config.det_model, "model.pt" |
46 | | - ) |
47 | | - rec_model_source_path = os.path.join( |
48 | | - model_name_or_path, "doctr_rec", config.rec_model, "model.pt" |
49 | | - ) |
50 | | - if not os.path.exists(det_model_source_path): |
51 | | - raise CorruptedModelPackageError( |
52 | | - message="Could not initialize DocTR model - could not find detection model weights.", |
53 | | - help_url="https://todo", |
54 | | - ) |
55 | | - if not os.path.exists(rec_model_source_path): |
56 | | - raise CorruptedModelPackageError( |
57 | | - message="Could not initialize DocTR model - could not find recognition model weights.", |
58 | | - help_url="https://todo", |
59 | | - ) |
60 | | - if config.det_model not in WEIGHTS_NAMES_MAPPING: |
| 57 | + if config.det_model not in SUPPORTED_DETECTION_MODELS: |
61 | 58 | raise CorruptedModelPackageError( |
62 | 59 | message=f"{config.det_model} model denoted in configuration not supported as DocTR detection model.", |
63 | 60 | help_url="https://todo", |
64 | 61 | ) |
65 | | - if config.rec_model not in WEIGHTS_NAMES_MAPPING: |
| 62 | + if config.rec_model not in SUPPORTED_RECOGNITION_MODELS: |
66 | 63 | raise CorruptedModelPackageError( |
67 | | - message=f"{config.det_model} model denoted in configuration not supported as DocTR recognition model.", |
| 64 | + message=f"{config.rec_model} model denoted in configuration not supported as DocTR recognition model.", |
68 | 65 | help_url="https://todo", |
69 | 66 | ) |
70 | | - det_model_target_path = os.path.join( |
71 | | - model_name_or_path, "models", WEIGHTS_NAMES_MAPPING[config.det_model] |
| 67 | + det_model = detection_predictor( |
| 68 | + arch=config.det_model, |
| 69 | + pretrained=False, |
| 70 | + assume_straight_pages=assume_straight_pages, |
| 71 | + preserve_aspect_ratio=preserve_aspect_ratio, |
| 72 | + batch_size=detection_max_batch_size, |
| 73 | + ) |
| 74 | + det_model.model.to(device) |
| 75 | + detector_weights = torch.load( |
| 76 | + model_package_content["detection_weights.pt"], |
| 77 | + weights_only=True, |
| 78 | + map_location=device, |
| 79 | + ) |
| 80 | + det_model.model.load_state_dict(detector_weights) |
| 81 | + rec_model = recognition_predictor( |
| 82 | + arch=config.rec_model, |
| 83 | + pretrained=False, |
| 84 | + batch_size=recognition_max_batch_size, |
72 | 85 | ) |
73 | | - rec_model_target_path = os.path.join( |
74 | | - model_name_or_path, "models", WEIGHTS_NAMES_MAPPING[config.rec_model] |
| 86 | + rec_model.model.to(device) |
| 87 | + rec_weights = torch.load( |
| 88 | + model_package_content["recognition_weights.pt"], |
| 89 | + weights_only=True, |
| 90 | + map_location=device, |
75 | 91 | ) |
76 | | - if os.path.exists(det_model_target_path): |
77 | | - os.remove(det_model_target_path) |
78 | | - os.symlink(det_model_source_path, det_model_target_path) |
79 | | - if os.path.exists(rec_model_target_path): |
80 | | - os.remove(rec_model_target_path) |
81 | | - os.symlink(rec_model_source_path, rec_model_target_path) |
| 92 | + rec_model.model.load_state_dict(rec_weights) |
82 | 93 | model = ocr_predictor( |
83 | | - det_arch=config.det_model, |
84 | | - reco_arch=config.rec_model, |
85 | | - pretrained=True, |
| 94 | + det_arch=det_model.model, |
| 95 | + reco_arch=rec_model.model, |
86 | 96 | ).to(device=device) |
87 | 97 | return cls(model=model, device=device) |
88 | 98 |
|
|
0 commit comments