diff --git a/deploy/py_infer/src/core/model/model.py b/deploy/py_infer/src/core/model/model.py index 9a809533f..fa23d8da1 100644 --- a/deploy/py_infer/src/core/model/model.py +++ b/deploy/py_infer/src/core/model/model.py @@ -106,8 +106,8 @@ def warmup(self): height, width = hw_list[0] warmup_shape = [(*other_shape, height, width)] # Only single input - dummy_tensor = [np.random.randn(*shape).astype(dtype) for shape, dtype in zip(warmup_shape, self.input_dtype)] - self.model.infer(dummy_tensor) + # dummy_tensor = [np.random.randn(*shape).astype(dtype) for shape, dtype in zip(warmup_shape, self.input_dtype)] + # self.model.infer(dummy_tensor) def __del__(self): if hasattr(self, "model") and self.model: diff --git a/deploy/py_infer/src/data_process/postprocess/builder.py b/deploy/py_infer/src/data_process/postprocess/builder.py index 092f415af..ddb7892a0 100644 --- a/deploy/py_infer/src/data_process/postprocess/builder.py +++ b/deploy/py_infer/src/data_process/postprocess/builder.py @@ -44,6 +44,7 @@ def get_device_status(): def _get_status(): nonlocal status try: + ms.set_context(max_device_memory="0.01GB") status = ms.Tensor([0])[0:].asnumpy()[0] except RuntimeError: status = 1 diff --git a/deploy/py_infer/src/infer_args.py b/deploy/py_infer/src/infer_args.py index fc7285939..fbc55db16 100644 --- a/deploy/py_infer/src/infer_args.py +++ b/deploy/py_infer/src/infer_args.py @@ -119,6 +119,9 @@ def get_args(): "--show_log", type=str2bool, default=False, required=False, help="Whether show log when inferring." ) parser.add_argument("--save_log_dir", type=str, required=False, help="Log saving dir.") + parser.add_argument( + "--is_concat", type=str2bool, default=False, help="Whether to concatenate crops after the detection." + ) args = parser.parse_args() setup_logger(args) diff --git a/deploy/py_infer/src/parallel/module/detection/det_post_node.py b/deploy/py_infer/src/parallel/module/detection/det_post_node.py index fba6a5abc..18d4ce5e0 100644 --- a/deploy/py_infer/src/parallel/module/detection/det_post_node.py +++ b/deploy/py_infer/src/parallel/module/detection/det_post_node.py @@ -1,3 +1,4 @@ +import cv2 import numpy as np from ....data_process.utils import cv_utils @@ -10,12 +11,35 @@ def __init__(self, args, msg_queue): super(DetPostNode, self).__init__(args, msg_queue) self.text_detector = None self.task_type = self.args.task_type + self.is_concat = self.args.is_concat def init_self_args(self): self.text_detector = TextDetector(self.args) self.text_detector.init(preprocess=False, model=False, postprocess=True) super().init_self_args() + def concat_crops(self, crops: list): + """ + Concatenates the list of cropped images horizontally after resizing them to have the same height. + + Args: + crops (list): A list of cropped images represented as numpy arrays. + + Returns: + numpy.ndarray: A horizontally concatenated image array. + """ + max_height = max(crop.shape[0] for crop in crops) + resized_crops = [] + for crop in crops: + h, w, c = crop.shape + new_h = max_height + new_w = int((w / h) * new_h) + + resized_img = cv2.resize(crop, (new_w, new_h), interpolation=cv2.INTER_LINEAR) + resized_crops.append(resized_img) + crops_concated = np.concatenate(resized_crops, axis=1) + return crops_concated + def process(self, input_data): if input_data.skip: self.send_to_next_module(input_data) @@ -23,6 +47,8 @@ def process(self, input_data): data = input_data.data boxes = self.text_detector.postprocess(data["pred"], data["shape_list"]) + if self.is_concat: + boxes = sorted(boxes, key=lambda points: (points[0][1], points[0][0])) infer_res_list = [] for box in boxes: @@ -39,6 +65,8 @@ def process(self, input_data): for box in infer_res_list: sub_image = cv_utils.crop_box_from_image(image, np.array(box)) sub_image_list.append(sub_image) + if self.is_concat: + sub_image_list = len(sub_image_list) * [self.concat_crops(sub_image_list)] input_data.sub_image_list = sub_image_list input_data.data = None diff --git a/mindocr/losses/det_loss.py b/mindocr/losses/det_loss.py index 23ca8f4e2..cc97a3210 100644 --- a/mindocr/losses/det_loss.py +++ b/mindocr/losses/det_loss.py @@ -1,4 +1,5 @@ import logging +import os from math import pi from typing import Tuple, Union @@ -10,6 +11,8 @@ __all__ = ["DBLoss", "PSEDiceLoss", "EASTLoss", "FCELoss"] _logger = logging.getLogger(__name__) +OFFLINE_MODE = os.getenv("OFFLINE_MODE", None) + class DBLoss(nn.LossBase): """ @@ -165,7 +168,13 @@ def construct(self, pred: Tensor, gt: Tensor, mask: Tensor) -> Tensor: neg_loss = (loss * negative).view(loss.shape[0], -1) neg_vals, _ = ops.sort(neg_loss) - neg_index = ops.stack((mnp.arange(loss.shape[0]), neg_vals.shape[1] - neg_count), axis=1) + + if OFFLINE_MODE is None: + neg_index = ops.stack((mnp.arange(loss.shape[0]), neg_vals.shape[1] - neg_count), axis=1) + else: + neg_index = ops.stack( + (ops.arange(loss.shape[0], dtype=neg_count.dtype), neg_vals.shape[1] - neg_count), axis=1 + ) min_neg_score = ops.expand_dims(ops.gather_nd(neg_vals, neg_index), axis=1) neg_loss_mask = (neg_loss >= min_neg_score).astype(ms.float32) # filter values less than top k diff --git a/mindocr/losses/rec_loss.py b/mindocr/losses/rec_loss.py index 09ee8caec..88cb4e7f5 100644 --- a/mindocr/losses/rec_loss.py +++ b/mindocr/losses/rec_loss.py @@ -1,3 +1,5 @@ +import os + import numpy as np import mindspore as ms @@ -6,6 +8,8 @@ __all__ = ["CTCLoss", "AttentionLoss", "VisionLANLoss"] +OFFLINE_MODE = os.getenv("OFFLINE_MODE", None) + class CTCLoss(LossBase): """ @@ -147,14 +151,21 @@ class AttentionLoss(LossBase): def __init__(self, reduction: str = "mean", ignore_index: int = 0) -> None: super().__init__() # ignore symbol, assume it is placed at 0th index - self.criterion = nn.CrossEntropyLoss(reduction=reduction, ignore_index=ignore_index) + if OFFLINE_MODE is None: + self.criterion = nn.CrossEntropyLoss(reduction=reduction, ignore_index=ignore_index) + else: + self.reduction = reduction + self.ignore_index = ignore_index def construct(self, logits: Tensor, labels: Tensor) -> Tensor: labels = labels[:, 1:] # without symbol num_classes = logits.shape[-1] logits = ops.reshape(logits, (-1, num_classes)) labels = ops.reshape(labels, (-1,)) - return self.criterion(logits, labels) + if OFFLINE_MODE is None: + return self.criterion(logits, labels) + else: + return ops.cross_entropy(logits, labels, reduction=self.reduction, ignore_index=self.ignore_index) class SARLoss(LossBase): diff --git a/mindocr/models/necks/fpn.py b/mindocr/models/necks/fpn.py index 650395554..32a628ce8 100644 --- a/mindocr/models/necks/fpn.py +++ b/mindocr/models/necks/fpn.py @@ -1,3 +1,4 @@ +import os from typing import List, Tuple from mindspore import Tensor, nn, ops @@ -7,14 +8,20 @@ from ..utils.attention_cells import SEModule from .asf import AdaptiveScaleFusion +OFFLINE_MODE = os.getenv("OFFLINE_MODE", None) -def _resize_nn(x: Tensor, scale: int = 0, shape: Tuple[int] = None): - if scale == 1 or shape == x.shape[2:]: - return x - if scale: - shape = (x.shape[2] * scale, x.shape[3] * scale) - return ops.ResizeNearestNeighbor(shape)(x) +if OFFLINE_MODE is None: + def _resize_nn(x: Tensor, scale: int = 0, shape: Tuple[int] = None): + if scale == 1 or shape == x.shape[2:]: + return x + + if scale: + shape = (x.shape[2] * scale, x.shape[3] * scale) + return ops.ResizeNearestNeighbor(shape)(x) +else: + def _resize_nn(x: Tensor, shape: Tensor): + return ops.ResizeNearestNeighborV2()(x, shape) class FPN(nn.Cell): @@ -64,11 +71,18 @@ def construct(self, features: List[Tensor]) -> Tensor: for i, uc_op in enumerate(self.unify_channels): features[i] = uc_op(features[i]) - for i in range(2, -1, -1): - features[i] += _resize_nn(features[i + 1], shape=features[i].shape[2:]) + if OFFLINE_MODE is None: + for i in range(2, -1, -1): + features[i] += _resize_nn(features[i + 1], shape=features[i].shape[2:]) + + for i, out in enumerate(self.out): + features[i] = _resize_nn(out(features[i]), shape=features[0].shape[2:]) + else: + for i in range(2, -1, -1): + features[i] += _resize_nn(features[i + 1], shape=ops.dyn_shape(features[i])[2:]) - for i, out in enumerate(self.out): - features[i] = _resize_nn(out(features[i]), shape=features[0].shape[2:]) + for i, out in enumerate(self.out): + features[i] = _resize_nn(out(features[i]), shape=ops.dyn_shape(features[0])[2:]) return self.fuse(features[::-1]) # matching the reverse order of the original work diff --git a/mindocr/models/transforms/tps_spatial_transformer.py b/mindocr/models/transforms/tps_spatial_transformer.py index a49736fe3..95d3f6d75 100644 --- a/mindocr/models/transforms/tps_spatial_transformer.py +++ b/mindocr/models/transforms/tps_spatial_transformer.py @@ -1,4 +1,5 @@ import itertools +import os from typing import Optional, Tuple import numpy as np @@ -8,6 +9,8 @@ import mindspore.ops as ops from mindspore import Tensor +OFFLINE_MODE = os.getenv("OFFLINE_MODE", None) + def grid_sample(input: Tensor, grid: Tensor, canvas: Optional[Tensor] = None) -> Tensor: output = ops.grid_sample(input, grid) @@ -111,6 +114,9 @@ def __init__( self.target_coordinate_repr = Tensor(target_coordinate_repr, dtype=ms.float32) self.target_control_points = Tensor(target_control_points, dtype=ms.float32) + if OFFLINE_MODE is not None: + self.matmul = ops.BatchMatMul() + def construct( self, input: Tensor, source_control_points: Tensor ) -> Tuple[Tensor, Tensor]: @@ -118,8 +124,12 @@ def construct( padding_matrix = ops.tile(self.padding_matrix, (batch_size, 1, 1)) Y = ops.concat([source_control_points, padding_matrix], axis=1) - mapping_matrix = ops.matmul(self.inverse_kernel, Y) - source_coordinate = ops.matmul(self.target_coordinate_repr, mapping_matrix) + if OFFLINE_MODE is None: + mapping_matrix = ops.matmul(self.inverse_kernel, Y) + source_coordinate = ops.matmul(self.target_coordinate_repr, mapping_matrix) + else: + mapping_matrix = self.matmul(self.inverse_kernel[None, ...], Y) + source_coordinate = self.matmul(self.target_coordinate_repr[None, ...], mapping_matrix) grid = ops.reshape( source_coordinate, (-1, self.target_height, self.target_width, 2), diff --git a/mindocr/models/utils/attention_cells.py b/mindocr/models/utils/attention_cells.py index 016001085..b3f14dc24 100644 --- a/mindocr/models/utils/attention_cells.py +++ b/mindocr/models/utils/attention_cells.py @@ -1,3 +1,4 @@ +import os from typing import Optional, Tuple import numpy as np @@ -9,6 +10,8 @@ __all__ = ["MultiHeadAttention", "PositionwiseFeedForward", "PositionalEncoding", "SEModule"] +OFFLINE_MODE = os.getenv("OFFLINE_MODE", None) + class MultiHeadAttention(nn.Cell): def __init__( @@ -108,9 +111,14 @@ def __init__( self.pe = Tensor(pe, dtype=ms.float32) def construct(self, input_tensor: Tensor) -> Tensor: - input_tensor = ( - input_tensor + self.pe[:, : input_tensor.shape[1]] - ) # pe 1 5000 512 + if OFFLINE_MODE is None: + input_tensor = ( + input_tensor + self.pe[:, : input_tensor.shape[1]] + ) # pe 1 5000 512 + else: + input_tensor = ( + input_tensor + self.pe[:, : ops.dyn_shape(input_tensor)[1]] + ) # pe 1 5000 512 return self.dropout(input_tensor)