diff --git a/ppocr/data/imaug/make_shrink_map.py b/ppocr/data/imaug/make_shrink_map.py index 0e2f3d4f5df..f9899f49e33 100644 --- a/ppocr/data/imaug/make_shrink_map.py +++ b/ppocr/data/imaug/make_shrink_map.py @@ -91,6 +91,10 @@ def __call__(self, data): cv2.fillPoly(gt, [shrink.astype(np.int32)], 1) data["shrink_map"] = gt + crop_mask = data["crop_mask"] if "crop_mask" in data else None + if crop_mask is not None: + # use intersection of crop_mask and src shrink_mask + mask = mask * crop_mask data["shrink_mask"] = mask return data diff --git a/ppocr/data/imaug/random_crop_data.py b/ppocr/data/imaug/random_crop_data.py index 6625776fa1e..274d84d476e 100644 --- a/ppocr/data/imaug/random_crop_data.py +++ b/ppocr/data/imaug/random_crop_data.py @@ -156,11 +156,15 @@ def __call__(self, data): scale = min(scale_w, scale_h) h = int(crop_h * scale) w = int(crop_w * scale) + mask = None if self.keep_ratio: padimg = np.zeros((self.size[1], self.size[0], img.shape[2]), img.dtype) + mask = np.zeros((self.size[1], self.size[0]), img.dtype) padimg[:h, :w] = cv2.resize( img[crop_y : crop_y + crop_h, crop_x : crop_x + crop_w], (w, h) ) + # mask the padding area + mask[:h, :w] = 1 img = padimg else: img = cv2.resize( @@ -181,6 +185,7 @@ def __call__(self, data): data["polys"] = np.array(text_polys_crop) data["ignore_tags"] = ignore_tags_crop data["texts"] = texts_crop + data["crop_mask"] = mask return data diff --git a/ppocr/losses/det_basic_loss.py b/ppocr/losses/det_basic_loss.py index 5a46e072e25..ea2f854466d 100644 --- a/ppocr/losses/det_basic_loss.py +++ b/ppocr/losses/det_basic_loss.py @@ -157,5 +157,8 @@ def __init__(self, reduction="mean"): self.reduction = reduction def forward(self, input, label, mask=None, weight=None, name=None): + if mask is not None: + input = input * mask + label = label * mask loss = F.binary_cross_entropy(input, label, reduction=self.reduction) return loss