From 6864f596c254033000b127ed1759b43852ea47f8 Mon Sep 17 00:00:00 2001 From: lcayvinliu Date: Fri, 16 May 2025 17:17:07 +0800 Subject: [PATCH 1/2] fix bug:add mask for padding area in EastRandomCropData --- ppocr/data/imaug/make_shrink_map.py | 3 +++ ppocr/data/imaug/random_crop_data.py | 4 ++++ ppocr/losses/det_basic_loss.py | 3 +++ 3 files changed, 10 insertions(+) diff --git a/ppocr/data/imaug/make_shrink_map.py b/ppocr/data/imaug/make_shrink_map.py index 0e2f3d4f5df..5364ff35cdb 100644 --- a/ppocr/data/imaug/make_shrink_map.py +++ b/ppocr/data/imaug/make_shrink_map.py @@ -91,6 +91,9 @@ def __call__(self, data): cv2.fillPoly(gt, [shrink.astype(np.int32)], 1) data["shrink_map"] = gt + crop_mask = data["crop_mask"] + if crop_mask is not None: + mask = mask * crop_mask # use intersection of crop_mask and src shrink_mask data["shrink_mask"] = mask return data diff --git a/ppocr/data/imaug/random_crop_data.py b/ppocr/data/imaug/random_crop_data.py index 6625776fa1e..b2a026ac678 100644 --- a/ppocr/data/imaug/random_crop_data.py +++ b/ppocr/data/imaug/random_crop_data.py @@ -156,11 +156,14 @@ def __call__(self, data): scale = min(scale_w, scale_h) h = int(crop_h * scale) w = int(crop_w * scale) + mask = None if self.keep_ratio: padimg = np.zeros((self.size[1], self.size[0], img.shape[2]), img.dtype) + mask = np.zeros((self.size[1], self.size[0]), img.dtype) padimg[:h, :w] = cv2.resize( img[crop_y : crop_y + crop_h, crop_x : crop_x + crop_w], (w, h) ) + mask[:h, :w] = 1 # mask the padding area img = padimg else: img = cv2.resize( @@ -181,6 +184,7 @@ def __call__(self, data): data["polys"] = np.array(text_polys_crop) data["ignore_tags"] = ignore_tags_crop data["texts"] = texts_crop + data["crop_mask"] = mask return data diff --git a/ppocr/losses/det_basic_loss.py b/ppocr/losses/det_basic_loss.py index 5a46e072e25..ea2f854466d 100644 --- a/ppocr/losses/det_basic_loss.py +++ b/ppocr/losses/det_basic_loss.py @@ -157,5 +157,8 @@ def __init__(self, reduction="mean"): self.reduction = reduction def forward(self, input, label, mask=None, weight=None, name=None): + if mask is not None: + input = input * mask + label = label * mask loss = F.binary_cross_entropy(input, label, reduction=self.reduction) return loss From c435d48778a177f2faf9fd2103a8cfd486beeece Mon Sep 17 00:00:00 2001 From: liuyulong Date: Tue, 23 Sep 2025 11:52:01 +0800 Subject: [PATCH 2/2] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=20CodeStyle=20=E6=B5=81?= =?UTF-8?q?=E6=B0=B4=E7=BA=BF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ppocr/data/imaug/make_shrink_map.py | 5 +++-- ppocr/data/imaug/random_crop_data.py | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/ppocr/data/imaug/make_shrink_map.py b/ppocr/data/imaug/make_shrink_map.py index 5364ff35cdb..f9899f49e33 100644 --- a/ppocr/data/imaug/make_shrink_map.py +++ b/ppocr/data/imaug/make_shrink_map.py @@ -91,9 +91,10 @@ def __call__(self, data): cv2.fillPoly(gt, [shrink.astype(np.int32)], 1) data["shrink_map"] = gt - crop_mask = data["crop_mask"] + crop_mask = data["crop_mask"] if "crop_mask" in data else None if crop_mask is not None: - mask = mask * crop_mask # use intersection of crop_mask and src shrink_mask + # use intersection of crop_mask and src shrink_mask + mask = mask * crop_mask data["shrink_mask"] = mask return data diff --git a/ppocr/data/imaug/random_crop_data.py b/ppocr/data/imaug/random_crop_data.py index b2a026ac678..274d84d476e 100644 --- a/ppocr/data/imaug/random_crop_data.py +++ b/ppocr/data/imaug/random_crop_data.py @@ -163,7 +163,8 @@ def __call__(self, data): padimg[:h, :w] = cv2.resize( img[crop_y : crop_y + crop_h, crop_x : crop_x + crop_w], (w, h) ) - mask[:h, :w] = 1 # mask the padding area + # mask the padding area + mask[:h, :w] = 1 img = padimg else: img = cv2.resize(