Skip to content

Commit dc2e08e

Browse files
committed
Add preprocessing script
Signed-off-by: Junze Wu <[email protected]>
1 parent 069d054 commit dc2e08e

File tree

2 files changed

+111
-1
lines changed

2 files changed

+111
-1
lines changed
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
#!/usr/bin/env python3
2+
3+
import json
4+
import glob
5+
import os
6+
from pathlib import Path
7+
import numpy as np
8+
import cv2
9+
from skimage.filters.rank import entropy
10+
from skimage.morphology import disk
11+
12+
TEST_FOLDER="test/"
13+
TEST_TXT=TEST_FOLDER+"test_img_id_gt.txt"
14+
15+
def binarize(img):
16+
# calculate local entropy
17+
entr = entropy(img, disk(5))
18+
# Normalize and negate entropy values
19+
MAX_ENTROPY = 8.0
20+
MAX_PIX_VAL = 255
21+
negative = 1 - (entr / MAX_ENTROPY)
22+
u8img = (negative * MAX_PIX_VAL).astype(np.uint8)
23+
# Global thresholding
24+
ret, mask = cv2.threshold(u8img, 0, MAX_PIX_VAL, cv2.THRESH_OTSU)
25+
# mask out text
26+
masked = cv2.bitwise_and(img, img, mask=mask)
27+
# fill in the holes to estimate the background
28+
kernel = np.ones((35, 35), np.uint8)
29+
background = cv2.dilate(masked, kernel, iterations=1)
30+
# By subtracting background from the original image, we get a clean text image
31+
text_only = cv2.absdiff(img, background)
32+
# Negate and increase contrast
33+
neg_text_only = (MAX_PIX_VAL - text_only) * 1.15
34+
# clamp the image within u8 range
35+
ret, clamped = cv2.threshold(neg_text_only, 255, MAX_PIX_VAL, cv2.THRESH_TRUNC)
36+
clamped_u8 = clamped.astype(np.uint8)
37+
# Do final adaptive thresholding to binarize image
38+
processed = cv2.adaptiveThreshold(clamped_u8, MAX_PIX_VAL, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,
39+
31, 2)
40+
return processed
41+
42+
def main():
43+
"""
44+
Preprocess GNHK dataset into text line images
45+
"""
46+
os.makedirs(os.path.dirname(TEST_FOLDER), exist_ok=True)
47+
open(TEST_TXT, 'w').close() # clear the file
48+
images = glob.glob('eng*.jpg')
49+
50+
for img_idx, image in enumerate(images):
51+
img_id = Path(image).stem
52+
print(img_idx, img_id)
53+
# open corresponding JSON annotation file
54+
with open(img_id + ".json") as f:
55+
data = json.load(f)
56+
line_indices = set(map(lambda obj: obj["line_idx"], data))
57+
img = cv2.imread(image, cv2.IMREAD_GRAYSCALE)
58+
img = binarize(img)
59+
for idx in sorted(list(line_indices)):
60+
objects = list(filter(lambda obj: obj["line_idx"] == idx, data))
61+
# discard math symbols, scribbles, illegible text, and printed text
62+
objects = list(filter(lambda obj: obj["text"] != "%math%" and obj["text"] != "%SC%" and obj["text"] != "%NA%" and obj["type"] != "P", objects))
63+
if not objects:
64+
continue
65+
objects = sorted(objects, key=lambda x: x['polygon']['x0'])
66+
label = " ".join(map(lambda obj: obj["text"], objects))
67+
print(img_id, idx, label)
68+
69+
# create mask for the words
70+
mask = np.zeros(img.shape[0:2], dtype=np.uint8)
71+
for obj in objects:
72+
region = [
73+
[obj["polygon"]["x0"], obj["polygon"]["y0"]],
74+
[obj["polygon"]["x1"], obj["polygon"]["y1"]],
75+
[obj["polygon"]["x2"], obj["polygon"]["y2"]],
76+
[obj["polygon"]["x3"], obj["polygon"]["y3"]]
77+
]
78+
points = np.array([region])
79+
cv2.drawContours(mask, [points], -1, (255, 255, 255), -1, cv2.LINE_AA)
80+
masked = cv2.bitwise_and(img, img, mask = mask)
81+
bg = np.ones_like(img, np.uint8) * 255
82+
cv2.bitwise_not(bg, bg, mask = mask)
83+
overlay = bg + masked
84+
# crop bounding rectangle of the text region
85+
l = list(map(lambda obj: [
86+
[obj["polygon"]["x0"], obj["polygon"]["y0"]],
87+
[obj["polygon"]["x1"], obj["polygon"]["y1"]],
88+
[obj["polygon"]["x2"], obj["polygon"]["y2"]],
89+
[obj["polygon"]["x3"], obj["polygon"]["y3"]]
90+
], objects))
91+
flat = [item for sublist in l for item in sublist]
92+
pts = np.array(flat)
93+
rect = cv2.boundingRect(pts)
94+
x, y, w, h = rect
95+
cropped = overlay[y:y+h, x:x+w].copy()
96+
97+
# discard image if width > 2000 after resizing to height=96 while keeping aspect ratio
98+
height, width = cropped.shape
99+
ratio = 96.0 / height
100+
new_width = int(width * ratio)
101+
if new_width > 2000:
102+
continue
103+
104+
cv2.imwrite(TEST_FOLDER + img_id + '_line'+ str(idx) + '.jpg', cropped)
105+
with open(TEST_TXT, 'a', encoding='utf-8') as test_txt:
106+
test_txt.write(img_id + '_line'+ str(idx) + '.jpg' + ',' + label + '\n')
107+
108+
if __name__ == '__main__':
109+
main()
110+

models/intel/handwritten-english-recognition-0001/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ The network is able to recognize English text consisting of characters in the [G
1818
| Accuracy on GNHK test subset (excluding images wider than 2000px after resized to height 96px with aspect ratio) | 81.5% |
1919
| Source framework | PyTorch\* |
2020

21-
> **Note:** to achieve the accuracy, images from the GNHK test set should be preprocessed into single-line text images, using the coordinates from the accompanying JSON annotation files in the GNHK dataset, and then binarized using adaptive thresholding.
21+
> **Note:** to achieve the accuracy, images from the GNHK test set should be binarized using adaptive thresholding, and preprocessed into single-line text images, using the coordinates from the accompanying JSON annotation files in the GNHK dataset. See [preprocess_gnhk.py](../../../demos/handwritten_text_recognition_demo/python/utils/preprocess_gnhk.py).
2222
2323
This model adopts [label error rate](https://dl.acm.org/doi/abs/10.1145/1143844.1143891) as the metric for accuracy.
2424

0 commit comments

Comments
 (0)