Skip to content

Commit 28f3730

Browse files
committed
update the readme
1 parent 3d27396 commit 28f3730

File tree

2 files changed

+131
-2
lines changed

2 files changed

+131
-2
lines changed

README.md

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,33 @@
1-
# aster.pytorch
2-
ASTER in Pytorch
1+
# ASTER: Attentional Scene Text Recognizer with Flexible Rectification
2+
3+
This repository implements the ASTER in pytorch. Origin software could be found in [here](https://github.com/bgshih/aster).
4+
5+
## Train
6+
```
7+
bash scripts/stn_att_rec.sh
8+
```
9+
10+
## Test
11+
```
12+
bash scripts/main_test_all.sh
13+
```
14+
15+
## Reproduced results
16+
17+
| | IIIT5k | SVT | IC03 | IC13 | SVTP | CUTE |
18+
|:-------------:|:------:|:----:|:-----:|:-----:|:-----:|:-----:|
19+
| ASTER (L2R) | 92.67 | - | 93.72 | 90.74 | 78.76 | 76.39 |
20+
| ASTER.Pytorch | 93.2 | 89.2 | 92.2 | 91 | 81.2 | 81.9 |
21+
|
22+
23+
At present, the bidirectional attention decoder proposed in ASTER is not included in my implementation.
24+
25+
You can use the codes to bootstrap for your next text recognition research project.
26+
27+
28+
## Data preparation
29+
30+
We give an example to construct your own datasets. Details please refer to `tools/create_svtp_lmdb.py`.
31+
32+
33+
IMPORTANT NOTICE: Although this software is licensed under MIT, our intention is to make it free for academic research purposes. If you are going to use it in a product, we suggest you [contact us]([email protected]) regarding possible patent issues.

lib/tools/create_svtp_lmdb.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
import os
2+
import lmdb # install lmdb by "pip install lmdb"
3+
import cv2
4+
import numpy as np
5+
from tqdm import tqdm
6+
import six
7+
from PIL import Image
8+
import scipy.io as sio
9+
from tqdm import tqdm
10+
import re
11+
12+
def checkImageIsValid(imageBin):
13+
if imageBin is None:
14+
return False
15+
imageBuf = np.fromstring(imageBin, dtype=np.uint8)
16+
img = cv2.imdecode(imageBuf, cv2.IMREAD_GRAYSCALE)
17+
imgH, imgW = img.shape[0], img.shape[1]
18+
if imgH * imgW == 0:
19+
return False
20+
return True
21+
22+
23+
def writeCache(env, cache):
24+
with env.begin(write=True) as txn:
25+
for k, v in cache.items():
26+
txn.put(k.encode(), v)
27+
28+
29+
def _is_difficult(word):
30+
assert isinstance(word, str)
31+
return not re.match('^[\w]+$', word)
32+
33+
34+
def createDataset(outputPath, imagePathList, labelList, lexiconList=None, checkValid=True):
35+
"""
36+
Create LMDB dataset for CRNN training.
37+
ARGS:
38+
outputPath : LMDB output path
39+
imagePathList : list of image path
40+
labelList : list of corresponding groundtruth texts
41+
lexiconList : (optional) list of lexicon lists
42+
checkValid : if true, check the validity of every image
43+
"""
44+
assert(len(imagePathList) == len(labelList))
45+
nSamples = len(imagePathList)
46+
env = lmdb.open(outputPath, map_size=1099511627776)
47+
cache = {}
48+
cnt = 1
49+
for i in range(nSamples):
50+
imagePath = imagePathList[i]
51+
label = labelList[i]
52+
if len(label) == 0:
53+
continue
54+
if not os.path.exists(imagePath):
55+
print('%s does not exist' % imagePath)
56+
continue
57+
with open(imagePath, 'rb') as f:
58+
imageBin = f.read()
59+
if checkValid:
60+
if not checkImageIsValid(imageBin):
61+
print('%s is not a valid image' % imagePath)
62+
continue
63+
64+
imageKey = 'image-%09d' % cnt
65+
labelKey = 'label-%09d' % cnt
66+
cache[imageKey] = imageBin
67+
cache[labelKey] = label.encode()
68+
if lexiconList:
69+
lexiconKey = 'lexicon-%09d' % cnt
70+
cache[lexiconKey] = ' '.join(lexiconList[i])
71+
if cnt % 1000 == 0:
72+
writeCache(env, cache)
73+
cache = {}
74+
print('Written %d / %d' % (cnt, nSamples))
75+
cnt += 1
76+
nSamples = cnt-1
77+
cache['num-samples'] = str(nSamples).encode()
78+
writeCache(env, cache)
79+
print('Created dataset with %d samples' % nSamples)
80+
81+
if __name__ == "__main__":
82+
data_dir = '/data/mkyang/datasets/English/benchmark/svtp/'
83+
lmdb_output_path = '/data/mkyang/datasets/English/benchmark_lmdbs_new/svt_p_645'
84+
gt_file = os.path.join(data_dir, 'gt.txt')
85+
image_dir = data_dir
86+
with open(gt_file, 'r') as f:
87+
lines = [line.strip('\n') for line in f.readlines()]
88+
89+
imagePathList, labelList = [], []
90+
for i, line in enumerate(lines):
91+
splits = line.split(' ')
92+
image_name = splits[0]
93+
gt_text = splits[1]
94+
print(image_name, gt_text)
95+
imagePathList.append(os.path.join(image_dir, image_name))
96+
labelList.append(gt_text)
97+
98+
createDataset(lmdb_output_path, imagePathList, labelList)

0 commit comments

Comments
 (0)