Skip to content

Commit bab4b3b

Browse files
committed
Update README.md
1 parent 1e63fbb commit bab4b3b

File tree

6 files changed

+337
-28
lines changed

6 files changed

+337
-28
lines changed

README.md

Lines changed: 317 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,317 @@
1-
# python-document-scanner-sdk
1+
# Python Document Scanner SDK
2+
The project is a Python binding to [Dynamsoft C/C++ Document Scanner SDK](https://www.dynamsoft.com/document-normalizer/docs/introduction/?ver=latest). It aims to help developers quickly build desktop document scanner applications in Python on Windows and Linux.
3+
4+
## About Dynamsoft Document Scanner
5+
Get a [30-day FREE trial license](https://www.dynamsoft.com/customer/license/trialLicense?product=ddn) to activate the SDK.
6+
7+
8+
## Supported Python Edition
9+
* Python 3.x
10+
11+
## Dependencies
12+
13+
```bash
14+
pip install opencv-python
15+
```
16+
17+
## Command-line Usage
18+
```bash
19+
# Scan documents from images
20+
$ scandocument -f <file-name> -l <license-key>
21+
22+
# Scan documents from camera video stream
23+
$ scandocument -c 1 -l <license-key>
24+
```
25+
26+
## Quick Start
27+
- Scan documents from an image file:
28+
```python
29+
import argparse
30+
import docscanner
31+
import sys
32+
import numpy as np
33+
import cv2
34+
import time
35+
36+
def showNormalizedImage(name, normalized_image):
37+
mat = docscanner.convertNormalizedImage2Mat(normalized_image)
38+
cv2.imshow(name, mat)
39+
return mat
40+
41+
def process_file(filename, scanner):
42+
image = cv2.imread(filename)
43+
results = scanner.detectMat(image)
44+
for result in results:
45+
x1 = result.x1
46+
y1 = result.y1
47+
x2 = result.x2
48+
y2 = result.y2
49+
x3 = result.x3
50+
y3 = result.y3
51+
x4 = result.x4
52+
y4 = result.y4
53+
54+
normalized_image = scanner.normalizeBuffer(image, x1, y1, x2, y2, x3, y3, x4, y4)
55+
showNormalizedImage("Normalized Image", normalized_image)
56+
cv2.drawContours(image, [np.int0([(x1, y1), (x2, y2), (x3, y3), (x4, y4)])], 0, (0, 255, 0), 2)
57+
58+
cv2.imshow('Document Image', image)
59+
cv2.waitKey(0)
60+
61+
normalized_image.save(str(time.time()) + '.png')
62+
print('Image saved')
63+
64+
def scandocument():
65+
"""
66+
Command-line script for scanning documents from a given image
67+
"""
68+
parser = argparse.ArgumentParser(description='Scan documents from an image file')
69+
parser.add_argument('-f', '--file', help='Path to the image file')
70+
parser.add_argument('-l', '--license', default='', type=str, help='Set a valid license key')
71+
args = parser.parse_args()
72+
# print(args)
73+
try:
74+
filename = args.file
75+
license = args.license
76+
77+
if filename is None:
78+
parser.print_help()
79+
return
80+
81+
# set license
82+
if license == '':
83+
docscanner.initLicense("DLS2eyJoYW5kc2hha2VDb2RlIjoiMjAwMDAxLTE2NDk4Mjk3OTI2MzUiLCJvcmdhbml6YXRpb25JRCI6IjIwMDAwMSIsInNlc3Npb25QYXNzd29yZCI6IndTcGR6Vm05WDJrcEQ5YUoifQ==")
84+
else:
85+
docscanner.initLicense(license)
86+
87+
# initialize mrz scanner
88+
scanner = docscanner.createInstance()
89+
ret = scanner.setParameters(docscanner.Templates.color)
90+
91+
if filename is not None:
92+
process_file(filename, scanner)
93+
94+
except Exception as err:
95+
print(err)
96+
sys.exit(1)
97+
98+
scandocument()
99+
```
100+
101+
![python document scanner from file](https://www.dynamsoft.com/codepool/img/2022/09/document-perspective-correction.png)
102+
103+
- Scan documents from camera video stream:
104+
```python
105+
import argparse
106+
import docscanner
107+
import sys
108+
import numpy as np
109+
import cv2
110+
import time
111+
112+
g_results = None
113+
g_normalized_images = []
114+
115+
def callback(results):
116+
global g_results
117+
g_results = results
118+
119+
def showNormalizedImage(name, normalized_image):
120+
mat = docscanner.convertNormalizedImage2Mat(normalized_image)
121+
cv2.imshow(name, mat)
122+
return mat
123+
124+
def process_video(scanner):
125+
scanner.addAsyncListener(callback)
126+
127+
cap = cv2.VideoCapture(0)
128+
while True:
129+
ret, image = cap.read()
130+
131+
ch = cv2.waitKey(1)
132+
if ch == 27:
133+
break
134+
elif ch == ord('n'): # normalize image
135+
if g_results != None:
136+
g_normalized_images = []
137+
index = 0
138+
for result in g_results:
139+
x1 = result.x1
140+
y1 = result.y1
141+
x2 = result.x2
142+
y2 = result.y2
143+
x3 = result.x3
144+
y3 = result.y3
145+
x4 = result.x4
146+
y4 = result.y4
147+
148+
normalized_image = scanner.normalizeBuffer(image, x1, y1, x2, y2, x3, y3, x4, y4)
149+
g_normalized_images.append((str(index), normalized_image))
150+
mat = showNormalizedImage(str(index), normalized_image)
151+
index += 1
152+
elif ch == ord('s'): # save image
153+
for data in g_normalized_images:
154+
# cv2.imwrite('images/' + str(time.time()) + '.png', image)
155+
cv2.destroyWindow(data[0])
156+
data[1].save(str(time.time()) + '.png')
157+
print('Image saved')
158+
159+
g_normalized_images = []
160+
161+
if image is not None:
162+
scanner.detectMatAsync(image)
163+
164+
if g_results != None:
165+
for result in g_results:
166+
x1 = result.x1
167+
y1 = result.y1
168+
x2 = result.x2
169+
y2 = result.y2
170+
x3 = result.x3
171+
y3 = result.y3
172+
x4 = result.x4
173+
y4 = result.y4
174+
175+
cv2.drawContours(image, [np.int0([(x1, y1), (x2, y2), (x3, y3), (x4, y4)])], 0, (0, 255, 0), 2)
176+
177+
cv2.putText(image, 'Press "n" to normalize image', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
178+
cv2.putText(image, 'Press "s" to save image', (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
179+
cv2.putText(image, 'Press "ESC" to exit', (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
180+
cv2.imshow('Document Scanner', image)
181+
182+
def scandocument():
183+
"""
184+
Command-line script for scanning documents from camera video stream.
185+
"""
186+
parser = argparse.ArgumentParser(description='Scan documents from camera')
187+
parser.add_argument('-c', '--camera', default=False, type=bool, help='Whether to show the image')
188+
parser.add_argument('-l', '--license', default='', type=str, help='Set a valid license key')
189+
args = parser.parse_args()
190+
# print(args)
191+
try:
192+
license = args.license
193+
camera = args.camera
194+
195+
if camera is False:
196+
parser.print_help()
197+
return
198+
199+
# set license
200+
if license == '':
201+
docscanner.initLicense("DLS2eyJoYW5kc2hha2VDb2RlIjoiMjAwMDAxLTE2NDk4Mjk3OTI2MzUiLCJvcmdhbml6YXRpb25JRCI6IjIwMDAwMSIsInNlc3Npb25QYXNzd29yZCI6IndTcGR6Vm05WDJrcEQ5YUoifQ==")
202+
else:
203+
docscanner.initLicense(license)
204+
205+
# initialize mrz scanner
206+
scanner = docscanner.createInstance()
207+
ret = scanner.setParameters(docscanner.Templates.color)
208+
209+
if camera is True:
210+
process_video(scanner)
211+
212+
except Exception as err:
213+
print(err)
214+
sys.exit(1)
215+
216+
scandocument()
217+
```
218+
219+
![python document scanner from camera](https://www.dynamsoft.com/codepool/img/2022/09/python-document-scanner.png)
220+
221+
## Methods
222+
- `docscanner.initLicense('YOUR-LICENSE-KEY')` # set the license key
223+
224+
```python
225+
docscanner.initLicense("DLS2eyJoYW5kc2hha2VDb2RlIjoiMjAwMDAxLTE2NDk4Mjk3OTI2MzUiLCJvcmdhbml6YXRpb25JRCI6IjIwMDAwMSIsInNlc3Npb25QYXNzd29yZCI6IndTcGR6Vm05WDJrcEQ5YUoifQ==")
226+
```
227+
228+
- `docscanner.createInstance()` # create a Document Scanner instance
229+
230+
```python
231+
scanner = docscanner.createInstance()
232+
```
233+
- `detectFile(filename)` # do edge detection from an image file
234+
235+
```python
236+
results = scanner.detectFile(<filename>)
237+
```
238+
- `detectMat(Mat image)` # do edge detection from Mat
239+
```python
240+
image = cv2.imread(<filename>)
241+
results = scanner.detectMat(image)
242+
for result in results:
243+
x1 = result.x1
244+
y1 = result.y1
245+
x2 = result.x2
246+
y2 = result.y2
247+
x3 = result.x3
248+
y3 = result.y3
249+
x4 = result.x4
250+
y4 = result.y4
251+
```
252+
253+
- `setParameters(Template)` # Select color, binary or grayscale template
254+
255+
```python
256+
scanner.setParameters(docscanner.Templates.color)
257+
```
258+
259+
- `addAsyncListener(callback function)` # start a native thread to run document scanning tasks
260+
- `detectMatAsync(<opencv mat data>)` # put a document scanning task into the native queue
261+
```python
262+
def callback(results):
263+
for result in results:
264+
print(result.x1)
265+
print(result.y1)
266+
print(result.x2)
267+
print(result.y2)
268+
print(result.x3)
269+
print(result.y3)
270+
print(result.x4)
271+
print(result.y4)
272+
273+
import cv2
274+
image = cv2.imread(<filename>)
275+
scanner.addAsyncListener(callback)
276+
scanner.detectMatAsync(image)
277+
sleep(5)
278+
```
279+
280+
- `normalizeBuffer(mat, x1, y1, x2, y2, x3, y3, x4, y4)` # do perspective correction from Mat
281+
```python
282+
normalized_image = scanner.normalizeBuffer(image, x1, y1, x2, y2, x3, y3, x4, y4)
283+
```
284+
- `normalizeFile(filename, x1, y1, x2, y2, x3, y3, x4, y4)` # do perspective correction from a file
285+
```python
286+
normalized_image = scanner.normalizeFile(<filename>, x1, y1, x2, y2, x3, y3, x4, y4)
287+
```
288+
- `normalized_image.save(filename)` # save the normalized image to a file
289+
```python
290+
normalized_image.save(<filename>)
291+
```
292+
293+
## C/C++ API
294+
To customize Python API based on C/C++, please refer to the
295+
[online documentation](https://www.dynamsoft.com/document-normalizer/docs/programming/c/api-reference/?ver=latest).
296+
297+
## How to Build the Python Document Scanner Extension
298+
- Create a source distribution:
299+
300+
```bash
301+
python setup.py sdist
302+
```
303+
304+
- setuptools:
305+
306+
```bash
307+
python setup_setuptools.py build
308+
python setup_setuptools.py develop
309+
```
310+
311+
- Build wheel:
312+
313+
```bash
314+
pip wheel . --verbose
315+
# Or
316+
python setup.py bdist_wheel
317+
```

docscanner/scripts.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def showNormalizedImage(name, normalized_image):
1919

2020
def process_file(filename, scanner):
2121
image = cv2.imread(filename)
22-
results = scanner.decodeMat(image)
22+
results = scanner.detectMat(image)
2323
for result in results:
2424
x1 = result.x1
2525
y1 = result.y1
@@ -78,7 +78,7 @@ def process_video(scanner):
7878
g_normalized_images = []
7979

8080
if image is not None:
81-
scanner.decodeMatAsync(image)
81+
scanner.detectMatAsync(image)
8282

8383
if g_results != None:
8484
for result in g_results:

examples/camera/test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def process_video(scanner):
5555
g_normalized_images = []
5656

5757
if image is not None:
58-
scanner.decodeMatAsync(image)
58+
scanner.detectMatAsync(image)
5959

6060
if g_results != None:
6161
for result in g_results:

examples/file/test.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,14 @@
55
import cv2
66
import time
77

8-
g_results = None
9-
g_normalized_images = []
10-
11-
def callback(results):
12-
global g_results
13-
g_results = results
14-
158
def showNormalizedImage(name, normalized_image):
169
mat = docscanner.convertNormalizedImage2Mat(normalized_image)
1710
cv2.imshow(name, mat)
1811
return mat
1912

2013
def process_file(filename, scanner):
2114
image = cv2.imread(filename)
22-
results = scanner.decodeMat(image)
15+
results = scanner.detectMat(image)
2316
for result in results:
2417
x1 = result.x1
2518
y1 = result.y1

0 commit comments

Comments
 (0)