Skip to content

Commit 9f78ebd

Browse files
authored
Merge pull request #90 from javaTheHutts/88_link_prototype
88 link prototype closes #88
2 parents 3fcea5d + bd7c45b commit 9f78ebd

File tree

5 files changed

+93
-67
lines changed

5 files changed

+93
-67
lines changed

src/main/python/extraction/controllers.py

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from flask import Blueprint, jsonify, request
1010
import cv2
1111
import numpy as np
12+
from prototype.text_extract import TextExtractor
1213

1314
extract = Blueprint('extract', __name__)
1415

@@ -44,21 +45,9 @@ def extract_text():
4445
# load the image and convert
4546
image = _grab_image(url=url)
4647
# Call open CV commands here with the extracted image
47-
print(image)
48-
data.update(
49-
{
50-
"surname": "Doe",
51-
"names": "John Jane",
52-
"sex": "M",
53-
"nationality": "RSA",
54-
"identity_number": "6944585228083",
55-
"date_of_birth": "06-05-1996",
56-
"country_of_birth": "RSA",
57-
"status": "citizen",
58-
"success": True
59-
}
60-
)
61-
return jsonify(data)
48+
extractor = TextExtractor()
49+
result = extractor.extract(image)
50+
return jsonify(result)
6251

6352

6453
@extract.route('/extractFace', methods=['POST'])
@@ -123,15 +112,15 @@ def extract_all():
123112
# otherwise, assume that a URL was passed in
124113
else:
125114
# grab the URL from the request
126-
url = request.post.get("url", None)
115+
url = request.args.get("url", None)
127116
# if the URL is None, then return an error
128117
if url is None:
129118
data["error"] = "No URL provided."
130119
return jsonify(data)
131120
# load the image and convert
132121
image = _grab_image(url=url)
122+
print(image)
133123
# Call open CV commands here with the extracted image
134-
print(image)
135124
data.update(
136125
{
137126
"surname": "Nell",
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# __author__ = 'Nicolai van Niekerk'

src/main/python/prototype/processing/barcode_manager.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,11 +94,13 @@ def get_barcode_info(self, image):
9494
if detection:
9595
gray = cv2.cvtColor(detected_image, cv2.COLOR_BGR2GRAY)
9696
scanner = zbar.Scanner()
97+
print(gray)
98+
print(gray.shape)
9799
results = scanner.scan(gray)
98-
image = self.apply_barcode_blur(image, box)
99100
if not results:
100101
return False, "", image
101102
else:
103+
image = self.apply_barcode_blur(image, box)
102104
return True, results[0].data, image
103105
else:
104106
return False, "", image

src/main/python/prototype/processing/text_manager.py

Lines changed: 47 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -45,41 +45,77 @@ def _sanitise_deplorables(self, deplorables):
4545
sanitised.append(deplorable)
4646
return sanitised
4747

48-
def dictify(self, id_string):
48+
def dictify(self, id_string, barcode_scan_data):
4949
# Given a string containing extracted ID text,
5050
# create a dictionary object from said text.
5151
id_info = {}
5252
# A list of dictionaries used to find regex matches.
5353
# The ID number regex is not the best performing pattern at this stage.
5454
find_matches = [{
5555
'find': 'surname',
56-
'regex': r'(surname\ *\n)((\w*\ *)*\n)',
56+
'regex': r'((surname|surinmame)\ *\n)((\w*\ *)*\n)',
5757
'text': True
5858
}, {
5959
'find': 'names',
6060
'regex': r'((fore\ *)?(names)\ *\n)((\w*\ *)*\n)',
6161
'text': True
6262
}, {
63-
'find': 'idNumber',
63+
'find': 'identity_number',
6464
'regex': r'((id\w*\ * )(no|number) *\s)((\w* *)*\n)',
6565
'text': False
6666
}, {
67-
'find': 'gender',
67+
'find': 'sex',
6868
'regex': r'((sex|gender)\ *\n)((\w*\ *)*\n)',
6969
'text': True
70+
}, {
71+
'find': 'date_of_birth',
72+
'regex': r'(date of birth\ *\n)((\w*\ *)*\n)',
73+
'text': True
74+
}, {
75+
'find': 'country_of_birth',
76+
'regex': r'(country of birth\ *\n)((\w*\ *)*\n)',
77+
'text': True
78+
}, {
79+
'find': 'status',
80+
'regex': r'(status\ *\n)((\w*\ *)*\n)',
81+
'text': True
82+
}, {
83+
'find': 'nationality',
84+
'regex': r'((nationality|nahally|nahonallly)\ *\n)((\w*\ *)*\n)',
85+
'text': True
7086
}]
7187
# Attempt to retrieve regex matches
7288
for find_match in find_matches:
7389
key = find_match['find']
7490
reg_exp = find_match['regex']
7591
text = find_match['text']
76-
id_info[key] = self._get_match(id_string, reg_exp, text)
77-
if (key == "idNumber"):
78-
yy = id_info[key][:2]
79-
mm = id_info[key][2:4]
80-
dd = id_info[key][4:6]
81-
date_of_birth = str(yy) + "-" + str(mm) + "-" + str(dd)
82-
id_info['dateOfBirth'] = date_of_birth
92+
if key not in id_info:
93+
id_info[key] = self._get_match(id_string, reg_exp, text)
94+
if key == "identity_number":
95+
if barcode_scan_data:
96+
id_info[key] = barcode_scan_data["identity_number"]
97+
98+
if id_info[key]:
99+
yy = id_info[key][:2]
100+
mm = id_info[key][2:4]
101+
dd = id_info[key][4:6]
102+
gender_digit = id_info[key][6:7]
103+
status_digit = id_info[key][10:11]
104+
105+
date_of_birth = str(yy) + "-" + str(mm) + "-" + str(dd)
106+
id_info['date_of_birth'] = date_of_birth
107+
108+
if gender_digit < "5":
109+
gender = "F"
110+
else:
111+
gender = "M"
112+
id_info['sex'] = gender
113+
114+
if status_digit == "0":
115+
status = "Citizen"
116+
else:
117+
status = "Non Citizen"
118+
id_info['status'] = status
83119

84120
# Return the info we tried to find.
85121
return id_info

src/main/python/prototype/text_extract.py

Lines changed: 36 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -12,68 +12,54 @@
1212
"""
1313

1414
from PIL import Image
15-
from preprocessing import ThresholdingManager
16-
from preprocessing import BlurManager
17-
from preprocessing import ColorManager
18-
from preprocessing import SimplificationManager
19-
from processing import FaceDetector
20-
from processing import BarCodeManager
21-
from processing import TextManager
15+
from prototype.preprocessing.thresholding_manager import ThresholdingManager
16+
from prototype.preprocessing.blur_manager import BlurManager
17+
from prototype.preprocessing.color_manager import ColorManager
18+
from prototype.preprocessing.simplification_manager import SimplificationManager
19+
from prototype.processing.face_manager import FaceDetector
20+
from prototype.processing.barcode_manager import BarCodeManager
21+
from prototype.processing.text_manager import TextManager
2222

2323
import pytesseract
2424
import cv2
2525
import os
26-
import json
2726

2827
# Constants path to trained data for Shape Predictor.
2928
SHAPE_PREDICTOR_PATH = "{base_path}/trained_data/shape_predictor_face_landmarks.dat".format(
3029
base_path=os.path.abspath(os.path.dirname(__file__)))
3130

31+
DESKTOP = os.path.join(os.path.join(os.path.expanduser('~')), 'Desktop')
32+
3233

3334
class TextExtractor:
34-
def extract(img, thresh="adaptive", blurr="median", clr="red", rm=False, knl=[7]):
35+
def extract(self, img, thresh="adaptive", blurr="median", clr="red", rm=False, knl=[9]):
3536

36-
image = cv2.imread(img)
37+
image = img
3738

3839
simplification_manager = SimplificationManager()
3940
barcode_manager = BarCodeManager()
4041
color_manager = ColorManager()
4142
face_detector = FaceDetector(SHAPE_PREDICTOR_PATH)
4243
image = simplification_manager.perspectiveTransformation(image)
43-
cv2.imwrite("output/3-warped.png", image)
44-
44+
cv2.imwrite(DESKTOP + "/output/3.png", image)
45+
data = {}
4546
barcode_data_found, barcode_scan_data, image = barcode_manager.get_barcode_info(image)
4647
if barcode_data_found:
4748
data = {
48-
'ID_number': barcode_scan_data.decode('utf-8'),
49+
'identity_number': barcode_scan_data.decode('utf-8'),
4950
}
50-
card_data = json.dumps(data)
51-
print(card_data)
5251

53-
if rm is True:
52+
if rm:
5453
image = face_detector.blur_face(image)
55-
cv2.imwrite("output/4-faceRemvoal.png", image)
54+
cv2.imwrite(DESKTOP + "/output/4.png", image)
5655

57-
if clr is not None:
58-
if clr == "blackhat":
59-
image = color_manager.blackHat(image)
60-
elif clr == "tophat":
61-
image = color_manager.topHat(image)
62-
else:
63-
image = color_manager.extractChannel(image, clr)
64-
cv2.imwrite("output/5-colour_extract.png", image)
65-
66-
cv2.imwrite("output/colour_extract.png", image)
67-
if knl is not None:
68-
blur_kernel = knl
69-
else:
56+
if knl:
7057
if blurr == "median":
71-
blur_kernel = [3]
58+
blur_kernel = [9]
7259
else:
73-
blur_kernel = [(3, 3)]
74-
75-
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
76-
cv2.imwrite("output/6-gray.png", image)
60+
blur_kernel = [(9, 9)]
61+
else:
62+
blur_kernel = knl
7763

7864
if blurr is not None:
7965
blur_manager = BlurManager()
@@ -83,8 +69,20 @@ def extract(img, thresh="adaptive", blurr="median", clr="red", rm=False, knl=[7]
8369
image = blur_manager.gaussianBlur(image, blur_kernel=blur_kernel)
8470
elif blurr == "median":
8571
image = blur_manager.medianBlur(image, blur_kernel=blur_kernel)
72+
cv2.imwrite(DESKTOP + "/output/5.png", image)
73+
74+
if clr is not None:
75+
if clr == "blackhat":
76+
image = color_manager.blackHat(image)
77+
elif clr == "tophat":
78+
image = color_manager.topHat(image)
79+
else:
80+
image = color_manager.extractChannel(image, clr)
81+
cv2.imwrite(DESKTOP + "/output/6.png", image)
82+
83+
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
84+
cv2.imwrite(DESKTOP + "/output/7.png", image)
8685

87-
cv2.imwrite("output/7-blur.png", image)
8886
if thresh is not None:
8987
thresh_manager = ThresholdingManager()
9088
if thresh == "adaptive":
@@ -95,7 +93,7 @@ def extract(img, thresh="adaptive", blurr="median", clr="red", rm=False, knl=[7]
9593
filename = "{}.png".format(os.getpid())
9694
cv2.imwrite(filename, image)
9795

98-
cv2.imwrite("output/8-Extraction.png", image)
96+
cv2.imwrite(DESKTOP+"/output/8.png", image)
9997
text = pytesseract.image_to_string(Image.open(filename))
10098
os.remove(filename)
10199

@@ -104,6 +102,6 @@ def extract(img, thresh="adaptive", blurr="median", clr="red", rm=False, knl=[7]
104102
print(text, "\n------------------------------------------------------")
105103
clean_text = text_manager.clean_up(text)
106104
print(clean_text, "\n -----------------------------------------------")
107-
id_details = text_manager.dictify(clean_text)
105+
id_details = text_manager.dictify(clean_text, data)
108106
print(id_details)
109107
return id_details

0 commit comments

Comments
 (0)