Merge pull request #90 from javaTheHutts/88_link_prototype

AndreasNel · web-flow · commit 9f78ebdc241b · 2017-08-07T15:11:34.000+02:00
88 link prototype closes #88
diff --git a/src/main/python/extraction/controllers.py b/src/main/python/extraction/controllers.py
@@ -9,6 +9,7 @@
 from flask import Blueprint, jsonify, request
 import cv2
 import numpy as np
+from prototype.text_extract import TextExtractor
 
 extract = Blueprint('extract', __name__)
 
@@ -44,21 +45,9 @@ def extract_text():
                 # load the image and convert
             image = _grab_image(url=url)
         # Call open CV commands here with the extracted image
-        print(image)
-        data.update(
-            {
-                "surname": "Doe",
-                "names": "John Jane",
-                "sex": "M",
-                "nationality": "RSA",
-                "identity_number": "6944585228083",
-                "date_of_birth": "06-05-1996",
-                "country_of_birth": "RSA",
-                "status": "citizen",
-                "success": True
-            }
-        )
-    return jsonify(data)
+        extractor = TextExtractor()
+        result = extractor.extract(image)
+    return jsonify(result)
 
 
 @extract.route('/extractFace', methods=['POST'])
@@ -123,15 +112,15 @@ def extract_all():
         # otherwise, assume that a URL was passed in
         else:
             # grab the URL from the request
-            url = request.post.get("url", None)
+            url = request.args.get("url", None)
             # if the URL is None, then return an error
             if url is None:
                 data["error"] = "No URL provided."
                 return jsonify(data)
             # load the image and convert
             image = _grab_image(url=url)
+            print(image)
         # Call open CV commands here with the extracted image
-        print(image)
         data.update(
             {
                 "surname": "Nell",
diff --git a/src/main/python/prototype/__init__.py b/src/main/python/prototype/__init__.py
@@ -0,0 +1 @@
+# __author__ = 'Nicolai van Niekerk'
diff --git a/src/main/python/prototype/processing/barcode_manager.py b/src/main/python/prototype/processing/barcode_manager.py
@@ -94,11 +94,13 @@ def get_barcode_info(self, image):
         if detection:
             gray = cv2.cvtColor(detected_image, cv2.COLOR_BGR2GRAY)
             scanner = zbar.Scanner()
+            print(gray)
+            print(gray.shape)
             results = scanner.scan(gray)
-            image = self.apply_barcode_blur(image, box)
             if not results:
                 return False, "", image
             else:
+                image = self.apply_barcode_blur(image, box)
                 return True, results[0].data, image
         else:
             return False, "", image
diff --git a/src/main/python/prototype/processing/text_manager.py b/src/main/python/prototype/processing/text_manager.py
@@ -45,41 +45,77 @@ def _sanitise_deplorables(self, deplorables):
                 sanitised.append(deplorable)
         return sanitised
 
-    def dictify(self, id_string):
+    def dictify(self, id_string, barcode_scan_data):
         # Given a string containing extracted ID text,
         # create a dictionary object from said text.
         id_info = {}
         # A list of dictionaries used to find regex matches.
         # The ID number regex is not the best performing pattern at this stage.
         find_matches = [{
             'find': 'surname',
-            'regex': r'(surname\ *\n)((\w*\ *)*\n)',
+            'regex': r'((surname|surinmame)\ *\n)((\w*\ *)*\n)',
             'text': True
         }, {
             'find': 'names',
             'regex': r'((fore\ *)?(names)\ *\n)((\w*\ *)*\n)',
             'text': True
         }, {
-            'find': 'idNumber',
+            'find': 'identity_number',
             'regex': r'((id\w*\ * )(no|number) *\s)((\w* *)*\n)',
             'text': False
         }, {
-            'find': 'gender',
+            'find': 'sex',
             'regex': r'((sex|gender)\ *\n)((\w*\ *)*\n)',
             'text': True
+        }, {
+            'find': 'date_of_birth',
+            'regex': r'(date of birth\ *\n)((\w*\ *)*\n)',
+            'text': True
+        }, {
+            'find': 'country_of_birth',
+            'regex': r'(country of birth\ *\n)((\w*\ *)*\n)',
+            'text': True
+        }, {
+            'find': 'status',
+            'regex': r'(status\ *\n)((\w*\ *)*\n)',
+            'text': True
+        }, {
+            'find': 'nationality',
+            'regex': r'((nationality|nahally|nahonallly)\ *\n)((\w*\ *)*\n)',
+            'text': True
         }]
         # Attempt to retrieve regex matches
         for find_match in find_matches:
             key = find_match['find']
             reg_exp = find_match['regex']
             text = find_match['text']
-            id_info[key] = self._get_match(id_string, reg_exp, text)
-            if (key == "idNumber"):
-                yy = id_info[key][:2]
-                mm = id_info[key][2:4]
-                dd = id_info[key][4:6]
-                date_of_birth = str(yy) + "-" + str(mm) + "-" + str(dd)
-                id_info['dateOfBirth'] = date_of_birth
+            if key not in id_info:
+                id_info[key] = self._get_match(id_string, reg_exp, text)
+            if key == "identity_number":
+                if barcode_scan_data:
+                    id_info[key] = barcode_scan_data["identity_number"]
+
+                if id_info[key]:
+                    yy = id_info[key][:2]
+                    mm = id_info[key][2:4]
+                    dd = id_info[key][4:6]
+                    gender_digit = id_info[key][6:7]
+                    status_digit = id_info[key][10:11]
+
+                    date_of_birth = str(yy) + "-" + str(mm) + "-" + str(dd)
+                    id_info['date_of_birth'] = date_of_birth
+
+                    if gender_digit < "5":
+                        gender = "F"
+                    else:
+                        gender = "M"
+                    id_info['sex'] = gender
+
+                    if status_digit == "0":
+                        status = "Citizen"
+                    else:
+                        status = "Non Citizen"
+                    id_info['status'] = status
 
         # Return the info we tried to find.
         return id_info
diff --git a/src/main/python/prototype/text_extract.py b/src/main/python/prototype/text_extract.py
@@ -12,68 +12,54 @@
 """
 
 from PIL import Image
-from preprocessing import ThresholdingManager
-from preprocessing import BlurManager
-from preprocessing import ColorManager
-from preprocessing import SimplificationManager
-from processing import FaceDetector
-from processing import BarCodeManager
-from processing import TextManager
+from prototype.preprocessing.thresholding_manager import ThresholdingManager
+from prototype.preprocessing.blur_manager import BlurManager
+from prototype.preprocessing.color_manager import ColorManager
+from prototype.preprocessing.simplification_manager import SimplificationManager
+from prototype.processing.face_manager import FaceDetector
+from prototype.processing.barcode_manager import BarCodeManager
+from prototype.processing.text_manager import TextManager
 
 import pytesseract
 import cv2
 import os
-import json
 
 # Constants path to trained data for Shape Predictor.
 SHAPE_PREDICTOR_PATH = "{base_path}/trained_data/shape_predictor_face_landmarks.dat".format(
     base_path=os.path.abspath(os.path.dirname(__file__)))
 
+DESKTOP = os.path.join(os.path.join(os.path.expanduser('~')), 'Desktop')
+
 
 class TextExtractor:
-    def extract(img, thresh="adaptive", blurr="median", clr="red", rm=False, knl=[7]):
+    def extract(self, img, thresh="adaptive", blurr="median", clr="red", rm=False, knl=[9]):
 
-        image = cv2.imread(img)
+        image = img
 
         simplification_manager = SimplificationManager()
         barcode_manager = BarCodeManager()
         color_manager = ColorManager()
         face_detector = FaceDetector(SHAPE_PREDICTOR_PATH)
         image = simplification_manager.perspectiveTransformation(image)
-        cv2.imwrite("output/3-warped.png", image)
-
+        cv2.imwrite(DESKTOP + "/output/3.png", image)
+        data = {}
         barcode_data_found, barcode_scan_data, image = barcode_manager.get_barcode_info(image)
         if barcode_data_found:
             data = {
-                'ID_number': barcode_scan_data.decode('utf-8'),
+                'identity_number': barcode_scan_data.decode('utf-8'),
             }
-            card_data = json.dumps(data)
-            print(card_data)
 
-        if rm is True:
+        if rm:
             image = face_detector.blur_face(image)
-            cv2.imwrite("output/4-faceRemvoal.png", image)
+            cv2.imwrite(DESKTOP + "/output/4.png", image)
 
-        if clr is not None:
-            if clr == "blackhat":
-                image = color_manager.blackHat(image)
-            elif clr == "tophat":
-                image = color_manager.topHat(image)
-            else:
-                image = color_manager.extractChannel(image, clr)
-            cv2.imwrite("output/5-colour_extract.png", image)
-
-        cv2.imwrite("output/colour_extract.png", image)
-        if knl is not None:
-            blur_kernel = knl
-        else:
+        if knl:
             if blurr == "median":
-                blur_kernel = [3]
+                blur_kernel = [9]
             else:
-                blur_kernel = [(3, 3)]
-
-        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-        cv2.imwrite("output/6-gray.png", image)
+                blur_kernel = [(9, 9)]
+        else:
+            blur_kernel = knl
 
         if blurr is not None:
             blur_manager = BlurManager()
@@ -83,8 +69,20 @@ def extract(img, thresh="adaptive", blurr="median", clr="red", rm=False, knl=[7]
                 image = blur_manager.gaussianBlur(image, blur_kernel=blur_kernel)
             elif blurr == "median":
                 image = blur_manager.medianBlur(image, blur_kernel=blur_kernel)
+            cv2.imwrite(DESKTOP + "/output/5.png", image)
+
+        if clr is not None:
+            if clr == "blackhat":
+                image = color_manager.blackHat(image)
+            elif clr == "tophat":
+                image = color_manager.topHat(image)
+            else:
+                image = color_manager.extractChannel(image, clr)
+            cv2.imwrite(DESKTOP + "/output/6.png", image)
+
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+        cv2.imwrite(DESKTOP + "/output/7.png", image)
 
-        cv2.imwrite("output/7-blur.png", image)
         if thresh is not None:
             thresh_manager = ThresholdingManager()
             if thresh == "adaptive":
@@ -95,7 +93,7 @@ def extract(img, thresh="adaptive", blurr="median", clr="red", rm=False, knl=[7]
         filename = "{}.png".format(os.getpid())
         cv2.imwrite(filename, image)
 
-        cv2.imwrite("output/8-Extraction.png", image)
+        cv2.imwrite(DESKTOP+"/output/8.png", image)
         text = pytesseract.image_to_string(Image.open(filename))
         os.remove(filename)
 
@@ -104,6 +102,6 @@ def extract(img, thresh="adaptive", blurr="median", clr="red", rm=False, knl=[7]
         print(text, "\n------------------------------------------------------")
         clean_text = text_manager.clean_up(text)
         print(clean_text, "\n -----------------------------------------------")
-        id_details = text_manager.dictify(clean_text)
+        id_details = text_manager.dictify(clean_text, data)
         print(id_details)
         return id_details