Skip to content

Commit 4bf76d2

Browse files
committed
bug fixed and code refactor
1 parent 632b099 commit 4bf76d2

File tree

11 files changed

+144
-77
lines changed

11 files changed

+144
-77
lines changed

sample/favicon.ico

1.12 KB
Binary file not shown.

sample/filters.html

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
<meta name="msapplication-tap-highlight" content="no">
77
<link rel="stylesheet" type="text/css" href="style/common-style.css">
88
<link rel="stylesheet" type="text/css" href="style/index.css">
9+
<link rel="shortcut icon" href="favicon.ico" type="image/x-icon">
10+
<link rel="icon" href="favicon.ico" type="image/x-icon">
911
<title>Image filters</title>
1012

1113
</head>
@@ -35,8 +37,7 @@
3537
File.prototype.convertToBase64 = function (callback) {
3638
let reader = new FileReader();
3739
reader.onloadend = function (e) {
38-
// TODO: Check if e.target.result of e.result
39-
callback(e.result, e.error);
40+
callback(e.target.result, e.target.error);
4041
};
4142
reader.readAsDataURL(this);
4243
};

sample/index.html

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
<meta name="msapplication-tap-highlight" content="no">
77
<link rel="stylesheet" type="text/css" href="style/common-style.css">
88
<link rel="stylesheet" type="text/css" href="style/index.css">
9+
<link rel="shortcut icon" href="favicon.ico" type="image/x-icon">
10+
<link rel="icon" href="favicon.ico" type="image/x-icon">
911
<title>Business Card Reader Library DEMO Application</title>
1012
</head>
1113
<body class="body-index">
@@ -85,12 +87,16 @@
8587
<div class="center">
8688
<img src="https://via.placeholder.com/400x300/ddd?text=upload+a+business+card" id="step1" class="img"
8789
alt="step1"/>
90+
<br>
8891
<img src="https://via.placeholder.com/400x300/ddd?text=upload+a+business+card" id="step2" class="img"
8992
alt="step2"/>
93+
<br>
9094
<img src="https://via.placeholder.com/400x300/ddd?text=upload+a+business+card" id="step3" class="img"
9195
alt="step3"/>
96+
<br>
9297
<img src="https://via.placeholder.com/400x300/ddd?text=upload+a+business+card" id="step4" class="img"
9398
alt="step4"/>
99+
<br>
94100
<img src="https://via.placeholder.com/400x300/ddd?text=upload+a+business+card" id="step5" class="img"
95101
alt="step5"/>
96102
</div>
@@ -129,8 +135,8 @@
129135
File.prototype.convertToBase64 = function (callback) {
130136
let reader = new FileReader();
131137
reader.onloadend = function (e) {
132-
// TODO: check if e.target.result of e.result
133-
callback(e.result, e.error);
138+
console.log(e);
139+
callback(e.target.result, e.target.error);
134140
};
135141
reader.readAsDataURL(this);
136142
};
@@ -151,32 +157,32 @@
151157
let img4 = document.getElementById("step4");
152158
let img5 = document.getElementById("step5");
153159

154-
img0.src = data.stages[0];
155-
img1.src = data.stages[1];
156-
img2.src = data.stages[2];
157-
img3.src = data.stages[3];
158-
img4.src = data.stages[4];
159-
img5.src = data.stages[5];
160+
if (data.stages.length > 0) img0.src = data.stages[0];
161+
if (data.stages.length > 1) img1.src = data.stages[1];
162+
if (data.stages.length > 2) img2.src = data.stages[2];
163+
if (data.stages.length > 3) img3.src = data.stages[3];
164+
if (data.stages.length > 4) img4.src = data.stages[4];
165+
if (data.stages.length > 5) img5.src = data.stages[5];
160166
}
161167

162168
// display result
163-
let keys = Object.keys(data.result);
164-
for (const k of keys) {
165-
if (k === "Name") {
166-
document.getElementById(k).innerHTML = data.result[k].Text;
167-
} else if (k === "Address") {
168-
document.getElementById(k).innerHTML = data.result[k].Text;
169-
} else if (k !== "Text") {
170-
document.getElementById(k).innerHTML = data.result[k];
171-
}
169+
if (typeof data.result !== "undefined") {
170+
Object.keys(data.result).forEach(k => {
171+
if (k === "Name") {
172+
document.getElementById(k).innerHTML = data.result[k].Text;
173+
} else if (k === "Address") {
174+
document.getElementById(k).innerHTML = data.result[k].Text;
175+
} else if (k !== "Text") {
176+
document.getElementById(k).innerHTML = data.result[k];
177+
}
178+
});
172179
}
173180

174181
$("#result").show();
175182
$("#steps").show();
176183
}
177184

178185
function displayProgress(data) {
179-
180186
if (typeof data.progress === "undefined")
181187
return;
182188

src/bcr.analyze.js

Lines changed: 43 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,14 @@ const TEL_MIN_LENGTH = 6;
3030
const DISTANCE_TOLERANCE = 4;
3131
const MIN_SCORE = 0.05;
3232

33+
// TODO: Remove global result
3334
// define result template
3435
let result = {};
3536

36-
// *********************************************
37+
// ****************************************************************************
3738
// REGEXES
38-
// *********************************************
39-
let typos = [
39+
// ****************************************************************************
40+
const typos = [
4041
{regex: /[A-Za-z]0[A-Za-z]/g, find: "0", replace: "o"}, // 0 instead of o inside a text
4142
{regex: /[A-Za-z]\|[A-Za-z]/g, find: "|", replace: "l"}, // pipe for l
4243
{regex: /[A-Za-z]\|0[A-Za-z]/g, find: "|0", replace: "lo"}, // 0 instead of o + pipe and words
@@ -52,13 +53,13 @@ let typos = [
5253
];
5354

5455
// email
55-
let email = /(([^<>()\[\]\\.,;:\s@"]+(\.[^<>()\[\]\\.,;:\s@"]+)*)|(".+"))@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}])|(([a-zA-Z\-0-9]+\.)+[a-zA-Z]{2,}))/gi;
56+
const email = /(([^<>()\[\]\\.,;:\s@"]+(\.[^<>()\[\]\\.,;:\s@"]+)*)|(".+"))@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}])|(([a-zA-Z\-0-9]+\.)+[a-zA-Z]{2,}))/gi;
5657

5758
// web
58-
let web = /(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9]\.[^\s]{2,})/gi;
59+
const web = /(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9]\.[^\s]{2,})/gi;
5960

6061
// tel
61-
let regex_tel = [
62+
const regex_tel = [
6263
{regex: /([+][0-9]{1,4}\s*)?(\([0-9]{1,2}\)\s*)?([0-9]+[\s|\\\/.-]?){3,}/g, confidence: 0.5},
6364
{
6465
regex: /((tel|phon|dir)\w*([.|:])*\s*)([+][0-9]{1,4}\s*)?(\([0-9]{1,2}\)\s*)?([0-9]+[\s|\\\/.-]?){3,}/g,
@@ -67,16 +68,19 @@ let regex_tel = [
6768
];
6869

6970
// fax
70-
let regex_fax = [
71+
const regex_fax = [
7172
{
7273
regex: /((fax)\w*([.|:])*\s*)([+][0-9]{1,4}\s*)?(\([0-9]{1,2}\)\s*)?([0-9]+[\s|\\\/.-]?){3,}/g,
7374
confidence: 0.5
7475
}
7576
];
7677

7778
// mobile
78-
let regex_mobile = [
79-
{regex: /([+][0-9]{1,4}\s*)?(\([0-9]{1,2}\)\s*)?([0-9]+[\s|\\\/.-]?){3,}/g, confidence: 0.5},
79+
const regex_mobile = [
80+
{
81+
regex: /([+][0-9]{1,4}\s*)?(\([0-9]{1,2}\)\s*)?([0-9]+[\s|\\\/.-]?){3,}/g,
82+
confidence: 0.5
83+
},
8084
{
8185
regex: /((mobi|cell|hand)\w*([.|:])*\s*)([+][0-9]{1,4}\s*)?(\([0-9]{1,2}\)\s*)?([0-9]+[\s|\\\/.-]?){3,}/g,
8286
confidence: 0.5
@@ -151,45 +155,56 @@ function analyzePipeline(ocr) {
151155
result = initializeResult();
152156

153157
// Step 0: Break lines from tesseract
158+
console.log("Analyze pipeline", "stage", 0, "breakLines");
154159
ocr = breakLines(ocr);
155160

156161
// Step 1: Clean text from tesseract
162+
console.log("Analyze pipeline", "stage", 1, "cleanText");
157163
ocr = cleanText(ocr);
158164

159165
// Step 2: Build logical blocks
166+
console.log("Analyze pipeline", "stage", 2, "buildBlocks");
160167
ocr = bcrBuildBlocks(ocr);
161168

162169
// Step 3: Score email
170+
console.log("Analyze pipeline", "stage", 3, "scoreEmail");
163171
ocr = scoreEmail(ocr);
164172

165173
// Step 4: Score web
174+
console.log("Analyze pipeline", "stage", 4, "scoreWeb");
166175
ocr = scoreWeb(ocr);
167176

168177
// Step 5: Score numbers
178+
console.log("Analyze pipeline", "stage", 5, "scoreNumbers");
169179
ocr = scoreNumbers(ocr);
170180

171181
// Step 6: Score company
182+
console.log("Analyze pipeline", "stage", 6, "scoreCompany");
172183
ocr = scoreCompany(ocr);
173184

174185
// Step 7: Score name
186+
console.log("Analyze pipeline", "stage", 7, "scoreName");
175187
ocr = scoreName(ocr);
176188

177189
// Step 8: Score job
190+
console.log("Analyze pipeline", "stage", 8, "scoreJob");
178191
ocr = scoreJob(ocr);
179192

180193
// Step 9: Score address
194+
console.log("Analyze pipeline", "stage", 9, "scoreAddress");
181195
ocr = scoreAddress(ocr);
182196

183197
// Step 10: Assign result
198+
console.log("Analyze pipeline", "stage", 10, "assignResult");
184199
assignResults(ocr);
185200

186201
// return result
187202
return result;
188203
}
189204

190-
// *********************************************************************
191-
// PREPROCESS
192-
// *********************************************************************
205+
// ****************************************************************************
206+
// PREPROCESS OCR Object
207+
// ****************************************************************************
193208

194209
// break long line
195210
function breakLines(ocr) {
@@ -413,9 +428,9 @@ function cleanText(ocr) {
413428
return ocr;
414429
}
415430

416-
// *********************************************************************
431+
// ****************************************************************************
417432
// UTILITIES
418-
// *********************************************************************
433+
// ****************************************************************************
419434

420435
// get font distance
421436
function getFontBiggerRatio(average, real) {
@@ -442,7 +457,6 @@ function bcrGetWordsFont(words) {
442457
return fontSize;
443458
}
444459

445-
/*
446460
// get average font size of words
447461
function bcrGetWordsBold(words) {
448462

@@ -454,16 +468,15 @@ function bcrGetWordsBold(words) {
454468
}
455469
return words.length / 2 < fontBold;
456470
}
457-
*/
458471

459472
// check regexp
460473
function checkRE(re, st) {
461474
return String(st).toLowerCase().match(re) || [];
462475
}
463476

464-
// *********************************************************************
477+
// ****************************************************************************
465478
// EXTARCT VALUE FROM BLOCK
466-
// *********************************************************************
479+
// ****************************************************************************
467480

468481
// extract web from candidate
469482
function extractWeb(text) {
@@ -569,16 +582,13 @@ function extractZip(text) {
569582

570583
// extract address street
571584
function extractStreet(text) {
572-
573585
let txt = text.toLowerCase();
574586
for (let j = 0; j < streetsDS.length; j++) {
575587
let re = streetsDS[j];
576-
577588
if (checkRE(re, txt).length > 0) {
578589
return txt;
579590
}
580591
}
581-
582592
return "";
583593
}
584594

@@ -621,9 +631,9 @@ function splitName(text) {
621631
return result;
622632
}
623633

624-
// *********************************************************************
634+
// ****************************************************************************
625635
// SCORES BLOCKS
626-
// *********************************************************************
636+
// ****************************************************************************
627637

628638
// score email (strategies: regex, @)
629639
function scoreEmail(ocr) {
@@ -711,7 +721,7 @@ function scoreCompany(ocr) {
711721
website = website.substr(0, website.lastIndexOf("."));
712722
}
713723
website = website.toLowerCase();
714-
if (typeof website !== undefined && website.length > 0)
724+
if (typeof website !== "undefined" && website.length > 0)
715725
keywords[website] = website;
716726
}
717727
}
@@ -723,7 +733,7 @@ function scoreCompany(ocr) {
723733
email = email.substr(email.indexOf("@") + 1);
724734
email = email.substr(0, email.indexOf("."));
725735
email = email.toLowerCase();
726-
if (typeof email !== undefined && email.length > 0)
736+
if (typeof email !== "undefined" && email.length > 0)
727737
keywords[email] = email;
728738
}
729739
}
@@ -734,9 +744,7 @@ function scoreCompany(ocr) {
734744
for (let i = 0; i < ocr.BCR.blocks.length; i++) {
735745
if (ocr.BCR.blocks[i].fields.web === 0 && ocr.BCR.blocks[i].fields.email === 0) {
736746
let word = ocr.BCR.blocks[i].text.toLowerCase();
737-
let keys = Object.keys(keywords);
738-
for (let k in keys) {
739-
747+
Object.keys(keywords).forEach(k => {
740748
// calculate similarity
741749
let sim = sSimilarity(word, k);
742750

@@ -747,7 +755,7 @@ function scoreCompany(ocr) {
747755
}
748756
// remaining 0.2, assigned by font criteria
749757
ocr.BCR.blocks[i].fields.company += getFontBiggerRatio(ocr.BCR.averageFontSize, ocr.BCR.blocks[i].fontSize) * 0.2;
750-
}
758+
});
751759
}
752760
}
753761

@@ -768,7 +776,7 @@ function scoreName(ocr) {
768776
let nick = email.substr(0, email.indexOf("@"));
769777
nick = nick.replace(new RegExp("\\.", 'g'), " ");
770778

771-
if (typeof email !== undefined && email.length > 0)
779+
if (typeof email !== "undefined" && email.length > 0)
772780
keywords.push(nick);
773781
}
774782
}
@@ -778,8 +786,7 @@ function scoreName(ocr) {
778786
for (let i = 0; i < ocr.BCR.blocks.length; i++) {
779787
if (ocr.BCR.blocks[i].fields.email === 0) {
780788
let word = ocr.BCR.blocks[i].text.toLowerCase();
781-
for (let k in keywords) {
782-
789+
for (let k = 0; k < keywords.length; k++) {
783790
// calculate similarity
784791
let sim = sSimilarity(word, keywords[k]);
785792

@@ -792,10 +799,11 @@ function scoreName(ocr) {
792799
ocr.BCR.blocks[i].fields.name += getFontBiggerRatio(ocr.BCR.averageFontSize, ocr.BCR.blocks[i].fontSize) * 0.2;
793800

794801
}
802+
795803
}
796804
}
797805

798-
// contribute max 0.3, assigned by dataset
806+
// contribute max 0.3, assigned by dataset
799807
for (let i = 0; i < ocr.BCR.blocks.length; i++) {
800808
if (ocr.BCR.blocks[i].fields.email === 0) {
801809
let line = ocr.BCR.blocks[i].text.toLowerCase();
@@ -938,9 +946,9 @@ function scoreAddress(ocr) {
938946
return ocr;
939947
}
940948

941-
// *********************************************************************
949+
// ****************************************************************************
942950
// Assign results
943-
// *********************************************************************
951+
// ****************************************************************************
944952
function assignResults(ocr) {
945953

946954
let web = [];

src/bcr.cities.js

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12935,15 +12935,16 @@ for (let i = 0; i < cities.length; i++) {
1293512935
}
1293612936

1293712937
for (let i = 0; i < cities.length; i++) {
12938-
if (cityDS[cities[i][1]] === undefined)
12938+
if (typeof cityDS[cities[i][1]] === "undefined")
1293912939
cityDS[cities[i][1]] = [];
1294012940
cityDS[cities[i][1]].push([cities[i][0], cities[i][2]]);
1294112941
}
1294212942

12943-
const keys = Object.keys(cityDS);
12944-
for (const k in keys) {
12943+
Object.keys(cityDS).forEach(k => {
1294512944
cityDS[k].sort();
1294612945
countryDS.push(k);
12947-
}
12946+
// console.log("Loaded", cityDS[k].length, "cities from", k); Too verbose
12947+
});
1294812948

1294912949
countryDS.sort();
12950+
console.log("Loaded", countryDS.length, "countries");

0 commit comments

Comments
 (0)