Skip to content

Commit bb7bab3

Browse files
committed
ocr enhance performance
1 parent fd0163e commit bb7bab3

File tree

6 files changed

+277
-27
lines changed

6 files changed

+277
-27
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ Download from [chrome web store](https://chromewebstore.google.com/detail/hmigni
1818
![Alt Text](doc/result1532.gif)
1919
![result](doc/screenshot_3.png)
2020
![result](doc/screenshot_6.png)
21-
![OCR Result](doc/ocr_result.png)
21+
![OCR Result](doc/ocr_result1.gif)
2222

2323
# Features
2424

src/ocr/ocrView.js

Lines changed: 47 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -14,19 +14,22 @@ const windowPostMessageProxy = new WindowPostMessageProxy({
1414
var ocrHistory = {};
1515
var iFrames = {};
1616
var ocrResultHistory = {}; // store ocr result to avoid duplicate request
17+
var translatorHistory = {}; // Store translation history to avoid duplicate translations
1718
var setting;
1819
var ocrFrameName = "ocrFrame";
1920
var opencvFrameName = "opencvFrame";
2021
// var ocrFrameName="ocrFrameDebug"
2122
// var opencvFrameName="opencvFrameDebug";
23+
const textSimilarityThreshold = 0.8; // Threshold for text similarity
24+
var textLengthMultiplier = 7; // Multiplier for text length gap filtering
2225

2326
//detect mouse positioned image to process ocr in ocr.html iframe
2427
//create text box from ocr result
25-
export async function checkImage(x,y, currentSetting, keyDownList) {
28+
export async function checkImage(x, y, currentSetting, keyDownList) {
2629
// if ocr is not on or no key bind, skip
2730
// if mouse target is not image, skip
2831
// if already ocr processed,skip
29-
var img=util.deepElementFromPoint(x, y);
32+
var img = util.deepElementFromPoint(x, y);
3033
if (
3134
!keyDownList[currentSetting["keyDownOCR"]] ||
3235
!checkIsImage(img) ||
@@ -36,6 +39,8 @@ export async function checkImage(x,y, currentSetting, keyDownList) {
3639
}
3740
setting = currentSetting;
3841
ocrHistory[img.src] = img;
42+
translatorHistory[img.src] = [];
43+
ocrResultHistory[img.src] = [];
3944
var lang = setting["ocrLang"];
4045
makeLoadingMouseStyle(img);
4146

@@ -64,6 +69,14 @@ export async function checkImage(x,y, currentSetting, keyDownList) {
6469
"ORANGE",
6570
"bbox_white_useOpencvImg"
6671
),
72+
processOcr(
73+
img.src,
74+
lang,
75+
base64Url,
76+
img,
77+
"PURPLE",
78+
"bbox_black_useOpencvImg"
79+
),
6780
]);
6881

6982
makeNormalMouseStyle(img);
@@ -79,6 +92,7 @@ export function removeAllOcrEnv() {
7992
ocrHistory = {};
8093
hideAll({ duration: 0 });
8194
ocrResultHistory = {};
95+
translatorHistory = {};
8296
}
8397

8498
async function processOcr(mainUrl, lang, base64Url, img, color, mode = "auto") {
@@ -91,15 +105,15 @@ async function processOcr(mainUrl, lang, base64Url, img, color, mode = "auto") {
91105
// OCR process with opencv, then display
92106
if (mode.includes("bbox")) {
93107
// console.time("OCR Process with OpenCV"+mode);
94-
var { bboxList, base64Url, ratio ,opencvImg } = await requestSegmentBox(
108+
var { bboxList, base64Url, ratio, opencvImg } = await requestSegmentBox(
95109
mainUrl,
96110
lang,
97111
base64Url,
98112
mode
99113
);
100114
// console.timeEnd("OCR Process with OpenCV"+mode);
101115
}
102-
116+
103117
await Promise.all(
104118
bboxList.map(async (bbox) => {
105119
var res = await requestOcr(mainUrl, lang, [bbox], base64Url, mode);
@@ -141,19 +155,19 @@ async function createIframe(name, htmlPath) {
141155
}
142156

143157
function loadScript(name, htmlPath) {
144-
var debugCSS={
158+
var debugCSS = {
145159
width: "700",
146160
height: "700",
147161
pointerEvents: "auto",
148162
opacity: 1.0,
149-
}
163+
};
150164
var iFrameCSS = {
151165
width: "1",
152166
height: "1",
153167
pointerEvents: "none",
154168
opacity: 0.0,
155169
};
156-
170+
157171
return new Promise(function (resolve, reject) {
158172
var iFrame = $("<iframe />", {
159173
name: name,
@@ -214,14 +228,30 @@ async function showOcrData(img, ocrData, ratio, color) {
214228

215229
async function showTooltipBoxes(img, textBoxList) {
216230
var filteredTextBoxList = filterDuplicateOcr(img, textBoxList);
231+
217232
for (var textBox of filteredTextBoxList) {
218233
var { targetText, sourceLang, targetLang } = await handleTranslate(
219234
textBox["text"]
220235
);
221-
// filter large translate text len gap
222-
if(targetText.length >textBox["text"].length*7){
236+
237+
const isAlreadyTranslated = translatorHistory[img.src].some(
238+
(prevTargetText) => {
239+
return (
240+
calculateTextSimilarity(prevTargetText, targetText) >
241+
textSimilarityThreshold
242+
);
243+
}
244+
);
245+
246+
// Filter large translate text length gap
247+
if (
248+
targetText.length > textBox["text"].length * textLengthMultiplier ||
249+
isAlreadyTranslated
250+
) {
223251
continue;
224252
}
253+
254+
translatorHistory[img.src].push(targetText);
225255
addTooltipBox(img, textBox, targetText, targetLang);
226256
}
227257
}
@@ -254,11 +284,7 @@ function calculateTextSimilarity(text1, text2) {
254284

255285
function filterDuplicateOcr(img, textBoxList) {
256286
// Ensure ocrResultHistory exists for the image
257-
if (!ocrResultHistory[img.src]) {
258-
ocrResultHistory[img.src] = [];
259-
}
260287
const bboxThreshold = 15; // Threshold for bounding box similarity (bbox is a common term in OCR)
261-
const textSimilarityThreshold = 0.8; // Threshold for text similarity (e.g., Levenshtein distance ratio)
262288

263289
// Filter out text boxes that are similar to previous history
264290
const filteredTextBoxList = textBoxList.filter((textBox) => {
@@ -293,10 +319,12 @@ function adjustTextBoxBbox(textBox, ratio) {
293319
textBox["bbox"]["y1"] = Math.ceil(textBox["bbox"]["y1"] / ratio);
294320
}
295321

296-
297322
function addTooltipBox(img, textBox, text, targetLang) {
298323
// Create a tooltip element using Tippy.js
299-
var tooltipWidth = textBox["bbox"]["x1"] - textBox["bbox"]["x0"];
324+
var tooltipWidth = Math.max(
325+
200,
326+
textBox["bbox"]["x1"] - textBox["bbox"]["x0"]
327+
);
300328
const tooltipContent = $("<span/>", {
301329
text: text,
302330
css: {
@@ -377,7 +405,10 @@ function addTooltipBox(img, textBox, text, targetLang) {
377405
});
378406

379407
$(window).on("resize", () => {
380-
const { left, top, width, height } = calculateImgSegBoxSize(img, textBox["bbox"]);
408+
const { left, top, width, height } = calculateImgSegBoxSize(
409+
img,
410+
textBox["bbox"]
411+
);
381412
tooltipTarget.css({
382413
left: `${left}px`,
383414
top: `${top + height * 0.7}px`,

src/ocr/opencvHandler.js

Lines changed: 121 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,113 @@ function detectText(canvasIn, mode) {
117117
var ksize = new cv.Size(12, 12);
118118
var element = cv.getStructuringElement(cv.MORPH_RECT, ksize);
119119
cv.cvtColor(src, dst, cv.COLOR_RGBA2GRAY, 0);
120+
} else if (mode.includes("black")) {
121+
var ksize = new cv.Size(15, 15);
122+
var element = cv.getStructuringElement(cv.MORPH_RECT, ksize);
123+
124+
cv.bitwise_not(src, src);
125+
126+
// Convert image to grayscale and ensure single-channel
127+
cv.cvtColor(src, dst, cv.COLOR_RGBA2GRAY, 0);
128+
// Threshold to get white areas (255, 255, 255)
129+
130+
//get white area as mask
131+
cv.threshold(dst, dst, 210, 255, cv.THRESH_BINARY);
132+
// showImage(dst, mode);
133+
134+
// Create floodfill masks for each edge
135+
let combinedFloodMask = new cv.Mat(
136+
dst.rows,
137+
dst.cols,
138+
cv.CV_8U,
139+
new cv.Scalar(0)
140+
);
141+
var combinedFloodVisited = new Set();
142+
combinedFloodMask = customFloodFillWithoutCv(
143+
dst,
144+
{ x: 0, y: 0 },
145+
combinedFloodMask,
146+
combinedFloodVisited
147+
);
148+
combinedFloodMask = customFloodFillWithoutCv(
149+
dst,
150+
{ x: dst.cols - 1, y: 0 },
151+
combinedFloodMask,
152+
combinedFloodVisited
153+
);
154+
combinedFloodMask = customFloodFillWithoutCv(
155+
dst,
156+
{ x: 0, y: dst.rows - 1 },
157+
combinedFloodMask,
158+
combinedFloodVisited
159+
);
160+
combinedFloodMask = customFloodFillWithoutCv(
161+
dst,
162+
{ x: dst.cols - 1, y: dst.rows - 1 },
163+
combinedFloodMask,
164+
combinedFloodVisited
165+
);
166+
// showImage(combinedFloodMask, mode);
167+
// Remove mask area that exists in combinedFloodMask
168+
cv.bitwise_not(dst, dst);
169+
cv.bitwise_or(dst, combinedFloodMask, dst);
170+
cv.bitwise_not(dst, dst);
171+
172+
// cv.bitwise_not(dst, dst); // Invert the image to get black areas
173+
// cv.bitwise_not(combinedFloodMask, combinedFloodMask); // Invert the image to get black areas
174+
// cv.bitwise_and(dst, dst, dst, combinedFloodMask); // Apply the mask to the original image grep white area as mask
175+
// cv.bitwise_not(dst, dst); // Invert the image to get black areas again
176+
177+
// showImage(dst, mode);
178+
179+
// // Apply the mask to the original image grep white area as mask
180+
// let mask = new cv.Mat();
181+
// cv.bitwise_and(src, src, mask, dst);
182+
// showImage(mask, mode);
183+
184+
// make invert white area using floodfill
185+
// dst = mask.clone(); // Update dst to the masked image
186+
187+
cv.copyMakeBorder(
188+
dst,
189+
dst,
190+
1,
191+
1,
192+
1,
193+
1,
194+
cv.BORDER_CONSTANT,
195+
new cv.Scalar(0)
196+
);
197+
// Flood fill the mask to get the white area
198+
let floodFillMask = customFloodFillWithoutCv(dst, { x: 0, y: 0 });
199+
cv.bitwise_not(floodFillMask, floodFillMask);
200+
floodFillMask = floodFillMask.roi(
201+
new cv.Rect(1, 1, floodFillMask.cols - 2, floodFillMask.rows - 2)
202+
);
203+
let slicedResultMask = new cv.Mat();
204+
cv.bitwise_and(src, src, slicedResultMask, floodFillMask);
205+
206+
// showImage(slicedResultMask, mode);
207+
// // make white background and combine with slicedResultMask
208+
cv.bitwise_not(floodFillMask, floodFillMask);
209+
cv.cvtColor(floodFillMask, floodFillMask, cv.COLOR_GRAY2RGBA, 0);
210+
cv.bitwise_or(slicedResultMask, floodFillMask, floodFillMask);
211+
// showImage(floodFillMask, mode);
212+
213+
// Enhance color saturation
214+
let enhancedImage = new cv.Mat();
215+
cv.cvtColor(floodFillMask, enhancedImage, cv.COLOR_RGBA2RGB, 0);
216+
cv.convertScaleAbs(enhancedImage, enhancedImage, 2.1, 0); // Increase intensity
217+
cv.bitwise_not(enhancedImage, enhancedImage); // Ivert colors
218+
cv.convertScaleAbs(enhancedImage, enhancedImage, 1.5, 0); // Adjust intensity
219+
cv.bitwise_not(enhancedImage, enhancedImage); // Invert colors
220+
preprocessedSourceImage = enhancedImage;
221+
222+
// showImage(preprocessedSourceImage, mode);
223+
// Update src and dst with the sliced result
224+
src = floodFillMask;
225+
dst = preprocessedSourceImage.clone();
226+
cv.cvtColor(dst, dst, cv.COLOR_RGBA2GRAY, 0);
120227
} else if (mode.includes("white")) {
121228
var ksize = new cv.Size(15, 15);
122229
var element = cv.getStructuringElement(cv.MORPH_RECT, ksize);
@@ -129,7 +236,7 @@ function detectText(canvasIn, mode) {
129236
//get white area as mask
130237
cv.threshold(dst, dst, 230, 255, cv.THRESH_BINARY);
131238

132-
// showImage(dst, mode);
239+
// showImage(dst, mode);
133240
// Combine all masks into one
134241
let combinedFloodMask = new cv.Mat(
135242
dst.rows,
@@ -172,13 +279,13 @@ function detectText(canvasIn, mode) {
172279
cv.bitwise_not(dst, dst);
173280

174281
// Apply the mask to the original image grep white area as mask
175-
let mask = new cv.Mat();
176-
cv.bitwise_and(src, src, mask, dst);
282+
// let mask = new cv.Mat();
283+
// cv.bitwise_and(src, src, mask, dst);
177284

178285
// showImage(mask, mode);
179286

180287
// make invert white area using floodfill
181-
dst = mask.clone(); // Update dst to the masked image
288+
// dst = mask.clone(); // Update dst to the masked image
182289
cv.copyMakeBorder(
183290
dst,
184291
dst,
@@ -211,7 +318,6 @@ function detectText(canvasIn, mode) {
211318
cv.bitwise_or(slicedResultMask, slicedBorderMask, slicedBorderMask);
212319
// showImage(slicedBorderMask, mode);
213320

214-
215321
// Enhance color saturation
216322
let enhancedImage = new cv.Mat();
217323
cv.cvtColor(slicedBorderMask, enhancedImage, cv.COLOR_RGBA2RGB, 0);
@@ -220,6 +326,7 @@ function detectText(canvasIn, mode) {
220326
cv.convertScaleAbs(enhancedImage, enhancedImage, 1.5, 0); // Adjust intensity
221327
cv.bitwise_not(enhancedImage, enhancedImage); // Invert colors
222328
preprocessedSourceImage = enhancedImage;
329+
// showImage(preprocessedSourceImage, mode);
223330

224331
// Update src and dst with the sliced result
225332
src = slicedBorderMask;
@@ -309,11 +416,11 @@ function detectText(canvasIn, mode) {
309416
rectCoverRatio < 0.15 ||
310417
cnt.rows < 100 ||
311418
area < 150 ||
312-
!isRightAngle ||
313-
left == 0 ||
314-
top == 0 ||
315-
left + width == w ||
316-
top + height == h
419+
!isRightAngle
420+
// left == 0 ||
421+
// top == 0
422+
// left + width == w ||
423+
// top + height == h
317424
) {
318425
continue;
319426
}
@@ -434,6 +541,8 @@ function preprocessImage(canvasIn, isResize) {
434541
}
435542

436543
function customFloodFillWithoutCv(image, startPoint, mask, visited) {
544+
// console.time("customFloodFillWithoutCv");
545+
437546
let rows = image.rows;
438547
let cols = image.cols;
439548
var mask = mask || new cv.Mat(rows, cols, cv.CV_8U, new cv.Scalar(0));
@@ -463,6 +572,8 @@ function customFloodFillWithoutCv(image, startPoint, mask, visited) {
463572
{ x, y: y - 1 }
464573
);
465574
}
575+
576+
// console.timeEnd("customFloodFillWithoutCv");
466577
return mask;
467578
}
468579

0 commit comments

Comments
 (0)