Skip to content

Commit 4162223

Browse files
committed
tesseract executor
1 parent d020554 commit 4162223

File tree

6 files changed

+132
-0
lines changed

6 files changed

+132
-0
lines changed
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import time
2+
3+
from fastapi import FastAPI, Request, status, File, UploadFile
4+
from piper.envs import CurrentEnv
5+
from loguru import logger
6+
#logger = logging.getLogger(__name__)
7+
8+
#logger.add("file.log", level="INFO", backtrace=True, diagnose=True, rotation='5 MB')
9+
10+
{% for script_name in scripts.keys() %}
11+
from {{ script_name }} import *
12+
{% endfor %}
13+
14+
app = FastAPI(debug=True)
15+
app.logger = logger
16+
logger.info(f'main here {time.time()}')
17+
logger.info(f'Tesseract executor')
18+
19+
@app.post('/health_check', status_code = status.HTTP_200_OK)
20+
async def hl():
21+
logger.info('health_check request')
22+
return {"message": "health check"}
23+
24+
with CurrentEnv():
25+
logger.info(f'CurrentEnv')
26+
service = {{ service_class }}( {% for k, v in service_kwargs.items() %} {{ k }}={{ v }}, {% endfor %} )
27+
logger.info(f'service {service}')
28+
29+
@app.post('/{{ function_name }}')
30+
async def {{ function_name }}(file: UploadFile = File(...)):
31+
logger.info(f'recived file {file.filename}')
32+
content = await file.read()
33+
result = await service.{{ function_name }}(content)
34+
return result
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
FROM python:{{ python_docker_version }}
2+
3+
RUN apt update && apt install -y software-properties-common
4+
RUN apt install -y tree cmake libgl1-mesa-glx poppler-utils tesseract-ocr \
5+
libtesseract-dev libleptonica-dev tesseract-ocr-rus mc
6+
RUN pip3 install --upgrade pip
7+
8+
WORKDIR /app
9+
10+
COPY requirements.txt ./requirements.txt
11+
RUN pip3 install -r requirements.txt
12+
13+
COPY ./ ./
14+
RUN chmod +x ./run.sh
15+
16+
ENTRYPOINT ["{{ cmd }}"]

piper/services/ocr_data.jpg

82.3 KB
Loading

piper/utils/tesrct_utils.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import cv2
2+
import pytesseract
3+
import numpy as np
4+
from loguru import logger
5+
6+
7+
def img_bytes_handler(img_bytes):
8+
img = cv2.imdecode(np.asarray(bytearray(img_bytes), dtype=np.uint8), cv2.IMREAD_COLOR)
9+
if img is not None:
10+
logger.info(f'processing img with shape {img.shape}')
11+
txt_dict = pytesseract.image_to_data(
12+
img,
13+
lang='rus',
14+
config=r'--oem 1 --psm 11',
15+
output_type=pytesseract.Output.DICT
16+
)
17+
18+
logger.info(f'get text from image {txt_dict}')
19+
return txt_dict
20+
21+
else:
22+
logger.error('recive empty image or convertion failed')

tests/ocr_data.jpg

82.3 KB
Loading

tests/tsrct_test.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import docker
2+
import os
3+
import sys
4+
import asyncio
5+
import requests
6+
root_dir = os.path.join(os.path.realpath(os.path.pardir), 'piper')
7+
sys.path.insert(1, root_dir)
8+
9+
from piper.utils import docker_utils as du
10+
from piper.envs import DockerEnv
11+
from piper.envs import is_docker_env
12+
from piper.configurations import get_configuration
13+
from piper.services import TesseractRecognizer, StringValue
14+
from pathlib import Path
15+
import os
16+
17+
18+
main_app_url = f'http://localhost:8788'
19+
20+
# pytest -vs tests/tsrct_test.py::TestTesseract::test_recognizer
21+
class TestTesseract():
22+
23+
def test_tesseract_install(self):
24+
#cfg = get_configuration()
25+
# loop = asyncio.get_event_loop()
26+
#tess = TesseractRecognizer(port=cfg.docker_app_port)
27+
# result = loop.run_until_complete()
28+
29+
assert True
30+
31+
def test_recognizer(self):
32+
file_path = Path(__file__).parent
33+
file_path = file_path.joinpath('ocr_data.jpg')
34+
print(file_path)
35+
36+
url = f'{main_app_url}/recognize'
37+
print(url)
38+
39+
multipart_form_data = {
40+
'file': open(file_path, 'rb')
41+
}
42+
43+
print(multipart_form_data)
44+
print((multipart_form_data.get('file')))
45+
46+
# headers = {'Content-Type': 'multipart/form-data'}
47+
48+
result = requests.post(url, files=multipart_form_data, verify=False)
49+
50+
data = result.json()
51+
52+
assert len(data) != 0
53+
54+
assert result.status_code == 200
55+
56+
def test_health_check(self):
57+
url = f'{main_app_url}/health_check'
58+
print(url)
59+
result = requests.post(url)
60+
assert result.status_code == 200

0 commit comments

Comments
 (0)