1
- from piper .base .executors import FastAPIExecutor , FastAPITesseractExecutor , VirtualEnvExecutor
2
- from fastapi .responses import JSONResponse
3
-
4
- from pydantic import BaseModel
5
- from loguru import logger
6
1
import json
7
- import spacy
8
2
import sys
3
+
4
+ import spacy
5
+ from fastapi .responses import JSONResponse
6
+ from loguru import logger
7
+ from pydantic import BaseModel
8
+
9
+ from piper .base .executors import FastAPIExecutor , FastAPITesseractExecutor
9
10
from piper .configurations import get_configuration
10
11
from piper .utils import tesrct_utils as tu
11
12
12
-
13
13
logger .add ("file.log" , level = "INFO" , backtrace = True , diagnose = True , rotation = '5 MB' )
14
14
15
15
16
16
class StringValue (BaseModel ):
17
17
value : str
18
18
19
+
19
20
class BytesObject (BaseModel ):
20
21
value : bytes
21
22
23
+
22
24
class ListOfStringsObject (BaseModel ):
23
25
value : list
24
26
27
+
25
28
class TestMessageAdder (FastAPIExecutor ):
26
29
27
30
def __init__ (self , appender = "TEST" , ** kwargs ):
@@ -30,18 +33,19 @@ def __init__(self, appender="TEST", **kwargs):
30
33
31
34
async def run (self , message : StringValue ) -> StringValue :
32
35
return StringValue (value = (message .value + self .appender ))
33
-
36
+
34
37
35
38
class TesseractRecognizer (FastAPITesseractExecutor ):
36
39
'''
37
40
Tesseract OCR implementation service
38
41
'''
42
+
39
43
def __init__ (self , ** kwargs ):
40
44
super ().__init__ (** kwargs )
41
45
cfg = get_configuration ()
42
46
self .ts_config = cfg .ts_config
43
47
44
- def set_config_ (self , config_ ):
48
+ def set_config_ (self , config_ ):
45
49
if 'ts_lang' not in config_ .keys ():
46
50
logger .error (f'tesseract config keys must contains ts_lang, keys { config_ .keys ()} ' )
47
51
logger .error (f'tesseract config did not set' )
@@ -59,23 +63,24 @@ async def sconfig(self, conf) -> ListOfStringsObject:
59
63
# conf = '12'
60
64
logger .info (f'request to set config to { conf } ' )
61
65
self .set_config_ (conf )
62
- return JSONResponse (content = {'text' :'OK' })
63
-
64
- async def recognize (self , file_content : BytesObject , suf : str ) -> ListOfStringsObject :
66
+ return JSONResponse (content = {'text' : 'OK' })
67
+
68
+ async def recognize (self , file_content : BytesObject , suf : str ) -> ListOfStringsObject :
65
69
logger .info (f'file_content { type (file_content )} , file suffix is { suf } ' )
66
70
67
71
logger .info (f'current tesseract config is { self .ts_config } ' )
68
72
text_dict = tu .bytes_handler (file_content , suf , self .ts_config )
69
73
logger .info (f'img_bytes_handler return { type (text_dict )} object' )
70
74
return JSONResponse (content = text_dict )
71
75
72
- async def ner (self , txt : str ):
76
+ async def ner (self , txt : str ):
73
77
sn = SpacyNER ()
74
78
if sn .available_models and len (sn .available_models ) > 0 :
75
79
dummy_model = sn .available_models [0 ]
76
80
sn .set_model (dummy_model )
77
81
return JSONResponse (content = sn .extract_named_ents (txt ))
78
82
83
+
79
84
# class ModelNameNotInList(BaseException):
80
85
# def __init__(self, msg):
81
86
# # pass
@@ -86,6 +91,7 @@ class SpacyNER():
86
91
'''
87
92
Spacy NER service
88
93
'''
94
+
89
95
def __init__ (self ):
90
96
cfg = get_configuration ()
91
97
self .available_models = set ()
@@ -102,34 +108,32 @@ def __init__(self):
102
108
logger .error (f'catch exception { e } ' )
103
109
sys .exit ()
104
110
105
-
106
111
def set_model (self , cur_model ):
107
112
if cur_model not in self .available_models :
108
113
logger .error (f'there is not { cur_model } in available_models set: { self .available_models } ' )
109
114
self .nlp = None
110
115
raise ValueError (f'there is not { cur_model } in available_models set: { self .available_models } ' )
111
116
112
- try :
117
+ try :
113
118
nlp = spacy .load (cur_model )
114
119
# nlp = spacy.load('en_default')
115
120
logger .info ('spacy nlp object created with model {cur_model}' )
116
121
except Exception as e :
117
122
logger .error (f'catch exception { e } ' )
118
- if isinstance (e , OSError ):
123
+ if isinstance (e , OSError ):
119
124
logger .error (f'you must download spacy model { cur_model } ' )
120
125
nlp = None
121
126
logger .info ('spacy nlp object DID NOT create' )
122
-
123
- self .nlp = nlp
124
127
128
+ self .nlp = nlp
125
129
126
130
def extract_named_ents (self , txt : str ):
127
131
logger .debug (f'got data type { type (txt )} and data <<{ txt } >> for NER' )
128
132
if self .nlp :
129
133
res = []
130
134
doc = self .nlp (txt )
131
135
for ent in doc .ents :
132
- res .append ((ent .text , ent .label_ ))
136
+ res .append ((ent .text , ent .label_ ))
133
137
return JSONResponse (content = res )
134
138
else :
135
139
logger .error (f'nlp object didn`t create. you should use set_model(model_name)' )
0 commit comments