Skip to content

Commit 00570e3

Browse files
authored
Merge branch 'master' into segment-improve-sep-line-detection
2 parents 4551656 + e429da4 commit 00570e3

File tree

9 files changed

+26
-19
lines changed

9 files changed

+26
-19
lines changed

README.md

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ Content:
1111
* [ocrd-cis-postcorrect](#ocrd-cis-postcorrect)
1212
* [ocrd-cis-align](#ocrd-cis-align)
1313
* [ocrd-cis-data](#ocrd-cis-data)
14-
* [Trainining](#trainining)
14+
* [Training](#training)
1515
* [ocrd-cis-ocropy-train](#ocrd-cis-ocropy-train)
1616
* [ocrd-cis-ocropy-clip](#ocrd-cis-ocropy-clip)
1717
* [ocrd-cis-ocropy-resegment](#ocrd-cis-ocropy-resegment)
@@ -74,11 +74,11 @@ It is possible (and recommended) to install `ocrd_cis` in a custom user director
7474
```
7575

7676
## Profiler
77-
The post correction is dependent on the language
78-
[profiler](https://github.com/cisocrgroup/Profiler) and its laguage
79-
configurations to generate corrections for suspicious words. In order
80-
to use the post correction a profiler with according language
81-
configruations have to be present on the system. You can refer to our
77+
The post-correction is dependent on the language
78+
[profiler](https://github.com/cisocrgroup/Profiler) and its language
79+
configurations to generate corrections for suspicious words. In order
80+
to use the post-correction, a profiler and according language
81+
configurations have to be present on the system. You can refer to our
8282
[manuals](https://github.com/cisocrgroup/Resources/tree/master/manuals)
8383
and our [lexical
8484
resources](https://github.com/cisocrgroup/Resources/tree/master/lexica)
@@ -162,7 +162,7 @@ jar library, the pre-trained post correction model, the path to the
162162
default 3-grams language model file or the default training
163163
configuration file. This tool does not follow the OCR-D conventions.
164164

165-
### Trainining
165+
### Training
166166
There is no dedicated training script provided. Models are trained
167167
using the java implementation directly (check out the [training test
168168
script](tests/run_training_test.bash) for an example). Training a
@@ -480,7 +480,7 @@ installed in order to run any tests).
480480
# Miscellaneous
481481
## OCR-D workspace
482482
483-
* Create a new (empty) workspace: `ocrd workspace init workspace-dir`
483+
* Create a new (empty) workspace: `ocrd workspace -d workspace-dir init`
484484
* cd into `workspace-dir`
485485
* Add new file to workspace: `ocrd workspace add file -G group -i id
486486
-m mimetype -g pageId`

ocrd_cis/aio/aio.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ def addtoworkspace(wsdir, gtdir):
161161

162162
_, _, files = os.walk(wsdir).__next__()
163163
if 'mets.xml' not in files:
164-
initcmd = 'ocrd workspace init {}'.format(wsdir)
164+
initcmd = 'ocrd workspace -d {} init'.format(wsdir)
165165
subprocess_cmd(initcmd)
166166

167167

ocrd_cis/ocropy/ocrolib/common.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@
1919
OcropusException)
2020
from numpy import (amax, amin, array, bitwise_and, clip, dtype, mean, minimum,
2121
nan, sin, sqrt, zeros, unique, fromstring)
22-
from pylab import (clf, cm, ginput, gray, imshow, ion, subplot,
23-
where, xticks, yticks, title, xlabel, ylabel)
2422
from scipy.ndimage import morphology, measurements
2523
import PIL
2624

@@ -802,6 +800,7 @@ def binarize_range(image,dtype='B',threshold=0.5):
802800

803801
def plotgrid(data,d=10,shape=(30,30)):
804802
"""Plot a list of images on a grid."""
803+
from matplotlib.pyplot import ion, gray, clf, subplot, imshow, ginput
805804
ion()
806805
gray()
807806
clf()
@@ -813,11 +812,13 @@ def plotgrid(data,d=10,shape=(30,30)):
813812
ginput(1,timeout=0.1)
814813

815814
def showrgb(r,g=None,b=None):
815+
from matplotlib.pyplot import imshow
816816
if g is None: g = r
817817
if b is None: b = r
818818
imshow(array([r,g,b]).transpose([1,2,0]))
819819

820820
def showgrid(l,cols=None,n=400,titles=None,xlabels=None,ylabels=None,**kw):
821+
from matplotlib.pyplot import cm, xticks, yticks, subplot, imshow, title, xlabel, ylabel
821822
if "cmap" not in kw: kw["cmap"] = cm.gray
822823
if "interpolation" not in kw: kw["interpolation"] = "nearest"
823824
n = minimum(n,len(l))

ocrd_cis/ocropy/ocrolib/ligatures.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
################################################################
66

77
import re
8-
from pylab import uint32
8+
from numpy import uint32
99

1010
### These aren't formal ligatures, they are character pairs
1111
### that are frequently touching in Latin script documents.

ocrd_cis/ocropy/ocrolib/lineest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import os
44

55
import numpy as np
6-
import matplotlib.pyplot as plt
76
from scipy.ndimage import interpolation,filters,measurements
87

98
def scale_to_h(img,target_height,order=1,dtype=np.dtype('f'),cval=0):
@@ -71,6 +70,7 @@ def measure(self,line):
7170
self.mad = np.mean(deltas[line!=0])
7271
self.r = int(1+self.range*self.mad)
7372
if self.debug:
73+
import matplotlib.pyplot as plt
7474
plt.figure("center")
7575
plt.imshow(line,cmap=plt.cm.gray)
7676
plt.plot(self.center)

ocrd_cis/ocropy/ocrolib/lstm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
import unicodedata, sys
3131

3232
import numpy as np
33-
import matplotlib.pyplot as plt
3433
from scipy.ndimage import measurements,filters
3534

3635
from . import common as ocrolib
@@ -806,6 +805,7 @@ def ctc_align_targets(outputs,targets,threshold=100.0,verbose=0,debug=0,lo=1e-5)
806805
lmatch = np.log(match)
807806

808807
if debug:
808+
import matplotlib.pyplot as plt
809809
plt.figure("ctcalign"); plt.clf();
810810
plt.subplot(411); plt.imshow(outputs.T,interpolation='nearest',cmap=plt.cm.hot)
811811
plt.subplot(412); plt.imshow(lmatch.T,interpolation='nearest',cmap=plt.cm.hot)

ocrd_cis/ocropy/ocrolib/morph.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66

77
from numpy import *
8-
import pylab
98
#from scipy.ndimage import morphology,measurements,filters
109
from scipy.ndimage import measurements
1110
from scipy.ndimage.interpolation import shift
@@ -154,7 +153,8 @@ def rg_closing(image,size,origin=0):
154153

155154
@checks(SEGMENTATION)
156155
def showlabels(x,n=7):
157-
pylab.imshow(where(x>0,x%n+1,0),cmap=pylab.cm.gist_stern)
156+
import matplotlib.pyplot as plt
157+
plt.imshow(where(x>0,x%n+1,0),cmap=plt.cm.gist_stern)
158158

159159
@checks(ABINARY2)
160160
def find_contours(image):

ocrd_cis/ocropy/ocrolib/psegutils.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
from __future__ import print_function
22

33
import numpy as np
4-
import matplotlib.pyplot as plt
5-
import matplotlib.patches as mpatches
64
from scipy.ndimage import filters,interpolation
75

86
from .toplevel import *
@@ -131,6 +129,7 @@ def separates(w,u,v):
131129
if w[1].start<u[1].stop and w[1].stop>v[1].start: return 1
132130
return 0
133131
if highlight is not None:
132+
import matplotlib.pyplot as plt
134133
plt.clf()
135134
plt.title("highlight")
136135
plt.imshow(lines)
@@ -178,6 +177,8 @@ def find(condition):
178177
def show_lines(image,lines,lsort):
179178
"""Overlays the computed lines on top of the image, for debugging
180179
purposes."""
180+
import matplotlib.pyplot as plt
181+
import matplotlib.patches as mpatches
181182
ys,xs = [],[]
182183
plt.clf()
183184
plt.cla()
@@ -197,12 +198,14 @@ def show_lines(image,lines,lsort):
197198

198199
@obsolete
199200
def read_gray(fname):
201+
import matplotlib.pyplot as plt
200202
image = plt.imread(fname)
201203
if image.ndim==3: image = np.mean(image,2)
202204
return image
203205

204206
@obsolete
205207
def read_binary(fname):
208+
import matplotlib.pyplot as plt
206209
image = plt.imread(fname)
207210
if image.ndim==3: image = np.mean(image,2)
208211
image -= np.amin(image)
@@ -214,6 +217,7 @@ def read_binary(fname):
214217
@obsolete
215218
def rgbshow(r,g,b=None,gn=1,cn=0,ab=0,**kw):
216219
"""Small function to display 2 or 3 images as RGB channels."""
220+
import matplotlib.pyplot as plt
217221
if b is None: b = np.zeros(r.shape)
218222
combo = np.transpose(np.array([r,g,b]),axes=[1,2,0])
219223
if cn:

setup.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,12 @@
4646
'shapely>=1.7.1',
4747
'scikit-image',
4848
'opencv-python-headless',
49-
'matplotlib>3.0.0',
5049
'python-Levenshtein',
5150
'calamari_ocr == 0.3.5'
5251
],
52+
extras_require={
53+
'debug': ['matplotlib>3.0.0'],
54+
},
5355
package_data={
5456
'': ['*.json', '*.yml', '*.yaml', '*.csv.gz', '*.jar', '*.zip'],
5557
},

0 commit comments

Comments
 (0)