Skip to content

Commit 706c82c

Browse files
committed
Merge branch 'dev'
2 parents 032475c + 9748b39 commit 706c82c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

88 files changed

+13033
-4
lines changed

.gitignore

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
__pycache__
2+
*.py[cod]
3+
*.py.*
4+
*~
5+
.mypy_cache
6+
*.egg-info*
7+
env/*
8+
env-dir/*
9+
.envrc
10+
/venv*
11+
/build
12+
/dist

.pylintrc

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
[MASTER]
2+
extension-pkg-whitelist = lxml, numpy, cv2
3+
ignored-modules = Levenshtein
4+
5+
[TYPECHECK]
6+
generated-members = cv2.*, matplotlib.cm.*, np.*, ocrd_cis.ocropy.ocrolib.lstm.LSTM.*
7+
# generated-members still buggy with pylint 2.4.4, so:
8+
ignored-classes = cv2, matplotlib.cm, np.*, ocrd_cis.ocropy.ocrolib.lstm.LSTM
9+
10+
[MESSAGES CONTROL]
11+
# as we have legacy code in ocrolib which
12+
# does not adhere to most if this, and
13+
# pylint does not offer per-subdirectory
14+
# configuration yet (#618), we must be
15+
# overly tolerant here:
16+
disable =
17+
ungrouped-imports,
18+
multiple-imports,
19+
bad-continuation,
20+
missing-docstring,
21+
no-self-use,
22+
superfluous-parens,
23+
invalid-name,
24+
line-too-long,
25+
too-many-arguments,
26+
too-many-branches,
27+
too-many-statements,
28+
too-many-locals,
29+
too-few-public-methods,
30+
too-many-instance-attributes,
31+
wrong-import-order,
32+
duplicate-code,
33+
bad-whitespace,
34+
unused-variable,
35+
import-outside-toplevel,
36+
wildcard-import,
37+
unused-wildcard-import,
38+
too-many-return-statements,
39+
unidiomatic-typecheck,
40+
multiple-statements,
41+
no-else-return,
42+
unnecessary-semicolon,
43+
consider-using-dict-comprehension,
44+
redefined-builtin
45+
46+
# allow indented whitespace (as required by interpreter):
47+
no-space-check=empty-line
48+
49+
# allow non-snake-case identifiers:
50+
good-names=n,i

.travis.yml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
language: python
2+
python:
3+
- "3.5"
4+
- "3.6"
5+
- "3.7-dev"
6+
install:
7+
- make install
8+
# command to run tests
9+
script:
10+
- make test

Dockerfile

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
FROM ocrd/core:latest AS base
2+
ENV VERSION="Di 12. Mai 13:26:35 CEST 2020"
3+
ENV GITURL="https://github.com/cisocrgroup"
4+
ENV DOWNLOAD_URL="http://cis.lmu.de/~finkf"
5+
6+
# deps
7+
RUN apt-get update \
8+
&& apt-get -y install --no-install-recommends locales
9+
10+
# locales
11+
RUN sed -i -e 's/# en_US.UTF-8 UTF-8/en_US.UTF-8 UTF-8/' /etc/locale.gen \
12+
&& dpkg-reconfigure --frontend=noninteractive locales \
13+
&& update-locale LANG=en_US.UTF-8
14+
15+
# install the profiler
16+
FROM base AS profiler
17+
RUN apt-get update \
18+
&& apt-get -y install --no-install-recommends cmake g++ libcppunit-dev libxerces-c-dev \
19+
&& git clone ${GITURL}/Profiler --branch devel --single-branch /build \
20+
&& cd /build \
21+
&& cmake -DCMAKE_BUILD_TYPE=release . \
22+
&& make compileFBDic trainFrequencyList runDictSearch profiler \
23+
&& mkdir /apps \
24+
&& cp bin/compileFBDic bin/trainFrequencyList bin/profiler bin/runDictSearch /apps/ \
25+
&& cd / \
26+
&& rm -rf /build
27+
28+
FROM profiler AS languagemodel
29+
# install the profiler's language backend
30+
COPY --from=profiler /apps/compileFBDic /apps/
31+
COPY --from=profiler /apps/trainFrequencyList /apps/
32+
COPY --from=profiler /apps/runDictSearch /apps/
33+
RUN apt-get update \
34+
&& apt-get -y install --no-install-recommends icu-devtools \
35+
&& git clone ${GITURL}/Resources --branch master --single-branch /build \
36+
&& cd /build/lexica \
37+
&& PATH=$PATH:/apps make \
38+
&& PATH=$PATH:/apps make test \
39+
&& PATH=$PATH:/apps make install \
40+
&& cd / \
41+
&& rm -rf /build
42+
43+
FROM base AS postcorrection
44+
# install ocrd_cis (python)
45+
VOLUME ["/data"]
46+
COPY --from=languagemodel /etc/profiler/languages /etc/profiler/languages
47+
COPY --from=profiler /apps/profiler /apps/
48+
COPY --from=profiler /usr/lib/x86_64-linux-gnu/libicuuc.so /usr/lib//x86_64-linux-gnu/
49+
COPY --from=profiler /usr/lib/x86_64-linux-gnu/libicudata.so /usr/lib//x86_64-linux-gnu/
50+
COPY --from=profiler /usr/lib//x86_64-linux-gnu/libxerces-c-3.2.so /usr/lib//x86_64-linux-gnu/
51+
COPY . /build
52+
RUN apt-get update \
53+
&& apt-get -y install --no-install-recommends gcc wget default-jre-headless \
54+
&& cd /build \
55+
&& make install \
56+
&& make test \
57+
&& cd / \
58+
&& rm -rf /build

Makefile

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
PY ?= python3
2+
PIP ?= pip3
3+
V ?= > /dev/null 2>&1
4+
PKG = ocrd_cis
5+
6+
install:
7+
${PIP} install --upgrade pip .
8+
install-devel:
9+
${PIP} install --upgrade pip -e .
10+
uninstall:
11+
${PIP} uninstall ${PKG}
12+
13+
docker-build: Dockerfile
14+
docker build -t flobar/ocrd_cis:latest .
15+
docker-push: docker-build
16+
docker push flobar/ocrd_cis:latest
17+
18+
TEST_SCRIPTS=$(sort $(wildcard tests/run_*.bash))
19+
.PHONY: $(TEST_SCRIPTS)
20+
$(TEST_SCRIPTS):
21+
bash $@ $V
22+
test: $(TEST_SCRIPTS)
23+
echo $^
24+
.PHONY: install test

Manifest.in

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
include ocrd_cis/data/ocrd-cis.jar
2+
include ocrd_cis/data/3gs.csv.gz

0 commit comments

Comments
 (0)