File tree Expand file tree Collapse file tree 8 files changed +82
-5
lines changed
test_unstructured_inference/inference Expand file tree Collapse file tree 8 files changed +82
-5
lines changed Original file line number Diff line number Diff line change @@ -141,3 +141,5 @@ dmypy.json
141141
142142# VSCode
143143.vscode /
144+
145+ sample-docs /* _images
Original file line number Diff line number Diff line change 1+ ## 0.5.7
2+
3+ * hotfix to handle issue storing images in a new dir when the pdf has no file extension
4+
15## 0.5.6
26
37* Update the ` annotate ` and ` _get_image_array ` methods of ` PageLayout ` to get the image from the ` image_path ` property if the ` image ` property is ` None ` .
Original file line number Diff line number Diff line change 1+ # syntax=docker/dockerfile:experimental
2+ FROM quay.io/unstructured-io/base-images:rocky8.7-3 as base
3+
4+ ARG PIP_VERSION
5+
6+ # Set up environment
7+ ENV HOME /home/
8+ WORKDIR ${HOME}
9+ RUN mkdir ${HOME}/.ssh && chmod go-rwx ${HOME}/.ssh \
10+ && ssh-keyscan -t rsa github.com >> /home/.ssh/known_hosts
11+ ENV PYTHONPATH="${PYTHONPATH}:${HOME}"
12+ ENV PATH="/home/usr/.local/bin:${PATH}"
13+
14+ FROM base as deps
15+ # Copy and install Unstructured
16+ COPY requirements requirements
17+
18+ RUN python3.8 -m pip install pip==${PIP_VERSION} && \
19+ dnf -y groupinstall "Development Tools" && \
20+ pip install --no-cache -r requirements/base.txt && \
21+ pip install --no-cache -r requirements/test.txt && \
22+ pip install --no-cache -r requirements/dev.txt && \
23+ pip install "unstructured.PaddleOCR" && \
24+ dnf -y groupremove "Development Tools" && \
25+ dnf clean all
26+
27+ FROM deps as code
28+ ARG PACKAGE_NAME=unstructured_inference
29+ COPY unstructured_inference unstructured_inference
30+
31+ # CMD ["pytest -m \"not slow\" test_${PACKAGE_NAME} --cov=${PACKAGE_NAME} --cov-report term-missing"]
32+ CMD ["/bin/bash" ]
33+ # CMD ["bash -c pytest test_unstructured_inference"]
Original file line number Diff line number Diff line change 11PACKAGE_NAME := unstructured_inference
22PIP_VERSION := 23.1.2
3+ CURRENT_DIR := $(shell pwd)
34
45
56.PHONY : help
@@ -116,3 +117,23 @@ version-sync:
116117.PHONY : check-coverage
117118check-coverage :
118119 coverage report --fail-under=95
120+
121+ # #########
122+ # Docker #
123+ # #########
124+
125+ # Docker targets are provided for convenience only and are not required in a standard development environment
126+
127+ DOCKER_IMAGE ?= unstructured-inference:dev
128+
129+ .PHONY : docker-build
130+ docker-build :
131+ PIP_VERSION=${PIP_VERSION} DOCKER_IMAGE_NAME=${DOCKER_IMAGE} ./scripts/docker-build.sh
132+
133+ .PHONY : docker-test
134+ docker-test : docker-build
135+ docker run --rm \
136+ -v ${CURRENT_DIR} /test_unstructured_inference:/home/test_unstructured_inference \
137+ -v ${CURRENT_DIR} /sample-docs:/home/sample-docs \
138+ $(DOCKER_IMAGE ) \
139+ bash -c " pytest $( if $( TEST_NAME) ,-k $( TEST_NAME) ,) test_unstructured_inference"
Original file line number Diff line number Diff line change 11#! /usr/bin/env bash
22
33set -euo pipefail
4+ PIP_VERSION=" ${PIP_VERSION:- 23.1.2} "
5+ DOCKER_IMAGE=" unstructured-inference:dev"
46
5- DOCKER_BUILDKIT=1 docker buildx build --load --platform=linux/amd64 -f Dockerfile \
7+ DOCKER_BUILD_CMD=( docker buildx build --load -f Dockerfile \
68 --build-arg PIP_VERSION=" $PIP_VERSION " \
9+ --build-arg BUILDKIT_INLINE_CACHE=1 \
710 --progress plain \
8- -t unstructured-inference-dev:latest .
11+ -t " $DOCKER_IMAGE " .)
12+
13+ DOCKER_BUILDKIT=1 " ${DOCKER_BUILD_CMD[@]} "
Original file line number Diff line number Diff line change @@ -836,7 +836,17 @@ def test_create_image_output_dir():
836836 with tempfile .TemporaryDirectory () as tmpdir :
837837 tmp_f_path = os .path .join (tmpdir , "loremipsum.pdf" )
838838 output_dir = create_image_output_dir (tmp_f_path )
839- expected_output_dir = os .path .join (os .path .abspath (tmpdir ), "loremipsum" )
839+ expected_output_dir = os .path .join (os .path .abspath (tmpdir ), "loremipsum_images" )
840+ assert os .path .isdir (output_dir )
841+ assert os .path .isabs (output_dir )
842+ assert output_dir == expected_output_dir
843+
844+
845+ def test_create_image_output_dir_no_ext ():
846+ with tempfile .TemporaryDirectory () as tmpdir :
847+ tmp_f_path = os .path .join (tmpdir , "loremipsum_no_ext" )
848+ output_dir = create_image_output_dir (tmp_f_path )
849+ expected_output_dir = os .path .join (os .path .abspath (tmpdir ), "loremipsum_no_ext_images" )
840850 assert os .path .isdir (output_dir )
841851 assert os .path .isabs (output_dir )
842852 assert output_dir == expected_output_dir
Original file line number Diff line number Diff line change 1- __version__ = "0.5.6 " # pragma: no cover
1+ __version__ = "0.5.7 " # pragma: no cover
Original file line number Diff line number Diff line change @@ -470,6 +470,8 @@ def create_image_output_dir(
470470 directory path"""
471471 parent_dir = os .path .abspath (os .path .dirname (filename ))
472472 f_name_without_extension = os .path .splitext (os .path .basename (filename ))[0 ]
473- output_dir = os .path .join (parent_dir , f_name_without_extension )
473+
474+ # Add a suffix to avoid conflicts in case original file doesn't have an extension
475+ output_dir = os .path .join (parent_dir , f"{ f_name_without_extension } _images" )
474476 os .makedirs (output_dir , exist_ok = True )
475477 return output_dir
You can’t perform that action at this time.
0 commit comments