Skip to content
This repository was archived by the owner on Apr 11, 2025. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
5200cde
Simplify and fix `Table.set_span`
tomprogrammer Aug 16, 2024
898ec39
Change `cell.hspan` and `cell.vspan` to properties
tomprogrammer Aug 17, 2024
6118c3c
Remove redundant conditions on `Cell.vspan/hspan`
tomprogrammer Aug 19, 2024
76e2774
[REF] Fix B028
bosd Oct 20, 2024
81b5461
[REF] lattice -reduce_index
bosd Oct 20, 2024
153a26b
[REF]: copy_spanning_text
bosd Oct 20, 2024
671d3d8
[REF]: get_table_index
bosd Oct 20, 2024
18bc8e5
[REF]: Find_closest_tls
bosd Oct 20, 2024
bb1e2ea
[REF]: get_table_areas
bosd Oct 20, 2024
ad13f55
[REF] search_table_body
bosd Oct 19, 2024
85e42d3
Pre-commit fix
bosd Oct 20, 2024
9a60fdc
Bump cryptography from 43.0.1 to 43.0.3
dependabot[bot] Oct 21, 2024
fc992af
Bump virtualenv from 20.26.6 to 20.27.0 in /.github/workflows
dependabot[bot] Oct 21, 2024
03671ba
Bump mypy from 1.12.0 to 1.12.1
dependabot[bot] Oct 21, 2024
00c0f40
Bump virtualenv from 20.26.6 to 20.27.0
dependabot[bot] Oct 21, 2024
75bb9e1
Flake8 fixes base parser
bosd Oct 20, 2024
a63c799
Bump mypy from 1.12.1 to 1.13.0
dependabot[bot] Oct 23, 2024
c007766
Bump pydata-sphinx-theme from 0.15.4 to 0.16.0 in /docs
dependabot[bot] Oct 23, 2024
bc7940c
Bump rich from 13.9.2 to 13.9.3
dependabot[bot] Oct 23, 2024
d346e4a
Bump safety-schemas from 0.0.5 to 0.0.7
dependabot[bot] Oct 24, 2024
e1511f0
[REF]: find lines
bosd Oct 20, 2024
fe07e91
[REF]: Remove old opencv api compatability
bosd Oct 21, 2024
1587fe1
[REF] Search header, closest_above
bosd Oct 23, 2024
4c932e0
[REF]: Network parser search header -> merge_zones and extract_zones
bosd Oct 24, 2024
ab8000e
[REF]: network parser generate_table_bbox -> split into mark_processe…
bosd Oct 22, 2024
d12b6f2
Updated pypdf dependency to include v5
snanda85 Oct 25, 2024
3e41ce6
Fixes custom backend functionality
snanda85 Oct 25, 2024
f061043
Fixed typeguard typecheck
snanda85 Oct 25, 2024
9c4c149
pre-commit Fixups
bosd Oct 26, 2024
afe7dcd
add docstring to get_backend
bosd Oct 26, 2024
2874cf6
Bump safety-schemas from 0.0.7 to 0.0.8
dependabot[bot] Oct 25, 2024
bf0baa7
Bump safety from 3.2.8 to 3.2.9
dependabot[bot] Oct 25, 2024
13b33c9
Network parser Fix B903
bosd Oct 24, 2024
27bb8af
Bump pydantic from 2.5.3 to 2.9.2
dependabot[bot] Oct 26, 2024
a68f021
[REF]: Table set edges
bosd Oct 24, 2024
fc3cf5e
[FIX] update edges index out of range
bosd Oct 26, 2024
d128cb8
Remove redundant conditions on `Cell.vspan/hspan`
tomprogrammer Aug 19, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/constraints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ pip==24.2
nox==2024.10.9
nox-poetry==1.0.3
poetry>=1.2.0
virtualenv==20.26.6
virtualenv==20.27.0
71 changes: 63 additions & 8 deletions camelot/backends/image_conversion.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Classes and functions for the ImageConversionBackend backends."""

from typing import Any
from typing import Dict
from typing import List
from typing import Type
Expand All @@ -22,7 +23,7 @@ class ImageConversionError(ValueError): # noqa D101
class ImageConversionBackend:
"""Classes the ImageConversionBackend backend."""

def __init__(self, backend: str = "poppler", use_fallback: bool = True) -> None:
def __init__(self, backend: Any = "poppler", use_fallback: bool = True) -> None:
"""Initialize the conversion backend .

Parameters
Expand All @@ -37,15 +38,70 @@ def __init__(self, backend: str = "poppler", use_fallback: bool = True) -> None:
ValueError
Raise an error if the backend is not supported.
"""
if backend not in BACKENDS.keys():
raise ValueError(f"Image conversion backend {backend!r} not supported")

self.backend: str = backend
self.backend: ConversionBackend = self.get_backend(backend)
self.use_fallback: bool = use_fallback
self.fallbacks: List[str] = list(
filter(lambda x: x != backend, BACKENDS.keys())
filter(lambda x: isinstance(backend, str) and x != backend, BACKENDS.keys())
)

def get_backend(self, backend):
"""Retrieve the specified backend for processing.

This method checks if the provided backend is a string representing
a known backend or an object implementing a 'convert' method. It
returns an instance of the backend if valid.

Parameters
----------
backend : str or object
The backend to retrieve. This can be:
- A string ('poppler' or 'ghostscript') corresponding to a pre-defined backend.
- An object that must implement a 'convert' method.

Returns
-------
object
An instance of the specified backend.

Raises
------
NotImplementedError
If the backend is a string that is not recognized or if it is an
object that does not implement the 'convert' method.

Examples
--------
>> backend_instance = get_backend('poppler')
>> backend_instance = get_backend(my_custom_backend)

Notes
-----
The valid backends are defined in the BACKENDS dictionary. The
method verifies the backend type and raises exceptions for
unsupported backends.
"""

def implements_convert():
methods = [
method for method in dir(backend) if method.startswith("__") is False
]
return "convert" in methods

if isinstance(backend, str):
if backend not in BACKENDS.keys():
raise NotImplementedError(
f"Unknown backend {backend!r} specified. Please use either 'poppler' or 'ghostscript'."
)

return BACKENDS[backend]()
else:
if not implements_convert():
raise NotImplementedError(
f"{backend!r} must implement a 'convert' method"
)

return backend

def convert(self, pdf_path: str, png_path: str) -> None:
"""Convert PDF to png_path.

Expand All @@ -64,8 +120,7 @@ def convert(self, pdf_path: str, png_path: str) -> None:
[description]
"""
try:
converter = BACKENDS[self.backend]()
converter.convert(pdf_path, png_path)
self.backend.convert(pdf_path, png_path)
except Exception as f:
if self.use_fallback:
for fallback in self.fallbacks:
Expand Down
Loading