diff --git a/camelot/handlers.py b/camelot/handlers.py index 14e29dd4..fc4e3ebb 100644 --- a/camelot/handlers.py +++ b/camelot/handlers.py @@ -64,9 +64,11 @@ def __init__( debug=False, ): self.debug = debug + self.is_temp_file = is_url(filepath) if is_url(filepath): - filepath = download_url(str(filepath)) - self.filepath: StrByteType | Path | str = filepath + self.filepath = download_url(str(filepath)) + else: + self.filepath: StrByteType | Path | str = filepath if isinstance(filepath, str) and not filepath.lower().endswith(".pdf"): raise NotImplementedError("File format not supported") @@ -77,6 +79,41 @@ def __init__( self.password = password self.pages = self._get_pages(pages) + def __enter__(self): + """Enter the context manager. + + Returns + ------- + PDFHandler + The instance itself. + """ + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """Exit the context manager and clean up temporary files. + + Deletes the temporary file if it was created from a URL. + + Parameters + ---------- + exc_type : type or None + Type of the exception raised in the context, if any. + exc_val : Exception or None + The exception instance raised, if any. + exc_tb : traceback or None + The traceback of the exception, if any. + """ + if self.is_temp_file and os.path.exists(self.filepath): # type: ignore + os.remove(self.filepath) # type: ignore + + def close(self): + """Close the handler and clean up temporary files. + + Deletes the temporary file if it was created from a URL. + """ + if self.is_temp_file and os.path.exists(self.filepath): # type: ignore + os.remove(self.filepath) # type: ignore + def _get_pages(self, pages): """Convert pages string to list of integers. diff --git a/camelot/io.py b/camelot/io.py index 43301132..266b9dd2 100644 --- a/camelot/io.py +++ b/camelot/io.py @@ -129,13 +129,13 @@ def read_pdf( warnings.simplefilter("ignore") validate_input(kwargs, flavor=flavor) - p = PDFHandler(filepath, pages=pages, password=password, debug=debug) - kwargs = remove_extra(kwargs, flavor=flavor) - tables = p.parse( - flavor=flavor, - suppress_stdout=suppress_stdout, - parallel=parallel, - layout_kwargs=layout_kwargs, - **kwargs, - ) + with PDFHandler(filepath, pages=pages, password=password, debug=debug) as p: + kwargs = remove_extra(kwargs, flavor=flavor) + tables = p.parse( + flavor=flavor, + suppress_stdout=suppress_stdout, + parallel=parallel, + layout_kwargs=layout_kwargs, + **kwargs, + ) return tables