diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 19bdedd..c2b7faf 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,7 +8,7 @@ repos: - id: check-toml - id: debug-statements - repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks - rev: v2.12.0 + rev: v2.16.0 hooks: - id: pretty-format-yaml args: [--preserve-quotes, --autofix, --indent, '2'] @@ -55,7 +55,7 @@ repos: hooks: - id: setup-cfg-fmt - repo: https://github.com/HunterMcGushion/docstr_coverage - rev: v2.3.0 + rev: v2.3.2 hooks: - id: docstr-coverage args: ["bibtexparser"] diff --git a/bibtexparser/middlewares/middleware.py b/bibtexparser/middlewares/middleware.py index c56f706..6d390fd 100644 --- a/bibtexparser/middlewares/middleware.py +++ b/bibtexparser/middlewares/middleware.py @@ -9,6 +9,7 @@ from bibtexparser.model import Entry from bibtexparser.model import ExplicitComment from bibtexparser.model import ImplicitComment +from bibtexparser.model import ParsingFailedBlock from bibtexparser.model import Preamble from bibtexparser.model import String @@ -129,6 +130,8 @@ def transform_block( return self.transform_explicit_comment(block, library) elif isinstance(block, ImplicitComment): return self.transform_implicit_comment(block, library) + elif isinstance(block, ParsingFailedBlock): + return self.transform_failed_block(block, library) logger.warning(f"Unknown block type {type(block)}") return block @@ -188,6 +191,17 @@ def transform_implicit_comment( """ return implicit_comment + def transform_failed_block( + self, failed_block: ParsingFailedBlock, library: "Library" + ) -> Union[Block, Collection[Block], None]: + """Transform a block whose parsing failed (e.g. a ``DuplicateFieldKeyBlock``). + Called by `transform_block` if the block is a ``ParsingFailedBlock``. + + By default, failed blocks are kept in the library unchanged, + i.e., the middleware is not applied to them. + """ + return failed_block + class LibraryMiddleware(Middleware, abc.ABC): """Changes an overall library at once (not just on a per-block basis). diff --git a/bibtexparser/model.py b/bibtexparser/model.py index 4b13935..f11a1ca 100644 --- a/bibtexparser/model.py +++ b/bibtexparser/model.py @@ -456,14 +456,18 @@ def previous_block(self) -> Block: class DuplicateFieldKeyBlock(ParsingFailedBlock): - """An error-indicating block indicating a duplicate field key in an entry.""" + """An error-indicating block indicating a duplicate field key in an entry. + + The entry containing the duplicate field keys is available as + `block.ignore_error_block`, the duplicate keys as `block.duplicate_keys`.""" def __init__(self, duplicate_keys: Set[str], entry: Entry): sorted_duplicate_keys = sorted(list(duplicate_keys)) super().__init__( error=Exception( - f"Duplicate field keys on entry: '{', '.join(sorted_duplicate_keys)}'." - f"Note: The entry (containing duplicate) is available as `failed_block.entry`" + f"Duplicate field keys on entry: '{', '.join(sorted_duplicate_keys)}'. " + f"Note: The entry (containing duplicates) is available as " + f"`failed_block.ignore_error_block`" ), start_line=entry.start_line, raw=entry.raw, diff --git a/docs/source/bibtexparser.rst b/docs/source/bibtexparser.rst index 4b8db4b..a3481de 100644 --- a/docs/source/bibtexparser.rst +++ b/docs/source/bibtexparser.rst @@ -17,13 +17,14 @@ Full API ----------------------------------------------------------------------- .. autoclass:: bibtexparser.Library - :members: entries, entries_dict, comments, strings, preambles, blocks + :members: entries, entries_dict, comments, strings, preambles, blocks, failed_blocks :mod:`bibtexparser.model` --- The classes used in the library ------------------------------------------------------------- .. automodule:: bibtexparser.model - :members: Entry, String, Preamble, Block, ExplicitComment, ImplicitComment, Field + :members: Entry, String, Preamble, Block, ExplicitComment, ImplicitComment, Field, + ParsingFailedBlock, MiddlewareErrorBlock, DuplicateBlockKeyBlock, DuplicateFieldKeyBlock :mod:`bibtexparser.middlewares` --- Customizers to transform parsed library diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst index 692100f..ba109b5 100644 --- a/docs/source/quickstart.rst +++ b/docs/source/quickstart.rst @@ -123,9 +123,27 @@ and you should check for their presence to make sure mistakes are not going unde Obviously, in your code, you may want to go beyond simply printing a statement when faced with failed_blocks. -Here, the actual failed blocks provided in ``library.failed_blocks`` -will provide you some more information -(exceeding this tutorial, see the corresponding section of the docs for more detail). +All failed blocks are instances of ``bibtexparser.model.ParsingFailedBlock`` +(or one of its subtypes) and expose at least the following attributes to investigate the problem: + +.. code-block:: python + + failed_block = library.failed_blocks[0] + failed_block.error # The exception describing why parsing failed + failed_block.start_line # The line in the file where the block started + failed_block.raw # The raw, unparsed bibtex of the block + +Depending on the type of failure, a more specific subtype with additional attributes is used: + +* ``DuplicateFieldKeyBlock``: The entry contained the same field key more than once + (e.g. two ``title`` fields). The offending keys are available as ``failed_block.duplicate_keys``. +* ``DuplicateBlockKeyBlock``: The library already contained a block with the same entry key. + The previously parsed block is available as ``failed_block.previous_block``. +* ``MiddlewareErrorBlock``: A middleware raised an exception while transforming the block. + +For these types, the block as parsed before the error was detected is available +as ``failed_block.ignore_error_block``, which you may use to recover from the error +manually (e.g. by fixing and re-adding it to the library) if you choose to do so. .. _writing_quickstart: diff --git a/tests/middleware_tests/test_block_middleware.py b/tests/middleware_tests/test_block_middleware.py index 11e4f24..4c27cfc 100644 --- a/tests/middleware_tests/test_block_middleware.py +++ b/tests/middleware_tests/test_block_middleware.py @@ -1,10 +1,16 @@ +import logging + import pytest from bibtexparser import Library from bibtexparser.middlewares.middleware import BlockMiddleware +from bibtexparser.model import DuplicateFieldKeyBlock from bibtexparser.model import Entry from bibtexparser.model import ExplicitComment +from bibtexparser.model import Field from bibtexparser.model import ImplicitComment +from bibtexparser.model import MiddlewareErrorBlock +from bibtexparser.model import ParsingFailedBlock from bibtexparser.model import Preamble from bibtexparser.model import String @@ -107,3 +113,35 @@ def test_returning_invalid_raises_error(middleware): library = Library(blocks=BLOCKS) with pytest.raises(TypeError): middleware.transform(library) + + +class NoopBlockMiddleware(BlockMiddleware): + """A middleware that does not override any of the type-specific transform methods.""" + + def __init__(self): + super().__init__(allow_parallel_execution=True, allow_inplace_modification=True) + + +@pytest.mark.parametrize( + "failed_block", + [ + ParsingFailedBlock(error=Exception("some error"), raw="some raw bibtex"), + MiddlewareErrorBlock(block=Entry("article", "key", fields=[]), error=Exception("oops")), + DuplicateFieldKeyBlock( + duplicate_keys={"title"}, + entry=Entry( + "article", + "key", + fields=[Field("title", "{A}"), Field("title", "{B}")], + ), + ), + ], +) +def test_failed_blocks_are_kept_without_warning(failed_block, caplog): + """Failed blocks are known types and must pass through silently (see issue #520).""" + library = Library(blocks=[failed_block]) + with caplog.at_level(logging.WARNING): + library = NoopBlockMiddleware().transform(library) + + assert library.blocks == [failed_block] + assert "Unknown block type" not in caplog.text diff --git a/tests/splitter_tests/test_splitter_entry.py b/tests/splitter_tests/test_splitter_entry.py index 29c908e..6479afa 100644 --- a/tests/splitter_tests/test_splitter_entry.py +++ b/tests/splitter_tests/test_splitter_entry.py @@ -159,6 +159,9 @@ def test_multiple_identical_field_keys(): assert isinstance(block, DuplicateFieldKeyBlock) assert "author, title" in str(block.error) + assert "ignore_error_block" in str(block.error) + + assert block.duplicate_keys == {"author", "title"} assert block.ignore_error_block is not None