Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ repos:
- id: check-toml
- id: debug-statements
- repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks
rev: v2.12.0
rev: v2.16.0
hooks:
- id: pretty-format-yaml
args: [--preserve-quotes, --autofix, --indent, '2']
Expand Down Expand Up @@ -55,7 +55,7 @@ repos:
hooks:
- id: setup-cfg-fmt
- repo: https://github.com/HunterMcGushion/docstr_coverage
rev: v2.3.0
rev: v2.3.2
hooks:
- id: docstr-coverage
args: ["bibtexparser"]
14 changes: 14 additions & 0 deletions bibtexparser/middlewares/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from bibtexparser.model import Entry
from bibtexparser.model import ExplicitComment
from bibtexparser.model import ImplicitComment
from bibtexparser.model import ParsingFailedBlock
from bibtexparser.model import Preamble
from bibtexparser.model import String

Expand Down Expand Up @@ -129,6 +130,8 @@ def transform_block(
return self.transform_explicit_comment(block, library)
elif isinstance(block, ImplicitComment):
return self.transform_implicit_comment(block, library)
elif isinstance(block, ParsingFailedBlock):
return self.transform_failed_block(block, library)

logger.warning(f"Unknown block type {type(block)}")
return block
Expand Down Expand Up @@ -188,6 +191,17 @@ def transform_implicit_comment(
"""
return implicit_comment

def transform_failed_block(
self, failed_block: ParsingFailedBlock, library: "Library"
) -> Union[Block, Collection[Block], None]:
"""Transform a block whose parsing failed (e.g. a ``DuplicateFieldKeyBlock``).
Called by `transform_block` if the block is a ``ParsingFailedBlock``.

By default, failed blocks are kept in the library unchanged,
i.e., the middleware is not applied to them.
"""
return failed_block


class LibraryMiddleware(Middleware, abc.ABC):
"""Changes an overall library at once (not just on a per-block basis).
Expand Down
10 changes: 7 additions & 3 deletions bibtexparser/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,14 +456,18 @@ def previous_block(self) -> Block:


class DuplicateFieldKeyBlock(ParsingFailedBlock):
"""An error-indicating block indicating a duplicate field key in an entry."""
"""An error-indicating block indicating a duplicate field key in an entry.

The entry containing the duplicate field keys is available as
`block.ignore_error_block`, the duplicate keys as `block.duplicate_keys`."""

def __init__(self, duplicate_keys: Set[str], entry: Entry):
sorted_duplicate_keys = sorted(list(duplicate_keys))
super().__init__(
error=Exception(
f"Duplicate field keys on entry: '{', '.join(sorted_duplicate_keys)}'."
f"Note: The entry (containing duplicate) is available as `failed_block.entry`"
f"Duplicate field keys on entry: '{', '.join(sorted_duplicate_keys)}'. "
f"Note: The entry (containing duplicates) is available as "
f"`failed_block.ignore_error_block`"
),
start_line=entry.start_line,
raw=entry.raw,
Expand Down
5 changes: 3 additions & 2 deletions docs/source/bibtexparser.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@ Full API
-----------------------------------------------------------------------

.. autoclass:: bibtexparser.Library
:members: entries, entries_dict, comments, strings, preambles, blocks
:members: entries, entries_dict, comments, strings, preambles, blocks, failed_blocks


:mod:`bibtexparser.model` --- The classes used in the library
-------------------------------------------------------------
.. automodule:: bibtexparser.model
:members: Entry, String, Preamble, Block, ExplicitComment, ImplicitComment, Field
:members: Entry, String, Preamble, Block, ExplicitComment, ImplicitComment, Field,
ParsingFailedBlock, MiddlewareErrorBlock, DuplicateBlockKeyBlock, DuplicateFieldKeyBlock


:mod:`bibtexparser.middlewares` --- Customizers to transform parsed library
Expand Down
24 changes: 21 additions & 3 deletions docs/source/quickstart.rst
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,27 @@ and you should check for their presence to make sure mistakes are not going unde

Obviously, in your code, you may want to go beyond simply printing a statement
when faced with failed_blocks.
Here, the actual failed blocks provided in ``library.failed_blocks``
will provide you some more information
(exceeding this tutorial, see the corresponding section of the docs for more detail).
All failed blocks are instances of ``bibtexparser.model.ParsingFailedBlock``
(or one of its subtypes) and expose at least the following attributes to investigate the problem:

.. code-block:: python

failed_block = library.failed_blocks[0]
failed_block.error # The exception describing why parsing failed
failed_block.start_line # The line in the file where the block started
failed_block.raw # The raw, unparsed bibtex of the block

Depending on the type of failure, a more specific subtype with additional attributes is used:

* ``DuplicateFieldKeyBlock``: The entry contained the same field key more than once
(e.g. two ``title`` fields). The offending keys are available as ``failed_block.duplicate_keys``.
* ``DuplicateBlockKeyBlock``: The library already contained a block with the same entry key.
The previously parsed block is available as ``failed_block.previous_block``.
* ``MiddlewareErrorBlock``: A middleware raised an exception while transforming the block.

For these types, the block as parsed before the error was detected is available
as ``failed_block.ignore_error_block``, which you may use to recover from the error
manually (e.g. by fixing and re-adding it to the library) if you choose to do so.

.. _writing_quickstart:

Expand Down
38 changes: 38 additions & 0 deletions tests/middleware_tests/test_block_middleware.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
import logging

import pytest

from bibtexparser import Library
from bibtexparser.middlewares.middleware import BlockMiddleware
from bibtexparser.model import DuplicateFieldKeyBlock
from bibtexparser.model import Entry
from bibtexparser.model import ExplicitComment
from bibtexparser.model import Field
from bibtexparser.model import ImplicitComment
from bibtexparser.model import MiddlewareErrorBlock
from bibtexparser.model import ParsingFailedBlock
from bibtexparser.model import Preamble
from bibtexparser.model import String

Expand Down Expand Up @@ -107,3 +113,35 @@ def test_returning_invalid_raises_error(middleware):
library = Library(blocks=BLOCKS)
with pytest.raises(TypeError):
middleware.transform(library)


class NoopBlockMiddleware(BlockMiddleware):
"""A middleware that does not override any of the type-specific transform methods."""

def __init__(self):
super().__init__(allow_parallel_execution=True, allow_inplace_modification=True)


@pytest.mark.parametrize(
"failed_block",
[
ParsingFailedBlock(error=Exception("some error"), raw="some raw bibtex"),
MiddlewareErrorBlock(block=Entry("article", "key", fields=[]), error=Exception("oops")),
DuplicateFieldKeyBlock(
duplicate_keys={"title"},
entry=Entry(
"article",
"key",
fields=[Field("title", "{A}"), Field("title", "{B}")],
),
),
],
)
def test_failed_blocks_are_kept_without_warning(failed_block, caplog):
"""Failed blocks are known types and must pass through silently (see issue #520)."""
library = Library(blocks=[failed_block])
with caplog.at_level(logging.WARNING):
library = NoopBlockMiddleware().transform(library)

assert library.blocks == [failed_block]
assert "Unknown block type" not in caplog.text
3 changes: 3 additions & 0 deletions tests/splitter_tests/test_splitter_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,9 @@ def test_multiple_identical_field_keys():
assert isinstance(block, DuplicateFieldKeyBlock)

assert "author, title" in str(block.error)
assert "ignore_error_block" in str(block.error)

assert block.duplicate_keys == {"author", "title"}

assert block.ignore_error_block is not None

Expand Down
Loading