diff --git a/bibtex_linter/parser.py b/bibtex_linter/parser.py index f52b50d..1930ce8 100644 --- a/bibtex_linter/parser.py +++ b/bibtex_linter/parser.py @@ -1,4 +1,4 @@ -from typing import List, Dict +from typing import List, Dict, Tuple import dataclasses import enum import re @@ -85,10 +85,7 @@ def from_string(cls, entry_string: str) -> "BibTeXEntry": raw_fields = cls._split_fields(entry_string) fields: Dict[str, str] = {} for raw_field in raw_fields: - raw_key, raw_value = raw_field.split("=") - # Clean up key and value - key = raw_key.strip(" ").lower() - value = cls._parse_field_value(raw_value) + key, value = cls._split_field_into_key_and_value(raw_field) fields[key] = value return BibTeXEntry( @@ -157,6 +154,19 @@ def _parse_field_value(raw_value: str) -> str: return raw_value + @staticmethod + def _split_field_into_key_and_value(raw_field: str) -> Tuple[str, str]: + """ + Splits a field, such as `author = {{John Doe}},` into the field's key and value and cleans up both. + + :param raw_field: + :return: + """ + parts = raw_field.split("=", 1) + key = parts[0].strip().lower() + value = parts[1].strip() if len(parts) > 1 else "" + return key, BibTeXEntry._parse_field_value(value) + def split_entries(raw_content: str) -> List[str]: """ diff --git a/test/test_parser.py b/test/test_parser.py index fd40738..497b787 100644 --- a/test/test_parser.py +++ b/test/test_parser.py @@ -110,6 +110,53 @@ def test_split_fields_missing_open_brace(self) -> None: with self.assertRaises(KeyError): BibTeXEntry._split_fields(entry) + def test_field_with_equals_in_value(self) -> None: + bibtex_string = """@misc{test_entry, + note = {This URL has equals: https://example.com/?id=123&lang=en}, + }""" + entry = BibTeXEntry.from_string(bibtex_string) + actual = entry.fields.get("note") + expected = "This URL has equals: https://example.com/?id=123&lang=en" + self.assertEqual(expected, actual) + + def test_double_braces(self) -> None: + bibtex_string = """@misc{test_entry, + title = {{Title with {{extra}} braces}}, + }""" + entry = BibTeXEntry.from_string(bibtex_string) + actual = entry.fields.get("title") + expected = "Title with {{extra}} braces" + self.assertEqual(expected, actual) + + def test_quoted_field(self) -> None: + bibtex_string = """@misc{test_entry, + author = "Jane Doe", + }""" + entry = BibTeXEntry.from_string(bibtex_string) + actual = entry.fields.get("author") + expected = "Jane Doe" + self.assertEqual(expected, actual) + + def test_multiline_field(self) -> None: + bibtex_string = """@misc{test_entry, + note = {This is a + multi-line + note.}, + }""" + entry = BibTeXEntry.from_string(bibtex_string) + actual = entry.fields.get("note") + expected = "This is a\n multi-line\n note." + self.assertEqual(expected, actual) + + def test_field_with_url_and_brackets(self) -> None: + bibtex_string = """@misc{test_entry, + howpublished = {\\url{https://example.org/query?x=1&y=2}}, + }""" + entry = BibTeXEntry.from_string(bibtex_string) + actual = entry.fields.get("howpublished") + expected = "\\url{https://example.org/query?x=1&y=2}" + self.assertEqual(expected, actual) + class TestSplitEntries(unittest.TestCase): def test_single_entry(self) -> None: