Skip to content

Commit ceb0613

Browse files
committed
parser.py: Allow equal signs in field values
Previously, the parser did not account for the possibility of equal signs (`=`) in the values of fields and would result in crashing the script with a non descriptive error message, due to expecting a simple `string.split("=")` to work. However, it is completely possible to have equal signs in the value, for example in an URL field: `URL = "https://example.org/query?x=1"`. This adapts the parsing logic, making it stable against equal signs in field values. Furthermore, it cleans up the `BibTeXEntry.from_string` method a little bit and adds additional test cases to the unittests. Fixes #3
1 parent 6a8f3b6 commit ceb0613

File tree

2 files changed

+62
-5
lines changed

2 files changed

+62
-5
lines changed

bibtex_linter/parser.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import List, Dict
1+
from typing import List, Dict, Tuple
22
import dataclasses
33
import enum
44
import re
@@ -85,10 +85,7 @@ def from_string(cls, entry_string: str) -> "BibTeXEntry":
8585
raw_fields = cls._split_fields(entry_string)
8686
fields: Dict[str, str] = {}
8787
for raw_field in raw_fields:
88-
raw_key, raw_value = raw_field.split("=")
89-
# Clean up key and value
90-
key = raw_key.strip(" ").lower()
91-
value = cls._parse_field_value(raw_value)
88+
key, value = cls._split_field_into_key_and_value(raw_field)
9289
fields[key] = value
9390

9491
return BibTeXEntry(
@@ -157,6 +154,19 @@ def _parse_field_value(raw_value: str) -> str:
157154

158155
return raw_value
159156

157+
@staticmethod
158+
def _split_field_into_key_and_value(raw_field: str) -> Tuple[str, str]:
159+
"""
160+
Splits a field, such as `author = {{John Doe}},` into the field's key and value and cleans up both.
161+
162+
:param raw_field:
163+
:return:
164+
"""
165+
parts = raw_field.split("=", 1)
166+
key = parts[0].strip().lower()
167+
value = parts[1].strip() if len(parts) > 1 else ""
168+
return key, BibTeXEntry._parse_field_value(value)
169+
160170

161171
def split_entries(raw_content: str) -> List[str]:
162172
"""

test/test_parser.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,53 @@ def test_split_fields_missing_open_brace(self) -> None:
110110
with self.assertRaises(KeyError):
111111
BibTeXEntry._split_fields(entry)
112112

113+
def test_field_with_equals_in_value(self):
114+
bibtex_string = """@misc{test_entry,
115+
note = {This URL has equals: https://example.com/?id=123&lang=en},
116+
}"""
117+
entry = BibTeXEntry.from_string(bibtex_string)
118+
actual = entry.fields.get("note")
119+
expected = "This URL has equals: https://example.com/?id=123&lang=en"
120+
self.assertEqual(expected, actual)
121+
122+
def test_double_braces(self):
123+
bibtex_string = """@misc{test_entry,
124+
title = {{Title with {{extra}} braces}},
125+
}"""
126+
entry = BibTeXEntry.from_string(bibtex_string)
127+
actual = entry.fields.get("title")
128+
expected = "Title with {{extra}} braces"
129+
self.assertEqual(expected, actual)
130+
131+
def test_quoted_field(self):
132+
bibtex_string = """@misc{test_entry,
133+
author = "Jane Doe",
134+
}"""
135+
entry = BibTeXEntry.from_string(bibtex_string)
136+
actual = entry.fields.get("author")
137+
expected = "Jane Doe"
138+
self.assertEqual(expected, actual)
139+
140+
def test_multiline_field(self):
141+
bibtex_string = """@misc{test_entry,
142+
note = {This is a
143+
multi-line
144+
note.},
145+
}"""
146+
entry = BibTeXEntry.from_string(bibtex_string)
147+
actual = entry.fields.get("note")
148+
expected = "This is a\n multi-line\n note."
149+
self.assertEqual(expected, actual)
150+
151+
def test_field_with_url_and_brackets(self):
152+
bibtex_string = """@misc{test_entry,
153+
howpublished = {\\url{https://example.org/query?x=1&y=2}},
154+
}"""
155+
entry = BibTeXEntry.from_string(bibtex_string)
156+
actual = entry.fields.get("howpublished")
157+
expected = "\\url{https://example.org/query?x=1&y=2}"
158+
self.assertEqual(expected, actual)
159+
113160

114161
class TestSplitEntries(unittest.TestCase):
115162
def test_single_entry(self) -> None:

0 commit comments

Comments
 (0)