diff --git a/bibtex_test.go b/bibtex_test.go index dea74a4..d907955 100644 --- a/bibtex_test.go +++ b/bibtex_test.go @@ -82,6 +82,35 @@ func TestParser(t *testing.T) { } } +// Test bug (Issue #24) where there is no parse error, but fields are missing +func TestTextOutsideEntries(t *testing.T) { + // Re-create the exact failing scenario + expected := NewBibTex() + entry := NewBibEntry("article", "CitekeyArticle") + entry.AddField("author", NewBibConst("John Doe")) + entry.AddField("title", NewBibConst("The independence of the continuum hypothesis")) + entry.AddField("journal", NewBibConst("Proceedings of the National Academy of Sciences")) + entry.AddField("year", NewBibConst("1963")) + entry.AddField("volume", NewBibConst("50")) + entry.AddField("number", NewBibConst("6")) + entry.AddField("pages", NewBibConst("1143--1148")) + expected.AddEntry(entry) + + // Parse file with same data as above, also with text in between the entries + ex := "example/text-outside-entries.bib" + b, err := os.ReadFile(ex) + if err != nil { + t.Errorf("Cannot read %s: %v", ex, err) + } + s, err := Parse(bytes.NewReader(b)) + if err != nil { + t.Errorf("Cannot parse valid bibtex file %s: %v", ex, err) + } + + // Check equality + AssertEntryListsEqual(t, expected.Entries, s.Entries) +} + // Tests that multiple parse returns different instances of the parsed BibTex. // Otherwise the number of entries will pile up. (Issue #4) func TestMultiParse(t *testing.T) { diff --git a/example/text-outside-entries.bib b/example/text-outside-entries.bib new file mode 100644 index 0000000..9ccfee6 --- /dev/null +++ b/example/text-outside-entries.bib @@ -0,0 +1,10 @@ +% Encoding: UTF-8 +@article{CitekeyArticle, + author = "John Doe", + title = "The independence of the continuum hypothesis", + journal = "Proceedings of the National Academy of Sciences", + year = 1963, + volume = "50", + number = "6", + pages = "1143--1148", +} \ No newline at end of file diff --git a/example/text-outside-entries2.bib b/example/text-outside-entries2.bib new file mode 100644 index 0000000..4fa2837 --- /dev/null +++ b/example/text-outside-entries2.bib @@ -0,0 +1,20 @@ +% Encoding: UTF-8 +@article{CitekeyArticle, + author = "John Doe", + title = "The independence of the continuum hypothesis", + journal = "Proceedings of the National Academy of Sciences", + year = 1963, + volume = "50", + number = "6", + pages = "1143--1148", +} +% Same entry again +@article{CitekeyArticle2, + author = "John Doe", + title = "The independence of the continuum hypothesis", + journal = "Proceedings of the National Academy of Sciences", + year = 1963, + volume = "50", + number = "6", + pages = "1143--1148", +} diff --git a/scanner.go b/scanner.go index 73d4f8d..2b63b7a 100644 --- a/scanner.go +++ b/scanner.go @@ -12,14 +12,15 @@ var parseField bool // scanner is a lexical scanner type scanner struct { - commentMode bool - r *bufio.Reader - pos tokenPos + commentMode bool + outsideEntry bool + r *bufio.Reader + pos tokenPos } // newScanner returns a new instance of scanner. func newScanner(r io.Reader) *scanner { - return &scanner{r: bufio.NewReader(r), pos: tokenPos{Char: 0, Lines: []int{}}} + return &scanner{outsideEntry: true, r: bufio.NewReader(r), pos: tokenPos{Char: 0, Lines: []int{}}} } // read reads the next rune from the buffered reader. @@ -51,6 +52,11 @@ func (s *scanner) unread() { // Scan returns the next token and literal value. func (s *scanner) Scan() (tok token, lit string, err error) { + if s.outsideEntry { + // Ordinary comment scanning, but without generating a token + s.scanCommentBody() + s.outsideEntry = false + } ch := s.read() if isWhitespace(ch) { s.ignoreWhitespace() @@ -91,6 +97,7 @@ func (s *scanner) Scan() (tok token, lit string, err error) { case '}': if parseField { // reset parseField if reached end of entry. parseField = false + s.outsideEntry = true } return tRBRACE, string(ch), nil case '#':