From 6f40f3e5d779df086e9924e6b057b976637febc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mislav=20Marohni=C4=87?= Date: Mon, 16 Jun 2025 00:09:32 +0200 Subject: [PATCH] Test_parser: make failures for text content mismatches more readable When Test_parser iterates over the myriad test pages in the suite, if any parsed text for an article doesn't match what's in the `expected.html` fixture, the test will generate a failure message using DiffPrettyText from diffmatchpatch. This diff formatter will output the complete text with the additions and removals marked up with ANSI escape sequences for terminal colors green and red. While this is alright for shorter text, some of the test pages are really long, and even when the mismatch is with just one word or one line, the entire text will always be printed together with the test failure, making the overall Test_parser result much harder to scroll through and comprehend. This change replaces the test failure message with a diff generated in a way that long passages of text are truncated with `<...>`. Furthermore, this adds diff markers `{+ +}` for additions and `[- -]` for removals, the same markers as with word diff mode from git-diff(1). --- parser_test.go | 70 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 64 insertions(+), 6 deletions(-) diff --git a/parser_test.go b/parser_test.go index 7c7744d..29e3f06 100644 --- a/parser_test.go +++ b/parser_test.go @@ -1,6 +1,7 @@ package readability import ( + "bytes" "encoding/json" "fmt" "net/url" @@ -9,6 +10,7 @@ import ( "strings" "testing" "time" + "unicode/utf8" "github.com/go-shiori/dom" "github.com/sergi/go-diff/diffmatchpatch" @@ -236,14 +238,10 @@ func compareArticleContent(result, expected *html.Node) error { comparator := diffmatchpatch.New() diffs := comparator.DiffMain(resultText, expectedText, false) + wordDiff := truncatedPrettyDiff(diffs) if len(diffs) > 1 { - return fmt.Errorf("text content is different\n"+ - "want : %s\n"+ - "got : %s\n"+ - "diffs : %s", - expectedExcerpt, resultExcerpt, - comparator.DiffPrettyText(diffs)) + return fmt.Errorf("text content is different:\nnode: %s\ntext: %s", resultExcerpt, wordDiff) } // Move to next node @@ -255,6 +253,66 @@ func compareArticleContent(result, expected *html.Node) error { return nil } +func truncatedPrettyDiff(diffs []diffmatchpatch.Diff) string { + var buf bytes.Buffer + for i, d := range diffs { + switch d.Type { + case diffmatchpatch.DiffInsert: + buf.WriteString("\x1B[32m") + buf.WriteString("{+") + writeTruncatedText(&buf, d.Text, 40, truncateMiddle) + buf.WriteString("+}") + buf.WriteString("\x1B[m") + case diffmatchpatch.DiffDelete: + buf.WriteString("\x1B[31m") + buf.WriteString("[-") + writeTruncatedText(&buf, d.Text, 40, truncateMiddle) + buf.WriteString("-]") + buf.WriteString("\x1B[m") + case diffmatchpatch.DiffEqual: + tt := truncateMiddle + if i == 0 { + tt = truncateLeft + } else if i == len(diffs)-1 { + tt = truncateRight + } + writeTruncatedText(&buf, d.Text, 40, tt) + default: + panic("diff type not implemented: " + d.Type.String()) + } + } + return buf.String() +} + +type truncateType int + +const ( + truncateLeft truncateType = iota + truncateRight + truncateMiddle +) + +func writeTruncatedText(buf *bytes.Buffer, text string, limit int, where truncateType) { + n := utf8.RuneCountInString(text) + if n <= limit { + buf.WriteString(text) + return + } + textRunes := []rune(text) + switch where { + case truncateLeft: + buf.WriteString("<...>") + buf.WriteString(string(textRunes[n-limit:])) + case truncateRight: + buf.WriteString(string(textRunes[:limit])) + buf.WriteString("<...>") + case truncateMiddle: + buf.WriteString(string(textRunes[:limit/2])) + buf.WriteString("<...>") + buf.WriteString(string(textRunes[n-(limit/2):])) + } +} + func getNodeExcerpt(node *html.Node) string { outer := dom.OuterHTML(node) outer = strings.Join(strings.Fields(outer), " ")