Skip to content

Commit e05c805

Browse files
authored
Merge pull request #45 from apvar/fix/add-isSameSentence
added missing isSameSentenc()
2 parents fd123c3 + f0d6a76 commit e05c805

File tree

1 file changed

+13
-0
lines changed

1 file changed

+13
-0
lines changed

text.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
package pdf
66

77
import (
8+
"math"
9+
"strings"
810
"unicode"
911
"unicode/utf16"
1012
)
@@ -156,3 +158,14 @@ var macRomanEncoding = [256]rune{
156158
0xf8ff, 0x00d2, 0x00da, 0x00db, 0x00d9, 0x0131, 0x02c6, 0x02dc,
157159
0x00af, 0x02d8, 0x02d9, 0x02da, 0x00b8, 0x02dd, 0x02db, 0x02c7,
158160
}
161+
162+
// isSameSentence checks if the current text segment likely belongs to the same sentence
163+
// as the last text segment based on font, size, vertical position, and lack of
164+
// sentence-ending punctuation in the last segment.
165+
func isSameSentence(last, current Text) bool {
166+
return last.Font == current.Font &&
167+
math.Abs(last.FontSize-current.FontSize) < 0.1 &&
168+
math.Abs(last.Y-current.Y) < 5 &&
169+
!strings.ContainsAny(last.S, ".!?") &&
170+
last.S != ""
171+
}

0 commit comments

Comments
 (0)