11package sanitize
22
33import (
4+ "strings"
45 "sync"
6+ "unicode"
57
68 "github.com/microcosm-cc/bluemonday"
79)
@@ -10,7 +12,7 @@ var policy *bluemonday.Policy
1012var policyOnce sync.Once
1113
1214func Sanitize (input string ) string {
13- return FilterHTMLTags (FilterInvisibleCharacters (input ))
15+ return FilterHTMLTags (FilterCodeFenceMetadata ( FilterInvisibleCharacters (input ) ))
1416}
1517
1618// FilterInvisibleCharacters removes invisible or control characters that should not appear
@@ -40,6 +42,109 @@ func FilterHTMLTags(input string) string {
4042 return getPolicy ().Sanitize (input )
4143}
4244
45+ // FilterCodeFenceMetadata removes hidden or suspicious info strings from fenced code blocks.
46+ func FilterCodeFenceMetadata (input string ) string {
47+ if input == "" {
48+ return input
49+ }
50+
51+ lines := strings .Split (input , "\n " )
52+ insideFence := false
53+ currentFenceLen := 0
54+ for i , line := range lines {
55+ sanitized , toggled , fenceLen := sanitizeCodeFenceLine (line , insideFence , currentFenceLen )
56+ lines [i ] = sanitized
57+ if toggled {
58+ insideFence = ! insideFence
59+ if insideFence {
60+ currentFenceLen = fenceLen
61+ } else {
62+ currentFenceLen = 0
63+ }
64+ }
65+ }
66+ return strings .Join (lines , "\n " )
67+ }
68+
69+ const maxCodeFenceInfoLength = 48
70+
71+ func sanitizeCodeFenceLine (line string , insideFence bool , expectedFenceLen int ) (string , bool , int ) {
72+ idx := strings .Index (line , "```" )
73+ if idx == - 1 {
74+ return line , false , expectedFenceLen
75+ }
76+
77+ if hasNonWhitespace (line [:idx ]) {
78+ return line , false , expectedFenceLen
79+ }
80+
81+ fenceEnd := idx
82+ for fenceEnd < len (line ) && line [fenceEnd ] == '`' {
83+ fenceEnd ++
84+ }
85+
86+ fenceLen := fenceEnd - idx
87+ if fenceLen < 3 {
88+ return line , false , expectedFenceLen
89+ }
90+
91+ rest := line [fenceEnd :]
92+
93+ if insideFence {
94+ if expectedFenceLen != 0 && fenceLen != expectedFenceLen {
95+ return line , false , expectedFenceLen
96+ }
97+ return line [:fenceEnd ], true , fenceLen
98+ }
99+
100+ trimmed := strings .TrimSpace (rest )
101+
102+ if trimmed == "" {
103+ return line [:fenceEnd ], true , fenceLen
104+ }
105+
106+ if strings .IndexFunc (trimmed , unicode .IsSpace ) != - 1 {
107+ return line [:fenceEnd ], true , fenceLen
108+ }
109+
110+ if len (trimmed ) > maxCodeFenceInfoLength {
111+ return line [:fenceEnd ], true , fenceLen
112+ }
113+
114+ if ! isSafeCodeFenceToken (trimmed ) {
115+ return line [:fenceEnd ], true , fenceLen
116+ }
117+
118+ if len (rest ) > 0 && unicode .IsSpace (rune (rest [0 ])) {
119+ return line [:fenceEnd ] + " " + trimmed , true , fenceLen
120+ }
121+
122+ return line [:fenceEnd ] + trimmed , true , fenceLen
123+ }
124+
125+ func hasNonWhitespace (segment string ) bool {
126+ for _ , r := range segment {
127+ if ! unicode .IsSpace (r ) {
128+ return true
129+ }
130+ }
131+ return false
132+ }
133+
134+ func isSafeCodeFenceToken (token string ) bool {
135+ for _ , r := range token {
136+ if unicode .IsLetter (r ) || unicode .IsDigit (r ) {
137+ continue
138+ }
139+ switch r {
140+ case '+' , '-' , '_' , '#' , '.' :
141+ continue
142+ }
143+ return false
144+ }
145+ return true
146+ }
147+
43148func getPolicy () * bluemonday.Policy {
44149 policyOnce .Do (func () {
45150 p := bluemonday .StrictPolicy ()
0 commit comments