Skip to content

Commit 50278ed

Browse files
Thomas StrombergThomas Stromberg
authored andcommitted
improve security - sanitize html
1 parent ea70b19 commit 50278ed

File tree

8 files changed

+922
-266
lines changed

8 files changed

+922
-266
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ Requires a service account with Cloud Storage access and a Brevo API key.
4848

4949
Storage is either local filesystem (`./data`) or GCS (`STORAGE_BUCKET`).
5050

51-
Email via Brevo API. Falls back to mock in dev.
51+
Email via Brevo API. Auto-detects mock mode when `BREVO_API_KEY` is not set.
5252

5353
Each thread is scraped once per poll cycle regardless of subscriber count. Polling interval calculated per-thread using exponential backoff: `5min × 2^(hours_since_post / 3)`, capped at 4 hours.
5454

email/provider.go

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Package email handles sending notification emails via multiple providers.
1+
// Package email handles sending notification emails via Brevo or mock.
22
package email
33

44
import (
@@ -10,25 +10,22 @@ import (
1010

1111
// Provider defines the interface for email sending implementations.
1212
type Provider interface {
13-
// Send sends an email with the given parameters.
1413
Send(ctx context.Context, to, subject, htmlBody string) error
1514
}
1615

17-
// Sender sends notification emails using a pluggable provider.
16+
// Sender sends notification emails.
1817
type Sender struct {
1918
provider Provider
2019
logger *slog.Logger
2120
baseURL string // For links in emails
22-
fromAddr string // From address for emails
2321
}
2422

25-
// New creates a new email sender with the given provider.
26-
func New(provider Provider, logger *slog.Logger, baseURL, fromAddr string) *Sender {
23+
// New creates a new email sender.
24+
func New(provider Provider, logger *slog.Logger, baseURL string) *Sender {
2725
return &Sender{
2826
provider: provider,
2927
logger: logger,
3028
baseURL: baseURL,
31-
fromAddr: fromAddr,
3229
}
3330
}
3431

email/templates.go

Lines changed: 203 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ func (s *Sender) formatNotificationBody(sub *notifier.Subscription, thread *noti
1818
b.WriteString("<style>\n")
1919
b.WriteString("body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; line-height: 1.6; color: #333; max-width: 800px; margin: 0 auto; padding: 20px; background: #fff; }\n")
2020
b.WriteString(".post { margin-bottom: 30px; padding-bottom: 30px; border-bottom: 2px solid #e67e22; }\n")
21-
b.WriteString(".post:last-of-type { border-bottom: none; padding-bottom: 0; }\n")
21+
b.WriteString(".post:last-of-type { border-bottom: none; padding-bottom: 0; margin-bottom: 0; }\n")
2222
b.WriteString(".post:first-of-type { padding-top: 0; }\n")
2323
b.WriteString(".meta { margin-bottom: 12px; }\n")
2424
b.WriteString(".post-number { color: #7f8c8d; font-weight: 500; font-size: 1.1em; text-decoration: none; }\n")
@@ -28,7 +28,8 @@ func (s *Sender) formatNotificationBody(sub *notifier.Subscription, thread *noti
2828
b.WriteString(".content { margin: 15px 0; }\n")
2929
b.WriteString(".content img { max-width: 100%; height: auto; margin: 10px 0; display: block; }\n")
3030
b.WriteString(".content blockquote { border-left: 3px solid #ddd; padding-left: 15px; margin: 10px 0; color: #666; font-size: 0.95em; }\n")
31-
b.WriteString(".footer { margin-top: 30px; padding-top: 15px; border-top: 1px solid #ddd; font-size: 0.9em; color: #7f8c8d; }\n")
31+
b.WriteString(".footer { margin-top: 30px; padding-top: 15px; font-size: 0.9em; color: #7f8c8d; }\n")
32+
b.WriteString(".footer.with-border { border-top: 1px solid #ddd; }\n")
3233
b.WriteString(".footer a { color: #7f8c8d; text-decoration: underline; margin: 0 8px; }\n")
3334
b.WriteString(".footer a:first-child { margin-left: 0; }\n")
3435
b.WriteString("a { color: #e67e22; text-decoration: none; }\n")
@@ -40,7 +41,8 @@ func (s *Sender) formatNotificationBody(sub *notifier.Subscription, thread *noti
4041
b.WriteString(".timestamp { color: #a0a0a0; }\n")
4142
b.WriteString(".content blockquote { border-left-color: #444; color: #b0b0b0; }\n")
4243
b.WriteString(".content img { opacity: 0.9; }\n")
43-
b.WriteString(".footer { border-top-color: #444; color: #a0a0a0; }\n")
44+
b.WriteString(".footer { color: #a0a0a0; }\n")
45+
b.WriteString(".footer.with-border { border-top-color: #444; }\n")
4446
b.WriteString(".footer a { color: #a0a0a0; }\n")
4547
b.WriteString("a { color: #ff8c42; }\n")
4648
b.WriteString("}\n")
@@ -61,9 +63,11 @@ func (s *Sender) formatNotificationBody(sub *notifier.Subscription, thread *noti
6163
b.WriteString("</div>\n")
6264

6365
b.WriteString("<div class=\"content\">\n")
64-
// Use HTML content if available (includes images), otherwise fall back to plain text
66+
// SECURITY: HTML content from forum posts is untrusted user input.
67+
// We sanitize it to allow only safe tags (img, blockquote, p, br, b, i, em, strong)
68+
// and safe attributes (src, alt for images) to prevent XSS and phishing.
6569
if post.HTMLContent != "" {
66-
b.WriteString(post.HTMLContent)
70+
b.WriteString(sanitizeHTML(post.HTMLContent))
6771
} else {
6872
b.WriteString(escapeHTML(post.Content))
6973
}
@@ -73,7 +77,12 @@ func (s *Sender) formatNotificationBody(sub *notifier.Subscription, thread *noti
7377
}
7478

7579
// Footer with thread link and manage link
76-
b.WriteString("<div class=\"footer\">\n")
80+
// Only add border if there are multiple posts (orange lines provide separation)
81+
footerClass := "footer"
82+
if len(posts) > 1 {
83+
footerClass = "footer with-border"
84+
}
85+
b.WriteString(fmt.Sprintf("<div class=\"%s\">\n", footerClass))
7786

7887
// Link to the last page with anchor to latest post (e.g., .../page-12#post-12345)
7988
// This loads the full page context but scrolls to the most recent post
@@ -153,3 +162,191 @@ func escapeHTML(s string) string {
153162
s = strings.ReplaceAll(s, "'", "&#39;")
154163
return s
155164
}
165+
166+
// sanitizeHTML sanitizes untrusted HTML content using a strict whitelist approach.
167+
// Only allows safe tags and attributes to prevent XSS, phishing, and tracking.
168+
// This is designed for email contexts where security is critical.
169+
func sanitizeHTML(html string) string {
170+
// Whitelist of allowed tags (no scripts, forms, iframes, etc.)
171+
allowedTags := map[string]bool{
172+
"p": true,
173+
"br": true,
174+
"b": true,
175+
"strong": true,
176+
"i": true,
177+
"em": true,
178+
"u": true,
179+
"blockquote": true,
180+
"img": true,
181+
"a": true,
182+
"ul": true,
183+
"ol": true,
184+
"li": true,
185+
"div": true,
186+
"span": true,
187+
}
188+
189+
var result strings.Builder
190+
inTag := false
191+
tagStart := 0
192+
193+
for i := 0; i < len(html); i++ {
194+
if html[i] == '<' {
195+
if inTag {
196+
// Malformed HTML - escape the previous <
197+
result.WriteString("&lt;")
198+
}
199+
inTag = true
200+
tagStart = i
201+
} else if html[i] == '>' && inTag {
202+
tagContent := html[tagStart+1 : i]
203+
204+
// Check if it's a closing tag
205+
isClosing := strings.HasPrefix(tagContent, "/")
206+
if isClosing {
207+
tagContent = tagContent[1:]
208+
}
209+
210+
// Extract tag name (before space or end)
211+
tagName := tagContent
212+
if idx := strings.IndexAny(tagContent, " \t\n"); idx != -1 {
213+
tagName = tagContent[:idx]
214+
}
215+
tagName = strings.ToLower(tagName)
216+
217+
if allowedTags[tagName] {
218+
// For allowed tags, sanitize attributes
219+
if isClosing {
220+
result.WriteString("</")
221+
result.WriteString(tagName)
222+
result.WriteString(">")
223+
} else {
224+
result.WriteString("<")
225+
result.WriteString(tagName)
226+
227+
// Only allow safe attributes for specific tags
228+
if tagName == "img" {
229+
// Extract and validate src and alt attributes
230+
if src := extractAttribute(tagContent, "src"); src != "" && isSafeURL(src) {
231+
result.WriteString(` src="`)
232+
result.WriteString(escapeHTML(src))
233+
result.WriteString(`"`)
234+
}
235+
if alt := extractAttribute(tagContent, "alt"); alt != "" {
236+
result.WriteString(` alt="`)
237+
result.WriteString(escapeHTML(alt))
238+
result.WriteString(`"`)
239+
}
240+
} else if tagName == "a" {
241+
// Extract and validate href attribute
242+
if href := extractAttribute(tagContent, "href"); href != "" && isSafeURL(href) {
243+
result.WriteString(` href="`)
244+
result.WriteString(escapeHTML(href))
245+
result.WriteString(`"`)
246+
}
247+
}
248+
// No attributes allowed for other tags
249+
250+
result.WriteString(">")
251+
}
252+
} else {
253+
// Disallowed tag - show placeholder for certain dangerous tags
254+
// This helps users understand that content was removed for security
255+
if !isClosing {
256+
// Only show placeholder for opening tags, not closing tags
257+
if tagName == "iframe" {
258+
// For iframes, extract the src URL and show it as a link
259+
if src := extractAttribute(tagContent, "src"); src != "" && isSafeURL(src) {
260+
result.WriteString("[iframe: <a href=\"")
261+
result.WriteString(escapeHTML(src))
262+
result.WriteString("\">")
263+
result.WriteString(escapeHTML(src))
264+
result.WriteString("</a>]")
265+
} else {
266+
result.WriteString("[replaced iframe]")
267+
}
268+
} else if tagName == "video" || tagName == "embed" || tagName == "object" {
269+
result.WriteString("[replaced ")
270+
result.WriteString(tagName)
271+
result.WriteString("]")
272+
} else {
273+
// For other disallowed tags, escape them
274+
result.WriteString("&lt;")
275+
result.WriteString(escapeHTML(tagContent))
276+
result.WriteString("&gt;")
277+
}
278+
}
279+
// Closing tags are silently removed
280+
}
281+
282+
inTag = false
283+
} else if !inTag {
284+
// Regular content - keep as-is (already HTML entities in the original)
285+
result.WriteByte(html[i])
286+
}
287+
}
288+
289+
// Handle unclosed tag at end
290+
if inTag {
291+
result.WriteString("&lt;")
292+
result.WriteString(escapeHTML(html[tagStart+1:]))
293+
}
294+
295+
return result.String()
296+
}
297+
298+
// extractAttribute extracts an attribute value from an HTML tag string.
299+
func extractAttribute(tag, attrName string) string {
300+
// Look for attrName="value" or attrName='value'
301+
patterns := []string{
302+
attrName + `="`,
303+
attrName + `='`,
304+
}
305+
306+
for _, pattern := range patterns {
307+
idx := strings.Index(strings.ToLower(tag), pattern)
308+
if idx == -1 {
309+
continue
310+
}
311+
312+
start := idx + len(pattern)
313+
quote := pattern[len(pattern)-1]
314+
end := strings.IndexByte(tag[start:], quote)
315+
if end == -1 {
316+
continue
317+
}
318+
319+
return tag[start : start+end]
320+
}
321+
322+
return ""
323+
}
324+
325+
// isSafeURL validates that a URL is safe for use in emails.
326+
// Only allows http, https, and relative URLs. Blocks javascript:, data:, etc.
327+
func isSafeURL(urlStr string) bool {
328+
urlStr = strings.TrimSpace(strings.ToLower(urlStr))
329+
330+
// Block dangerous protocols
331+
dangerousProtocols := []string{
332+
"javascript:",
333+
"data:",
334+
"vbscript:",
335+
"file:",
336+
"about:",
337+
}
338+
339+
for _, protocol := range dangerousProtocols {
340+
if strings.HasPrefix(urlStr, protocol) {
341+
return false
342+
}
343+
}
344+
345+
// Allow http, https, and relative URLs
346+
return strings.HasPrefix(urlStr, "http://") ||
347+
strings.HasPrefix(urlStr, "https://") ||
348+
strings.HasPrefix(urlStr, "/") ||
349+
strings.HasPrefix(urlStr, "./") ||
350+
strings.HasPrefix(urlStr, "../") ||
351+
(!strings.Contains(urlStr, ":") && len(urlStr) > 0) // relative path without protocol
352+
}

0 commit comments

Comments
 (0)