Skip to content

Commit 986fd28

Browse files
Merge pull request #11 from kohkimakimoto/dev
add RegexRemove preprocessor
2 parents a58bd76 + 24b5664 commit 986fd28

File tree

5 files changed

+858
-353
lines changed

5 files changed

+858
-353
lines changed
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
package pongo2
2+
3+
import (
4+
"io"
5+
"regexp"
6+
)
7+
8+
// RegexRemove is a preprocessor that removes parts matching regular expressions
9+
type RegexRemove struct {
10+
patterns []*regexp.Regexp
11+
}
12+
13+
// NewRegexRemove creates a new instance of RegexRemove
14+
// patterns can specify multiple regular expression pattern strings to match parts to be removed
15+
func NewRegexRemove(patterns ...string) (*RegexRemove, error) {
16+
regexps := make([]*regexp.Regexp, len(patterns))
17+
for i, pattern := range patterns {
18+
re, err := regexp.Compile(pattern)
19+
if err != nil {
20+
return nil, err
21+
}
22+
regexps[i] = re
23+
}
24+
return &RegexRemove{patterns: regexps}, nil
25+
}
26+
27+
// MustNewRegexRemove creates a new instance of RegexRemove and panics if any pattern is invalid
28+
// This is useful when patterns are statically determined and error handling can be simplified
29+
func MustNewRegexRemove(patterns ...string) *RegexRemove {
30+
p, err := NewRegexRemove(patterns...)
31+
if err != nil {
32+
panic(err)
33+
}
34+
return p
35+
}
36+
37+
func (p *RegexRemove) Execute(dst io.Writer, src io.Reader) error {
38+
b, err := io.ReadAll(src)
39+
if err != nil {
40+
return err
41+
}
42+
43+
// Remove parts based on each regular expression pattern in order
44+
for _, re := range p.patterns {
45+
b = re.ReplaceAll(b, []byte(""))
46+
}
47+
48+
_, err = dst.Write(b)
49+
return err
50+
}
Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
package pongo2
2+
3+
import (
4+
"bytes"
5+
"strings"
6+
"testing"
7+
8+
"github.com/stretchr/testify/assert"
9+
)
10+
11+
func TestRegexRemove_Execute(t *testing.T) {
12+
tests := []struct {
13+
name string
14+
patterns []string
15+
input string
16+
expected string
17+
hasError bool
18+
}{
19+
{
20+
name: "remove custom tags",
21+
patterns: []string{`(?s)<!--\s*DEBUG\s*-->.*?<!--\s*/DEBUG\s*-->`},
22+
input: `
23+
<div>visible</div>
24+
<!-- DEBUG -->
25+
debug info
26+
<!-- /DEBUG -->
27+
<div>visible2</div>
28+
`,
29+
expected: `
30+
<div>visible</div>
31+
32+
<div>visible2</div>
33+
`,
34+
hasError: false,
35+
},
36+
{
37+
name: "no match keeps input as is",
38+
patterns: []string{`(?s)<remove>.*?</remove>`},
39+
input: `
40+
<div>
41+
content
42+
</div>
43+
`,
44+
expected: `
45+
<div>
46+
content
47+
</div>
48+
`,
49+
hasError: false,
50+
},
51+
{
52+
name: "invalid regex returns error",
53+
patterns: []string{`[`},
54+
input: "",
55+
expected: "",
56+
hasError: true,
57+
},
58+
}
59+
60+
for _, tt := range tests {
61+
t.Run(tt.name, func(t *testing.T) {
62+
processor, err := NewRegexRemove(tt.patterns...)
63+
64+
if tt.hasError {
65+
assert.Error(t, err)
66+
return
67+
}
68+
69+
assert.NoError(t, err)
70+
71+
src := strings.NewReader(tt.input)
72+
dst := &bytes.Buffer{}
73+
74+
err = processor.Execute(dst, src)
75+
76+
assert.NoError(t, err)
77+
assert.Equal(t, tt.expected, dst.String())
78+
})
79+
}
80+
}
81+
82+
func TestMustNewRegexRemove(t *testing.T) {
83+
t.Run("valid patterns", func(t *testing.T) {
84+
processor := MustNewRegexRemove(`(?s)<style[^>]*>.*?</style>`, `(?s)<script[^>]*>.*?</script>`)
85+
assert.NotNil(t, processor)
86+
87+
input := `<style>test</style><script>test</script><div>content</div>`
88+
src := strings.NewReader(input)
89+
dst := &bytes.Buffer{}
90+
91+
err := processor.Execute(dst, src)
92+
93+
assert.NoError(t, err)
94+
assert.Equal(t, "<div>content</div>", dst.String())
95+
})
96+
97+
t.Run("invalid pattern panics", func(t *testing.T) {
98+
assert.Panics(t, func() {
99+
MustNewRegexRemove(`[`)
100+
})
101+
})
102+
}
103+
104+
func TestRegexRemove_TemplateExtractEquivalent(t *testing.T) {
105+
tests := []struct {
106+
name string
107+
input string
108+
expected string
109+
}{
110+
{
111+
name: "remove <style data-extract> tag",
112+
input: `
113+
<style data-extract>
114+
.test { color: red; }
115+
</style>
116+
<div>content</div>
117+
`,
118+
expected: `
119+
120+
<div>content</div>
121+
`,
122+
},
123+
{
124+
name: "keep style tag without data-extract attribute",
125+
input: `
126+
<style>
127+
.normal { color: black; }
128+
</style>
129+
`,
130+
expected: `
131+
<style>
132+
.normal { color: black; }
133+
</style>
134+
`,
135+
},
136+
{
137+
name: "remove script tag with data-extract attribute",
138+
input: `<script data-extract>
139+
...
140+
</script>
141+
<div>
142+
...
143+
</div>`,
144+
expected: `
145+
<div>
146+
...
147+
</div>`,
148+
},
149+
{
150+
name: "keep normal script tag",
151+
input: `<script>
152+
console.log('normal script');
153+
</script>
154+
<div>Content</div>`,
155+
expected: `<script>
156+
console.log('normal script');
157+
</script>
158+
<div>Content</div>`,
159+
},
160+
}
161+
162+
for _, tt := range tests {
163+
t.Run(tt.name, func(t *testing.T) {
164+
processor := MustNewRegexRemove(
165+
`(?i)(?s)<style[^>]*\bdata-extract\b[^>]*>.*?</style>`,
166+
`(?i)(?s)<script[^>]*\bdata-extract\b[^>]*>.*?</script>`,
167+
)
168+
src := strings.NewReader(tt.input)
169+
dst := &bytes.Buffer{}
170+
171+
err := processor.Execute(dst, src)
172+
173+
assert.NoError(t, err)
174+
assert.Equal(t, tt.expected, dst.String())
175+
})
176+
}
177+
}

website/go.sum

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,12 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
44
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
55
github.com/kohkimakimoto/go-subprocess v0.2.0 h1:bNWgqGUL9UqLYOSXRsSSckyaUAPxp/vMFHgwQdKdALs=
66
github.com/kohkimakimoto/go-subprocess v0.2.0/go.mod h1:q3fQJ0dlq0Gt+U8mEKB7EWFkpw9ugHmz+COM0Vlq0cQ=
7-
github.com/labstack/echo/v4 v4.13.3 h1:pwhpCPrTl5qry5HRdM5FwdXnhXSLSY+WE+YQSeCaafY=
8-
github.com/labstack/echo/v4 v4.13.3/go.mod h1:o90YNEeQWjDozo584l7AwhJMHN0bOC4tAfg+Xox9q5g=
7+
github.com/labstack/echo/v4 v4.13.4 h1:oTZZW+T3s9gAu5L8vmzihV7/lkXGZuITzTQkTEhcXEA=
98
github.com/labstack/echo/v4 v4.13.4/go.mod h1:g63b33BZ5vZzcIUF8AtRH40DrTlXnx4UMC8rBdndmjQ=
109
github.com/labstack/gommon v0.4.2 h1:F8qTUNXgG1+6WQmqoUWnz8WiEU60mXVVw0P4ht1WRA0=
1110
github.com/labstack/gommon v0.4.2/go.mod h1:QlUFxVM+SNXhDL/Z7YhocGIBYOiwB0mXm1+1bAPHPyU=
12-
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
13-
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
11+
github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE=
1412
github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8=
15-
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
1613
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
1714
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
1815
github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
@@ -32,22 +29,16 @@ github.com/yuin/goldmark-emoji v1.0.4 h1:vCwMkPZSNefSUnOW2ZKRUjBSD5Ok3W78IXhGxxA
3229
github.com/yuin/goldmark-emoji v1.0.4/go.mod h1:tTkZEbwu5wkPmgTcitqddVxY9osFZiavD+r4AzQrh1U=
3330
github.com/yuin/goldmark-meta v1.1.0 h1:pWw+JLHGZe8Rk0EGsMVssiNb/AaPMHfSRszZeUeiOUc=
3431
github.com/yuin/goldmark-meta v1.1.0/go.mod h1:U4spWENafuA7Zyg+Lj5RqK/MF+ovMYtBvXi1lBb2VP0=
35-
golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U=
36-
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
32+
golang.org/x/crypto v0.41.0 h1:WKYxWedPGCTVVl5+WHSSrOBT0O8lx32+zxmHxijgXp4=
3733
golang.org/x/crypto v0.41.0/go.mod h1:pO5AFd7FA68rFak7rOAGVuygIISepHftHnr8dr6+sUc=
38-
golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I=
39-
golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
34+
golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE=
4035
golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg=
41-
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
4236
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
43-
golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA=
44-
golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
37+
golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI=
4538
golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
46-
golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
47-
golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
39+
golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng=
4840
golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU=
49-
golang.org/x/time v0.8.0 h1:9i3RxcPv3PZnitoVGMPDKZSq1xW1gK1Xy3ArNOGZfEg=
50-
golang.org/x/time v0.8.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
41+
golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0=
5142
golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=
5243
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
5344
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=

0 commit comments

Comments
 (0)