Skip to content

Commit fa06b05

Browse files
committed
docs: simplify web scraper example
1 parent 4532ad2 commit fa06b05

File tree

2 files changed

+22
-29
lines changed

2 files changed

+22
-29
lines changed

examples/web-scraper/index.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ <h1><a href="https://web.scraper.workers.dev" class="Link Link-without-underline
244244
<button data-js-dialog-open class="Button Button-is-help Button-is-bordered" type="button" aria-label="Help">?</button>
245245
</div>
246246
<main class="Panel--main" is-smooth-scrolling>
247-
<form id="form" class="Stack" method="GET" action="http://localhost">
247+
<form id="form" class="Stack" method="GET" action="/">
248248
<div class="FormField">
249249
<div class="FormField--text">
250250
<label class="FormField--label" for="url">URL</label>

examples/web-scraper/main.go

Lines changed: 21 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -95,36 +95,29 @@ func handler(w http.ResponseWriter, req *http.Request) {
9595
if attr == "" {
9696
nextText := make(map[string]string)
9797

98-
// sort of confusing...
99-
// https://stackoverflow.com/questions/27646434/reference-to-another-field-with-in-a-func-in-the-same-struct-in-golang
100-
// Are there better ways?
10198
for _, s := range selectors {
102-
elementContentHandler := &lolhtml.ElementContentHandler{
103-
Selector: s,
104-
}
105-
elementContentHandler.ElementHandler = func(ech *lolhtml.ElementContentHandler) lolhtml.ElementHandlerFunc {
106-
return func(e *lolhtml.Element) lolhtml.RewriterDirective {
107-
matches[ech.Selector] = append(matches[ech.Selector], textSeparator)
108-
nextText[ech.Selector] = ""
109-
return lolhtml.Continue
110-
}
111-
}(elementContentHandler)
112-
elementContentHandler.TextChunkHandler = func(ech *lolhtml.ElementContentHandler) lolhtml.TextChunkHandlerFunc {
113-
return func(t *lolhtml.TextChunk) lolhtml.RewriterDirective {
114-
nextText[ech.Selector] += t.Content()
115-
if t.IsLastInTextNode() {
116-
if spaced {
117-
nextText[ech.Selector] += " "
118-
}
119-
matches[ech.Selector] = append(matches[ech.Selector], nextText[ech.Selector])
120-
nextText[ech.Selector] = ""
121-
}
122-
return lolhtml.Continue
123-
}
124-
}(elementContentHandler)
99+
s := s
125100
handlers.ElementContentHandler = append(
126101
handlers.ElementContentHandler,
127-
*elementContentHandler,
102+
lolhtml.ElementContentHandler{
103+
Selector: s,
104+
ElementHandler: func(e *lolhtml.Element) lolhtml.RewriterDirective {
105+
matches[s] = append(matches[s], textSeparator)
106+
nextText[s] = ""
107+
return lolhtml.Continue
108+
},
109+
TextChunkHandler: func(t *lolhtml.TextChunk) lolhtml.RewriterDirective {
110+
nextText[s] += t.Content()
111+
if t.IsLastInTextNode() {
112+
if spaced {
113+
nextText[s] += " "
114+
}
115+
matches[s] = append(matches[s], nextText[s])
116+
nextText[s] = ""
117+
}
118+
return lolhtml.Continue
119+
},
120+
},
128121
)
129122
}
130123
} else {
@@ -162,7 +155,7 @@ func handler(w http.ResponseWriter, req *http.Request) {
162155
}
163156
defer resp.Body.Close()
164157

165-
// another confusing point
158+
// might be confusing
166159
_, err = io.Copy(lolWriter, resp.Body)
167160
if err != nil && err.Error() != "The rewriter has been stopped." {
168161
sendError(w, http.StatusInternalServerError, err.Error(), pretty)

0 commit comments

Comments
 (0)