Skip to content

Commit 75700f7

Browse files
Merge pull request #33 from lightpanda-io/chromedp-links
chromedp: add links extract example
2 parents aebe28e + 68ea953 commit 75700f7

File tree

1 file changed

+137
-0
lines changed

1 file changed

+137
-0
lines changed

chromedp/links/main.go

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
// Copyright 2023-2025 Lightpanda (Selecy SAS)
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
package main
15+
16+
import (
17+
"context"
18+
"errors"
19+
"flag"
20+
"fmt"
21+
"io"
22+
"log"
23+
"log/slog"
24+
"os"
25+
26+
"github.com/chromedp/cdproto/cdp"
27+
"github.com/chromedp/chromedp"
28+
)
29+
30+
const (
31+
exitOK = 0
32+
exitFail = 1
33+
)
34+
35+
// main starts interruptable context and runs the program.
36+
func main() {
37+
ctx, cancel := context.WithCancel(context.Background())
38+
defer cancel()
39+
40+
err := run(ctx, os.Args, os.Stdout, os.Stderr)
41+
if err != nil {
42+
fmt.Fprintln(os.Stderr, err.Error())
43+
os.Exit(exitFail)
44+
}
45+
46+
os.Exit(exitOK)
47+
}
48+
49+
const (
50+
CdpWSDefault = "ws://127.0.0.1:9222"
51+
)
52+
53+
func run(ctx context.Context, args []string, stdout, stderr io.Writer) error {
54+
// declare runtime flag parameters.
55+
flags := flag.NewFlagSet(args[0], flag.ExitOnError)
56+
flags.SetOutput(stderr)
57+
58+
var (
59+
verbose = flags.Bool("verbose", false, "enable debug log level")
60+
cdpws = flags.String("cdp", env("CDPCLI_WS", CdpWSDefault), "cdp ws to connect")
61+
)
62+
63+
// usage func declaration.
64+
exec := args[0]
65+
flags.Usage = func() {
66+
fmt.Fprintf(stderr, "usage: %s <url>]\n", exec)
67+
fmt.Fprintf(stderr, "chromedp fetch an url and extracts all links.\n")
68+
fmt.Fprintf(stderr, "\nCommand line options:\n")
69+
flags.PrintDefaults()
70+
fmt.Fprintf(stderr, "\nEnvironment vars:\n")
71+
fmt.Fprintf(stderr, "\tCDPCLI_WS\tdefault %s\n", CdpWSDefault)
72+
}
73+
if err := flags.Parse(args[1:]); err != nil {
74+
return err
75+
}
76+
77+
if *verbose {
78+
slog.SetLogLoggerLevel(slog.LevelDebug)
79+
}
80+
81+
args = flags.Args()
82+
if len(args) != 1 {
83+
return errors.New("url is required")
84+
}
85+
url := args[0]
86+
87+
ctx, cancel := chromedp.NewRemoteAllocator(ctx,
88+
*cdpws, chromedp.NoModifyURL,
89+
)
90+
defer cancel()
91+
92+
// build context options
93+
var opts []chromedp.ContextOption
94+
if *verbose {
95+
opts = append(opts, chromedp.WithDebugf(log.Printf))
96+
}
97+
98+
ctx, cancel = chromedp.NewContext(ctx, opts...)
99+
defer cancel()
100+
101+
// ensure the first tab is created
102+
if err := chromedp.Run(ctx); err != nil {
103+
return fmt.Errorf("new tab: %w", err)
104+
}
105+
106+
err := chromedp.Run(ctx, chromedp.Navigate(url))
107+
if err != nil {
108+
return fmt.Errorf("navigate %s: %w", url, err)
109+
}
110+
111+
var a []*cdp.Node
112+
if err := chromedp.Run(ctx, chromedp.Nodes(`a[href]`, &a)); err != nil {
113+
return fmt.Errorf("get links: %w", err)
114+
}
115+
116+
links := make([]string, 0, len(a))
117+
for _, aa := range a {
118+
v, ok := aa.Attribute("href")
119+
if ok {
120+
links = append(links, v)
121+
}
122+
}
123+
124+
fmt.Fprintf(os.Stdout, "%v", links)
125+
126+
return nil
127+
}
128+
129+
// env returns the env value corresponding to the key or the default string.
130+
func env(key, dflt string) string {
131+
val, ok := os.LookupEnv(key)
132+
if !ok {
133+
return dflt
134+
}
135+
136+
return val
137+
}

0 commit comments

Comments
 (0)