Skip to content

Commit 7bc3609

Browse files
committed
More robust version of estimate based on go mod graph
The technique previously used for retrieving the module graph was costly and britle, with many errors returned from importgraph.Build for unknown reasons. As go modules now provide a simple API to fetch the import graph of a packages we can now use this more reliable interface instead. It is faster and delivers more accurate results. For example: Before $ go run . estimate github.com/charmbracelet/vhs 2> /dev/null github.com/charmbracelet/vhs github.com/go-rod/rod github.com/charmbracelet/ssh $ go run . estimate github.com/go-rod/rod 2> /dev/null github.com/go-rod/rod github.com/ysmood/goob github.com/ysmood/gson github.com/ysmood/got github.com/ysmood/gotrace github.com/ysmood/fetchup github.com/ysmood/leakless github.com/gobwas/ws After $ go run . estimate github.com/charmbracelet/vhs 2> /dev/null github.com/charmbracelet/vhs github.com/charmbracelet/ssh github.com/erikgeiser/coninput github.com/go-rod/rod github.com/ysmood/fetchup github.com/ysmood/got github.com/ysmood/gop github.com/ysmood/goob github.com/ysmood/gotrace github.com/ysmood/gson github.com/ysmood/leakless github.com/mattn/go-localereader See that previously github.com/go-rod/rod was indicated as a single dependency of github.com/charmbracelet/vhs, when in fact it had itself multiple missing dependencies. The new implementation reports all of them from the beginning. It is still quite imprecise, probably due to module graph pruning [1], but also because sometimes go mod graph returns a little bit too much direct dependencies for an unknown reason. [1] https://go.dev/ref/mod#graph-pruning
1 parent 56171aa commit 7bc3609

File tree

1 file changed

+76
-73
lines changed

1 file changed

+76
-73
lines changed

estimate.go

Lines changed: 76 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,16 @@ package main
33
import (
44
"flag"
55
"fmt"
6-
"go/build"
76
"log"
87
"os"
98
"os/exec"
109
"path/filepath"
11-
"sort"
1210
"strings"
1311

1412
"golang.org/x/tools/go/vcs"
15-
"golang.org/x/tools/refactor/importgraph"
1613
)
1714

18-
func clone(srcdir, repo string) (string, error) {
15+
func clone(srcdir, repo string) error {
1916
done := make(chan struct{})
2017
defer close(done)
2118
go progressSize("vcs clone", srcdir, done)
@@ -25,21 +22,20 @@ func clone(srcdir, repo string) (string, error) {
2522
// version of Go.
2623
rr, err := vcs.RepoRootForImportPath(repo, false)
2724
if err != nil {
28-
return "", fmt.Errorf("get repo root: %w", err)
25+
return fmt.Errorf("get repo root: %w", err)
2926
}
30-
dir := filepath.Join(srcdir, rr.Root)
3127
// Run "git clone {repo} {dir}" (or the equivalent command for hg, svn, bzr)
32-
return dir, rr.VCS.Create(dir, rr.Repo)
28+
return rr.VCS.Create(srcdir, rr.Repo)
3329
}
3430

3531
func get(gopath, repodir, repo string) error {
3632
done := make(chan struct{})
3733
defer close(done)
3834
go progressSize("go get", repodir, done)
3935

40-
// Run go get without arguments directly in the module directory to
41-
// download all its dependencies (with -t to include the test dependencies).
42-
cmd := exec.Command("go", "get", "-t")
36+
// Run go mod tidy directly in the module directory to sync go.(mod|sum) and
37+
// download all its dependencies.
38+
cmd := exec.Command("go", "mod", "tidy")
4339
cmd.Dir = repodir
4440
cmd.Stderr = os.Stderr
4541
cmd.Env = append([]string{
@@ -69,22 +65,28 @@ func removeVendor(gopath string) (found bool, _ error) {
6965
}
7066

7167
func estimate(importpath string) error {
68+
removeTemp := func(path string) {
69+
if err := forceRemoveAll(path); err != nil {
70+
log.Printf("could not remove all %s: %v", path, err)
71+
}
72+
}
73+
7274
// construct a separate GOPATH in a temporary directory
7375
gopath, err := os.MkdirTemp("", "dh-make-golang")
7476
if err != nil {
7577
return fmt.Errorf("create temp dir: %w", err)
7678
}
77-
defer func() {
78-
if err := forceRemoveAll(gopath); err != nil {
79-
log.Printf("could not remove all %s: %v", gopath, err)
80-
}
81-
}()
79+
defer removeTemp(gopath)
80+
// second temporary directosy for the repo sources
81+
repodir, err := os.MkdirTemp("", "dh-make-golang")
82+
if err != nil {
83+
return fmt.Errorf("create temp dir: %w", err)
84+
}
85+
defer removeTemp(repodir)
8286

8387
// clone the repo inside the src directory of the GOPATH
8488
// and init a Go module if it is not yet one.
85-
srcdir := filepath.Join(gopath, "src")
86-
repodir, err := clone(srcdir, importpath)
87-
if err != nil {
89+
if err := clone(repodir, importpath); err != nil {
8890
return fmt.Errorf("vcs clone: %w", err)
8991
}
9092
if !isFile(filepath.Join(repodir, "go.mod")) {
@@ -111,55 +113,79 @@ func estimate(importpath string) error {
111113
}
112114
}
113115

114-
// Remove standard lib packages
115-
cmd := exec.Command("go", "list", "std")
116+
// Get dependency graph from go mod graph
117+
cmd := exec.Command("go", "mod", "graph")
118+
cmd.Dir = repodir
116119
cmd.Stderr = os.Stderr
117120
cmd.Env = append([]string{
118121
"GOPATH=" + gopath,
119122
}, passthroughEnv()...)
120-
121123
out, err := cmd.Output()
122124
if err != nil {
123-
return fmt.Errorf("go list std: args: %v; error: %w", cmd.Args, err)
124-
}
125-
stdlib := make(map[string]bool)
126-
for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
127-
stdlib[line] = true
125+
return fmt.Errorf("go mod graph: args: %v; error: %w", cmd.Args, err)
128126
}
129127

130-
stdlib["C"] = true // would fail resolving anyway
131-
132-
// Filter out all already-packaged ones:
128+
// Retrieve already-packaged ones
133129
golangBinaries, err := getGolangBinaries()
134130
if err != nil {
135131
return nil
136132
}
137133

138-
build.Default.GOPATH = gopath
139-
build.Default.Dir = repodir
140-
forward, _, errors := importgraph.Build(&build.Default)
141-
errLines := make([]string, 0, len(errors))
142-
for importPath, err := range errors {
143-
// For an unknown reason, parent directories and subpackages
144-
// of the current module report an error about not being able
145-
// to import them. We can safely ignore them.
146-
isSubpackage := strings.HasPrefix(importPath, importpath+"/")
147-
isParentDir := strings.HasPrefix(importpath, importPath+"/")
148-
if !isSubpackage && !isParentDir && importPath != importPath {
149-
errLines = append(errLines, fmt.Sprintf("%s: %v", importPath, err))
150-
}
134+
// Build a graph in memory from the output of go mod graph
135+
type Node struct {
136+
name string
137+
children []*Node
151138
}
152-
if len(errLines) > 0 {
153-
return fmt.Errorf("could not load packages: %v", strings.Join(errLines, "\n"))
139+
root := &Node{name: importpath}
140+
nodes := make(map[string]*Node)
141+
nodes[importpath] = root
142+
for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
143+
// go mod graph outputs one line for each dependency. Each line
144+
// consists of the dependency preceded by the module that
145+
// imported it, separated by a single space. The module names
146+
// can have a version information delimited by the @ character
147+
src, dep, _ := strings.Cut(line, " ")
148+
depNode := &Node{name: dep}
149+
// Sometimes, the given import path is not the one outputed by
150+
// go mod graph, for instance when there are multiple major
151+
// versions.
152+
// The root module is the only one that does not have a version
153+
// indication with @ in the output of go mod graph, so if there
154+
// is no @ we always use the given importpath instead.
155+
if !strings.Contains(src, "@") {
156+
src = importpath
157+
}
158+
srcNode, ok := nodes[src]
159+
if !ok {
160+
log.Printf("source not found in graph: %s", src)
161+
continue
162+
}
163+
srcNode.children = append(srcNode.children, depNode)
164+
nodes[dep] = depNode
154165
}
155166

167+
// Analyse the dependency graph
156168
var lines []string
157169
seen := make(map[string]bool)
158170
rrseen := make(map[string]bool)
159-
node := func(importPath string, indent int) {
160-
rr, err := vcs.RepoRootForImportPath(importPath, false)
171+
var visit func(n *Node, indent int)
172+
visit = func(n *Node, indent int) {
173+
// Get the module name without its version, as go mod graph
174+
// can return multiple times the same module with different
175+
// versions.
176+
mod, _, _ := strings.Cut(n.name, "@")
177+
if seen[mod] {
178+
return
179+
}
180+
seen[mod] = true
181+
// Go version dependency is indicated as a dependency to "go" and
182+
// "toolchain", we do not use this information for now.
183+
if mod == "go" || mod == "toolchain" {
184+
return
185+
}
186+
rr, err := vcs.RepoRootForImportPath(mod, false)
161187
if err != nil {
162-
log.Printf("Could not determine repo path for import path %q: %v\n", importPath, err)
188+
log.Printf("Could not determine repo path for import path %q: %v\n", mod, err)
163189
return
164190
}
165191
if rrseen[rr.Root] {
@@ -170,35 +196,12 @@ func estimate(importpath string) error {
170196
return // already packaged in Debian
171197
}
172198
lines = append(lines, fmt.Sprintf("%s%s", strings.Repeat(" ", indent), rr.Root))
173-
}
174-
var visit func(x string, indent int)
175-
visit = func(x string, indent int) {
176-
if seen[x] {
177-
return
178-
}
179-
seen[x] = true
180-
if !stdlib[x] {
181-
node(x, indent)
182-
}
183-
for y := range forward[x] {
184-
visit(y, indent+1)
199+
for _, n := range n.children {
200+
visit(n, indent+1)
185201
}
186202
}
187203

188-
keys := make([]string, 0, len(forward))
189-
for key := range forward {
190-
keys = append(keys, key)
191-
}
192-
sort.Strings(keys)
193-
for _, key := range keys {
194-
if !strings.HasPrefix(key, importpath) {
195-
continue
196-
}
197-
if seen[key] {
198-
continue // already covered in a previous visit call
199-
}
200-
visit(key, 0)
201-
}
204+
visit(root, 0)
202205

203206
if len(lines) == 0 {
204207
log.Printf("%s is already fully packaged in Debian", importpath)

0 commit comments

Comments
 (0)