Skip to content

Commit ffcf0ec

Browse files
authored
feat: concurrent parsing for Gazelle (#630)
* feat: concurrent parsing for Gazelle Signed-off-by: Thulio Ferraz Assis <[email protected]> * rm: unused code Signed-off-by: Thulio Ferraz Assis <[email protected]> * doc: why 2 workers
1 parent 6e0cb65 commit ffcf0ec

File tree

10 files changed

+86
-77
lines changed

10 files changed

+86
-77
lines changed

gazelle/BUILD.bazel

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ go_test(
5555
":gazelle_python_binary",
5656
":parse",
5757
":std_modules",
58-
#"@python_interpreter//:bazel_install/bin/python3",
5958
] + glob(["testdata/**"]),
6059
deps = [
6160
"@bazel_gazelle//testtools:go_default_library",

gazelle/fix.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,4 @@ import (
1010
// that delete or rename rules should not be performed.
1111
func (py *Python) Fix(c *config.Config, f *rule.File) {
1212
// TODO(f0rmiga): implement.
13-
}
13+
}

gazelle/generate.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ func (py *Python) GenerateRules(args language.GenerateArgs) language.GenerateRes
191191

192192
var pyLibrary *rule.Rule
193193
if !pyLibraryFilenames.Empty() {
194-
deps, err := parser.parseAll(pyLibraryFilenames)
194+
deps, err := parser.parse(pyLibraryFilenames)
195195
if err != nil {
196196
log.Fatalf("ERROR: %v\n", err)
197197
}
@@ -228,7 +228,7 @@ func (py *Python) GenerateRules(args language.GenerateArgs) language.GenerateRes
228228
}
229229

230230
if hasPyBinary {
231-
deps, err := parser.parse(pyBinaryEntrypointFilename)
231+
deps, err := parser.parseSingle(pyBinaryEntrypointFilename)
232232
if err != nil {
233233
log.Fatalf("ERROR: %v\n", err)
234234
}
@@ -275,7 +275,7 @@ func (py *Python) GenerateRules(args language.GenerateArgs) language.GenerateRes
275275
// the file exists on disk.
276276
pyTestFilenames.Add(pyTestEntrypointFilename)
277277
}
278-
deps, err := parser.parseAll(pyTestFilenames)
278+
deps, err := parser.parse(pyTestFilenames)
279279
if err != nil {
280280
log.Fatalf("ERROR: %v\n", err)
281281
}

gazelle/kinds.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,4 +85,4 @@ var pyLoads = []rule.LoadInfo{
8585
pyTestKind,
8686
},
8787
},
88-
}
88+
}

gazelle/language.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,4 @@ type Python struct {
1515
// interface. This is the entrypoint for the extension initialization.
1616
func NewLanguage() language.Language {
1717
return &Python{}
18-
}
18+
}

gazelle/parse.py

Lines changed: 38 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,17 @@
11
# parse.py is a long-living program that communicates over STDIN and STDOUT.
2-
# STDIN receives filepaths, one per line. For each parsed file, it outputs to
3-
# STDOUT the modules parsed out of the import statements.
2+
# STDIN receives parse requests, one per line. It outputs the parsed modules and
3+
# comments from all the files from each request.
44

55
import ast
6+
import concurrent.futures
67
import json
8+
import os
79
import sys
810
from io import BytesIO
911
from tokenize import COMMENT, tokenize
1012

1113

12-
def parse_import_statements(content):
14+
def parse_import_statements(content, filepath):
1315
modules = list()
1416
tree = ast.parse(content)
1517
for node in ast.walk(tree):
@@ -18,12 +20,14 @@ def parse_import_statements(content):
1820
module = {
1921
"name": subnode.name,
2022
"lineno": node.lineno,
23+
"filepath": filepath,
2124
}
2225
modules.append(module)
2326
elif isinstance(node, ast.ImportFrom) and node.level == 0:
2427
module = {
2528
"name": node.module,
2629
"lineno": node.lineno,
30+
"filepath": filepath,
2731
}
2832
modules.append(module)
2933
return modules
@@ -38,25 +42,45 @@ def parse_comments(content):
3842
return comments
3943

4044

41-
def parse(stdout, filepath):
42-
with open(filepath, "r") as file:
45+
def parse(repo_root, rel_package_path, filename):
46+
rel_filepath = os.path.join(rel_package_path, filename)
47+
abs_filepath = os.path.join(repo_root, rel_filepath)
48+
with open(abs_filepath, "r") as file:
4349
content = file.read()
44-
modules = parse_import_statements(content)
45-
comments = parse_comments(content)
50+
# From simple benchmarks, 2 workers gave the best performance here.
51+
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
52+
modules_future = executor.submit(parse_import_statements, content, rel_filepath)
53+
comments_future = executor.submit(parse_comments, content)
54+
modules = modules_future.result()
55+
comments = comments_future.result()
4656
output = {
4757
"modules": modules,
4858
"comments": comments,
4959
}
50-
print(json.dumps(output), end="", file=stdout)
51-
stdout.flush()
52-
stdout.buffer.write(bytes([0]))
53-
stdout.flush()
60+
return output
5461

5562

5663
def main(stdin, stdout):
57-
for filepath in stdin:
58-
filepath = filepath.rstrip()
59-
parse(stdout, filepath)
64+
with concurrent.futures.ProcessPoolExecutor() as executor:
65+
for parse_request in stdin:
66+
parse_request = json.loads(parse_request)
67+
repo_root = parse_request["repo_root"]
68+
rel_package_path = parse_request["rel_package_path"]
69+
filenames = parse_request["filenames"]
70+
outputs = list()
71+
if len(filenames) == 1:
72+
outputs.append(parse(repo_root, rel_package_path, filenames[0]))
73+
else:
74+
futures = [
75+
executor.submit(parse, repo_root, rel_package_path, filename)
76+
for filename in filenames
77+
if filename != ""
78+
]
79+
for future in concurrent.futures.as_completed(futures):
80+
outputs.append(future.result())
81+
print(json.dumps(outputs), end="", file=stdout, flush=True)
82+
stdout.buffer.write(bytes([0]))
83+
stdout.flush()
6084

6185

6286
if __name__ == "__main__":

gazelle/parser.go

Lines changed: 39 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ import (
99
"log"
1010
"os"
1111
"os/exec"
12-
"path/filepath"
1312
"strings"
1413
"sync"
1514
"time"
@@ -91,64 +90,61 @@ func newPython3Parser(
9190
}
9291
}
9392

94-
// parseAll parses all provided Python files by consecutively calling p.parse.
95-
func (p *python3Parser) parseAll(pyFilepaths *treeset.Set) (*treeset.Set, error) {
96-
allModules := treeset.NewWith(moduleComparator)
97-
it := pyFilepaths.Iterator()
98-
for it.Next() {
99-
modules, err := p.parse(it.Value().(string))
100-
if err != nil {
101-
return nil, err
102-
}
103-
modulesIt := modules.Iterator()
104-
for modulesIt.Next() {
105-
allModules.Add(modulesIt.Value())
106-
}
107-
}
108-
return allModules, nil
93+
// parseSingle parses a single Python file and returns the extracted modules
94+
// from the import statements as well as the parsed comments.
95+
func (p *python3Parser) parseSingle(pyFilename string) (*treeset.Set, error) {
96+
pyFilenames := treeset.NewWith(godsutils.StringComparator)
97+
pyFilenames.Add(pyFilename)
98+
return p.parse(pyFilenames)
10999
}
110100

111-
// parse parses a Python file and returns the extracted modules from the import
112-
// statements. An error is raised if communicating with the long-lived Python
113-
// parser over stdin and stdout fails.
114-
func (p *python3Parser) parse(pyFilepath string) (*treeset.Set, error) {
101+
// parse parses multiple Python files and returns the extracted modules from
102+
// the import statements as well as the parsed comments.
103+
func (p *python3Parser) parse(pyFilenames *treeset.Set) (*treeset.Set, error) {
115104
parserMutex.Lock()
116105
defer parserMutex.Unlock()
117106

118107
modules := treeset.NewWith(moduleComparator)
119108

120-
relFilepath := filepath.Join(p.relPackagePath, pyFilepath)
121-
absFilepath := filepath.Join(p.repoRoot, relFilepath)
122-
fmt.Fprintln(parserStdin, absFilepath)
109+
req := map[string]interface{}{
110+
"repo_root": p.repoRoot,
111+
"rel_package_path": p.relPackagePath,
112+
"filenames": pyFilenames.Values(),
113+
}
114+
encoder := json.NewEncoder(parserStdin)
115+
if err := encoder.Encode(&req); err != nil {
116+
return nil, fmt.Errorf("failed to parse: %w", err)
117+
}
118+
123119
reader := bufio.NewReader(parserStdout)
124120
data, err := reader.ReadBytes(0)
125121
if err != nil {
126-
return nil, fmt.Errorf("failed to parse %s: %w", pyFilepath, err)
122+
return nil, fmt.Errorf("failed to parse: %w", err)
127123
}
128124
data = data[:len(data)-1]
129-
var res parserResponse
130-
if err := json.Unmarshal(data, &res); err != nil {
131-
return nil, fmt.Errorf("failed to parse %s: %w", pyFilepath, err)
125+
var allRes []parserResponse
126+
if err := json.Unmarshal(data, &allRes); err != nil {
127+
return nil, fmt.Errorf("failed to parse: %w", err)
132128
}
133129

134-
annotations := annotationsFromComments(res.Comments)
135-
136-
for _, m := range res.Modules {
137-
// Check for ignored dependencies set via an annotation to the Python
138-
// module.
139-
if annotations.ignores(m.Name) {
140-
continue
141-
}
130+
for _, res := range allRes {
131+
annotations := annotationsFromComments(res.Comments)
142132

143-
// Check for ignored dependencies set via a Gazelle directive in a BUILD
144-
// file.
145-
if p.ignoresDependency(m.Name) {
146-
continue
147-
}
133+
for _, m := range res.Modules {
134+
// Check for ignored dependencies set via an annotation to the Python
135+
// module.
136+
if annotations.ignores(m.Name) {
137+
continue
138+
}
148139

149-
m.Filepath = relFilepath
140+
// Check for ignored dependencies set via a Gazelle directive in a BUILD
141+
// file.
142+
if p.ignoresDependency(m.Name) {
143+
continue
144+
}
150145

151-
modules.Add(m)
146+
modules.Add(m)
147+
}
152148
}
153149

154150
return modules, nil
@@ -173,17 +169,7 @@ type module struct {
173169
// The line number where the import happened.
174170
LineNumber uint32 `json:"lineno"`
175171
// The path to the module file relative to the Bazel workspace root.
176-
Filepath string
177-
}
178-
179-
// path returns the replaced dots with the os-specific path separator.
180-
func (m *module) path() string {
181-
return filepath.Join(strings.Split(m.Name, ".")...)
182-
}
183-
184-
// bazelPath returns the replaced dots with forward slashes.
185-
func (m *module) bazelPath() string {
186-
return strings.ReplaceAll(m.Name, ".", "/")
172+
Filepath string `json:"filepath"`
187173
}
188174

189175
// moduleComparator compares modules by name.

gazelle/python_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,4 +208,4 @@ type testYAML struct {
208208
Stdout string `json:"stdout"`
209209
Stderr string `json:"stderr"`
210210
} `json:"expect"`
211-
}
211+
}

gazelle/std_modules.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,4 +95,4 @@ func isStdModule(m module) (bool, error) {
9595
return true, nil
9696
}
9797
return false, nil
98-
}
98+
}

gazelle/target.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,4 +133,4 @@ func (t *targetBuilder) build() *rule.Rule {
133133
}
134134
r.SetPrivateAttr(resolvedDepsKey, t.resolvedDeps)
135135
return r
136-
}
136+
}

0 commit comments

Comments
 (0)