Skip to content

Commit 51d99f7

Browse files
authored
fix(collect,golang/parse): All line numbers now start from zero. (#9)
* fix: go/parser uses 0-based linenos. add checker. The default golang parser Position has Line and Column starting at 1 (while the offset starts at 0), which contradicts common sense and the LSP model. Add an independent checker to verify. Property to verify: forall sym, let contents = sym.file.read() and lines = contents.splitlines() in contents[sym.StartOffset:sym.EndOffset] = sym.Content /\ lines[sym.Line] = sym.Content.splitlines()[0] The general rules are (Pos and Position are from go/token) Safe constructs: * Pos (=int) * Position.{Filename,Offset} Unsafe constructs: * Position.{Line,Column}. - Especially `fset.Position(decl.Pos()).Line` * fix: collect/export outputs 0-based linenos. fix checker. This fixes the invalid offset warnings and invalid StartOffsets. The checker has a --implheads option that allows relaxed comparisons, because for rust sym.Content may contain extra implheads. * fix: /script, testdata and CI support
1 parent 790612e commit 51d99f7

File tree

18 files changed

+508
-22
lines changed

18 files changed

+508
-22
lines changed

.github/workflows/simple_checks.yml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
name: Simple Checks
2+
3+
on:
4+
push:
5+
branches: [ "main" ]
6+
pull_request:
7+
branches: [ "main" ]
8+
9+
jobs:
10+
build:
11+
runs-on: ubuntu-latest
12+
steps:
13+
- uses: actions/checkout@v3
14+
- name: prepare rls
15+
run: rustup component add rust-analyzer
16+
- name: check linenos
17+
run: ./script/check_all_linenos.sh
18+

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,9 @@ __pycache__
6868
rust-analyzer-x86_64-unknown-linux-gnu
6969

7070
testdata/test
71+
testdata/jsons
7172

7273
src/lang/testdata
7374
*.json
7475

75-
tools
76+
tools

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ COZE_BOT_ID={YOUR_COZE_BOT_ID}
7272

7373
2. compile the parsers
7474
```
75-
sh ./script/make_parser.sh
75+
./script/make_parser.sh
7676
```
7777

7878
3. compile and run ABCoder

script/check_all_linenos.sh

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#!/bin/bash
2+
# Copyright 2025 CloudWeGo Authors
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# https://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
root=$(dirname $(realpath $(dirname $0)))
17+
cd $root
18+
echo "[Making parser]"
19+
./script/make_parser.sh
20+
echo "[Done making parser]"
21+
22+
parser=tools/parser/lang
23+
mkdir -p testdata/jsons
24+
25+
do_test() {
26+
lang=$1
27+
srcpath=$2
28+
name=$3
29+
flags=$4
30+
31+
echo $name...
32+
$parser -d -v --no-need-comment collect $lang $srcpath > testdata/jsons/$name.json 2>testdata/jsons/$name.log
33+
cat testdata/jsons/$name.log
34+
python script/check_lineno.py --json testdata/jsons/$name.json --base $srcpath $flags > testdata/jsons/$name.check
35+
36+
if grep -q "All functions verified successfully!" testdata/jsons/$name.check; then
37+
echo " [PASS]"
38+
else
39+
echo " [FAIL]"
40+
exit 1
41+
fi
42+
}
43+
do_test go src/lang go "--zero_linebase"
44+
do_test rust testdata/rust2-wobyted rust2 "--zero_linebase --implheads"

script/check_lineno.py

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
"""检查 json 中诸符号的 StartOffset, EndOffset, Line, Content 的一致性
2+
(假设本文件在 src/lang 中)
3+
4+
例如检查本项目:
5+
6+
$ ./lang -d -v --no-need-comment collect go . > lang.json
7+
# 应当成功,尤其应当是 --zero_linebase(行号从 0 开始)
8+
$ python3 check.py --json lang.json --base . --zero_linebase
9+
10+
检查 rust 项目
11+
12+
$ ./lang -d -v --no-need-comment collect rust ../../testdata/rust2 > rust2.json
13+
$ python3 check.py --json lang.json --base . --zero_linebase --implheads
14+
"""
15+
import json
16+
import os
17+
import argparse
18+
import sys
19+
from collections import defaultdict
20+
21+
22+
def trim_multiline(s, max_lines=5):
23+
lines = s.splitlines()
24+
if len(lines) > max_lines:
25+
return "\n".join(lines[:max_lines]) + "\n..."
26+
return s
27+
28+
29+
def safe_decode(b):
30+
try:
31+
return b.decode("utf-8")
32+
except UnicodeDecodeError:
33+
return b.decode("utf-8", errors="replace")
34+
35+
36+
def verify_function_content(
37+
json_path,
38+
base_dir=".",
39+
bail_on_error=False,
40+
filter_files=None,
41+
filter_funcs=None,
42+
zero_linebase=False,
43+
implheads=False,
44+
):
45+
with open(json_path, "r", encoding="utf-8") as f:
46+
data = json.load(f)
47+
48+
modules = data.get("Modules", {})
49+
errors = defaultdict(list)
50+
51+
for module_name, module in modules.items():
52+
packages = module.get("Packages", {})
53+
for package_name, package in packages.items():
54+
functions = package.get("Functions", {})
55+
for func_name, func in functions.items():
56+
file_name = func.get("File")
57+
if not file_name:
58+
continue
59+
if filter_files and file_name not in filter_files:
60+
continue
61+
if filter_funcs and func_name not in filter_funcs:
62+
continue
63+
64+
file_path = os.path.join(base_dir, file_name)
65+
try:
66+
with open(file_path, "rb") as src:
67+
content_bytes = src.read()
68+
except FileNotFoundError:
69+
print(f"[ERROR] File not found: {file_path}")
70+
errors[file_name].append(func_name)
71+
if bail_on_error:
72+
sys.exit(1)
73+
continue
74+
75+
start = func["StartOffset"]
76+
end = func["EndOffset"]
77+
expected_content = func["Content"]
78+
actual_bytes = content_bytes[start:end]
79+
actual_content = safe_decode(actual_bytes)
80+
81+
line_number = func["Line"]
82+
content_str = safe_decode(content_bytes)
83+
file_lines = content_str.splitlines()
84+
85+
try:
86+
if zero_linebase:
87+
actual_line_content = file_lines[line_number].strip()
88+
else:
89+
actual_line_content = file_lines[line_number - 1].strip()
90+
except IndexError:
91+
actual_line_content = "<out of range>"
92+
93+
if implheads:
94+
offset_match = actual_content in expected_content
95+
line_match = any(
96+
line.strip() == actual_line_content.strip()
97+
for line in expected_content.splitlines()
98+
)
99+
else:
100+
offset_match = actual_content == expected_content
101+
expected_line_start = (
102+
expected_content.splitlines()[0].strip()
103+
if expected_content
104+
else ""
105+
)
106+
line_match = actual_line_content == expected_line_start
107+
108+
print(f"[{module_name}/{package_name}] Checking function: {func_name}")
109+
if not offset_match:
110+
print(" [Mismatch] Offset content does not match.")
111+
print(" Expected:\n" + trim_multiline(expected_content))
112+
print(" Actual:\n" + trim_multiline(actual_content))
113+
if not line_match:
114+
display_line_number = line_number if zero_linebase else line_number
115+
print(f" [Mismatch] Line {display_line_number} mismatch:")
116+
print(f" Expected line (from JSON content):")
117+
if implheads:
118+
print(f" Any line in expected content matching actual line:")
119+
else:
120+
print(f" {expected_line_start}")
121+
print(f" Actual line:")
122+
print(f" {actual_line_content}")
123+
if not offset_match or not line_match:
124+
errors[file_name].append(func_name)
125+
if bail_on_error:
126+
sys.exit(1)
127+
if offset_match and line_match:
128+
print(" [OK] Function content and line verified.")
129+
print()
130+
131+
if errors:
132+
print("===== MISMATCH SUMMARY =====")
133+
for file, funcs in errors.items():
134+
print(f"File: {file}")
135+
for func in funcs:
136+
print(f" - {func}")
137+
print("============================")
138+
else:
139+
print("✅ All functions verified successfully!")
140+
141+
142+
if __name__ == "__main__":
143+
parser = argparse.ArgumentParser(
144+
description="Verify function content from JSON and source files."
145+
)
146+
parser.add_argument(
147+
"--json", type=str, default="input.json", help="Path to the JSON file"
148+
)
149+
parser.add_argument(
150+
"--base", type=str, default=".", help="Base directory for source files"
151+
)
152+
parser.add_argument(
153+
"--bail_on_error", action="store_true", help="Stop at first error"
154+
)
155+
parser.add_argument(
156+
"--filter_file",
157+
type=str,
158+
help="Comma-separated list of files to check (e.g. 'main.go,util.go')",
159+
)
160+
parser.add_argument(
161+
"--filter_func",
162+
type=str,
163+
help="Comma-separated list of function names to check",
164+
)
165+
parser.add_argument(
166+
"--zero_linebase",
167+
action="store_true",
168+
help="Line numbers in JSON are 0-based instead of 1-based",
169+
)
170+
parser.add_argument(
171+
"--implheads",
172+
action="store_true",
173+
help="Allow actual content to be a substring of expected content and lines to match any line",
174+
)
175+
176+
args = parser.parse_args()
177+
filter_files = set(args.filter_file.split(",")) if args.filter_file else None
178+
filter_funcs = set(args.filter_func.split(",")) if args.filter_func else None
179+
180+
verify_function_content(
181+
json_path=args.json,
182+
base_dir=args.base,
183+
bail_on_error=args.bail_on_error,
184+
filter_files=filter_files,
185+
filter_funcs=filter_funcs,
186+
zero_linebase=args.zero_linebase,
187+
implheads=args.implheads,
188+
)

src/lang/collect/export.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ func (c *Collector) fileLine(loc Location) uniast.FileLine {
4242
text := c.cli.GetFile(loc.URI).Text
4343
return uniast.FileLine{
4444
File: rel,
45-
Line: loc.Range.Start.Line + 1,
45+
Line: loc.Range.Start.Line,
4646
StartOffset: lsp.PositionOffset(text, loc.Range.Start),
4747
EndOffset: lsp.PositionOffset(text, loc.Range.End),
4848
}

src/lang/golang/parser/ctx.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ func (ctx *fileContext) FileLine(node ast.Node) FileLine {
169169
pos := ctx.fset.Position((node).Pos())
170170
rel, _ := filepath.Rel(ctx.repoDir, pos.Filename)
171171
end := ctx.fset.Position((node).End())
172-
ret := FileLine{File: rel, Line: pos.Line, StartOffset: pos.Offset, EndOffset: end.Offset}
172+
ret := FileLine{File: rel, Line: pos.Line - 1, StartOffset: pos.Offset, EndOffset: end.Offset}
173173
if _, ok := node.(*ast.TypeSpec); ok {
174174
// NOTICE: type spec is not the start of the type definition
175175
// so we need to adjust the offset = len("type ")
@@ -305,7 +305,7 @@ func (p *GoParser) mockTypes(typ ast.Expr, m map[string]Identity, file []byte, f
305305
st := p.newType(id.ModPath, id.PkgPath, id.Name)
306306
st.Exported = isUpperCase(id.Name[0])
307307
st.File = fpath
308-
st.Line = fset.Position(typ.Pos()).Line // not real
308+
st.Line = fset.Position(typ.Pos()).Line - 1 // not real
309309
// FIXME: cannot get specific entity's definition unless load the whole package
310310
st.Content = "type " + id.Name + " struct{}"
311311
}

src/lang/golang/parser/parser.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,7 @@ func (p *GoParser) searchOnFile(file *ast.File, fset *token.FileSet, fcontent []
309309
fn := p.newFunc(mod, pkg, name)
310310
fn.Content = string(GetRawContent(fset, fcontent, decl, p.opts.CollectComment))
311311
fn.File = getRelativeOrBasePath(p.homePageDir, fset, decl.Pos())
312-
fn.Line = fset.Position(decl.Pos()).Line
312+
fn.Line = fset.Position(decl.Pos()).Line - 1
313313
fn.IsMethod = decl.Recv != nil
314314
fn.Receiver = receiver
315315
// if decl.Type.Params != nil {
@@ -338,7 +338,7 @@ func (p *GoParser) searchOnFile(file *ast.File, fset *token.FileSet, fcontent []
338338
st = p.newType(mod, pkg, spec.Name.Name)
339339
st.Content = string(GetRawContent(fset, fcontent, spec, p.opts.CollectComment))
340340
st.File = getRelativeOrBasePath(p.homePageDir, fset, decl.Pos())
341-
st.Line = fset.Position(decl.Pos()).Line
341+
st.Line = fset.Position(decl.Pos()).Line - 1
342342
st.TypeKind = getTypeKind(spec.Type)
343343
ids = append(ids, newIdentity(mod, pkg, name))
344344
}
@@ -355,7 +355,7 @@ func (p *GoParser) searchOnFile(file *ast.File, fset *token.FileSet, fcontent []
355355
fn := p.newFunc(mod, pkg, name)
356356
fn.Content = string(GetRawContent(fset, fcontent, m, p.opts.CollectComment))
357357
fn.File = getRelativeOrBasePath(p.homePageDir, fset, decl.Pos())
358-
fn.Line = fset.Position(decl.Pos()).Line
358+
fn.Line = fset.Position(decl.Pos()).Line - 1
359359
fn.IsMethod = true
360360
fn.Receiver = &Receiver{
361361
Type: st.Identity,
@@ -386,7 +386,7 @@ func (p *GoParser) searchOnFile(file *ast.File, fset *token.FileSet, fcontent []
386386
v := p.newVar(mod, pkg, name, decl.Tok == token.CONST)
387387
v.Content = string(GetRawContent(fset, fcontent, spec, p.opts.CollectComment))
388388
v.File = getRelativeOrBasePath(p.homePageDir, fset, decl.Pos())
389-
v.Line = fset.Position(decl.Pos()).Line
389+
v.Line = fset.Position(decl.Pos()).Line - 1
390390
if spec.Type != nil {
391391
var m = map[string]Identity{}
392392
// NOTICE: collect all types

src/lang/lsp/lsp.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ type Location struct {
9999
}
100100

101101
func (l Location) String() string {
102-
return fmt.Sprintf("%s:%d:%d-%d:%d", l.URI, l.Range.Start.Line+1, l.Range.Start.Character+1, l.Range.End.Line+1, l.Range.End.Character+1)
102+
return fmt.Sprintf("%s:%d:%d-%d:%d", l.URI, l.Range.Start.Line, l.Range.Start.Character, l.Range.End.Line, l.Range.End.Character)
103103
}
104104

105105
var locationMarshalJSONInline = true

src/lang/lsp/lsp_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
// Copyright 2025 CloudWeGo Authors
2-
//
2+
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
55
// You may obtain a copy of the License at
6-
//
6+
//
77
// https://www.apache.org/licenses/LICENSE-2.0
8-
//
8+
//
99
// Unless required by applicable law or agreed to in writing, software
1010
// distributed under the License is distributed on an "AS IS" BASIS,
1111
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

0 commit comments

Comments
 (0)