Skip to content

Commit 9e354ce

Browse files
authored
feat:(uniast) add Function.Signature (#48)
* feat: support `Repository.GetFileNodes()` * feat:(go) support get function signature * opt: only collect codes files * opt: Get File only return codes file * feat(lsp): support collect func signatures * update ast version * still keep files
1 parent 05a5cbc commit 9e354ce

File tree

9 files changed

+238
-165
lines changed

9 files changed

+238
-165
lines changed

docs/uniast-zh.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Universal Abstract-Syntax-Tree Specification (v0.1.1)
1+
# Universal Abstract-Syntax-Tree Specification (v0.1.2)
22

33
Universal Abstract-Syntax-Tree 是 ABCoder 建立的一种 LLM 亲和、语言无关的代码上下文数据结构,表示某个仓库代码的统一抽象语法树。收集了语言实体(函数、类型、常(变)量)的定义及其相互依赖关系,用于后续的 AI 理解、coding-workflow 开发。
44

@@ -237,6 +237,7 @@ Universal Abstract-Syntax-Tree 是 ABCoder 建立的一种 LLM 亲和、语言
237237
"StartOffset": 3290,
238238
"EndOffset": 3573,
239239
"Content": "// BindSession binds the session with current goroutine\nfunc (self *SessionManager) BindSession(Identity SessionIdentity, s Session) {\n\tshard : = self.shards[uint64(Identity)%uint64(self.opts.ShardNumber)]\n\n\tshard.Store(Identity, s)\n\n\tif self.opts.EnableImplicitlyTransmitAsync {\n\t\ttransmitSessionIdentity(Identity)\n\t}\n}",
240+
"Signature": "func (self *SessionManager) BindSession(Identity SessionIdentity, s Session)",
240241
"Receiver": {
241242
"IsPointer": true,
242243
"Type": {
@@ -320,6 +321,7 @@ Universal Abstract-Syntax-Tree 是 ABCoder 建立的一种 LLM 亲和、语言
320321

321322
- IsMethod: 是否是一个方法
322323

324+
- Signature: 函数签名,包括函数名、参数、返回值等
323325

324326
- IsInterfaceMethod: 是否是接口的方法--这里 abcoder parse 收集 InterfaceMethod 为了方便 LLM 理解,但是实际上 write 中并不会认为其是一个语言实体
325327

lang/collect/collect.go

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ type functionInfo struct {
7777
InputsSorted []dependency `json:"-"`
7878
Outputs map[int]dependency `json:"outputs,omitempty"`
7979
OutputsSorted []dependency `json:"-"`
80+
Signature string `json:"signature,omitempty"`
8081
}
8182

8283
func switchSpec(l uniast.Language) LanguageSpec {
@@ -596,11 +597,29 @@ func (c *Collector) processSymbol(ctx context.Context, sym *DocumentSymbol, dept
596597
log.Error("get receiver symbol for token %v failed: %v\n", rec, err)
597598
}
598599
}
599-
600600
tsyms, ts := c.getDepsWithLimit(ctx, sym, tps, depth-1)
601601
ipsyms, is := c.getDepsWithLimit(ctx, sym, ips, depth-1)
602602
opsyms, os := c.getDepsWithLimit(ctx, sym, ops, depth-1)
603-
c.updateFunctionInfo(sym, tsyms, ipsyms, opsyms, ts, is, os, rsym)
603+
604+
//get last token of params for get signature
605+
lastToken := rec
606+
for _, t := range tps {
607+
if t > lastToken {
608+
lastToken = t
609+
}
610+
}
611+
for _, t := range ips {
612+
if t > lastToken {
613+
lastToken = t
614+
}
615+
}
616+
for _, t := range ops {
617+
if t > lastToken {
618+
lastToken = t
619+
}
620+
}
621+
622+
c.updateFunctionInfo(sym, tsyms, ipsyms, opsyms, ts, is, os, rsym, lastToken)
604623
}
605624

606625
// variable info: type
@@ -628,7 +647,7 @@ func (c *Collector) processSymbol(ctx context.Context, sym *DocumentSymbol, dept
628647
}
629648
}
630649

631-
func (c *Collector) updateFunctionInfo(sym *DocumentSymbol, tsyms, ipsyms, opsyms map[int]dependency, ts, is, os []dependency, rsym *dependency) {
650+
func (c *Collector) updateFunctionInfo(sym *DocumentSymbol, tsyms, ipsyms, opsyms map[int]dependency, ts, is, os []dependency, rsym *dependency, lastToken int) {
632651
if _, ok := c.funcs[sym]; !ok {
633652
c.funcs[sym] = functionInfo{}
634653
}
@@ -645,5 +664,12 @@ func (c *Collector) updateFunctionInfo(sym *DocumentSymbol, tsyms, ipsyms, opsym
645664
}
646665
f.Method.Receiver = *rsym
647666
}
667+
668+
// ctruncate the function signature text
669+
if lastToken >= 0 && lastToken < len(sym.Tokens)-1 {
670+
lastPos := sym.Tokens[lastToken+1].Location.Range.Start
671+
f.Signature = ChunkHead(sym.Text, sym.Location.Range.Start, lastPos)
672+
}
673+
648674
c.funcs[sym] = f
649675
}

lang/collect/export.go

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ import (
2222
"strings"
2323

2424
"github.com/cloudwego/abcoder/lang/log"
25-
"github.com/cloudwego/abcoder/lang/lsp"
2625
. "github.com/cloudwego/abcoder/lang/lsp"
2726
"github.com/cloudwego/abcoder/lang/uniast"
2827
)
@@ -44,8 +43,8 @@ func (c *Collector) fileLine(loc Location) uniast.FileLine {
4443
return uniast.FileLine{
4544
File: rel,
4645
Line: loc.Range.Start.Line + 1,
47-
StartOffset: lsp.PositionOffset(file_uri, text, loc.Range.Start),
48-
EndOffset: lsp.PositionOffset(file_uri, text, loc.Range.End),
46+
StartOffset: PositionOffset(file_uri, text, loc.Range.Start),
47+
EndOffset: PositionOffset(file_uri, text, loc.Range.End),
4948
}
5049
}
5150

@@ -75,7 +74,7 @@ func (c *Collector) Export(ctx context.Context) (*uniast.Repository, error) {
7574
c.filterLocalSymbols()
7675

7776
// export symbols
78-
visited := make(map[*lsp.DocumentSymbol]*uniast.Identity)
77+
visited := make(map[*DocumentSymbol]*uniast.Identity)
7978
for _, symbol := range c.syms {
8079
_, _ = c.exportSymbol(&repo, symbol, "", visited)
8180
}
@@ -191,15 +190,15 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol
191190
// map receiver to methods
192191
receivers := make(map[*DocumentSymbol][]*DocumentSymbol, len(c.funcs)/4)
193192
for method, rec := range c.funcs {
194-
if method.Kind == lsp.SKMethod && rec.Method != nil && rec.Method.Receiver.Symbol != nil {
193+
if method.Kind == SKMethod && rec.Method != nil && rec.Method.Receiver.Symbol != nil {
195194
receivers[rec.Method.Receiver.Symbol] = append(receivers[rec.Method.Receiver.Symbol], method)
196195
}
197196
}
198197

199198
switch k := symbol.Kind; k {
200199
// Function
201-
case lsp.SKFunction, lsp.SKMethod:
202-
if p := c.cli.GetParent(symbol); p != nil && p.Kind == lsp.SKInterface {
200+
case SKFunction, SKMethod:
201+
if p := c.cli.GetParent(symbol); p != nil && p.Kind == SKInterface {
203202
// NOTICE: no need collect interface method
204203
break
205204
}
@@ -209,6 +208,7 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol
209208
Exported: public,
210209
}
211210
info := c.funcs[symbol]
211+
obj.Signature = info.Signature
212212
// NOTICE: type parames collect into types
213213
if info.TypeParams != nil {
214214
for _, input := range info.TypeParamsSorted {
@@ -262,7 +262,7 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol
262262
id.Name = iid.Name + "<" + id.Name + ">"
263263
}
264264
}
265-
if k == lsp.SKFunction {
265+
if k == SKFunction {
266266
// NOTICE: class static method name is: type::method
267267
id.Name += "::" + name
268268
} else {
@@ -285,20 +285,20 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol
285285
}
286286
pdep := uniast.NewDependency(*depid, c.fileLine(dep.Location))
287287
switch dep.Symbol.Kind {
288-
case lsp.SKFunction:
288+
case SKFunction:
289289
obj.FunctionCalls = uniast.InsertDependency(obj.FunctionCalls, pdep)
290-
case lsp.SKMethod:
290+
case SKMethod:
291291
if obj.MethodCalls == nil {
292292
obj.MethodCalls = make([]uniast.Dependency, 0, len(deps))
293293
}
294294
// NOTICE: use loc token as key here, to make it more readable
295295
obj.MethodCalls = uniast.InsertDependency(obj.MethodCalls, pdep)
296-
case lsp.SKVariable, lsp.SKConstant:
296+
case SKVariable, SKConstant:
297297
if obj.GlobalVars == nil {
298298
obj.GlobalVars = make([]uniast.Dependency, 0, len(deps))
299299
}
300300
obj.GlobalVars = uniast.InsertDependency(obj.GlobalVars, pdep)
301-
case lsp.SKStruct, lsp.SKTypeParameter, lsp.SKInterface, lsp.SKEnum, lsp.SKClass:
301+
case SKStruct, SKTypeParameter, SKInterface, SKEnum, SKClass:
302302
if obj.Types == nil {
303303
obj.Types = make([]uniast.Dependency, 0, len(deps))
304304
}
@@ -312,7 +312,7 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol
312312
pkg.Functions[id.Name] = obj
313313

314314
// Type
315-
case lsp.SKStruct, lsp.SKTypeParameter, lsp.SKInterface, lsp.SKEnum, lsp.SKClass:
315+
case SKStruct, SKTypeParameter, SKInterface, SKEnum, SKClass:
316316
obj := &uniast.Type{
317317
FileLine: fileLine,
318318
Content: content,
@@ -328,7 +328,7 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol
328328
continue
329329
}
330330
switch dep.Symbol.Kind {
331-
case lsp.SKStruct, lsp.SKTypeParameter, lsp.SKInterface, lsp.SKEnum, lsp.SKClass:
331+
case SKStruct, SKTypeParameter, SKInterface, SKEnum, SKClass:
332332
obj.SubStruct = uniast.InsertDependency(obj.SubStruct, uniast.NewDependency(*depid, c.fileLine(dep.Location)))
333333
default:
334334
log.Error("dep symbol %s not collected for \n", dep.Symbol, id)
@@ -351,12 +351,12 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol
351351
obj.Identity = *id
352352
pkg.Types[id.Name] = obj
353353
// Vars
354-
case lsp.SKConstant, lsp.SKVariable:
354+
case SKConstant, SKVariable:
355355
obj := &uniast.Var{
356356
FileLine: fileLine,
357357
Content: content,
358358
IsExported: public,
359-
IsConst: k == lsp.SKConstant,
359+
IsConst: k == SKConstant,
360360
}
361361
if ty, ok := c.vars[symbol]; ok {
362362
tok, _ := c.cli.Locate(ty.Location)
@@ -374,18 +374,18 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol
374374
return
375375
}
376376

377-
func mapKind(kind lsp.SymbolKind) uniast.TypeKind {
377+
func mapKind(kind SymbolKind) uniast.TypeKind {
378378
switch kind {
379-
case lsp.SKStruct:
379+
case SKStruct:
380380
return "struct"
381381
// XXX: C++ should use class instead of struct
382-
case lsp.SKClass:
382+
case SKClass:
383383
return "struct"
384-
case lsp.SKTypeParameter:
384+
case SKTypeParameter:
385385
return "type-parameter"
386-
case lsp.SKInterface:
386+
case SKInterface:
387387
return "interface"
388-
case lsp.SKEnum:
388+
case SKEnum:
389389
return "enum"
390390
default:
391391
panic(fmt.Sprintf("unexpected kind %v", kind))

lang/golang/parser/file.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,9 @@ func (p *GoParser) parseFunc(ctx *fileContext, funcDecl *ast.FuncDecl) (*Functio
455455
ctx.collectFields(funcDecl.Type.Results.List, &results)
456456
}
457457

458+
// collect signature
459+
sig := ctx.GetRawContent(funcDecl.Type)
460+
458461
// collect content
459462
content := string(ctx.GetRawContent(funcDecl))
460463

@@ -487,6 +490,7 @@ set_func:
487490
f.Results = results
488491
f.GlobalVars = collects.globalVars
489492
f.Types = collects.tys
493+
f.Signature = string(sig)
490494
return f, false
491495
}
492496

@@ -588,6 +592,7 @@ func (p *GoParser) parseInterface(ctx *fileContext, name *ast.Ident, decl *ast.I
588592
fn.FileLine = ctx.FileLine(fieldDecl)
589593
fn.IsMethod = true
590594
fn.IsInterfaceMethod = true
595+
fn.Signature = string(ctx.GetRawContent(fieldDecl))
591596
}
592597
p.collectTypes(ctx, fieldDecl.Type, st, inlined)
593598
}

lang/lsp/utils.go

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
package lsp
3232

3333
import (
34+
"strings"
35+
3436
"github.com/cloudwego/abcoder/lang/log"
3537
"github.com/cloudwego/abcoder/lang/utils"
3638
)
@@ -71,3 +73,94 @@ func PositionOffset(file_uri string, text string, pos Position) int {
7173

7274
return RelativePostionWithLines(*lines, Position{Line: 0, Character: 0}, pos)
7375
}
76+
77+
// FindSingle finds the single char's left token index in a text
78+
// start and end is the limit range of tokens
79+
func FindSingle(text string, lines []int, textPos Position, tokens []Token, sep string, start int, end int) int {
80+
if start < 0 {
81+
start = 0
82+
}
83+
if end >= len(tokens) {
84+
end = len(tokens) - 1
85+
}
86+
if start >= len(tokens) {
87+
return -1
88+
}
89+
sPos := RelativePostionWithLines(lines, textPos, tokens[start].Location.Range.Start)
90+
ePos := RelativePostionWithLines(lines, textPos, tokens[end].Location.Range.End)
91+
pos := strings.Index(text[sPos:ePos], sep)
92+
if pos == -1 {
93+
return -1
94+
}
95+
pos += sPos
96+
for i := start; i <= end && i < len(tokens); i++ {
97+
rel := RelativePostionWithLines(lines, textPos, tokens[i].Location.Range.Start)
98+
if rel > pos {
99+
return i - 1
100+
}
101+
}
102+
return -1
103+
}
104+
105+
// FindPair finds the right token index of lchar and left token index of rchar in a text
106+
// start and end is the limit range of tokens
107+
// notAllow is the character that not allow in the range
108+
func FindPair(text string, lines []int, textPos Position, tokens []Token, lchar rune, rchar rune, start int, end int, notAllow rune) (int, int) {
109+
if start < 0 {
110+
start = 0
111+
}
112+
if end >= len(tokens) {
113+
end = len(tokens) - 1
114+
}
115+
if start >= len(tokens) {
116+
return -1, -1
117+
}
118+
119+
startIndex := RelativePostionWithLines(lines, textPos, tokens[start].Location.Range.Start)
120+
121+
lArrow := -1
122+
lCount := 0
123+
rArrow := -1
124+
notAllowCount := 0
125+
ctext := text[startIndex:]
126+
for i, c := range ctext {
127+
if c == notAllow && lCount == 0 {
128+
return -1, -1
129+
} else if c == lchar && notAllowCount == 0 {
130+
lCount++
131+
if lCount == 1 {
132+
lArrow = i
133+
}
134+
} else if c == rchar && notAllowCount == 0 {
135+
if rchar == '>' && ctext[i-1] == '-' {
136+
// notice: -> is not a pair in Rust
137+
continue
138+
}
139+
lCount--
140+
if lCount == 0 {
141+
rArrow = i
142+
break
143+
}
144+
}
145+
}
146+
if lArrow == -1 || rArrow == -1 {
147+
return -1, -1
148+
}
149+
lArrow += startIndex
150+
rArrow += startIndex
151+
152+
s := -1
153+
e := -1
154+
for i := start; i <= end && i < len(tokens); i++ {
155+
rel := RelativePostionWithLines(lines, textPos, tokens[i].Location.Range.Start)
156+
if rel >= lArrow && s == -1 {
157+
s = i
158+
}
159+
if rel > rArrow {
160+
e = i - 1
161+
break
162+
}
163+
}
164+
165+
return s, e
166+
}

0 commit comments

Comments
 (0)