diff --git a/README.md b/README.md index a1859af..bf4e565 100644 --- a/README.md +++ b/README.md @@ -118,12 +118,12 @@ $ exit ABCoder currently supports the following languages: | Language | Parser | Writer | -| -------- | ----------- | ----------- | -| Go | ✅ | ✅ | +| -------- | ----------- |-------------| +| Go | ✅ | ✅ | | Rust | ✅ | Coming Soon | | C | ✅ | Coming Soon | | Python | ✅ | Coming Soon | - +| Thrift | ✅ | Coming Soon | # Getting Involved diff --git a/docs/uniast-en.md b/docs/uniast-en.md index 48ff999..f99a387 100644 --- a/docs/uniast-en.md +++ b/docs/uniast-en.md @@ -20,7 +20,8 @@ To ensure precise querying and scalable storage, `ModPath?PkgPath#SymbolName` is > * In Python, a package is a directory, which may contain sub-packages. A package can also contain modules, which are .py files inside the package directory. > * In Rust, the term package does not exist at all. Instead, a crate (project) contains multiple modules, and modules may include sub-modules. > * In C, neither concept exists at all. -> +> * In Thrift, no concept of modules, but there are namespaces for each language. +> > Do not confuse them with the terminology used in abcoder! > In abcoder, unless otherwise specified, the module (mod) and package (pkg) are defined as follows: @@ -44,6 +45,8 @@ To ensure precise querying and scalable storage, `ModPath?PkgPath#SymbolName` is - Golang: Corresponds to a package, e.g., github.com/cloudwego/hertz/pkg/app/server - Rust: Corresponds to a mod, e.g., [serde_json](https://crates.io/crates/serde_json)::[value](https://docs.rs/serde_json/1.0.114/serde_json/value/index.html) + - Thrift: corresponds to namespace, specifying a different idl package type results in a different namespace + - Example: `go namespace test.main` -> `test.main` - Note: This should be as equivalent as possible to the import (use) path in code files for easier LLM understanding diff --git a/docs/uniast-zh.md b/docs/uniast-zh.md index 2a7d644..25ccafb 100644 --- a/docs/uniast-zh.md +++ b/docs/uniast-zh.md @@ -20,6 +20,7 @@ Universal Abstract-Syntax-Tree 是 ABCoder 建立的一种 LLM 亲和、语言 > * 在 Python 中则是,package 是一个目录,可能包含子 package。而且 package 也可能包含 module,是 package 目录下的 py 文件。 > * 在 Rust 中根本没有 package 的说法,而是 crate(项目)包含了诸 module。module 可能包含子 module。 > * 在 C 中就完全没有这两个东西。 +> * 在 Thrift 中就完全没有 module 的概念, 但是有各个语言的 namespace > > 不要把它们和 abcoder 的描述混淆! > 在 abcoder 中,除非另外说明,module(mod) / package(pkg) 的含义如下。 @@ -44,6 +45,8 @@ Universal Abstract-Syntax-Tree 是 ABCoder 建立的一种 LLM 亲和、语言 - Golang: 对应 package,如 github.com/cloudwego/hertz/pkg/app/server - Rust: 对应 mod,如 [serde_json](https://crates.io/crates/serde_json): : [value](https://docs.rs/serde_json/1.0.114/serde_json/value/index.html) + - Thrift: 对应 namespace, 指定不同的 idl Package type 会获得不同的 namespace + - 例如: `go namespace test.main` -> `test.main` - 提示: 这里应该尽量等同于代码文件中的 import (use) 路径,方便 LLM 理解 diff --git a/go.mod b/go.mod index c4dff74..75993ad 100644 --- a/go.mod +++ b/go.mod @@ -13,10 +13,12 @@ require ( github.com/cloudwego/eino-ext/components/tool/mcp v0.0.3 github.com/fsnotify/fsnotify v1.4.9 github.com/invopop/jsonschema v0.13.0 + github.com/joyme123/thrift-ls v0.2.9 github.com/mark3labs/mcp-go v0.34.0 github.com/sourcegraph/go-lsp v0.0.0-20240223163137-f80c5dd31dfd github.com/sourcegraph/jsonrpc2 v0.2.0 github.com/stretchr/testify v1.10.0 + go.lsp.dev/uri v0.3.0 golang.org/x/mod v0.24.0 golang.org/x/tools v0.32.0 ) @@ -39,10 +41,13 @@ require ( github.com/aws/smithy-go v1.22.1 // indirect github.com/bahlo/generic-list-go v0.2.0 // indirect github.com/buger/jsonparser v1.1.1 // indirect + github.com/bytedance/gopkg v0.1.1 // indirect github.com/bytedance/sonic/loader v0.2.4 // indirect github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/cloudwego/base64x v0.1.5 // indirect github.com/cloudwego/eino-ext/libs/acl/openai v0.0.0-20250626133421-3c142631c961 // indirect + github.com/cloudwego/gopkg v0.1.4 // indirect + github.com/cloudwego/thriftgo v0.4.2 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/dustin/go-humanize v1.0.1 // indirect github.com/evanphx/json-patch v0.5.2 // indirect @@ -54,6 +59,7 @@ require ( github.com/invopop/yaml v0.3.1 // indirect github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/josharian/intern v1.0.0 // indirect + github.com/joyme123/protocol v0.12.0-patch20250429 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/klauspost/cpuid/v2 v2.2.9 // indirect github.com/mailru/easyjson v0.9.0 // indirect @@ -69,6 +75,8 @@ require ( github.com/perimeterx/marshmallow v1.1.5 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect + github.com/segmentio/asm v1.1.3 // indirect + github.com/segmentio/encoding v0.3.4 // indirect github.com/sirupsen/logrus v1.9.3 // indirect github.com/slongfield/pyfmt v0.0.0-20220222012616-ea85ff4c361f // indirect github.com/spf13/cast v1.7.1 // indirect @@ -82,6 +90,11 @@ require ( github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect github.com/yargevad/filepathx v1.0.0 // indirect github.com/yosida95/uritemplate/v3 v3.0.2 // indirect + go.lsp.dev/jsonrpc2 v0.10.0 // indirect + go.lsp.dev/pkg v0.0.0-20210717090340-384b27a52fb2 // indirect + go.uber.org/atomic v1.9.0 // indirect + go.uber.org/multierr v1.8.0 // indirect + go.uber.org/zap v1.21.0 // indirect golang.org/x/arch v0.14.0 // indirect golang.org/x/exp v0.0.0-20250218142911-aa4b98e5adaa // indirect golang.org/x/net v0.39.0 // indirect diff --git a/go.sum b/go.sum index 1cc4753..40c0cd3 100644 --- a/go.sum +++ b/go.sum @@ -96,6 +96,7 @@ github.com/aws/smithy-go v1.22.1 h1:/HPHZQ0g7f4eUeK6HKglFz8uwVfZKgoI25rb/J+dnro= github.com/aws/smithy-go v1.22.1/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg= github.com/bahlo/generic-list-go v0.2.0 h1:5sz/EEAK+ls5wF+NeqDpk5+iNdMDXrh3z3nPnH1Wvgk= github.com/bahlo/generic-list-go v0.2.0/go.mod h1:2KvAjgMlE5NNynlg/5iLrrCCZ2+5xWbdbCW3pNTGyYg= +github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8= github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= @@ -107,6 +108,8 @@ github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMU github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0= github.com/bugsnag/bugsnag-go v1.4.0/go.mod h1:2oa8nejYd4cQ/b0hMIopN0lCRxU0bueqREvZLWFrtK8= github.com/bugsnag/panicwrap v1.2.0/go.mod h1:D/8v3kj0zr8ZAKg1AQ6crr+5VwKN5eIywRkfhyM/+dE= +github.com/bytedance/gopkg v0.1.1 h1:3azzgSkiaw79u24a+w9arfH8OfnQQ4MHUt9lJFREEaE= +github.com/bytedance/gopkg v0.1.1/go.mod h1:576VvJ+eJgyCzdjS+c4+77QF3p7ubbtiKARP3TxducM= github.com/bytedance/mockey v1.2.14 h1:KZaFgPdiUwW+jOWFieo3Lr7INM1P+6adO3hxZhDswY8= github.com/bytedance/mockey v1.2.14/go.mod h1:1BPHF9sol5R1ud/+0VEHGQq/+i2lN+GTsr3O2Q9IENY= github.com/bytedance/sonic v1.13.3 h1:MS8gmaH16Gtirygw7jV91pDCN33NyMrPbN7qiYhEsF0= @@ -147,7 +150,11 @@ github.com/cloudwego/eino-ext/components/tool/mcp v0.0.3 h1:kT8yynQh8q92BPofkHxx github.com/cloudwego/eino-ext/components/tool/mcp v0.0.3/go.mod h1:D1Yircehgx7em8EH6vw8If+ATjtXl60IpBzVaqFBx3c= github.com/cloudwego/eino-ext/libs/acl/openai v0.0.0-20250626133421-3c142631c961 h1:fGE3RFHaAsrLjA+2fkE0YMsPrkFI6pEKKZmbhD42L7E= github.com/cloudwego/eino-ext/libs/acl/openai v0.0.0-20250626133421-3c142631c961/go.mod h1:iB0W8l+OqKNL5LtJQ9JaGYXekhsxVxrDMfnfD9L+5gc= +github.com/cloudwego/gopkg v0.1.4 h1:EoQiCG4sTonTPHxOGE0VlQs+sQR+Hsi2uN0qqwu8O50= +github.com/cloudwego/gopkg v0.1.4/go.mod h1:FQuXsRWRsSqJLsMVd5SYzp8/Z1y5gXKnVvRrWUOsCMI= github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY= +github.com/cloudwego/thriftgo v0.4.2 h1:+XioeEgBOVqyKMJqUuqeJbKUtQ0XIkXhlNIqoWSESFw= +github.com/cloudwego/thriftgo v0.4.2/go.mod h1:/D4zRAEj1t3/Tq1bVGDMnRt3wxpHfalXfZWvq/n4YmY= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= github.com/cncf/xds/go v0.0.0-20210312221358-fbca930ec8ed/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= @@ -352,6 +359,10 @@ github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGw github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/joyme123/protocol v0.12.0-patch20250429 h1:xpdpCrPh3iiEIQvdg+qe5jvRLOZuGIB6sszHdXrypYk= +github.com/joyme123/protocol v0.12.0-patch20250429/go.mod h1:DtA/00dR9o4lqtyqUzD6mGDfHKoCwHtazn62H2gnerA= +github.com/joyme123/thrift-ls v0.2.9 h1:a1omU0mVzFnk34py+thVKjzx85rqoSZkewDMCZmVPH4= +github.com/joyme123/thrift-ls v0.2.9/go.mod h1:5OjpgkpLu+87FVblB98llJYW7/JzapUL6SCD1238LQs= github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= @@ -515,6 +526,10 @@ github.com/rollbar/rollbar-go v1.0.2/go.mod h1:AcFs5f0I+c71bpHlXNNDbOWJiKwjFDtIS github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= +github.com/segmentio/asm v1.1.3 h1:WM03sfUOENvvKexOLp+pCqgb/WDjsi7EK8gIsICtzhc= +github.com/segmentio/asm v1.1.3/go.mod h1:Ld3L4ZXGNcSLRg4JBsZ3//1+f/TjYl0Mzen/DQy1EJg= +github.com/segmentio/encoding v0.3.4 h1:WM4IBnxH8B9TakiM2QD5LyNl9JSndh88QbHqVC+Pauc= +github.com/segmentio/encoding v0.3.4/go.mod h1:n0JeuIqEQrQoPDGsjo8UNd1iA0U8d8+oHAA4E3G3OxM= github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= @@ -601,6 +616,12 @@ go.etcd.io/etcd/api/v3 v3.5.0/go.mod h1:cbVKeC6lCfl7j/8jBhAK6aIYO9XOjdptoxU/nLQc go.etcd.io/etcd/client/pkg/v3 v3.5.0/go.mod h1:IJHfcCEKxYu1Os13ZdwCwIUTUVGYTSAM3YSwc9/Ac1g= go.etcd.io/etcd/client/v2 v2.305.0/go.mod h1:h9puh54ZTgAKtEbut2oe9P4L/oqKCVB6xsXlzd7alYQ= go.etcd.io/etcd/client/v3 v3.5.0/go.mod h1:AIKXXVX/DQXtfTEqBryiLTUXwON+GuvO6Z7lLS/oTh0= +go.lsp.dev/jsonrpc2 v0.10.0 h1:Pr/YcXJoEOTMc/b6OTmcR1DPJ3mSWl/SWiU1Cct6VmI= +go.lsp.dev/jsonrpc2 v0.10.0/go.mod h1:fmEzIdXPi/rf6d4uFcayi8HpFP1nBF99ERP1htC72Ac= +go.lsp.dev/pkg v0.0.0-20210717090340-384b27a52fb2 h1:hCzQgh6UcwbKgNSRurYWSqh8MufqRRPODRBblutn4TE= +go.lsp.dev/pkg v0.0.0-20210717090340-384b27a52fb2/go.mod h1:gtSHRuYfbCT0qnbLnovpie/WEmqyJ7T4n6VXiFMBtcw= +go.lsp.dev/uri v0.3.0 h1:KcZJmh6nFIBeJzTugn5JTU6OOyG0lDOo3R9KwTxTYbo= +go.lsp.dev/uri v0.3.0/go.mod h1:P5sbO1IQR+qySTWOCnhnK7phBx+W3zbLqSMDJNTw88I= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= @@ -609,14 +630,21 @@ go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E= go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= +go.uber.org/atomic v1.9.0 h1:ECmE8Bn/WFTYwEW/bpKD3M8VtR/zQVbavAoalC1PYyE= go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/goleak v1.1.11-0.20210813005559-691160354723/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= +go.uber.org/goleak v1.1.11 h1:wy28qYRKZgnJTxGxvye5/wgWr1EKjmUDGYox5mGlRlI= +go.uber.org/goleak v1.1.11/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= go.uber.org/mock v0.4.0 h1:VcM4ZOtdbR4f6VXfiOpwpVJDL6lCReaZ6mw31wqh7KU= go.uber.org/mock v0.4.0/go.mod h1:a6FSlNadKUHUa9IP5Vyt1zh4fC7uAwxMutEAscFbkZc= go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= go.uber.org/multierr v1.7.0/go.mod h1:7EAYxJLBy9rStEaz58O2t4Uvip6FSURkq8/ppBp95ak= +go.uber.org/multierr v1.8.0 h1:dg6GjLku4EH+249NNmoIciG9N/jURbDG+pFlTkhzIC8= +go.uber.org/multierr v1.8.0/go.mod h1:7EAYxJLBy9rStEaz58O2t4Uvip6FSURkq8/ppBp95ak= go.uber.org/zap v1.17.0/go.mod h1:MXVU+bhUf/A7Xi2HNOnopQOrmycQ5Ih87HtOu4q5SSo= go.uber.org/zap v1.19.1/go.mod h1:j3DNczoxDZroyBnOT1L/Q79cfUMGZxlv/9dzN7SM1rI= +go.uber.org/zap v1.21.0 h1:WefMeulhovoZ2sYXz7st6K0sLj7bBhpiFaud4r4zST8= +go.uber.org/zap v1.21.0/go.mod h1:wjWOCqI0f2ZZrJF/UufIOkiC8ii6tm1iqIsLo76RfJw= golang.org/x/arch v0.14.0 h1:z9JUEZWr8x4rR0OU6c4/4t6E6jOZ8/QBS2bBYBm4tx4= golang.org/x/arch v0.14.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= @@ -805,6 +833,7 @@ golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210917161153-d61c044b1678/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211110154304-99a53858aa08/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/lang/collect/collect.go b/lang/collect/collect.go index 0dae115..9963b49 100644 --- a/lang/collect/collect.go +++ b/lang/collect/collect.go @@ -40,6 +40,7 @@ type CollectOption struct { NotNeedTest bool Excludes []string LoadByPackages bool + IDLPkgType string } type Collector struct { diff --git a/lang/parse.go b/lang/parse.go index 9ef9530..c1006ce 100644 --- a/lang/parse.go +++ b/lang/parse.go @@ -32,6 +32,7 @@ import ( "github.com/cloudwego/abcoder/lang/lsp" "github.com/cloudwego/abcoder/lang/python" "github.com/cloudwego/abcoder/lang/rust" + tparser "github.com/cloudwego/abcoder/lang/thrift/parser" "github.com/cloudwego/abcoder/lang/uniast" ) @@ -107,8 +108,6 @@ func checkRepoPath(repoPath string, language uniast.Language) (openfile string, openfile, wait = rust.CheckRepo(repoPath) case uniast.Cxx: openfile, wait = cxx.CheckRepo(repoPath) - case uniast.Python: - openfile, wait = python.CheckRepo(repoPath) default: openfile = "" wait = 0 @@ -126,6 +125,8 @@ func checkLSP(language uniast.Language, lspPath string) (l uniast.Language, s st l, s = cxx.GetDefaultLSP() case uniast.Python: l, s = python.GetDefaultLSP() + case uniast.Thrift: + return "", "", nil case uniast.Golang: l = uniast.Golang s = "" @@ -159,6 +160,11 @@ func collectSymbol(ctx context.Context, cli *lsp.LSPClient, repoPath string, opt if err != nil { return nil, err } + } else if opts.Language == uniast.Thrift { + repo, err = callThriftParser(ctx, repoPath, opts) + if err != nil { + return nil, err + } } else { collector := collect.NewCollector(repoPath, cli) collector.CollectOption = opts @@ -203,3 +209,24 @@ func callGoParser(ctx context.Context, repoPath string, opts collect.CollectOpti } return &repo, nil } + +func callThriftParser(ctx context.Context, repoPath string, opts collect.CollectOption) (*uniast.Repository, error) { + thriftopts := tparser.Options{} + if !opts.NoNeedComment { + thriftopts.CollectComment = true + } + + if opts.IDLPkgType != "" { + thriftopts.TargetLanguage = opts.IDLPkgType + } else { + thriftopts.TargetLanguage = "go" + } + + thriftopts.Excludes = opts.Excludes + p, err := tparser.NewParser(repoPath, thriftopts) + repo, err := p.ParseRepo() + if err != nil { + return nil, err + } + return &repo, nil +} diff --git a/lang/thrift/parser/no_comment_utils.go b/lang/thrift/parser/no_comment_utils.go new file mode 100644 index 0000000..0088546 --- /dev/null +++ b/lang/thrift/parser/no_comment_utils.go @@ -0,0 +1,358 @@ +/** + * Copyright 2025 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package parser + +import ( + "fmt" + "strings" + + "github.com/cloudwego/abcoder/lang/log" + "github.com/joyme123/thrift-ls/parser" +) + +// getRealContent extracts the content from the source bytes using precise start and end offsets. +func (p *ThriftParser) getRealContent(source []byte, startOffset, endOffset int) string { + if source == nil { + return "" + } + sourceLen := len(source) + if startOffset < 0 || endOffset > sourceLen || startOffset > endOffset { + log.Error("Invalid content offset. Start: %d, End: %d, Source Length: %d", startOffset, endOffset, sourceLen) + return "" + } + return string(source[startOffset:endOffset]) +} + +// getRealStructPositions returns a Struct node's real start and end positions (from 'struct' keyword to '}'). +func (p *ThriftParser) getRealStructPositions(s *parser.Struct) (sp, ep parser.Position) { + if s == nil { + return parser.InvalidPosition, parser.InvalidPosition + } + startPos := s.Location.StartPos + endPos := s.Location.EndPos + + if !p.opts.CollectComment { + if s.StructKeyword != nil { + startPos = s.StructKeyword.Pos() + } + if s.RCurKeyword != nil { + endPos = s.RCurKeyword.End() + } + } + return startPos, endPos +} + +// getRealEnumPositions returns an Enum node's real start and end positions. +func (p *ThriftParser) getRealEnumPositions(e *parser.Enum) (sp, ep parser.Position) { + if e == nil { + return parser.InvalidPosition, parser.InvalidPosition + } + startPos := e.Location.StartPos + endPos := e.Location.EndPos + + if !p.opts.CollectComment { + if e.EnumKeyword != nil { + startPos = e.EnumKeyword.Pos() + } + if e.RCurKeyword != nil { + endPos = e.RCurKeyword.End() + } + } + + return startPos, endPos +} + +// getRealServicePositions returns a Service node's real start and end positions. +func (p *ThriftParser) getRealServicePositions(s *parser.Service) (sp, ep parser.Position) { + if s == nil { + return parser.InvalidPosition, parser.InvalidPosition + } + startPos := s.Location.StartPos + endPos := s.Location.EndPos + + if !p.opts.CollectComment { + if s.ServiceKeyword != nil { + startPos = s.ServiceKeyword.Pos() + } + if s.RCurKeyword != nil { + endPos = s.RCurKeyword.End() + } + } + + return startPos, endPos +} + +// getRealExceptionPositions returns an Exception node's real start and end positions. +func (p *ThriftParser) getRealExceptionPositions(e *parser.Exception) (sp, ep parser.Position) { + if e == nil { + return parser.InvalidPosition, parser.InvalidPosition + } + startPos := e.Location.StartPos + endPos := e.Location.EndPos + if !p.opts.CollectComment { + if e.ExceptionKeyword != nil { + startPos = e.ExceptionKeyword.Pos() + } + if e.RCurKeyword != nil { + endPos = e.RCurKeyword.End() + } + } + return startPos, endPos +} + +// getRealUnionPositions returns a Union node's real start and end positions. +func (p *ThriftParser) getRealUnionPositions(u *parser.Union) (sp, ep parser.Position) { + if u == nil { + return parser.InvalidPosition, parser.InvalidPosition + } + startPos := u.Location.StartPos + endPos := u.Location.EndPos + + if !p.opts.CollectComment { + if u.UnionKeyword != nil { + startPos = u.UnionKeyword.Pos() + } + if u.RCurKeyword != nil { + endPos = u.RCurKeyword.End() + } + } + + return startPos, endPos +} + +// getRealTypedefPositions returns a Typedef node's real start and end positions. +func (p *ThriftParser) getRealTypedefPositions(t *parser.Typedef) (sp, ep parser.Position) { + if t == nil { + return parser.InvalidPosition, parser.InvalidPosition + } + startPos := t.Location.StartPos + endPos := t.Location.EndPos + + if !p.opts.CollectComment { + if t.TypedefKeyword != nil { + startPos = t.TypedefKeyword.Pos() + } + if t.Alias != nil { + endPos = t.Alias.End() + } + } + + return startPos, endPos +} + +// getRealConstPositions returns a Const definition's real start and end positions. +func (p *ThriftParser) getRealConstPositions(c *parser.Const) (sp, ep parser.Position) { + if c == nil { + return parser.InvalidPosition, parser.InvalidPosition + } + startPos := c.Location.StartPos + endPos := c.Location.EndPos + + if !p.opts.CollectComment { + if c.ConstKeyword != nil { + startPos = c.ConstKeyword.Pos() + } + if c.ListSeparatorKeyword != nil { + endPos = c.ListSeparatorKeyword.End() + } else if c.Value != nil { + endPos = c.Value.End() + } + } + + return startPos, endPos +} + +func (p *ThriftParser) getRealFuncEndOffset(fn *parser.Function, collectSignature bool) int { + if fn == nil { + return -1 + } + + if !p.opts.CollectComment || collectSignature { + if fn.ListSeparatorKeyword != nil { + return fn.ListSeparatorKeyword.End().Offset + } + if fn.Annotations != nil { + return fn.Annotations.End().Offset + } + if fn.Throws != nil { + return fn.Throws.End().Offset + } + if fn.RParKeyword != nil { + return fn.RParKeyword.End().Offset + } + } + + return fn.End().Offset +} + +func (p *ThriftParser) getFuncStartOffset(fn *parser.Function, collectSignature bool) int { + if fn == nil { + return -1 + } + if !p.opts.CollectComment || collectSignature { + if fn.Oneway != nil { + return fn.Oneway.Pos().Offset + } + if fn.Void != nil { + return fn.Void.Pos().Offset + } + if fn.FunctionType != nil { + return fn.FunctionType.Pos().Offset + } + } + return fn.Pos().Offset +} + +func (p *ThriftParser) getRealFuncStartLine(fn *parser.Function) int { + if fn != nil && fn.Name != nil && fn.Name.Name != nil { + return fn.Name.Name.Pos().Line + } + if fn != nil { + return fn.Pos().Line + } + return -1 +} + +func (p *ThriftParser) getFuncSignature(function *parser.Function, source []byte) (string, error) { + if function == nil || source == nil { + return "", fmt.Errorf("function node or source is nil") + } + + startOffset := p.getFuncStartOffset(function, true) + endOffset := p.getRealFuncEndOffset(function, true) + + sourceLen := len(source) + if startOffset < 0 || endOffset > sourceLen || startOffset > endOffset { + return "", fmt.Errorf("invalid offset range for function '%s'. Start: %d, End: %d", function.Name.Name.Text, startOffset, endOffset) + } + + signatureBytes := source[startOffset:endOffset] + signature := strings.TrimSpace(string(signatureBytes)) + + if strings.HasSuffix(signature, ",") || strings.HasSuffix(signature, ";") { + signature = signature[:len(signature)-1] + signature = strings.TrimSpace(signature) + } + + return signature, nil +} + +func (p *ThriftParser) getRealFieldTypePositions(ft *parser.FieldType) (sp, ep parser.Position) { + if ft == nil { + return parser.InvalidPosition, parser.InvalidPosition + } + startPos := ft.Location.StartPos + endPos := ft.Location.EndPos + + if !p.opts.CollectComment { + if ft.TypeName != nil { + startPos = ft.TypeName.Pos() + } + + if ft.Annotations != nil { + endPos = ft.Annotations.End() + } else if ft.RPointKeyword != nil { + endPos = ft.RPointKeyword.End() + } else if ft.TypeName != nil { + endPos = ft.TypeName.End() + } + } + + return startPos, endPos +} + +func (p *ThriftParser) getRealFieldTypeLine(ft *parser.FieldType) int { + if ft == nil { + return -1 + } + + if ft.TypeName != nil { + return ft.TypeName.Pos().Line + } + + return ft.Pos().Line +} + +func (p *ThriftParser) getRealStructLine(s *parser.Struct) int { + if s != nil && s.Identifier != nil && s.Identifier.Name != nil { + return s.Identifier.Name.Pos().Line + } + if s != nil { + return s.Pos().Line + } + return -1 +} + +func (p *ThriftParser) getRealEnumLine(e *parser.Enum) int { + if e != nil && e.Name != nil && e.Name.Name != nil { + return e.Name.Name.Pos().Line + } + if e != nil { + return e.Pos().Line + } + return -1 +} + +func (p *ThriftParser) getRealServiceLine(s *parser.Service) int { + if s != nil && s.Name != nil && s.Name.Name != nil { + return s.Name.Name.Pos().Line + } + if s != nil { + return s.Pos().Line + } + return -1 +} + +func (p *ThriftParser) getRealExceptionLine(e *parser.Exception) int { + if e != nil && e.Name != nil && e.Name.Name != nil { + return e.Name.Name.Pos().Line + } + if e != nil { + return e.Pos().Line + } + return -1 +} + +func (p *ThriftParser) getRealUnionLine(u *parser.Union) int { + if u != nil && u.Name != nil && u.Name.Name != nil { + return u.Name.Name.Pos().Line + } + if u != nil { + return u.Pos().Line + } + return -1 +} + +func (p *ThriftParser) getRealTypedefLine(t *parser.Typedef) int { + if t != nil && t.Alias != nil && t.Alias.Name != nil { + return t.Alias.Name.Pos().Line + } + if t != nil { + return t.Pos().Line + } + return -1 +} + +func (p *ThriftParser) getRealConstLine(c *parser.Const) int { + if c != nil && c.Name != nil && c.Name.Name != nil { + return c.Name.Name.Pos().Line + } + if c != nil { + return c.Pos().Line + } + return -1 +} diff --git a/lang/thrift/parser/option.go b/lang/thrift/parser/option.go new file mode 100644 index 0000000..5c33082 --- /dev/null +++ b/lang/thrift/parser/option.go @@ -0,0 +1,23 @@ +/** + * Copyright 2025 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package parser + +type Options struct { + Excludes []string + CollectComment bool + TargetLanguage string +} diff --git a/lang/thrift/parser/parser.go b/lang/thrift/parser/parser.go new file mode 100644 index 0000000..3e43ddf --- /dev/null +++ b/lang/thrift/parser/parser.go @@ -0,0 +1,800 @@ +/** + * Copyright 2025 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package parser + +import ( + "bufio" + "context" + "fmt" + "os" + "path/filepath" + "regexp" + "strings" + + "github.com/cloudwego/abcoder/lang/log" + . "github.com/cloudwego/abcoder/lang/uniast" + "github.com/joyme123/thrift-ls/format" + "github.com/joyme123/thrift-ls/lsp/cache" + "github.com/joyme123/thrift-ls/lsp/lsputils" + "github.com/joyme123/thrift-ls/lsp/memoize" + "github.com/joyme123/thrift-ls/parser" + "go.lsp.dev/uri" +) + +var _ Parser = (*ThriftParser)(nil) + +// ThriftParser holds the state and logic for parsing a repository of Thrift files into a UniAST structure. +type ThriftParser struct { + rootDir string // Absolute path to the repository root. + repo Repository // The UniAST repository object being built. + opts Options // Specific options for Thrift parsing. + fileCache map[string][]byte + fileAst map[string]*parser.Document + excludes []*regexp.Regexp // Regular expressions for files/directories to exclude. + + parsedFiles map[string]bool + modName string + includeRelations map[string]map[string]string + fileToNamespace map[string]string + + initialFileChanges []*cache.FileChange // A list of initial file changes to build the AST. +} + +// NewParser creates and initializes a new ThriftParser. +func NewParser(rootDir string, opts Options) (*ThriftParser, error) { + if opts.TargetLanguage == "" { + return nil, fmt.Errorf("TargetLanguage option is required") + } + + absRootDir, err := filepath.Abs(rootDir) + if err != nil { + return nil, fmt.Errorf("failed to get absolute path for rootDir: %w", err) + } + + p := &ThriftParser{ + rootDir: absRootDir, + repo: NewRepository(rootDir), + opts: opts, + fileCache: make(map[string][]byte), + parsedFiles: make(map[string]bool), + fileAst: make(map[string]*parser.Document), + modName: "current", + includeRelations: make(map[string]map[string]string), + fileToNamespace: make(map[string]string), + } + p.repo.Modules["current"] = NewModule("current", ".", Thrift) + + for _, ex := range opts.Excludes { + r, err := regexp.Compile(ex) + if err != nil { + log.Error("Warning: failed to compile exclude pattern '%s': %v\n", ex, err) + } else { + p.excludes = append(p.excludes, r) + } + } + + if err := p.preScanThriftFiles(); err != nil { + return nil, fmt.Errorf("failed during pre-scan: %w", err) + } + + return p, nil +} + +// preScanThriftFiles walks the root directory to find all .thrift files, +// caches their content, and performs an initial parse. +func (p *ThriftParser) preScanThriftFiles() error { + var fileChanges []*cache.FileChange + + err := filepath.Walk(p.rootDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if info.IsDir() { + return nil + } + for _, r := range p.excludes { + if r.MatchString(path) { + return filepath.SkipDir + } + } + + if !strings.HasSuffix(path, ".thrift") { + return nil + } + + content, err := os.ReadFile(path) + if err != nil { + return fmt.Errorf("failed to read file %s: %w", path, err) + } + + // Quick scan for namespaces and includes without a full parse. + scanner := bufio.NewScanner(strings.NewReader(string(content))) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if strings.HasPrefix(line, "namespace") { + parts := strings.Fields(line) + // e.g., "namespace go abc.def" + if len(parts) == 3 && parts[1] == p.opts.TargetLanguage { + p.fileToNamespace[path] = parts[2] + } + } else if strings.HasPrefix(line, "include") { + parts := strings.Fields(line) + if len(parts) == 2 { + includePathRaw := strings.Trim(parts[1], `"'`) + absIncludePath := filepath.Join(filepath.Dir(path), includePathRaw) + alias := strings.TrimSuffix(filepath.Base(includePathRaw), ".thrift") + if p.includeRelations[path] == nil { + p.includeRelations[path] = make(map[string]string) + } + p.includeRelations[path][alias] = absIncludePath + } + } + } + if err := scanner.Err(); err != nil { + return err + } + + absPath, _ := filepath.Abs(path) + var finalAST *parser.Document + + // Initial parse to get the AST. + initialAST, err := parser.Parse(absPath, content) + if err != nil { + log.Error("Initial parse failed for %s: %v", absPath, err) + finalAST = nil // Continue even if parsing fails. + } else { + finalAST = initialAST.(*parser.Document) + } + + if !p.opts.CollectComment && finalAST != nil { + // Remove comments and re-parse to get clean offsets. + removeAllComments(finalAST) + contentString, err := format.FormatDocument(finalAST) + if err != nil { + return err // Formatting failure is a critical error. + } + content = []byte(contentString) + + reParsedAST, err := parser.Parse(absPath, content) + if err != nil { + log.Error("Re-parse after comment removal failed for %s: %v", absPath, err) + // If re-parse fails, we set the AST to nil. + // Falling back to the commented AST would complicate offset logic. + finalAST = nil + } else { + finalAST = reParsedAST.(*parser.Document) + } + } + + uriFile := uri.File(absPath) + p.fileAst[uriFile.Filename()] = finalAST + p.fileCache[uriFile.Filename()] = content + fileChanges = append(fileChanges, &cache.FileChange{ + URI: uriFile, + Content: content, + From: cache.FileChangeTypeDidOpen, + }) + + return nil + }) + + if err != nil { + return fmt.Errorf("failed to walk thrift files: %w", err) + } + p.initialFileChanges = fileChanges + return nil +} + +// ParseRepo parses the entire repository and builds the UniAST graph. +func (p *ThriftParser) ParseRepo() (Repository, error) { + for _, fc := range p.initialFileChanges { + if err := p.collectEntitiesFromURI(&p.repo, fc.URI); err != nil { + log.Error("Error processing file '%s': %v", fc.URI.Filename(), err) + return p.repo, err + } + } + + if err := p.repo.BuildGraph(); err != nil { + return p.repo, fmt.Errorf("failed to build UniAST graph: %w", err) + } + + return p.repo, nil +} + +// findNamespace finds the namespace declaration for the target language. +func (p *ThriftParser) findNamespace(doc *parser.Document) PkgPath { + for _, ns := range doc.Namespaces { + if ns.Language.Name.Text == p.opts.TargetLanguage { + return ns.Name.Name.Text + } + } + // Fallback to wildcard namespace if available. + for _, ns := range doc.Namespaces { + if ns.Language.Name.Text == "*" { + return ns.Name.Name.Text + } + } + return "" +} + +// collectServices extracts service and function definitions from a Thrift document. +func (p *ThriftParser) collectServices(doc *parser.Document, pkg *Package, relFilePath string, fileURI uri.URI) error { + content := p.fileCache[fileURI.Filename()] + + var addDependenciesToSlice func(dependencies *[]Dependency, ft *parser.FieldType) + addDependenciesToSlice = func(dependencies *[]Dependency, ft *parser.FieldType) { + if ft == nil { + return + } + + identity, err := p.fieldTypeToIdentity(fileURI, doc, ft) + if err != nil { + log.Error("Failed to resolve type identity for '%s' in '%s': %v", ft.TypeName.Name, fileURI.Filename(), err) + return + } + sp, ep := p.getRealFieldTypePositions(ft) + // Only add dependencies that are custom types (i.e., have a PkgPath). + if identity.PkgPath != "" { + dep := NewDependency(*identity, FileLine{ + File: relFilePath, + Line: p.getRealFieldTypeLine(ft), + StartOffset: sp.Offset, + EndOffset: ep.Offset, + }) + *dependencies = InsertDependency(*dependencies, dep) + } + + // Recursively add dependencies for container types. + if ft.KeyType != nil { + addDependenciesToSlice(dependencies, ft.KeyType) + } + if ft.ValueType != nil { + addDependenciesToSlice(dependencies, ft.ValueType) + } + } + + for _, service := range doc.Services { + serviceIdentity := NewIdentity(p.modName, pkg.PkgPath, service.Name.Name.Text) + for _, function := range service.Functions { + funcName := fmt.Sprintf("%s.%s", service.Name.Name.Text, function.Name.Name.Text) + funcIdentity := NewIdentity(p.modName, pkg.PkgPath, funcName) + + signature, err := p.getFuncSignature(function, content) + if err != nil { + log.Error("Failed to get signature for function '%s': %v", funcName, err) + } + + uniFunc := &Function{ + Exported: true, + IsMethod: true, + IsInterfaceMethod: false, + Identity: funcIdentity, + FileLine: FileLine{ + File: relFilePath, + Line: p.getRealFuncStartLine(function), + StartOffset: p.getFuncStartOffset(function, false), + EndOffset: p.getRealFuncEndOffset(function, false), + }, + Content: format.MustFormatService(service), + Signature: signature, + Receiver: &Receiver{ + IsPointer: false, + Type: serviceIdentity, + }, + Params: make([]Dependency, 0), + Results: make([]Dependency, 0), + } + + for _, arg := range function.Arguments { + addDependenciesToSlice(&uniFunc.Params, arg.FieldType) + } + + if function.Oneway == nil && function.FunctionType != nil { + addDependenciesToSlice(&uniFunc.Results, function.FunctionType) + } + + pkg.Functions[funcName] = uniFunc + } + } + return nil +} + +// fieldTypeToIdentity converts a Thrift FieldType to a UniAST Identity. +func (p *ThriftParser) fieldTypeToIdentity(currentFileURI uri.URI, currentDoc *parser.Document, fieldType *parser.FieldType) (*Identity, error) { + if fieldType == nil || fieldType.TypeName == nil { + return &Identity{ModPath: p.modName, PkgPath: p.findNamespace(currentDoc), Name: "unknown"}, nil + } + return p.resolveTypeIdentity(currentFileURI, currentDoc, fieldType.TypeName.Name) +} + +// resolveTypeIdentity resolves a type name to its full UniAST Identity, handling includes and namespaces. +func (p *ThriftParser) resolveTypeIdentity(currentFileURI uri.URI, currentDoc *parser.Document, typeName string) (*Identity, error) { + baseTypes := map[string]bool{ + "bool": true, "byte": true, "i8": true, "i16": true, "i32": true, "i64": true, + "double": true, "string": true, "binary": true, "uuid": true, + "list": true, "set": true, "map": true, + } + + if baseTypes[typeName] { + // Base types have no module or package path. + return &Identity{Name: typeName}, nil + } + + alias := "" + typeNamePart := typeName + if strings.Contains(typeName, ".") { + parts := strings.SplitN(typeName, ".", 2) + alias = parts[0] + typeNamePart = parts[1] + } + + var targetDoc *parser.Document + var targetFileURI uri.URI + + if alias == "" { + // Type is defined in the current file. + targetDoc = currentDoc + targetFileURI = currentFileURI + } else { + // Type is imported from another file. + includePath, found := "", false + for _, inc := range currentDoc.Includes { + incAlias := strings.TrimSuffix(filepath.Base(inc.Path.Value.Text), ".thrift") + if incAlias == alias { + includePath = inc.Path.Value.Text + found = true + break + } + } + + if !found { + return nil, fmt.Errorf("include declaration with alias '%s' not found in file '%s'", alias, currentFileURI.Filename()) + } + + targetFileURI = lsputils.IncludeURI(currentFileURI, includePath) + parsedFile := p.fileAst[targetFileURI.Filename()] + targetDoc = parsedFile + } + + pkgPath := p.findNamespace(targetDoc) + if pkgPath == "" { + log.Error("No suitable namespace found for target language '%s' in file '%s'", p.opts.TargetLanguage, targetFileURI.Filename()) + return &Identity{ModPath: p.modName, Name: typeNamePart}, fmt.Errorf("namespace not found in %s", targetFileURI.Filename()) + } + + identity := NewIdentity(p.modName, pkgPath, typeNamePart) + return &identity, nil +} + +// collectConsts extracts const definitions from a Thrift document. +func (p *ThriftParser) collectConsts(doc *parser.Document, content []byte, pkg *Package, relFilePath string) { + vars, err := p.collectThriftVars(doc, content, p.modName, relFilePath) + if err == nil { + for k, v := range vars { + pkg.Vars[k] = v + } + } +} + +// collectTypes extracts type definitions (structs, enums, etc.) from a Thrift document. +func (p *ThriftParser) collectTypes(doc *parser.Document, pkg *Package, relFilePath string, fileURI uri.URI) { + types, err := p.collectThriftTypes(doc, pkg.PkgPath, p.modName, relFilePath, fileURI) + if err == nil { + for k, v := range types { + pkg.Types[k] = v + } + } +} + +// collectThriftTypes is a helper that performs the actual extraction of various type definitions. +func (p *ThriftParser) collectThriftTypes(doc *parser.Document, pkgPath PkgPath, modPath ModPath, filePath string, fileURI uri.URI) (map[string]*Type, error) { + types := make(map[string]*Type) + + for _, s := range doc.Structs { + name := s.Identifier.Name.Text + sp, ep := p.getRealStructPositions(s) + uniType := &Type{ + Exported: true, + TypeKind: TypeKindStruct, + Identity: NewIdentity(modPath, pkgPath, name), + FileLine: newFileLine(filePath, p.getRealStructLine(s), sp, ep), + Content: format.MustFormatStruct(s), + } + p.processStructLike(s.Fields, uniType, fileURI, doc) + types[name] = uniType + } + + for _, e := range doc.Exceptions { + name := e.Name.Name.Text + sp, ep := p.getRealExceptionPositions(e) + uniType := &Type{ + Exported: true, + TypeKind: TypeKindStruct, // Exceptions are structurally similar to structs. + Identity: NewIdentity(modPath, pkgPath, name), + FileLine: newFileLine(filePath, p.getRealExceptionLine(e), sp, ep), + Content: format.MustFormatException(e), + } + p.processStructLike(e.Fields, uniType, fileURI, doc) + types[name] = uniType + } + + for _, u := range doc.Unions { + name := u.Name.Name.Text + sp, ep := p.getRealUnionPositions(u) + uniType := &Type{ + Exported: true, + TypeKind: TypeKindStruct, // Unions are also structurally similar to structs. + Identity: NewIdentity(modPath, pkgPath, name), + FileLine: newFileLine(filePath, p.getRealUnionLine(u), sp, ep), + Content: format.MustFormatUnion(u), + } + p.processStructLike(u.Fields, uniType, fileURI, doc) + types[name] = uniType + } + + for _, e := range doc.Enums { + name := e.Name.Name.Text + sp, ep := p.getRealEnumPositions(e) + types[name] = &Type{ + Exported: true, + TypeKind: TypeKindEnum, + Identity: NewIdentity(modPath, pkgPath, name), + FileLine: newFileLine(filePath, p.getRealEnumLine(e), sp, ep), + Content: format.MustFormatEnum(e), + } + } + + for _, t := range doc.Typedefs { + name := t.Alias.Name.Text + originalTypeIdentity, err := p.fieldTypeToIdentity(fileURI, doc, t.T) + if err != nil { + log.Error("Failed to resolve typedef for '%s': %v", name, err) + continue + } + sp, ep := p.getRealTypedefPositions(t) + dep := NewDependency(*originalTypeIdentity, FileLine{ + File: filePath, + Line: p.getRealTypedefLine(t), + StartOffset: sp.Offset, + EndOffset: ep.Offset, + }) + types[name] = &Type{ + Exported: true, + TypeKind: TypeKindTypedef, + Identity: NewIdentity(modPath, pkgPath, name), + FileLine: newFileLine(filePath, p.getRealTypedefLine(t), sp, ep), + Content: format.MustFormatTypedef(t), + SubStruct: []Dependency{dep}, + } + } + + for _, s := range doc.Services { + name := s.Name.Name.Text + sp, ep := p.getRealServicePositions(s) + uniType := &Type{ + Exported: true, + TypeKind: TypeKindInterface, + Identity: NewIdentity(modPath, pkgPath, name), + FileLine: newFileLine(filePath, p.getRealServiceLine(s), sp, ep), + Content: format.MustFormatService(s), + Methods: make(map[string]Identity), + } + for _, f := range s.Functions { + methodName := f.Name.Name.Text + methodIdentity := NewIdentity(modPath, pkgPath, fmt.Sprintf("%s.%s", name, methodName)) + uniType.Methods[methodName] = methodIdentity + } + types[name] = uniType + } + + return types, nil +} + +// collectEntitiesFromURI orchestrates the collection of all entities from a single file URI. +func (p *ThriftParser) collectEntitiesFromURI(repo *Repository, fileURI uri.URI) error { + content := p.fileCache[fileURI.Filename()] + document := p.fileAst[fileURI.Filename()] + if document == nil { + log.Error("AST for file '%s' not found in cache.", fileURI.Filename()) + return nil + } + + relFilePath, _ := filepath.Rel(p.rootDir, fileURI.Filename()) + module := repo.Modules["current"] + + // Process file-level information (Imports, Package). + uniastFile := NewFile(relFilePath) + for _, include := range document.Includes { + pathValue := include.Path.Value.Text + uniastFile.Imports = append(uniastFile.Imports, NewImport(nil, fmt.Sprintf(`"%s"`, pathValue))) + } + namespace := p.findNamespace(document) + uniastFile.Package = namespace + module.Files[relFilePath] = uniastFile + + if namespace == "" { + log.Info("No namespace found for language %s in file %s, skipping entity collection.", p.opts.TargetLanguage, relFilePath) + return nil + } + + if module.Packages[namespace] == nil { + module.Packages[namespace] = NewPackage(namespace) + } + uniastPackage := module.Packages[namespace] + + if err := p.collectServices(document, uniastPackage, relFilePath, fileURI); err != nil { + return err + } + p.collectTypes(document, uniastPackage, relFilePath, fileURI) + p.collectConsts(document, content, uniastPackage, relFilePath) + + return nil +} + +// ParseNode parses a single node and its direct dependencies from the repository. +func (p *ThriftParser) ParseNode(pkgPath, name string) (Repository, error) { + outRepo := NewRepository(p.repo.Name) + outRepo.Modules["current"] = NewModule("current", ".", Thrift) + + // Helper function to copy a node from the fully parsed p.repo to the outRepo. + addNode := func(id Identity) { + if outRepo.Modules["current"].Packages[id.PkgPath] == nil { + outRepo.Modules["current"].Packages[id.PkgPath] = NewPackage(id.PkgPath) + } + outPkg := outRepo.Modules["current"].Packages[id.PkgPath] + + // Find the original node in the complete repository AST (p.repo) and copy it. + sourcePkg := p.repo.Modules["current"].Packages[id.PkgPath] + if sourcePkg == nil { + return // The dependent package does not exist in the full AST, skip. + } + + if fn, ok := sourcePkg.Functions[id.Name]; ok { + outPkg.Functions[id.Name] = fn + } else if t, ok := sourcePkg.Types[id.Name]; ok { + outPkg.Types[id.Name] = t + } else if v, ok := sourcePkg.Vars[id.Name]; ok { + outPkg.Vars[id.Name] = v + } + } + + // Find the target node in the complete repository AST. + pkg := p.repo.Modules["current"].Packages[pkgPath] + if pkg == nil { + return outRepo, fmt.Errorf("package '%s' not found in repository", pkgPath) + } + + var targetIdentity *Identity + nodeFound := false + + if fn, ok := pkg.Functions[name]; ok { + targetIdentity = &fn.Identity + nodeFound = true + } else if t, ok := pkg.Types[name]; ok { + targetIdentity = &t.Identity + nodeFound = true + } else if v, ok := pkg.Vars[name]; ok { + targetIdentity = &v.Identity + nodeFound = true + } + + if !nodeFound { + return outRepo, fmt.Errorf("node '%s' not found in package '%s'", name, pkgPath) + } + + // Add the target node and its dependencies to the output repository. + addNode(*targetIdentity) + graphNode := p.repo.GetNode(*targetIdentity) + if graphNode != nil { + for _, relation := range graphNode.Dependencies { + addNode(relation.Identity) + } + } + + if err := outRepo.BuildGraph(); err != nil { + return outRepo, fmt.Errorf("failed to build UniAST graph for node '%s': %w", name, err) + } + + return outRepo, nil +} + +// ParsePackage parses all files belonging to a specific package path. +func (p *ThriftParser) ParsePackage(pkgPath PkgPath) (Repository, error) { + outRepo := NewRepository(p.repo.Name) + // FIX: Initialize the "current" module to prevent panic in collectEntitiesFromURI. + outRepo.Modules["current"] = NewModule("current", ".", Thrift) + + found := false + for file, namespace := range p.fileToNamespace { + if namespace == pkgPath { + found = true + fileURI := uri.File(file) + if err := p.collectEntitiesFromURI(&outRepo, fileURI); err != nil { + log.Error("Error processing file '%s' for package '%s': %v", file, pkgPath, err) + } + } + } + + if !found { + return outRepo, fmt.Errorf("package not found: %s", pkgPath) + } + + if err := outRepo.BuildGraph(); err != nil { + return outRepo, fmt.Errorf("failed to build UniAST graph for package '%s': %w", pkgPath, err) + } + + return outRepo, nil +} + +// buildSnapshot initializes a thrift-ls snapshot for parsing. (Currently not used in the main flow but useful for LSP-based approaches) +func (p *ThriftParser) buildSnapshot(fileChanges []*cache.FileChange) (*cache.Snapshot, error) { + if len(fileChanges) == 0 { + return nil, fmt.Errorf("no .thrift files found to build snapshot") + } + + store := &memoize.Store{} + c := cache.New(store) + fs := cache.NewOverlayFS(c) + + if err := fs.Update(context.TODO(), fileChanges); err != nil { + return nil, fmt.Errorf("failed to update overlay FS: %w", err) + } + + folderURI := uri.File(p.rootDir) + view := cache.NewView(p.modName, folderURI, fs, store) + ss := cache.NewSnapshot(view, store) + + for _, f := range fileChanges { + document, err := ss.Parse(context.TODO(), f.URI) + if err != nil { + log.Error("Warning: error parsing file '%s': %v\n", f.URI.Filename(), err) + } + p.fileAst[f.URI.Filename()] = document.AST() + } + + return ss, nil +} + +// toIdentity converts a thrift-ls FieldType node to a UniAST Identity. +// This is a simplified implementation. A more robust version would need to handle +// alias resolution from 'include' statements to correctly determine the PkgPath for types like `shared.User`. +func toIdentity(fieldType *parser.FieldType, currentPkg PkgPath, modPath ModPath) *Identity { + if fieldType == nil || fieldType.TypeName == nil { + return &Identity{Name: "unknown"} + } + + typeName := fieldType.TypeName.Name + + // Base Thrift types do not have a ModPath or PkgPath. + baseTypes := map[string]bool{ + "bool": true, "byte": true, "i8": true, "i16": true, "i32": true, "i64": true, + "double": true, "string": true, "binary": true, "uuid": true, + } + if baseTypes[typeName] { + return &Identity{Name: typeName} + } + + // Container types themselves are keywords; their dependencies are their inner types. + // This function only identifies the container type itself. + containerTypes := map[string]bool{"list": true, "set": true, "map": true} + if containerTypes[typeName] { + return &Identity{Name: typeName} + } + + // For this simplified version, assume all other types are within the current package. + return &Identity{ + ModPath: modPath, + PkgPath: currentPkg, + Name: typeName, + } +} + +// collectThriftVars extracts all 'const' definitions from a document. +func (p *ThriftParser) collectThriftVars(doc *parser.Document, source []byte, modPath ModPath, filePath string) (map[string]*Var, error) { + vars := make(map[string]*Var) + + pkgPath := p.findNamespace(doc) + if pkgPath == "" { + return nil, fmt.Errorf("no suitable namespace found for language '%s' in file '%s'", p.opts.TargetLanguage, doc.Filename) + } + + for _, c := range doc.Consts { + constName := c.Name.Name.Text + sp, ep := p.getRealConstPositions(c) + content := p.getRealContent(source, sp.Offset, ep.Offset) + + uniVar := &Var{ + IsExported: true, // Thrift consts are public by default. + IsConst: true, + IsPointer: false, + Identity: Identity{ + ModPath: modPath, + PkgPath: pkgPath, + Name: constName, + }, + FileLine: newFileLine(filePath, p.getRealConstLine(c), sp, ep), + Type: toIdentity(c.ConstType, pkgPath, modPath), + Content: content, + } + + // Handle dependencies on other enums or constants, e.g., `const MyStatus s = MyStatus.OK`. + if c.Value.TypeName == "identifier" { + if identVal, ok := c.Value.Value.(string); ok { + // This is a simplified analysis. The dependency is on the `MyStatus.OK` Var. + // We create an identity for it assuming it's in the same package. + if strings.Contains(identVal, ".") { + depIdentity := Identity{ + ModPath: modPath, + PkgPath: pkgPath, + Name: identVal, // The dependency name is the full identifier, e.g., 'MyStatus.OK'. + } + uniVar.Dependencies = append(uniVar.Dependencies, Dependency{Identity: depIdentity}) + } + } + } + + vars[constName] = uniVar + } + + return vars, nil +} + +// processStructLike processes types with fields, such as struct, exception, and union, +// to find and add their field type dependencies. +func (p *ThriftParser) processStructLike( + fields []*parser.Field, + uniType *Type, + currentFileURI uri.URI, + currentDoc *parser.Document, +) { + var addDependencies func(ft *parser.FieldType) + addDependencies = func(ft *parser.FieldType) { + if ft == nil { + return + } + + // Parse the main type (or a container type like 'list'). + identity, err := p.fieldTypeToIdentity(currentFileURI, currentDoc, ft) + if err != nil { + log.Error("Failed to resolve type identity for '%s' in '%s': %v", ft.TypeName.Name, currentFileURI.Filename(), err) + return + } + + // Only add dependencies for custom types that have a package path. + if identity.PkgPath != "" { + dep := NewDependency(*identity, FileLine{ + File: uniType.File, + Line: ft.Location.StartPos.Line, + StartOffset: ft.Location.StartPos.Offset, + EndOffset: ft.Location.EndPos.Offset, + }) + uniType.SubStruct = InsertDependency(uniType.SubStruct, dep) + } + + // Recursively add dependencies for container key/value types. + if ft.KeyType != nil { + addDependencies(ft.KeyType) + } + if ft.ValueType != nil { + addDependencies(ft.ValueType) + } + } + + for _, field := range fields { + addDependencies(field.FieldType) + } +} diff --git a/lang/thrift/parser/utils.go b/lang/thrift/parser/utils.go new file mode 100644 index 0000000..50b6d5e --- /dev/null +++ b/lang/thrift/parser/utils.go @@ -0,0 +1,214 @@ +/** + * Copyright 2025 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package parser + +import ( + "github.com/cloudwego/abcoder/lang/uniast" + "github.com/joyme123/thrift-ls/parser" + "github.com/joyme123/thrift-ls/utils" +) + +func newFileLine(relFilePath string, line int, startPos, endPos parser.Position) uniast.FileLine { + return uniast.FileLine{ + File: relFilePath, + Line: line, + StartOffset: startPos.Offset, + EndOffset: endPos.Offset, + } +} + +func removeAllComments(doc *parser.Document) { + if doc == nil { + return + } + removeCommentsRecursive(doc) +} + +// removeCommentsRecursive 是一个递归函数,用于遍历 AST 并清除注释。 +func removeCommentsRecursive(node parser.Node) { + if utils.IsNil(node) { + return + } + + switch n := node.(type) { + case *parser.Document: + n.Comments = nil + + case *parser.Include: + n.Comments = nil + n.EndLineComments = nil + if n.IncludeKeyword != nil { + n.IncludeKeyword.Comments = nil + } + case *parser.CPPInclude: + n.Comments = nil + n.EndLineComments = nil + if n.CPPIncludeKeyword != nil { + n.CPPIncludeKeyword.Comments = nil + } + case *parser.Namespace: + n.Comments = nil + n.EndLineComments = nil + if n.NamespaceKeyword != nil { + n.NamespaceKeyword.Comments = nil + } + + case *parser.Struct: + n.Comments = nil + n.EndLineComments = nil + if n.StructKeyword != nil { + n.StructKeyword.Comments = nil + } + if n.LCurKeyword != nil { + n.LCurKeyword.Comments = nil + } + if n.RCurKeyword != nil { + n.RCurKeyword.Comments = nil + } + case *parser.Union: + n.Comments = nil + n.EndLineComments = nil + if n.UnionKeyword != nil { + n.UnionKeyword.Comments = nil + } + if n.LCurKeyword != nil { + n.LCurKeyword.Comments = nil + } + if n.RCurKeyword != nil { + n.RCurKeyword.Comments = nil + } + case *parser.Exception: + n.Comments = nil + n.EndLineComments = nil + if n.ExceptionKeyword != nil { + n.ExceptionKeyword.Comments = nil + } + if n.LCurKeyword != nil { + n.LCurKeyword.Comments = nil + } + if n.RCurKeyword != nil { + n.RCurKeyword.Comments = nil + } + case *parser.Service: + n.Comments = nil + n.EndLineComments = nil + if n.ServiceKeyword != nil { + n.ServiceKeyword.Comments = nil + } + if n.ExtendsKeyword != nil { + n.ExtendsKeyword.Comments = nil + } + if n.LCurKeyword != nil { + n.LCurKeyword.Comments = nil + } + if n.RCurKeyword != nil { + n.RCurKeyword.Comments = nil + } + case *parser.Enum: + n.Comments = nil + n.EndLineComments = nil + if n.EnumKeyword != nil { + n.EnumKeyword.Comments = nil + } + if n.LCurKeyword != nil { + n.LCurKeyword.Comments = nil + } + if n.RCurKeyword != nil { + n.RCurKeyword.Comments = nil + } + case *parser.Typedef: + n.Comments = nil + n.EndLineComments = nil + if n.TypedefKeyword != nil { + n.TypedefKeyword.Comments = nil + } + case *parser.Const: + n.Comments = nil + n.EndLineComments = nil + if n.ConstKeyword != nil { + n.ConstKeyword.Comments = nil + } + if n.EqualKeyword != nil { + n.EqualKeyword.Comments = nil + } + if n.ListSeparatorKeyword != nil { + n.ListSeparatorKeyword.Comments = nil + } + + case *parser.Field: + n.Comments = nil + n.EndLineComments = nil + if n.Index != nil { + n.Index.Comments = nil + if n.Index.ColonKeyword != nil { + n.Index.ColonKeyword.Comments = nil + } + } + if n.RequiredKeyword != nil { + n.RequiredKeyword.Comments = nil + } + if n.EqualKeyword != nil { + n.EqualKeyword.Comments = nil + } + if n.ListSeparatorKeyword != nil { + n.ListSeparatorKeyword.Comments = nil + } + case *parser.Function: + n.Comments = nil + n.EndLineComments = nil + if n.Oneway != nil { + n.Oneway.Comments = nil + } + if n.Void != nil { + n.Void.Comments = nil + } + if n.LParKeyword != nil { + n.LParKeyword.Comments = nil + } + if n.RParKeyword != nil { + n.RParKeyword.Comments = nil + } + if n.ListSeparatorKeyword != nil { + n.ListSeparatorKeyword.Comments = nil + } + case *parser.EnumValue: + n.Comments = nil + n.EndLineComments = nil + if n.EqualKeyword != nil { + n.EqualKeyword.Comments = nil + } + if n.ListSeparatorKeyword != nil { + n.ListSeparatorKeyword.Comments = nil + } + case *parser.Identifier: + n.Comments = nil + case *parser.Literal: + n.Comments = nil + case *parser.ConstValue: + n.Comments = nil + case *parser.FieldType: + if n.TypeName != nil { + n.TypeName.Comments = nil + } + case *parser.TypeName: + n.Comments = nil + } + + for _, child := range node.Children() { + removeCommentsRecursive(child) + } +} diff --git a/lang/uniast/ast.go b/lang/uniast/ast.go index dde9203..03995d6 100644 --- a/lang/uniast/ast.go +++ b/lang/uniast/ast.go @@ -31,6 +31,7 @@ const ( Rust Language = "rust" Cxx Language = "cxx" Python Language = "python" + Thrift Language = "thrift" Unknown Language = "" ) @@ -44,6 +45,8 @@ func (l Language) String() string { return "cxx" case Python: return "python" + case Thrift: + return "thrift" default: return string(l) } @@ -64,6 +67,8 @@ func NewLanguage(lang string) (l Language) { return Cxx case "python": return Python + case "thrift": + return Thrift default: return Unknown } diff --git a/main.go b/main.go index 555ba07..20b3748 100644 --- a/main.go +++ b/main.go @@ -77,6 +77,7 @@ func main() { flags.BoolVar(&opts.LoadByPackages, "load-by-packages", false, "load by packages (only works for Go now)") flags.Var((*StringArray)(&opts.Excludes), "exclude", "exclude files or directories, support multiple values") flags.StringVar(&opts.RepoID, "repo-id", "", "specify the repo id") + flags.StringVar(&opts.IDLPkgType, "idl-pkg-type", "", "specify the idl package type (onlu works for Thrift now)") var wopts lang.WriteOptions flags.StringVar(&wopts.Compiler, "compiler", "", "destination compiler path.") diff --git a/testdata/thrifts/gender/gender.thrift b/testdata/thrifts/gender/gender.thrift new file mode 100644 index 0000000..0486e07 --- /dev/null +++ b/testdata/thrifts/gender/gender.thrift @@ -0,0 +1,6 @@ +namespace go abcoder.testdata.thrifts.gender +namespace java abcoder.testdata.thrifts.gender + +struct Gender { + 1: string gender +} \ No newline at end of file diff --git a/testdata/thrifts/main.thrift b/testdata/thrifts/main.thrift new file mode 100644 index 0000000..8f5c28b --- /dev/null +++ b/testdata/thrifts/main.thrift @@ -0,0 +1,70 @@ +/** + * 这是 thrift 文件的开头,通常用来定义不同编程语言生成代码时使用的命名空间。 + */ +namespace go abcoder.testdata.thrifts +namespace java abcoder.testdata.thrifts + +// 你也可以在这里包含其他的 thrift 文件 +// include "shared.thrift" +include "person/person.thrift" + +// 定义一个常量 +const i32 VERSION = 1; + +/** + * 枚举(Enum)类型,用于定义一组命名的常量。 + */ +enum Status { + OK = 0 + ERROR = 1 +} + +/** + * 结构体(Struct)是 Thrift 中的基本构建块。 + * 它们本质上等同于类,但是没有继承。 + */ +struct UserProfile { + 1: required i32 uid (api.get="/hello"), + 2: required string name (api.get="/hello"), + 3: optional string email (api.get="/hello"), + 4: map attributes, +} + +// sayHello 方法的请求体 +struct HelloRequest { + 1: required string name, + 2: optional UserProfile profile (api.get="/hello"), +} + +// sayHello 方法的响应体 +struct HelloResponse { + 1: required string message, + 2: optional Status status = Status.OK, + 3: person.Person person +} + +/** + * 异常(Exception)在功能上等同于结构体, + * 不同之处在于它们在目标语言中会继承原生的异常基类。 + */ +exception InvalidRequest { + 1: i32 code, + 2: string reason, +} + +/** + * 服务(Service)定义了你的 RPC 公共接口。 + * 代码生成器会为你创建客户端和服务器的存根(stubs)。 + */ +service Greeter { + // 一个简单的函数,返回一句问候。 + // 它可能会抛出 InvalidRequest 异常。 + HelloResponse sayHello(1: HelloRequest request) throws (1: InvalidRequest err), + + /** + * 'oneway' 函数表示客户端发送请求后不会等待服务器的响应。 + * 客户端不会阻塞,服务器也不会发送回包。 + * Oneway 函数的返回类型必须是 void。 + */ + oneway void ping(), +} \ No newline at end of file diff --git a/testdata/thrifts/person/person.thrift b/testdata/thrifts/person/person.thrift new file mode 100644 index 0000000..dcc4345 --- /dev/null +++ b/testdata/thrifts/person/person.thrift @@ -0,0 +1,9 @@ +namespace go abcoder.testdata.thrifts.person +namespace java abcoder.testdata.thrifts.person + +include "../gender/gender.thrift" + +struct Person { + 1: string name + 2: gender.Gender gender +} \ No newline at end of file