Skip to content

Commit b4a0987

Browse files
authored
opt: (#7)
* feat: add module compress * opt
1 parent d910402 commit b4a0987

File tree

7 files changed

+211
-20
lines changed

7 files changed

+211
-20
lines changed

src/compress/compress.rs

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ use crate::config::CONFIG;
3131
use crate::storage::cache::get_cache;
3232
use crate::storage::cache::load_repo;
3333

34+
use super::types::types::Module;
3435
use super::types::types::ToCompressVar;
3536
use super::types::types::Variant;
3637

@@ -50,13 +51,17 @@ pub fn from_json(id: &str, json: &str) -> Result<Repository, Box<dyn Error>> {
5051
Ok(f)
5152
}
5253

54+
pub fn is_externa_mod(modpath: &str) -> bool {
55+
return modpath == "" || modpath.contains("@");
56+
}
57+
5358
pub async fn compress_all(repo: &mut Repository) {
5459
let mut to_compress_func = Vec::new();
5560
let mut to_compress_type = Vec::new();
5661
let mut to_compress_var = Vec::new();
5762

58-
for (_, _mod) in &repo.modules {
59-
if _mod.dir == "" {
63+
for (mpath, _mod) in &repo.modules {
64+
if is_externa_mod(mpath) {
6065
// NOTICE: empty dir means it's a external module, which is only used for lookup symbols
6166
continue;
6267
}
@@ -93,17 +98,32 @@ pub async fn compress_all(repo: &mut Repository) {
9398
cascade_compress_struct(&id, repo, &mut m).await;
9499
}
95100

96-
for (mname, _mod) in &repo.clone().modules {
97-
if _mod.dir == "" {
101+
for (mpath, _mod) in repo.clone().modules {
102+
if is_externa_mod(&mpath) {
98103
// NOTICE: empty dir means it's a external module, which is only used for lookup symbols
99104
continue;
100105
}
101106
for (id, pkg) in &_mod.packages {
102107
if pkg.compress_data.is_none() {
103-
compress_package(&id, mname, repo).await;
108+
compress_package(&id, &mpath, repo).await;
104109
}
105110
}
111+
compress_module(&mpath, repo).await;
112+
repo.save_to_cache();
113+
}
114+
}
115+
116+
pub async fn compress_module(modpath: &str, repo: &mut Repository) {
117+
let module = repo.modules.get_mut(modpath).unwrap();
118+
let compress_data = module.to_compress();
119+
let compress_data =
120+
llm_compress_module(serde_json::to_string(&compress_data).unwrap().as_str()).await;
121+
if compress_data.is_none() {
122+
return;
106123
}
124+
let compress_data = compress_data.unwrap();
125+
module.compress_data = Some(compress_data);
126+
println!("finish to compress module: {}", module.name);
107127
}
108128

109129
pub async fn compress_package(id: &str, module: &str, repo: &mut Repository) {
@@ -811,6 +831,12 @@ pub async fn cascade_compress_struct(
811831
// panic!("empty compress for {:?}", id)
812832
}
813833

834+
async fn llm_compress_module(m: &str) -> Option<String> {
835+
let compressing = ToCompress::ToCompressModule(m.to_string());
836+
let compress_data = compress(&compressing).await;
837+
Option::from(compress_data)
838+
}
839+
814840
async fn llm_compress_package(pkg: &str) -> Option<String> {
815841
let compress_pkg = ToCompress::ToCompressPkg(pkg.to_string());
816842
let compress_data = compress(&compress_pkg).await;

src/compress/llm/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ pub enum ToCompress {
2424
ToCompressType(String),
2525
ToCompressVar(String),
2626
ToCompressPkg(String),
27+
ToCompressModule(String),
2728
ToMergeRustPkg(String),
2829
ToValidateRust(String),
2930
}

src/compress/llm/prompts.rs

Lines changed: 125 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ macro_rules! choose_prompt_lang {
3030

3131
pub fn make_compress_prompt(to_compress: &ToCompress) -> String {
3232
match to_compress {
33+
ToCompress::ToCompressModule(f) => {
34+
choose_prompt_lang!(PROMPT_COMPRESS_MOD).replace("{{DATA}}", f)
35+
}
3336
ToCompress::ToCompressType(t) => {
3437
choose_prompt_lang!(PROMPT_COMPRESS_TYPE).replace("{{DATA}}", t)
3538
}
@@ -462,7 +465,7 @@ bufferSizeLimit is an integer variable with an initial value of integer 1024 tha
462465

463466
const PROMPT_COMPRESS_PKG_ZH: &str = r##"
464467
# 角色
465-
你是一名经验丰富的工程师,专门研究lang,并深入了解其各种包。你的主要职责是利用其他开发人员提供的有关公共函数和类型的数据,简化并总结lang包的基本功能。你的目标是通过使这些包更易理解,帮助对这些包了解较少的工程师。
468+
你是一名经验丰富的工程师,并深入了解其各种包。你的主要职责是利用其他开发人员提供的有关公共函数和类型的数据,简化并总结包的基本功能。你的目标是通过使这些包更易理解,帮助对这些包了解较少的工程师。
466469
467470
# 提示
468471
@@ -655,3 +658,124 @@ Key global variables:
655658
{{DATA}}
656659
657660
"##;
661+
662+
const PROMPT_COMPRESS_MOD_ZH: &str = r##"
663+
# 角色
664+
你是一名经验丰富的工程师,并深入了解其各种模块。你的主要职责是利用其他开发人员提供的有关公共函数和类型的数据,简化并总结模块的基本功能。你的目标是通过使这些包更易理解,帮助对这些包了解较少的工程师。
665+
666+
# 提示
667+
668+
## 输入格式(JSON)
669+
包含 一个具体的类型定义 及 其依赖的其他语言符号描述:
670+
- "Name": 模块名称
671+
- "Dir": 模块所处的相对仓库位置
672+
- "Description": 该方法的代码或总结, 格式为字符串
673+
- "Packages": 格式为数组。该数组中的每个对象表示此模块内每个子包描述:
674+
- "Name": 使用该包的名称,格式为字符串
675+
- "Description": 该包的总结,格式为字符串
676+
677+
678+
## 输出格式(text)
679+
直接输出总结内容。不要输出JSON(IMPORTANT)!
680+
681+
## 总结内容
682+
- 该模块的主要功能和用途
683+
- 该模块的一些关键函数和类型的描述
684+
685+
686+
# 约束
687+
- 专注于高度总结模块的基本功能,避免深入具体实现细节。
688+
- 编写简短且易于理解的总结,供其他工程师参考。
689+
- 保持与提供的输入数据一致的技术术语。
690+
- 输出字符限制为2000字符。
691+
692+
693+
# 示例
694+
695+
## 输入
696+
{
697+
"Name": "github.com/cloudwego/localsession",
698+
"Dir": ".",
699+
"Packages": [
700+
{
701+
"Description": "该包用于管理会话上下文,并定义了通用的Session接口",
702+
"Name": "github.com/cloudwego/localsession",
703+
},
704+
{
705+
"Description": "该包用于处理具体的上下文的metainfo等信息的兜底方式",
706+
"Name": "github.com/cloudwego/localsession/backup",
707+
}
708+
]
709+
}
710+
711+
## 输出
712+
此模块位于当前目录,为应用中的会话管理提供工具,特别关注会话上下文的备份和恢复机制。它包括创建、备份、恢复和清除会话上下文的功能,以及默认的上下文兜底方式。
713+
关键包:
714+
- github.com/cloudwego/localsession: 该包用于管理会话上下文,并定义了通用的Session接口
715+
- github.com/cloudwego/localsession/backup: 该包用于处理具体的上下文的metainfo等信息的兜底方式
716+
717+
718+
# 现在,请开始处理如下输入:
719+
720+
{{DATA}}
721+
722+
"##;
723+
724+
const PROMPT_COMPRESS_MOD_EN: &str = r##"
725+
# Character
726+
You are an experienced engineer and have in-depth knowledge of its various modules. Your primary responsibility is to simplify and summarize the basic functionality of the module using data about common functions and types provided by other developers. Your goal is to help engineers who know less about these packages by making them easier to understand.
727+
728+
# Tips
729+
730+
## Input format (JSON)
731+
Contains a specific type definition and descriptions of other language symbols on which it depends:
732+
- Name: indicates the module name
733+
- "Dir": indicates the relative repository location of the module
734+
- "Description": indicates the code or summary of the method. The format is a string
735+
- "Packages": in array format. Each object in this array represents each subpackage description within this module:
736+
- "Name": indicates the name of the package. The format is a string
737+
- "Description": indicates the summary of the package. The format is a string
738+
739+
740+
## Output format (text)
741+
Output summary content directly. Do not output JSON (IMPORTANT)!
742+
743+
## Summarize the content
744+
- Main functions and uses of the module
745+
- Description of some of the key functions and types of the module
746+
747+
748+
# Constraint
749+
- Focus on the basic functions of highly summarized modules and avoid delving into specific implementation details.
750+
- Write short and easy to understand summaries for other engineers to refer to.
751+
- Technical terms that are consistent with the input data provided.
752+
- The output character limit is 2000 characters.
753+
754+
755+
# Examples
756+
757+
## Input
758+
{
759+
"Name": "github.com/cloudwego/localsession",
760+
"Dir": ".",
761+
"Packages": [
762+
{
763+
"Description": "This package manages the Session context and defines the generic session interface ",
764+
"Name": "github.com/cloudwego/localsession",
765+
},
766+
{
767+
"Description": "This package is used to handle the specific context of metainfo and other information in the back of the way ",
768+
"Name": "github.com/cloudwego/localsession/backup",
769+
}
770+
]
771+
}
772+
773+
## Output
774+
This module is located in the current directory and provides tools for session management in the application, with a special focus on backup and recovery mechanisms for session context. It includes the ability to create, back up, restore, and clear session context, as well as the default context bypass.
775+
Key package:
776+
- github.com/cloudwego/localsession: this package is used to manage Session context, and defines the general Session interface
777+
- github.com/cloudwego/localsession/backup: this package to deal with the specific context of the information such as the metainfo way out
778+
779+
# Now, please summarize below input:
780+
{{DATA}}
781+
"##;

src/compress/types/types.rs

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,7 @@ impl Repository {
239239
self.modules.insert(mod_name.clone(), _mod.clone());
240240
}
241241
}
242+
self.graph = other.graph.clone();
242243
}
243244

244245
pub fn save_to_cache(&self) {
@@ -357,9 +358,29 @@ pub struct Module {
357358
#[serde(rename = "Packages")]
358359
pub packages: HashMap<String, Package>,
359360
#[serde(rename = "Files")]
360-
pub files: HashMap<String, File>,
361-
#[serde(rename = "Language")]
361+
pub files: Option<HashMap<String, File>>,
362+
#[serde(rename = "Language", default)]
362363
pub language: String,
364+
#[serde(rename = "compress_data")]
365+
pub compress_data: Option<String>,
366+
}
367+
368+
impl Module {
369+
pub fn to_compress(&self) -> ToCompressModule {
370+
let mut packages = Vec::new();
371+
for (_, p) in self.packages.iter() {
372+
packages.push(Description {
373+
name: &p.id,
374+
description: p.compress_data.as_ref().unwrap(),
375+
});
376+
}
377+
378+
ToCompressModule {
379+
name: &self.name,
380+
dir: &self.dir,
381+
packages: Some(packages),
382+
}
383+
}
363384
}
364385

365386
#[derive(Serialize, Deserialize, Debug, Clone)]
@@ -769,6 +790,16 @@ pub(crate) struct ToCompressPkg<'a> {
769790
pub(crate) vars: Option<Vec<Description<'a>>>,
770791
}
771792

793+
#[derive(Serialize, Debug)]
794+
pub(crate) struct ToCompressModule<'a> {
795+
#[serde(rename = "Name")]
796+
pub(crate) name: &'a str,
797+
#[serde(rename = "Dir")]
798+
pub(crate) dir: &'a str,
799+
#[serde(rename = "Packages")]
800+
pub(crate) packages: Option<Vec<Description<'a>>>,
801+
}
802+
772803
#[derive(Serialize, Debug)]
773804
pub(crate) struct Description<'a> {
774805
pub name: &'a str,

src/lang/patch/lib.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,10 @@ type Options struct {
5555
OutDir string
5656
}
5757

58-
func NewPatcher(opts Options) *Patcher {
58+
func NewPatcher(repo *uniast.Repository, opts Options) *Patcher {
5959
return &Patcher{
6060
Options: opts,
61+
repo: repo,
6162
}
6263
}
6364

@@ -98,6 +99,7 @@ next_dep:
9899
})
99100
}
100101
n := patchNode{
102+
Identity: patch.Id,
101103
FileLine: node.FileLine(),
102104
Codes: patch.Codes,
103105
File: f,
@@ -174,7 +176,7 @@ func (p *Patcher) Flush() error {
174176
}
175177

176178
// write origins
177-
for _, mod := range p.repo.Modules {
179+
for _, mod := range p.repo.InternalModules() {
178180
for _, f := range mod.Files {
179181
if p.patches[f.Path] != nil {
180182
continue

src/lang/uniast/ast.go

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,16 @@ type Repository struct {
3737
Graph map[string]*Node
3838
}
3939

40+
func (r Repository) InternalModules() []*Module {
41+
var ret []*Module
42+
for k, v := range r.Modules {
43+
if !IsExternalModule(k) {
44+
ret = append(ret, v)
45+
}
46+
}
47+
return ret
48+
}
49+
4050
func NewRepository(name string) Repository {
4151
ret := Repository{
4252
Name: name,
@@ -82,6 +92,7 @@ type Module struct {
8292
Packages map[PkgPath]*Package // pkage import path => Package
8393
Dependencies map[string]string // module name => module_path@version
8494
Files map[string]*File // relative path => file info
95+
CompressData *string `json:"compress_data,omitempty"` // module compress info
8596
}
8697

8798
func (r Repository) GetFileById(id Identity) *File {
@@ -164,9 +175,10 @@ func ModPathName(mod ModPath) string {
164175

165176
// Identity holds identity information about a third party declaration
166177
type Identity struct {
167-
ModPath // ModPath is the module which the package belongs to
168-
PkgPath // Import Path of the third party package
169-
Name string // Unique Name of declaration (FunctionName, TypeName.MethodName, InterfaceName<TypeName>.MethodName, or TypeName)
178+
ModPath `json:"ModPath" jsonschema:"description=the compiling module of the ast node, the format is {ModName} or {ModName}@{Version}"` // ModPath is the module which the package belongs to
179+
PkgPath `json:"PkgPath" jsonschema:"description=the namespace of the ast node"` // Import Path of the third party package
180+
181+
Name string `json:"Name" jsonschema:"description=unique name of the ast node, the format is one of {FunctionName}, {TypeName}.{MethodName}, {InterfaceName}<{TypeName}>.{MethodName}, {TypeName}"` // Unique Name of declaration (FunctionName, TypeName.MethodName, InterfaceName<TypeName>.MethodName, or TypeName)
170182
}
171183

172184
func NewIdentity(mod, pkg, name string) Identity {

src/lang/uniast/node.go

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,6 @@
1414

1515
package uniast
1616

17-
import (
18-
"fmt"
19-
)
20-
2117
func (r *Repository) GetNode(id Identity) *Node {
2218
key := id.Full()
2319
node, ok := r.Graph[key]
@@ -55,6 +51,7 @@ func (r *Repository) SetNode(id Identity, typ NodeType) *Node {
5551
node = &Node{
5652
Identity: id,
5753
Type: typ,
54+
Repo: r,
5855
}
5956
r.Graph[key] = node
6057
}
@@ -89,6 +86,7 @@ func (r *Repository) AddRelation(node *Node, dep Identity) {
8986
if !ok {
9087
nd = &Node{
9188
Identity: dep,
89+
Repo: r,
9290
}
9391
r.Graph[key] = nd
9492
}
@@ -209,9 +207,6 @@ func (t NodeType) MarshalJSON() ([]byte, error) {
209207

210208
func (t *NodeType) UnmarshalJSON(b []byte) error {
211209
typ := NewNodeType(string(b))
212-
if typ == UNKNOWN {
213-
return fmt.Errorf("unknown node type: %s", b)
214-
}
215210
*t = typ
216211
return nil
217212
}

0 commit comments

Comments
 (0)