wenda/example.config.yml at main · fallondown/wenda · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129

logging: False
#日志"
port: 17860
#webui 默认启动端口号"
library:
     strategy: "calc:2 rtst:2 agents:0"
     #库参数，每组参数间用空格分隔，冒号前为知识库类型，后为抽取数量。

     #知识库类型:
     #bing        cn.bing搜索，仅国内可用，目前处于服务降级状态
     #sosowx      sogo微信公众号搜索，可配合相应auto实现全文内容分析
     #fess        fess搜索引擎
     #rtst        支持实时生成的sentence_transformers
     #remote      调用远程闻达知识库，用于集群化部署
     #kg          知识图谱,暂未启用
     #特殊库：
     #mix         根据参数进行多知识库融合
     #agents      提供网络资源代理，没有知识库查找功能，所以数量为0
     #            （目前stable-diffusion的auto脚本需要使用其中功能，同时需开启stable-diffusion的api功能）

     count: 5
     #最大抽取数量（所有知识库总和）

     step: 2
     #知识库默认上下文步长
librarys:
  bing:
    count: 5
     #最大抽取数量
  bingsite:
     count: 5
     #最大抽取数量
     site: "www.12371.cn"
     #搜索网站
  fess:
     count: 1
     #最大抽取数量
     fess_host: "127.0.0.1:8080"
     #fess搜索引擎的部署地址
  remote:
    host: "http://127.0.0.1:17860/api/find"
     #远程知识库地址地址
  rtst:
     count: 3
     #最大抽取数量
   #   backend: Annoy
     size: 20
     #分块大小"
     overlap: 0
     #分块重叠长度
     model_path: "model/m3e-base"
     #向量模型存储路径
     device: cuda
     #embedding运行设备
  qdrant:
     path: txt
     #知识库文本路径
     model_path: "model/text2vec-large-chinese"
     #向量模型存储路径
     qdrant_host: "http://localhost:6333"
     #qdrant服务地址"
     device: cpu
     #qdrant运行设备
     collection: qa_collection
     #qdrant集合名称
  kg:
     count: 5
     #最大抽取数量
     knowledge_path: ""
     #知识库的文件夹目录名称，若留空则为txt
     graph_host: ""
     #图数据库部署地址
     model_path: ""
     #信息抽取模型所在路径"
llm_type: rwkv
#llm模型类型:glm6b、rwkv、llama、replitcode等，详见相关文件
llm_models:
  rwkv:
     path: "model/RWKV-4-World-7B-v1-OnlyForTest_52%_trained-20230606-ctx4096-i8.pth"      #rwkv模型位置"
     strategy: "cuda fp16i8"
   #   path: "model/rwkv_ggml_q8.bin"           #rwkv模型位置"
   #   strategy: "Q8_0"       #rwkvcpp:运行方式，设置strategy诸如"Q8_0->16"即可开启，代表运行Q8_0模型在16个cpu核心上
     #rwkv模型参数"

     historymode: state
     #rwkv历史记录实现方式：state、string
  glm6b:
     path: "model/chatglm-6b-int4"
     #glm模型位置"
     strategy: "cuda fp16"
     #cuda fp16	 所有glm模型 要直接跑在gpu上都可以使用这个参数
     #cuda fp16i8	 fp16原生模型 要自行量化为int8跑在gpu上可以使用这个参数
     #cuda fp16i4	 fp16原生模型 要自行量化为int4跑在gpu上可以使用这个参数
     #cuda:0 fp16 *14 -> cuda:1	fp16 多卡流水线并行，使用方法参考RWKV的strategy介绍。总层数28
   #   lora: "model/lora-450"
     #glm-lora模型位置
  llama:
     path: "model/stable-vicuna-13B.ggml.q4_2.bin"
     #llama模型位置
     strategy: ""
     #llama模型参数 暂时不用
  moss:
     path: "model/moss-moon-003-sft-plugin-int4"
     #模型位置
     strategy: ""
     #模型参数 暂时不用"
  openai:
   #   api_host: "https://gpt.lucent.blog/v1" #网友的反代
   #   api_host: "https://api.openai.com/v1" #官方地址
     api_host: "http://127.0.0.1:8000/v1" #rwkv runner

  replitcode:
     path: "model/replit-code-v1-3b"
     #replitcode模型位置
     #说明：目前模型参数和chat模型差异较大，写死了，不能通过wenda界面配置，需要调整自行到llm_replitcode.py 文件中调整，或放开wenda界面参数
     #y = model.generate(x, max_length=100, do_sample=true, top_p=0.95, top_k=4, temperature=0.2, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)
     #模型地址：https://huggingface.co/replit/replit-code-v1-3b ，约10g
     #作用代码补全：问：def fibonacci(n):
     #答：def fibonacci(n):
     #if n == 0:
     #return 0
     #elif n == 1:
     #return 1
     #else:
     #return fibonacci(n-1) + fibonacci(n-2)
     #print(fibonacci(10))
     strategy: "cuda fp16"
    #因我的3070只有8g内存，所以是先把模理加载到内存进行精度转换，至少要32g系统内存， fp16 占用不到6g显存,fp32 超过8g未测试"