Skip to content

Commit 0215a9a

Browse files
authored
repo-sync-2024-06-19T10:45:51+0800 (#72)
1 parent a10a98d commit 0215a9a

File tree

9 files changed

+923
-86
lines changed

9 files changed

+923
-86
lines changed

docs/architecture/apps/index.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ TrustedFlow内置了多种可信APP,每一个可信APP在执行计算逻辑之
3131
lr_train
3232
xgb_predict
3333
lr_predict
34+
lgbm_train
35+
lgbm_predict
3436
binary_evaluation
3537
prediction_bias_eval
3638

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
# LightGBM预测
2+
3+
使用给定的LightGBM模型对数据进行预测。
4+
5+
## 组件定义
6+
7+
1. 参数
8+
(1) pred_name: 预测值的列名。
9+
(2) save_label: 输出结果是否包含标签列,true表示保存。
10+
(3) label_name: 标签列的名称,默认为“label”。
11+
(4) save_id: 输出结果是否保存ID列,true表示保存。
12+
(5) id_name: ID列的名称。
13+
(6) col_names: 可选,输出指定的列到结果中,默认为空。
14+
2. 输入:待预测的数据以及LightGBM模型。
15+
3. 输出:预测结果。
16+
17+
```json
18+
{
19+
"domain": "ml.predict",
20+
"name": "lgbm_predict",
21+
"desc": "Predict using the lgbm model.",
22+
"version": "0.0.1",
23+
"attrs": [
24+
{
25+
"name": "pred_name",
26+
"desc": "Column name for predictions.",
27+
"type": "AT_STRING",
28+
"atomic": {
29+
"is_optional": true,
30+
"default_value": {
31+
"s": "pred"
32+
}
33+
}
34+
},
35+
{
36+
"name": "save_label",
37+
"desc": "Whether or not to save real label column into output pred table. If true, input feature_dataset must contain label column.",
38+
"type": "AT_BOOL",
39+
"atomic": {
40+
"is_optional": true,
41+
"default_value": {}
42+
}
43+
},
44+
{
45+
"name": "label_name",
46+
"desc": "Column name for label.",
47+
"type": "AT_STRING",
48+
"atomic": {
49+
"is_optional": true,
50+
"default_value": {
51+
"s": "label"
52+
}
53+
}
54+
},
55+
{
56+
"name": "save_id",
57+
"desc": "Whether to save id column into output pred table. If true, input feature_dataset must contain id column.",
58+
"type": "AT_BOOL",
59+
"atomic": {
60+
"is_optional": true,
61+
"default_value": {}
62+
}
63+
},
64+
{
65+
"name": "id_name",
66+
"desc": "Column name for id.",
67+
"type": "AT_STRING",
68+
"atomic": {
69+
"is_optional": true,
70+
"default_value": {
71+
"s": "id"
72+
}
73+
}
74+
},
75+
{
76+
"name": "col_names",
77+
"desc": "Extra column names into output pred table.",
78+
"type": "AT_STRINGS",
79+
"atomic": {
80+
"list_max_length_inclusive": "-1",
81+
"is_optional": true
82+
}
83+
}
84+
],
85+
"inputs": [
86+
{
87+
"name": "feature_dataset",
88+
"desc": "Input feature dataset.",
89+
"types": [
90+
"sf.table.individual"
91+
],
92+
"attrs": [
93+
{
94+
"name": "ids",
95+
"desc": "Id columns.",
96+
"col_max_cnt_inclusive": "1"
97+
},
98+
{
99+
"name": "label",
100+
"desc": "Label column.",
101+
"col_max_cnt_inclusive": "1"
102+
}
103+
]
104+
},
105+
{
106+
"name": "model",
107+
"desc": "Input model.",
108+
"types": [
109+
"sf.model.lgbm"
110+
]
111+
}
112+
],
113+
"outputs": [
114+
{
115+
"name": "pred",
116+
"desc": "Output prediction.",
117+
"types": [
118+
"sf.table.individual"
119+
]
120+
}
121+
]
122+
}
123+
```
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
# LightGBM训练
2+
3+
使用LightGBM对数据集进行训练,得到LightGBM模型,支持二分类和线性回归。
4+
5+
## 组件定义
6+
7+
```json
8+
{
9+
"domain": "ml.train",
10+
"name": "lgbm_train",
11+
"desc": "LightGBM train component for individual dataset.",
12+
"version": "0.0.1",
13+
"attrs": [
14+
{
15+
"name": "n_estimators",
16+
"desc": "Number of boosted trees to fit.",
17+
"type": "AT_INT",
18+
"atomic": {
19+
"is_optional": true,
20+
"default_value": {
21+
"i64": "10"
22+
},
23+
"lower_bound_enabled": true,
24+
"lower_bound": {
25+
"i64": "1"
26+
},
27+
"lower_bound_inclusive": true,
28+
"upper_bound_enabled": true,
29+
"upper_bound": {
30+
"i64": "1024"
31+
},
32+
"upper_bound_inclusive": true
33+
}
34+
},
35+
{
36+
"name": "objective",
37+
"desc": "Specify the learning objective.",
38+
"type": "AT_STRING",
39+
"atomic": {
40+
"is_optional": true,
41+
"default_value": {
42+
"s": "binary"
43+
},
44+
"allowed_values": {
45+
"ss": [
46+
"regression",
47+
"binary"
48+
]
49+
}
50+
}
51+
},
52+
{
53+
"name": "boosting_type",
54+
"desc": "Boosting type.",
55+
"type": "AT_STRING",
56+
"atomic": {
57+
"is_optional": true,
58+
"default_value": {
59+
"s": "gbdt"
60+
},
61+
"allowed_values": {
62+
"ss": [
63+
"gbdt",
64+
"rf",
65+
"dart"
66+
]
67+
}
68+
}
69+
},
70+
{
71+
"name": "learning_rate",
72+
"desc": "Learning rate.",
73+
"type": "AT_FLOAT",
74+
"atomic": {
75+
"is_optional": true,
76+
"default_value": {
77+
"f": 0.1
78+
},
79+
"lower_bound_enabled": true,
80+
"lower_bound": {},
81+
"upper_bound_enabled": true,
82+
"upper_bound": {
83+
"f": 1
84+
},
85+
"upper_bound_inclusive": true
86+
}
87+
},
88+
{
89+
"name": "num_leaves",
90+
"desc": "Max number of leaves in one tree.",
91+
"type": "AT_INT",
92+
"atomic": {
93+
"is_optional": true,
94+
"default_value": {
95+
"i64": "31"
96+
},
97+
"lower_bound_enabled": true,
98+
"lower_bound": {
99+
"i64": "2"
100+
},
101+
"lower_bound_inclusive": true,
102+
"upper_bound_enabled": true,
103+
"upper_bound": {
104+
"i64": "1024"
105+
},
106+
"upper_bound_inclusive": true
107+
}
108+
}
109+
],
110+
"inputs": [
111+
{
112+
"name": "train_dataset",
113+
"desc": "Input table.",
114+
"types": [
115+
"sf.table.individual"
116+
],
117+
"attrs": [
118+
{
119+
"name": "ids",
120+
"desc": "Id columns will not be trained."
121+
},
122+
{
123+
"name": "label",
124+
"desc": "Label column.",
125+
"col_min_cnt_inclusive": "1",
126+
"col_max_cnt_inclusive": "1"
127+
}
128+
]
129+
}
130+
],
131+
"outputs": [
132+
{
133+
"name": "output_model",
134+
"desc": "Output model.",
135+
"types": [
136+
"sf.model.lgbm"
137+
]
138+
}
139+
]
140+
}
141+
```

docs/architecture/index.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
核心功能
1+
架构设计
22
========================
33
想了解TrustedFlow原理和功能,欢迎阅读下列文章!
44

0 commit comments

Comments
 (0)