Skip to content

Commit 937ea69

Browse files
authored
更新专家模式代码和测试用例 (#72)
* update secure lr * update model and predict * update ppc_dev * update model setting * Update booster.py * update wedpr_ml_toolkit * update predict feature selection * update jupyter task * update test ml toolkit
1 parent 9f9118b commit 937ea69

File tree

12 files changed

+991
-60
lines changed

12 files changed

+991
-60
lines changed
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# wedpr专家模式用户手册
2+
3+
## 配置
4+
5+
1. 左侧用户目录中新建配置文件,文件命名为:config.properties
6+
2. 配置信息参考:
7+
8+
```
9+
access_key_id=
10+
access_key_secret=
11+
remote_entrypoints=http://139.159.202.235:8005,http://139.159.202.235:8006
12+
13+
agency_name=SGD
14+
workspace_path=/user/ppc/milestone2/sgd/
15+
user=test_user
16+
storage_endpoint=http://192.168.0.18:50070
17+
```
18+
19+
3. 通过前端页面登录,例如:http://139.159.202.235:8005/
20+
4. 创建个人项目空间,通过【打开jupyter】按钮进入专家模式
21+
22+
## 基础功能
23+
24+
1. 支持通过launcher启动python,jupyter,终端,文本编辑等功能
25+
2. 支持在用户目录空间创建/修改/删除配置文件,文本文件,bash,python notebook等格式文件
26+
3. 通过launcher启动python,jupyter,终端后可以正常执行对应的代码功能
27+
28+
## hdfs数据功能
29+
30+
1. 支持注册dataset,支持两种方式: pd.Dataframe, hdfs_path
31+
2. 支持更新dataset
32+
33+
* 详细使用说明参考示例文件:【test_dataset.ipynb】
34+
35+
## wedpr任务功能
36+
37+
1. 支持配置任务参数
38+
2. 支持提交psi,建模训练,预测等任务
39+
3. 支持获取任务结果
40+
4. 支持对任务结果进行明文处理
41+
42+
* 详细使用说明参考示例文件:【test_psi.ipynb】和【test_xgboost.ipynb】

python/wedpr_ml_toolkit/test/config.properties

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,3 @@ agency_name=SGD
66
workspace_path=/user/wedpr/milestone2/sgd/
77
user=test_user
88
storage_endpoint=http://127.0.0.1:50070
9-
Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {},
7+
"outputs": [
8+
{
9+
"name": "stdout",
10+
"output_type": "stream",
11+
"text": [
12+
"['/usr/lib/python3/dist-packages/wedpr_ml_toolkit/', 'd:\\\\github\\\\wedpr3.0\\\\WeDPR-Component\\\\python\\\\wedpr_ml_toolkit', 'd:\\\\github\\\\wedpr3.0\\\\WeDPR-Component\\\\python', 'd:\\\\github\\\\wedpr3.0\\\\WeDPR-Component\\\\python', 'c:\\\\Users\\\\yanxi\\\\anaconda3\\\\python38.zip', 'c:\\\\Users\\\\yanxi\\\\anaconda3\\\\DLLs', 'c:\\\\Users\\\\yanxi\\\\anaconda3\\\\lib', 'c:\\\\Users\\\\yanxi\\\\anaconda3', '', 'c:\\\\Users\\\\yanxi\\\\anaconda3\\\\lib\\\\site-packages', 'c:\\\\Users\\\\yanxi\\\\anaconda3\\\\lib\\\\site-packages\\\\win32', 'c:\\\\Users\\\\yanxi\\\\anaconda3\\\\lib\\\\site-packages\\\\win32\\\\lib', 'c:\\\\Users\\\\yanxi\\\\anaconda3\\\\lib\\\\site-packages\\\\Pythonwin', 'c:\\\\Users\\\\yanxi\\\\anaconda3\\\\lib\\\\site-packages\\\\IPython\\\\extensions', 'C:\\\\Users\\\\yanxi\\\\.ipython']\n"
13+
]
14+
}
15+
],
16+
"source": [
17+
"import numpy as np\n",
18+
"import pandas as pd\n",
19+
"from wedpr_ml_toolkit.config.wedpr_ml_config import WeDPRMlConfigBuilder\n",
20+
"from wedpr_ml_toolkit.wedpr_ml_toolkit import WeDPRMlToolkit\n",
21+
"from wedpr_ml_toolkit.toolkit.dataset_toolkit import DatasetToolkit"
22+
]
23+
},
24+
{
25+
"cell_type": "code",
26+
"execution_count": 2,
27+
"metadata": {},
28+
"outputs": [],
29+
"source": [
30+
"# 读取配置文件\n",
31+
"wedpr_config = WeDPRMlConfigBuilder.build_from_properties_file('config.properties')\n",
32+
"wedpr_ml_toolkit = WeDPRMlToolkit(wedpr_config)"
33+
]
34+
},
35+
{
36+
"cell_type": "code",
37+
"execution_count": 3,
38+
"metadata": {},
39+
"outputs": [
40+
{
41+
"name": "stdout",
42+
"output_type": "stream",
43+
"text": [
44+
"http://139.159.202.235:50070 /user/ppc/milestone2/sgd/test_user SGD\n",
45+
"/user/ppc/milestone2/sgd/test_user\\d-101\n",
46+
" id y x1 x2 x3 x4 x5 x6 \\\n",
47+
"0 0 1 0.954183 0.652034 0.704070 0.180889 0.025025 0.511596 \n",
48+
"1 1 1 0.302088 0.462222 0.435542 0.029966 0.931294 0.848483 \n",
49+
"2 2 1 0.468104 0.430161 0.239322 0.588153 0.470668 0.225856 \n",
50+
"3 3 0 0.152269 0.811666 0.834451 0.354288 0.635447 0.062092 \n",
51+
"4 4 0 0.841470 0.800512 0.451507 0.118651 0.748845 0.557916 \n",
52+
"\n",
53+
" x7 x8 x9 x10 \n",
54+
"0 0.529848 0.759689 0.159081 0.556419 \n",
55+
"1 0.962787 0.224096 0.464418 0.208487 \n",
56+
"2 0.564879 0.730366 0.394245 0.299081 \n",
57+
"3 0.424057 0.202234 0.577448 0.636958 \n",
58+
"4 0.030906 0.514350 0.340864 0.123303 \n"
59+
]
60+
}
61+
],
62+
"source": [
63+
"# 注册 dataset,支持两种方式: pd.Dataframe, hdfs_path\n",
64+
"# 1. pd.Dataframe\n",
65+
"df = pd.DataFrame({\n",
66+
" 'id': np.arange(0, 100), # id列,顺序整数\n",
67+
" 'y': np.random.randint(0, 2, size=100),\n",
68+
" # x1到x10列,随机数\n",
69+
" **{f'x{i}': np.random.rand(100) for i in range(1, 11)}\n",
70+
"})\n",
71+
"\n",
72+
"dataset1 = DatasetToolkit(storage_entrypoint=wedpr_ml_toolkit.get_storage_entry_point(),\n",
73+
" storage_workspace=wedpr_config.user_config.get_workspace_path(),\n",
74+
" dataset_owner='flyhuang1',\n",
75+
" agency=wedpr_config.user_config.agency_name,\n",
76+
" values=df,\n",
77+
" is_label_holder=True)\n",
78+
"print(dataset1.storage_client.storage_client.endpoint, dataset1.storage_workspace, dataset1.agency)\n",
79+
"dataset1.storage_client = None # 本地测试时跳过hdfs上传/下载过程\n",
80+
"dataset1.save_values(path='d-101')\n",
81+
"print(dataset1.dataset_path)\n",
82+
"print(dataset1.values.head())"
83+
]
84+
},
85+
{
86+
"cell_type": "code",
87+
"execution_count": 4,
88+
"metadata": {},
89+
"outputs": [
90+
{
91+
"name": "stdout",
92+
"output_type": "stream",
93+
"text": [
94+
"http://139.159.202.235:50070 /user/ppc/milestone2/sgd/test_user WeBank\n",
95+
"/user/ppc/milestone2/webank/flyhuang/d-9606695119693829\n",
96+
"/user/ppc/milestone2/webank/flyhuang/d-9606695119693829\n",
97+
" id z1 z2 z3 z4 z5 z6 z7 \\\n",
98+
"0 0 0.597205 0.942475 0.886443 0.560584 0.254432 0.370152 0.076031 \n",
99+
"1 1 0.778616 0.607374 0.616211 0.602282 0.385989 0.816963 0.756814 \n",
100+
"2 2 0.999795 0.596794 0.240741 0.241070 0.857676 0.342412 0.066459 \n",
101+
"3 3 0.968410 0.895163 0.636140 0.978791 0.237098 0.095272 0.938806 \n",
102+
"4 4 0.921513 0.454901 0.004514 0.769216 0.627185 0.676253 0.184952 \n",
103+
"\n",
104+
" z8 z9 z10 \n",
105+
"0 0.587627 0.851390 0.864929 \n",
106+
"1 0.661537 0.865674 0.050091 \n",
107+
"2 0.473916 0.080120 0.477873 \n",
108+
"3 0.452399 0.953515 0.405465 \n",
109+
"4 0.877475 0.316322 0.139290 \n"
110+
]
111+
}
112+
],
113+
"source": [
114+
"# 2. hdfs_path\n",
115+
"dataset2 = DatasetToolkit(storage_entrypoint=wedpr_ml_toolkit.get_storage_entry_point(), \n",
116+
" storage_workspace=wedpr_config.user_config.get_workspace_path(), \n",
117+
" dataset_owner='flyhuang',\n",
118+
" dataset_path=\"/user/ppc/milestone2/webank/flyhuang/d-9606695119693829\", \n",
119+
" agency=\"WeBank\")\n",
120+
"print(dataset2.storage_client.storage_client.endpoint, dataset2.storage_workspace, dataset2.agency)\n",
121+
"print(dataset2.dataset_path)\n",
122+
"dataset2.storage_client = None # 本地测试时跳过hdfs上传/下载过程\n",
123+
"\n",
124+
"# 提供本地测试数据\n",
125+
"if dataset2.storage_client is None:\n",
126+
" # 支持更新dataset的values数据\n",
127+
" df2 = pd.DataFrame({\n",
128+
" 'id': np.arange(0, 100), # id列,顺序整数\n",
129+
" **{f'z{i}': np.random.rand(100) for i in range(1, 11)} # x1到x10列,随机数\n",
130+
" })\n",
131+
" dataset2.update_values(values=df2)\n",
132+
" dataset2.save_values()\n",
133+
" print(dataset2.dataset_path)\n",
134+
" print(dataset2.values.head())\n",
135+
"\n",
136+
"# 对于己方数据集支持load_values,其他方数据集无需load_values,可直接使用\n",
137+
"if dataset2.storage_client is not None:\n",
138+
" # 仅支持load本机构hdfs的数据集\n",
139+
" dataset2.load_values(header=0)\n",
140+
" print(dataset2.dataset_path)\n",
141+
" print(dataset2.values.head())"
142+
]
143+
},
144+
{
145+
"cell_type": "code",
146+
"execution_count": 5,
147+
"metadata": {},
148+
"outputs": [
149+
{
150+
"name": "stdout",
151+
"output_type": "stream",
152+
"text": [
153+
"/user/ppc/milestone2/sgd/test_user\\d-101\n",
154+
" id y x1 x2 x3 x4 x5 x6 \\\n",
155+
"0 0 1 0.954183 0.652034 0.704070 0.180889 0.025025 0.511596 \n",
156+
"1 1 1 0.302088 0.462222 0.435542 0.029966 0.931294 0.848483 \n",
157+
"2 2 1 0.468104 0.430161 0.239322 0.588153 0.470668 0.225856 \n",
158+
"3 3 0 0.152269 0.811666 0.834451 0.354288 0.635447 0.062092 \n",
159+
"4 4 0 0.841470 0.800512 0.451507 0.118651 0.748845 0.557916 \n",
160+
"\n",
161+
" x7 x8 x9 x10 \n",
162+
"0 0.529848 0.759689 0.159081 0.556419 \n",
163+
"1 0.962787 0.224096 0.464418 0.208487 \n",
164+
"2 0.564879 0.730366 0.394245 0.299081 \n",
165+
"3 0.424057 0.202234 0.577448 0.636958 \n",
166+
"4 0.030906 0.514350 0.340864 0.123303 \n"
167+
]
168+
}
169+
],
170+
"source": [
171+
"# 更新数据集\n",
172+
"if dataset1.storage_client is not None:\n",
173+
" dataset1.update_values(\n",
174+
" path='/user/ppc/milestone2/sgd/flyhuang1/d-9606704699156485')\n",
175+
" dataset1.load_values(header=0)\n",
176+
"print(dataset1.dataset_path)\n",
177+
"print(dataset1.values.head())"
178+
]
179+
},
180+
{
181+
"cell_type": "code",
182+
"execution_count": null,
183+
"metadata": {},
184+
"outputs": [],
185+
"source": []
186+
}
187+
],
188+
"metadata": {
189+
"kernelspec": {
190+
"display_name": "base",
191+
"language": "python",
192+
"name": "python3"
193+
},
194+
"language_info": {
195+
"codemirror_mode": {
196+
"name": "ipython",
197+
"version": 3
198+
},
199+
"file_extension": ".py",
200+
"mimetype": "text/x-python",
201+
"name": "python",
202+
"nbconvert_exporter": "python",
203+
"pygments_lexer": "ipython3",
204+
"version": "3.8.5"
205+
}
206+
},
207+
"nbformat": 4,
208+
"nbformat_minor": 2
209+
}

0 commit comments

Comments
 (0)