Skip to content

Commit 7a910d3

Browse files
author
zenghsh3
authored
Baselines of grid dispatching competition (#709)
* add baselines for grid dispatching competition * update doc * remove model * update link * fix yapf * refine code and doc * refine doc
1 parent 527155d commit 7a910d3

File tree

11 files changed

+1156
-0
lines changed

11 files changed

+1156
-0
lines changed
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
## Baselines for grid dispatching competition
2+
3+
Competition link: [国家电网调控AI创新大赛:电网运行组织智能安排](https://aistudio.baidu.com/aistudio/competition/detail/111)
4+
5+
We provide a distributed SAC baseline based on PARL with paddlepaddle or torch:
6+
- [paddlepaddle baseline](paddle)
7+
- [torch baseline](torch)
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
## SAC baseline for grid dispatching competition
2+
3+
In this example, we provide a distributed SAC baseline based on PARL and paddlepaddle for the [grid dispatching competition](https://aistudio.baidu.com/aistudio/competition/detail/111) task.
4+
5+
### Dependencies
6+
* Linux
7+
* python3.6+
8+
* paddlepaddle >= 2.1.0
9+
* parl >= 2.0.0
10+
11+
### Computing resource requirements
12+
* 1 GPU + 6 CPUs
13+
14+
### Training
15+
16+
1. Download the pretrained model (trained with fixed first 288 timesteps data) in the current directory. (filename: `paddle_pretrain_model`)
17+
18+
[Baidu Pan](https://pan.baidu.com/s/1R-4EWIgNr2YogbJnMXk4Cg) (password: hwkb)
19+
20+
2. Copy all files of `gridsim` (the competition package) to the current directory.
21+
```bash
22+
# For example:
23+
cp -r /XXX/gridsim/* .
24+
```
25+
26+
2. Update the data path for distributed training (Using an absoluate path).
27+
```bash
28+
export PWD=`pwd`
29+
python yml_creator.py --dataset_path $PWD/data
30+
```
31+
32+
33+
3. Set the environment variable of PARL and gridsim.
34+
```bash
35+
export PARL_BACKEND=paddle
36+
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib64
37+
```
38+
39+
4. Start xparl cluster
40+
41+
```bash
42+
# You can change following `cpu_num` and `args.actor_num` in the train.py based on the CPU number of your machine.
43+
# Note that you only need to start the cluster once.
44+
45+
xparl start --port 8010 --cpu_num 6
46+
```
47+
48+
5. start training.
49+
50+
```bash
51+
python train.py --actor_num 6
52+
```
53+
54+
6. Visualize the training curve and other information.
55+
```
56+
tensorboard --logdir .
57+
```
58+
59+
### Performance
60+
The result after training one hour with 1 GPU and 6 CPUs.
61+
![learning curve](https://raw.githubusercontent.com/benchmarking-rl/PARL-experiments/master/Baselines/GridDispatch_competition/paddle/result.png)
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import gym
16+
import numpy as np
17+
from parl.utils import logger
18+
from Environment.base_env import Environment
19+
from utilize.settings import settings
20+
from utilize.form_action import *
21+
22+
23+
class MaxTimestepWrapper(gym.Wrapper):
24+
def __init__(self, env, max_timestep=288):
25+
logger.info("[env type]:{}".format(type(env)))
26+
self.max_timestep = max_timestep
27+
env.observation_space = None
28+
env.reward_range = None
29+
env.metadata = None
30+
gym.Wrapper.__init__(self, env)
31+
32+
self.timestep = 0
33+
34+
def step(self, action, **kwargs):
35+
self.timestep += 1
36+
obs, reward, done, info = self.env.step(action, **kwargs)
37+
if self.timestep >= self.max_timestep:
38+
done = True
39+
info["timeout"] = True
40+
else:
41+
info["timeout"] = False
42+
return obs, reward, done, info
43+
44+
def reset(self, **kwargs):
45+
self.timestep = 0
46+
return self.env.reset(**kwargs)
47+
48+
49+
class ObsTransformerWrapper(gym.Wrapper):
50+
def __init__(self, env):
51+
logger.info("[env type]:{}".format(type(env)))
52+
gym.Wrapper.__init__(self, env)
53+
54+
def _get_obs(self, obs):
55+
# loads
56+
loads = []
57+
loads.append(obs.load_p)
58+
loads.append(obs.load_q)
59+
loads.append(obs.load_v)
60+
loads = np.concatenate(loads)
61+
62+
# prods
63+
prods = []
64+
prods.append(obs.gen_p)
65+
prods.append(obs.gen_q)
66+
prods.append(obs.gen_v)
67+
prods = np.concatenate(prods)
68+
69+
# rho
70+
rho = np.array(obs.rho) - 1.0
71+
72+
next_load = obs.nextstep_load_p
73+
74+
# action_space
75+
action_space_low = obs.action_space['adjust_gen_p'].low.tolist()
76+
action_space_high = obs.action_space['adjust_gen_p'].high.tolist()
77+
action_space_low[settings.balanced_id] = 0.0
78+
action_space_high[settings.balanced_id] = 0.0
79+
80+
features = np.concatenate([
81+
loads, prods,
82+
rho.tolist(), next_load, action_space_low, action_space_high
83+
])
84+
85+
return features
86+
87+
def step(self, action, **kwargs):
88+
self.raw_obs, reward, done, info = self.env.step(action, **kwargs)
89+
obs = self._get_obs(self.raw_obs)
90+
return obs, reward, done, info
91+
92+
def reset(self, **kwargs):
93+
self.raw_obs = self.env.reset(**kwargs)
94+
obs = self._get_obs(self.raw_obs)
95+
return obs
96+
97+
98+
class RewardShapingWrapper(gym.Wrapper):
99+
def __init__(self, env):
100+
logger.info("[env type]:{}".format(type(env)))
101+
gym.Wrapper.__init__(self, env)
102+
103+
def step(self, action, **kwargs):
104+
obs, reward, done, info = self.env.step(action, **kwargs)
105+
106+
shaping_reward = 1.0
107+
108+
info["origin_reward"] = reward
109+
110+
return obs, shaping_reward, done, info
111+
112+
def reset(self, **kwargs):
113+
return self.env.reset(**kwargs)
114+
115+
116+
class ActionWrapper(gym.Wrapper):
117+
def __init__(self, env, raw_env):
118+
logger.info("[env type]:{}".format(type(env)))
119+
gym.Wrapper.__init__(self, env)
120+
self.raw_env = raw_env
121+
self.v_action = np.zeros(self.raw_env.settings.num_gen)
122+
123+
def step(self, action, **kwargs):
124+
N = len(action)
125+
126+
gen_p_action_space = self.env.raw_obs.action_space['adjust_gen_p']
127+
128+
low_bound = gen_p_action_space.low
129+
high_bound = gen_p_action_space.high
130+
131+
mapped_action = low_bound + (action - (-1.0)) * (
132+
(high_bound - low_bound) / 2.0)
133+
mapped_action[self.raw_env.settings.balanced_id] = 0.0
134+
mapped_action = np.clip(mapped_action, low_bound, high_bound)
135+
136+
ret_action = form_action(mapped_action, self.v_action)
137+
return self.env.step(ret_action, **kwargs)
138+
139+
def reset(self, **kwargs):
140+
return self.env.reset(**kwargs)
141+
142+
143+
def get_env():
144+
env = Environment(settings, "EPRIReward")
145+
env.action_space = None
146+
raw_env = env
147+
148+
env = MaxTimestepWrapper(env)
149+
env = RewardShapingWrapper(env)
150+
env = ObsTransformerWrapper(env)
151+
env = ActionWrapper(env, raw_env)
152+
153+
return env
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import parl
16+
import paddle
17+
import numpy as np
18+
19+
20+
class GridAgent(parl.Agent):
21+
def __init__(self, algorithm):
22+
super(GridAgent, self).__init__(algorithm)
23+
24+
self.alg.sync_target(decay=0)
25+
26+
def predict(self, obs):
27+
obs = paddle.to_tensor(obs.reshape(1, -1), dtype='float32')
28+
action = self.alg.predict(obs)
29+
action_numpy = action.cpu().numpy()[0]
30+
return action_numpy
31+
32+
def sample(self, obs):
33+
obs = paddle.to_tensor(obs.reshape(1, -1), dtype='float32')
34+
action, _ = self.alg.sample(obs)
35+
action_numpy = action.cpu().numpy()[0]
36+
return action_numpy
37+
38+
def learn(self, obs, action, reward, next_obs, terminal):
39+
terminal = np.expand_dims(terminal, -1)
40+
reward = np.expand_dims(reward, -1)
41+
42+
obs = paddle.to_tensor(obs, dtype='float32')
43+
action = paddle.to_tensor(action, dtype='float32')
44+
reward = paddle.to_tensor(reward, dtype='float32')
45+
next_obs = paddle.to_tensor(next_obs, dtype='float32')
46+
terminal = paddle.to_tensor(terminal, dtype='float32')
47+
critic_loss, actor_loss = self.alg.learn(obs, action, reward, next_obs,
48+
terminal)
49+
return critic_loss.cpu().numpy()[0], actor_loss.cpu().numpy()[0]
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import parl
16+
import paddle
17+
import paddle.nn as nn
18+
import paddle.nn.functional as F
19+
20+
# clamp bounds for Std of action_log
21+
LOG_SIG_MAX = 2.0
22+
LOG_SIG_MIN = -20.0
23+
24+
25+
class GridModel(parl.Model):
26+
def __init__(self, obs_dim, action_dim):
27+
super(GridModel, self).__init__()
28+
self.actor_model = Actor(obs_dim, action_dim)
29+
self.critic_model = Critic(obs_dim, action_dim)
30+
31+
def policy(self, obs):
32+
return self.actor_model(obs)
33+
34+
def value(self, obs, action):
35+
return self.critic_model(obs, action)
36+
37+
def get_actor_params(self):
38+
return self.actor_model.parameters()
39+
40+
def get_critic_params(self):
41+
return self.critic_model.parameters()
42+
43+
44+
class Actor(parl.Model):
45+
def __init__(self, obs_dim, action_dim):
46+
super(Actor, self).__init__()
47+
48+
self.l1 = nn.Linear(obs_dim, 512)
49+
self.l2 = nn.Linear(512, 256)
50+
self.mean_linear = nn.Linear(256, action_dim)
51+
self.std_linear = nn.Linear(256, action_dim)
52+
53+
def forward(self, obs):
54+
x = F.relu(self.l1(obs))
55+
x = F.relu(self.l2(x))
56+
57+
act_mean = self.mean_linear(x)
58+
act_std = self.std_linear(x)
59+
act_log_std = paddle.clip(act_std, min=LOG_SIG_MIN, max=LOG_SIG_MAX)
60+
return act_mean, act_log_std
61+
62+
63+
class Critic(parl.Model):
64+
def __init__(self, obs_dim, action_dim):
65+
super(Critic, self).__init__()
66+
67+
# Q1 network
68+
self.l1 = nn.Linear(obs_dim + action_dim, 512)
69+
self.l2 = nn.Linear(512, 256)
70+
self.l3 = nn.Linear(256, 1)
71+
72+
# Q2 network
73+
self.l4 = nn.Linear(obs_dim + action_dim, 512)
74+
self.l5 = nn.Linear(512, 256)
75+
self.l6 = nn.Linear(256, 1)
76+
77+
def forward(self, obs, action):
78+
x = paddle.concat([obs, action], 1)
79+
80+
# Q1
81+
q1 = F.relu(self.l1(x))
82+
q1 = F.relu(self.l2(q1))
83+
q1 = self.l3(q1)
84+
85+
# Q2
86+
q2 = F.relu(self.l4(x))
87+
q2 = F.relu(self.l5(q2))
88+
q2 = self.l6(q2)
89+
return q1, q2

0 commit comments

Comments
 (0)