Skip to content

Commit 1b905df

Browse files
committed
fix wandb api key issue
1 parent 251aca4 commit 1b905df

File tree

139 files changed

+24582
-4765
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

139 files changed

+24582
-4765
lines changed

.gitignore

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
*.pyd
44
*.log
55
*.csv
6+
*.parquet
67
*.c
78
*.cpp
89
*.DS_Store
@@ -12,12 +13,14 @@
1213
.vs/
1314
build/
1415
log/
16+
logs/
17+
checkpoint/
18+
checkpoints/
19+
streamit/
1520
dist/
1621
*.egg-info/
1722
tools/schedule
1823
docs/_build
19-
test/
20-
data/
2124
.eggs/
2225
maro_venv/
2326
pyvenv.cfg

docs/source/key_components/data_model.rst

Lines changed: 24 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1165,15 +1165,15 @@ Transport cost per product.
11651165
distribution
11661166
++++++++++++
11671167

1168-
remaining_order_quantity
1168+
pending_product_quantity
11691169
************************
11701170

11711171
type: unsigned int
11721172
slots: 1
11731173

11741174
Sum of product number in current order list for now.
11751175

1176-
remaining_order_number
1176+
pending_order_number
11771177
**********************
11781178

11791179
type: unsigned int
@@ -1184,22 +1184,6 @@ How many pending order for now.
11841184
consumer
11851185
++++++++
11861186

1187-
total_purchased
1188-
***************
1189-
1190-
type: unsigned int
1191-
slots: 1
1192-
1193-
How many products this node purchased from start to now.
1194-
1195-
total_received
1196-
**************
1197-
1198-
type: unsigned int
1199-
slots: 1
1200-
1201-
How many products this node received from start to now.
1202-
12031187
purchased
12041188
*********
12051189

@@ -1232,14 +1216,6 @@ slots: 1
12321216

12331217
Per tick states. Consumption of current tick, 1.0 if there is purchase, or 0.
12341218

1235-
order_quantity
1236-
**************
1237-
1238-
type: unsigned int
1239-
slots: 1
1240-
1241-
How many product to order, from action.
1242-
12431219
price
12441220
*****
12451221

@@ -1248,13 +1224,13 @@ slots: 1
12481224

12491225
Price per product.
12501226

1251-
order_cost
1227+
order_base_cost
12521228
**********
12531229

12541230
type: float
12551231
slots: 1
12561232

1257-
Cost per order.
1233+
Base cost for orders in this tick.
12581234

12591235
reward_discount
12601236
***************
@@ -1267,21 +1243,37 @@ Reward discount from action.
12671243
manufacture
12681244
+++++++++++
12691245

1270-
manufacture_quantity
1246+
start_manufacture_quantity
1247+
**************************
1248+
1249+
type: unsigned int
1250+
slots: 1
1251+
1252+
How many products start to produce at current tick, controlled by action.
1253+
1254+
in_pipeline_quantity
12711255
********************
12721256

12731257
type: unsigned int
12741258
slots: 1
12751259

1276-
How many products being produced at current tick, controlled by action.
1260+
How many products in manufacture pipeline at current tick, will lead to manufacture cost.
1261+
1262+
finished_quantity
1263+
*****************
1264+
1265+
type: unsigned int
1266+
slots: 1
1267+
1268+
How many products are finished and exit manufacture pipeline at current tick.
12771269

1278-
product_unit_cost
1270+
manufacture_cost
12791271
*****************
12801272

12811273
type: float
12821274
slots: 1
12831275

1284-
Cost to procedue a product.
1276+
Manufacture cost spent to produce products in pipeline at current tick.
12851277

12861278
seller
12871279
++++++

docs/source/key_components/rl_toolkit.rst

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,6 @@ An example of creating an actor-critic trainer:
170170
DiscreteActorCriticTrainer(
171171
name='ac',
172172
params=DiscreteActorCriticParams(
173-
device="cpu",
174173
get_v_critic_net_func=lambda: MyCriticNet(state_dim=128),
175174
reward_discount=.0,
176175
grad_iters=10,

examples/cim/rl/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,14 @@
33

44
from .callbacks import post_collect, post_evaluate
55
from .env_sampler import agent2policy, env_sampler_creator
6-
from .policy_trainer import policy_creator, trainer_creator
6+
from .policy_trainer import device_mapping, policy_creator, trainer_creator
77

88
__all__ = [
99
"agent2policy",
10+
"device_mapping",
1011
"env_sampler_creator",
1112
"policy_creator",
1213
"post_collect",
1314
"post_evaluate",
14-
"trainer_creator"
15+
"trainer_creator",
1516
]

examples/cim/rl/algorithms/ac.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,24 +8,22 @@
88

99
from maro.rl.model import DiscretePolicyNet, FullyConnected, VNet
1010
from maro.rl.policy import DiscretePolicyGradient
11-
from maro.rl.training.algorithms import (
12-
DiscreteActorCriticTrainer, DiscreteActorCriticParams, DiscretePPOParams, DiscretePPOTrainer,
13-
)
11+
from maro.rl.training.algorithms import DiscreteActorCriticTrainer, DiscreteActorCriticParams
1412

1513
actor_net_conf = {
1614
"hidden_dims": [256, 128, 64],
1715
"activation": torch.nn.Tanh,
1816
"softmax": True,
1917
"batch_norm": False,
20-
"head": True
18+
"head": True,
2119
}
2220
critic_net_conf = {
2321
"hidden_dims": [256, 128, 64],
2422
"output_dim": 1,
2523
"activation": torch.nn.LeakyReLU,
2624
"softmax": False,
2725
"batch_norm": True,
28-
"head": True
26+
"head": True,
2927
}
3028
actor_learning_rate = 0.001
3129
critic_learning_rate = 0.001
@@ -64,7 +62,7 @@ def apply_gradients(self, grad: Dict[str, torch.Tensor]) -> None:
6462
def get_state(self) -> dict:
6563
return {
6664
"network": self.state_dict(),
67-
"optim": self._optim.state_dict()
65+
"optim": self._optim.state_dict(),
6866
}
6967

7068
def set_state(self, net_state: dict) -> None:
@@ -99,7 +97,7 @@ def apply_gradients(self, grad: Dict[str, torch.Tensor]) -> None:
9997
def get_state(self) -> dict:
10098
return {
10199
"network": self.state_dict(),
102-
"optim": self._optim.state_dict()
100+
"optim": self._optim.state_dict(),
103101
}
104102

105103
def set_state(self, net_state: dict) -> None:
@@ -121,7 +119,6 @@ def get_ac(state_dim: int, name: str) -> DiscreteActorCriticTrainer:
121119
return DiscreteActorCriticTrainer(
122120
name=name,
123121
params=DiscreteActorCriticParams(
124-
device="cpu",
125122
get_v_critic_net_func=lambda: MyCriticNet(state_dim),
126123
reward_discount=.0,
127124
grad_iters=10,

examples/cim/rl/algorithms/dqn.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
"batch_norm": True,
1919
"skip_connection": False,
2020
"head": True,
21-
"dropout_p": 0.0
21+
"dropout_p": 0.0,
2222
}
2323
learning_rate = 0.05
2424

@@ -75,22 +75,21 @@ def get_policy(state_dim: int, action_num: int, name: str) -> ValueBasedPolicy:
7575
"final_value": 0.0,
7676
}
7777
)],
78-
warmup=100
78+
warmup=100,
7979
)
8080

8181

8282
def get_dqn(name: str) -> DQNTrainer:
8383
return DQNTrainer(
8484
name=name,
8585
params=DQNParams(
86-
device="cpu",
8786
reward_discount=.0,
8887
update_target_every=5,
8988
num_epochs=10,
9089
soft_update_coef=0.1,
9190
double=False,
9291
replay_memory_capacity=10000,
9392
random_overwrite=False,
94-
batch_size=32
95-
)
93+
batch_size=32,
94+
),
9695
)

examples/cim/rl/algorithms/maddpg.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,6 @@ def get_maddpg(state_dim: int, action_dims: List[int], name: str) -> DiscreteMAD
126126
return DiscreteMADDPGTrainer(
127127
name=name,
128128
params=DiscreteMADDPGParams(
129-
device="cpu",
130129
reward_discount=.0,
131130
num_epoch=10,
132131
get_q_critic_net_func=partial(get_multi_critic_net, state_dim, action_dims),

examples/cim/rl/algorithms/ppo.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ def get_ppo(state_dim: int, name: str) -> DiscretePPOTrainer:
1414
return DiscretePPOTrainer(
1515
name=name,
1616
params=DiscretePPOParams(
17-
device="cpu",
1817
get_v_critic_net_func=lambda: MyCriticNet(state_dim),
1918
reward_discount=.0,
2019
grad_iters=10,

examples/cim/rl/callbacks.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
# Copyright (c) Microsoft Corporation.
22
# Licensed under the MIT license.
33

4-
54
def post_collect(info_list: list, ep: int, segment: int) -> None:
65
# print the env metric from each rollout worker
76
for info in info_list:

examples/cim/rl/config.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@
77
"durations": 560
88
}
99

10+
if env_conf["topology"].startswith("toy"):
11+
num_agents = int(env_conf["topology"].split(".")[1][0])
12+
else:
13+
num_agents = int(env_conf["topology"].split(".")[1][:2])
14+
1015
port_attributes = ["empty", "full", "on_shipper", "on_consignee", "booking", "shortage", "fulfillment"]
1116
vessel_attributes = ["empty", "full", "remaining_space"]
1217

@@ -34,4 +39,4 @@
3439
+ len(vessel_attributes)
3540
)
3641

37-
algorithm = "ac" # ac, ppo, dqn or discrete_maddpg
42+
algorithm = "ppo" # ac, ppo, dqn or discrete_maddpg

0 commit comments

Comments
 (0)