Skip to content

Commit 4fb1e18

Browse files
committed
Update some files
1 parent e43b580 commit 4fb1e18

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

73 files changed

+885
-409
lines changed

activation/softmax.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import "math"
44

55
// Softmax returns the softmax of the input vector.
66
func Softmax(a []float64) []float64 {
7-
var max float64
7+
max := a[0]
88
for i := range a {
99
if a[i] > max {
1010
max = a[i]

agent/agent_test.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ func ExampleAgent() {
1818
Source: rand.Const(1),
1919
}
2020

21-
for i := 0; i < 10; i++ {
21+
for range 10 {
2222
action := a.GetAction()
2323
a.Update(action, 1.0)
2424
fmt.Printf("%v: %v\n", action, a.Qs)
@@ -42,13 +42,13 @@ func ExampleAgent_bandit() {
4242
s := rand.Const(1)
4343

4444
all := make([][]float64, runs)
45-
for r := 0; r < runs; r++ {
45+
for r := range runs {
4646
bandit := env.NewNonStatBandit(arms, s)
4747
agent := &agent.Agent{Epsilon: eps, Qs: make([]float64, arms), Ns: make([]float64, arms), Source: s}
4848

4949
var total float64
5050
rates := make([]float64, steps)
51-
for i := 0; i < steps; i++ {
51+
for i := range steps {
5252
action := agent.GetAction()
5353
reward := bandit.Play(action)
5454
agent.Update(action, reward)
@@ -78,13 +78,13 @@ func ExampleAgent_bandit() {
7878
}
7979

8080
func Example_rand() {
81-
for i := 0; i < 5; i++ {
81+
for range 5 {
8282
r := randv2.New(rand.Const(1))
8383
fmt.Println(r.Float64())
8484
}
8585

8686
s := rand.Const(1)
87-
for i := 0; i < 5; i++ {
87+
for range 5 {
8888
r := randv2.New(s)
8989
fmt.Println(r.Float64())
9090
}

agent/alpha_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ func ExampleAlphaAgent() {
1717
Source: rand.Const(1),
1818
}
1919

20-
for i := 0; i < 10; i++ {
20+
for range 10 {
2121
action := a.GetAction()
2222
a.Update(action, 1.0)
2323
fmt.Printf("%v: %.4f\n", action, a.Qs)
@@ -42,13 +42,13 @@ func ExampleAlphaAgent_bandit() {
4242
s := rand.Const(1)
4343

4444
all := make([][]float64, runs)
45-
for r := 0; r < runs; r++ {
45+
for r := range runs {
4646
bandit := env.NewNonStatBandit(arms, s)
4747
agent := &agent.AlphaAgent{Epsilon: eps, Alpha: alpha, Qs: make([]float64, arms), Source: s}
4848

4949
var total float64
5050
rates := make([]float64, steps)
51-
for i := 0; i < steps; i++ {
51+
for i := range steps {
5252
action := agent.GetAction()
5353
reward := bandit.Play(action)
5454
agent.Update(action, reward)

agent/default_map_test.go

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,17 @@ import (
1010
func ExampleDefaultMap() {
1111
m := agent.DefaultMap[agent.RandomActions]{}
1212

13-
fmt.Println(m.Get(env.GridState{Height: 1, Width: 1}, agent.RandomActions{0: 0.25, 1: 0.25, 2: 0.25, 3: 0.25}).Probs())
13+
fmt.Println(m.Get(
14+
env.GridState{
15+
Height: 1,
16+
Width: 1,
17+
},
18+
agent.RandomActions{
19+
0: 0.25,
20+
1: 0.25,
21+
2: 0.25,
22+
3: 0.25,
23+
}).Probs())
1424
for k, v := range m {
1525
fmt.Println(k, v)
1626
}

agent/dqn_test.go

Lines changed: 42 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ func ExampleDQNAgent() {
4141
}
4242

4343
episodes, syncInterval := 1, 1
44-
for i := 0; i < episodes; i++ {
44+
for i := range episodes {
4545
state := e.OneHot(e.Reset())
4646
var totalLoss, totalReward float64
4747
var count int
@@ -81,47 +81,47 @@ func ExampleDQNAgent() {
8181
}
8282

8383
// Output:
84-
// 0: 0.0143, -0.0046
85-
// (0, 0) UP : 0.1057
86-
// (0, 0) DOWN : 0.0411
87-
// (0, 0) LEFT : -0.1044
88-
// (0, 0) RIGHT : -0.1153
89-
// (0, 1) UP : 0.2575
90-
// (0, 1) DOWN : 0.0565
91-
// (0, 1) LEFT : -0.0090
92-
// (0, 1) RIGHT : -0.1412
93-
// (0, 2) UP : -0.1237
94-
// (0, 2) DOWN : 0.3362
95-
// (0, 2) LEFT : 0.0124
96-
// (0, 2) RIGHT : -0.0446
97-
// (1, 0) UP : 0.0993
98-
// (1, 0) DOWN : 0.0425
99-
// (1, 0) LEFT : -0.1653
100-
// (1, 0) RIGHT : -0.0591
101-
// (1, 2) UP : -0.4625
102-
// (1, 2) DOWN : 0.0474
103-
// (1, 2) LEFT : -0.2263
104-
// (1, 2) RIGHT : -0.1712
105-
// (1, 3) UP : 0.7964
106-
// (1, 3) DOWN : 0.0965
107-
// (1, 3) LEFT : 0.0643
108-
// (1, 3) RIGHT : -0.1828
109-
// (2, 0) UP : -0.4854
110-
// (2, 0) DOWN : 0.2162
111-
// (2, 0) LEFT : -0.2302
112-
// (2, 0) RIGHT : -0.1094
113-
// (2, 1) UP : 0.2301
114-
// (2, 1) DOWN : 0.0680
115-
// (2, 1) LEFT : -0.0531
116-
// (2, 1) RIGHT : -0.0764
117-
// (2, 2) UP : -1.2781
118-
// (2, 2) DOWN : 0.2185
119-
// (2, 2) LEFT : -0.6493
120-
// (2, 2) RIGHT : -0.4158
121-
// (2, 3) UP : -0.7843
122-
// (2, 3) DOWN : 0.1689
123-
// (2, 3) LEFT : -0.4043
124-
// (2, 3) RIGHT : -0.2855
84+
// 0: 0.0088, -0.0082
85+
// (0, 0) UP : 0.0940
86+
// (0, 0) DOWN : -0.0011
87+
// (0, 0) LEFT : -0.0951
88+
// (0, 0) RIGHT : -0.0126
89+
// (0, 1) UP : 0.4905
90+
// (0, 1) DOWN : 0.1174
91+
// (0, 1) LEFT : -0.1345
92+
// (0, 1) RIGHT : 0.0601
93+
// (0, 2) UP : -1.1773
94+
// (0, 2) DOWN : 0.0345
95+
// (0, 2) LEFT : -0.2830
96+
// (0, 2) RIGHT : 0.0377
97+
// (1, 0) UP : 0.0477
98+
// (1, 0) DOWN : 0.0067
99+
// (1, 0) LEFT : -0.2127
100+
// (1, 0) RIGHT : 0.0289
101+
// (1, 2) UP : 0.2000
102+
// (1, 2) DOWN : 0.1005
103+
// (1, 2) LEFT : -0.1068
104+
// (1, 2) RIGHT : 0.0478
105+
// (1, 3) UP : 0.9551
106+
// (1, 3) DOWN : 0.1666
107+
// (1, 3) LEFT : 0.1643
108+
// (1, 3) RIGHT : -0.1554
109+
// (2, 0) UP : -0.3087
110+
// (2, 0) DOWN : 0.2201
111+
// (2, 0) LEFT : -0.1539
112+
// (2, 0) RIGHT : 0.0535
113+
// (2, 1) UP : 0.1924
114+
// (2, 1) DOWN : -0.0042
115+
// (2, 1) LEFT : -0.1180
116+
// (2, 1) RIGHT : -0.0138
117+
// (2, 2) UP : -1.0109
118+
// (2, 2) DOWN : 0.0833
119+
// (2, 2) LEFT : -0.3053
120+
// (2, 2) RIGHT : -0.0602
121+
// (2, 3) UP : -0.4370
122+
// (2, 3) DOWN : 0.0578
123+
// (2, 3) LEFT : -0.3560
124+
// (2, 3) RIGHT : -0.1734
125125
}
126126

127127
func Example_target() {

agent/env/bandit_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import (
1010
func ExampleBandit() {
1111
bandit := env.NewBandit(10, rand.Const(1))
1212

13-
for i := 0; i < 10; i++ {
13+
for i := range 10 {
1414
fmt.Print(bandit.Play(i))
1515
}
1616

@@ -21,7 +21,7 @@ func ExampleBandit() {
2121
func ExampleNonStatBandit() {
2222
bandit := env.NewNonStatBandit(10, rand.Const(1))
2323

24-
for i := 0; i < 10; i++ {
24+
for i := range 10 {
2525
fmt.Print(bandit.Play(i))
2626
}
2727

agent/monte_carlo.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ func greedyProbs(Q DefaultMap[float64], state string, epsilon float64, actionSiz
5050
max := vector.Argmax(qs)
5151

5252
probs := make(RandomActions)
53-
for i := 0; i < actionSize; i++ {
53+
for i := range actionSize {
5454
probs[i] = epsilon / float64(actionSize)
5555
}
5656

@@ -60,7 +60,7 @@ func greedyProbs(Q DefaultMap[float64], state string, epsilon float64, actionSiz
6060

6161
func qstate(Q DefaultMap[float64], state string, actionSize int) []float64 {
6262
qs := make([]float64, 0)
63-
for i := 0; i < actionSize; i++ {
63+
for i := range actionSize {
6464
qs = append(qs, Q.Get(StateAction{State: state, Action: i}, 0.0))
6565
}
6666

agent/monte_carlo_test.go

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ func ExampleMonteCarloAgent() {
2525
}
2626

2727
episodes := 10000
28-
for i := 0; i < episodes; i++ {
28+
for range episodes {
2929
state := e.Reset()
3030
a.Reset()
3131

@@ -50,44 +50,44 @@ func ExampleMonteCarloAgent() {
5050
}
5151

5252
// Output:
53-
// (0, 0) UP : 0.70
54-
// (0, 0) DOWN : 0.63
53+
// (0, 0) UP : 0.71
54+
// (0, 0) DOWN : 0.60
5555
// (0, 0) LEFT : 0.73
56-
// (0, 0) RIGHT : 0.75
56+
// (0, 0) RIGHT : 0.79
5757
// (0, 1) UP : 0.81
58-
// (0, 1) DOWN : 0.80
59-
// (0, 1) LEFT : 0.69
60-
// (0, 1) RIGHT : 0.86
58+
// (0, 1) DOWN : 0.79
59+
// (0, 1) LEFT : 0.71
60+
// (0, 1) RIGHT : 0.88
6161
// (0, 2) UP : 0.89
62-
// (0, 2) DOWN : 0.77
62+
// (0, 2) DOWN : 0.80
6363
// (0, 2) LEFT : 0.80
6464
// (0, 2) RIGHT : 1.00
65-
// (1, 0) UP : 0.68
65+
// (1, 0) UP : 0.71
6666
// (1, 0) DOWN : 0.57
67-
// (1, 0) LEFT : 0.61
68-
// (1, 0) RIGHT : 0.64
69-
// (1, 2) UP : 0.88
67+
// (1, 0) LEFT : 0.64
68+
// (1, 0) RIGHT : 0.63
69+
// (1, 2) UP : 0.89
7070
// (1, 2) DOWN : 0.63
71-
// (1, 2) LEFT : 0.78
72-
// (1, 2) RIGHT : -0.11
71+
// (1, 2) LEFT : 0.70
72+
// (1, 2) RIGHT : -0.13
7373
// (1, 3) UP : 1.00
74-
// (1, 3) DOWN : -0.14
75-
// (1, 3) LEFT : 0.30
74+
// (1, 3) DOWN : 0.25
75+
// (1, 3) LEFT : 0.45
7676
// (1, 3) RIGHT : -0.10
77-
// (2, 0) UP : 0.61
78-
// (2, 0) DOWN : 0.56
77+
// (2, 0) UP : 0.62
78+
// (2, 0) DOWN : 0.55
7979
// (2, 0) LEFT : 0.54
80-
// (2, 0) RIGHT : 0.56
81-
// (2, 1) UP : 0.51
82-
// (2, 1) DOWN : 0.21
83-
// (2, 1) LEFT : 0.45
84-
// (2, 1) RIGHT : 0.64
85-
// (2, 2) UP : 0.71
86-
// (2, 2) DOWN : 0.48
87-
// (2, 2) LEFT : 0.42
88-
// (2, 2) RIGHT : -0.09
89-
// (2, 3) UP : -0.20
90-
// (2, 3) DOWN : -0.20
91-
// (2, 3) LEFT : -0.04
92-
// (2, 3) RIGHT : -0.23
80+
// (2, 0) RIGHT : 0.59
81+
// (2, 1) UP : 0.24
82+
// (2, 1) DOWN : 0.07
83+
// (2, 1) LEFT : 0.37
84+
// (2, 1) RIGHT : 0.66
85+
// (2, 2) UP : 0.74
86+
// (2, 2) DOWN : 0.40
87+
// (2, 2) LEFT : 0.50
88+
// (2, 2) RIGHT : 0.01
89+
// (2, 3) UP : -0.24
90+
// (2, 3) DOWN : -0.00
91+
// (2, 3) LEFT : 0.02
92+
// (2, 3) RIGHT : -0.15
9393
}

agent/qlearning_test.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ func ExampleQLearningAgent() {
2222
}
2323

2424
episodes := 10000
25-
for i := 0; i < episodes; i++ {
25+
for range episodes {
2626
state := e.Reset()
2727

2828
for {
@@ -68,7 +68,7 @@ func ExampleQLearningAgent() {
6868
// (1, 3) UP : 1.0000
6969
// (1, 3) DOWN : 0.0000
7070
// (1, 3) LEFT : 0.0000
71-
// (1, 3) RIGHT : -0.0812
71+
// (1, 3) RIGHT : 0.0000
7272
// (2, 0) UP : 0.6561
7373
// (2, 0) DOWN : 0.5905
7474
// (2, 0) LEFT : 0.5905
@@ -80,9 +80,9 @@ func ExampleQLearningAgent() {
8080
// (2, 2) UP : 0.8100
8181
// (2, 2) DOWN : 0.7290
8282
// (2, 2) LEFT : 0.6561
83-
// (2, 2) RIGHT : 0.0000
84-
// (2, 3) UP : -0.1000
83+
// (2, 2) RIGHT : 0.5039
84+
// (2, 3) UP : -0.0997
8585
// (2, 3) DOWN : 0.0000
86-
// (2, 3) LEFT : 0.0000
86+
// (2, 3) LEFT : 0.6998
8787
// (2, 3) RIGHT : 0.0000
8888
}

agent/random_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ func ExampleRandomAgent() {
2222
}
2323

2424
episodes := 1000
25-
for i := 0; i < episodes; i++ {
25+
for range episodes {
2626
state := e.Reset()
2727
a.Reset()
2828

0 commit comments

Comments
 (0)