-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathpolicy.go
More file actions
140 lines (114 loc) · 3.14 KB
/
policy.go
File metadata and controls
140 lines (114 loc) · 3.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
package cfr
import (
"bytes"
"encoding/gob"
"expvar"
"fmt"
"github.com/timpalpant/go-cfr/internal/policy"
)
var (
numInfosets = expvar.NewInt("num_infosets")
)
func init() {
gob.Register(&PolicyTable{})
}
// PolicyTable implements traditional (tabular) CFR by storing accumulated
// regrets and strategy sums for each InfoSet, which is looked up by its Key().
type PolicyTable struct {
params DiscountParams
iter int
// Map of InfoSet Key -> the policy for that infoset.
policiesByKey map[string]*policy.Policy
mayNeedUpdate map[*policy.Policy]struct{}
}
// NewPolicyTable creates a new PolicyTable with the given DiscountParams.
func NewPolicyTable(params DiscountParams) *PolicyTable {
return &PolicyTable{
params: params,
iter: 1,
policiesByKey: make(map[string]*policy.Policy),
mayNeedUpdate: make(map[*policy.Policy]struct{}),
}
}
// Update performs regret matching for all nodes within this strategy profile that have
// been touched since the lapt call to Update().
func (pt *PolicyTable) Update() {
discountPos, discountNeg, discountSum := pt.params.GetDiscountFactors(pt.iter)
for p := range pt.mayNeedUpdate {
p.NextStrategy(discountPos, discountNeg, discountSum)
delete(pt.mayNeedUpdate, p)
}
pt.iter++
}
func (pt *PolicyTable) Iter() int {
return pt.iter
}
func (pt *PolicyTable) Close() error {
return nil
}
func (pt *PolicyTable) GetPolicy(node GameTreeNode) NodePolicy {
key := node.InfoSetKey(node.Player())
np, ok := pt.policiesByKey[string(key)]
if !ok {
np = policy.New(node.NumChildren())
pt.policiesByKey[string(key)] = np
numInfosets.Set(int64(len(pt.policiesByKey)))
} else if np.NumActions() != node.NumChildren() {
panic(fmt.Errorf("strategy has n_actions=%v but node has n_children=%v: %v",
np.NumActions(), node.NumChildren(), node))
}
pt.mayNeedUpdate[np] = struct{}{}
return np
}
// UnmarshalBinary implements encoding.BinaryUnmarshaler.
func (pt *PolicyTable) UnmarshalBinary(buf []byte) error {
r := bytes.NewReader(buf)
dec := gob.NewDecoder(r)
if err := dec.Decode(&pt.params); err != nil {
return err
}
if err := dec.Decode(&pt.iter); err != nil {
return err
}
var nStrategies int
if err := dec.Decode(&nStrategies); err != nil {
return err
}
pt.policiesByKey = make(map[string]*policy.Policy, nStrategies)
for i := 0; i < nStrategies; i++ {
var key string
if err := dec.Decode(&key); err != nil {
return err
}
var p policy.Policy
if err := dec.Decode(&p); err != nil {
return err
}
pt.policiesByKey[key] = &p
}
pt.mayNeedUpdate = make(map[*policy.Policy]struct{})
return nil
}
// MarshalBinary implements encoding.BinaryMarshaler.
func (pt *PolicyTable) MarshalBinary() ([]byte, error) {
var buf bytes.Buffer
enc := gob.NewEncoder(&buf)
if err := enc.Encode(pt.params); err != nil {
return nil, err
}
if err := enc.Encode(pt.iter); err != nil {
return nil, err
}
if err := enc.Encode(len(pt.policiesByKey)); err != nil {
return nil, err
}
for key, p := range pt.policiesByKey {
if err := enc.Encode(key); err != nil {
return nil, err
}
if err := enc.Encode(p); err != nil {
return nil, err
}
}
return buf.Bytes(), nil
}