forked from geohot/twitchchess
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenerate_training_set.py
More file actions
executable file
·39 lines (36 loc) · 998 Bytes
/
generate_training_set.py
File metadata and controls
executable file
·39 lines (36 loc) · 998 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/usr/bin/env python3
import os
import chess.pgn
import numpy as np
from state import State
def get_dataset(num_samples=None):
X,Y = [], []
gn = 0
values = {'1/2-1/2':0, '0-1':-1, '1-0':1}
# pgn files in the data folder
for fn in os.listdir("data"):
pgn = open(os.path.join("data", fn))
while 1:
game = chess.pgn.read_game(pgn)
if game is None:
break
res = game.headers['Result']
if res not in values:
continue
value = values[res]
board = game.board()
for i, move in enumerate(game.mainline_moves()):
board.push(move)
ser = State(board).serialize()
X.append(ser)
Y.append(value)
print("parsing game %d, got %d examples" % (gn, len(X)))
if num_samples is not None and len(X) > num_samples:
return X,Y
gn += 1
X = np.array(X)
Y = np.array(Y)
return X,Y
if __name__ == "__main__":
X,Y = get_dataset(25000000)
np.savez("processed/dataset_25M.npz", X, Y)