@@ -500,7 +500,7 @@ class QlearningMcCall(NamedTuple):
500500 β: float # discount factor
501501 w: jnp.ndarray # array of wage values, w[i] = wage at state i
502502 q: jnp.ndarray # array of probabilities
503- eps : float # for epsilon greedy algorithm
503+ ε : float # for ε greedy algorithm
504504 δ: float # Q-table threshold
505505 lr: float # the learning rate α
506506 T: int # maximum periods of accepting
@@ -511,7 +511,7 @@ def create_qlearning_mccall(c=25,
511511 β=0.99,
512512 w=w_default,
513513 q=q_default,
514- eps =0.1,
514+ ε =0.1,
515515 δ=1e-5,
516516 lr=0.5,
517517 T=10000,
@@ -520,7 +520,7 @@ def create_qlearning_mccall(c=25,
520520 β=β,
521521 w=w,
522522 q=q,
523- eps=eps ,
523+ ε=ε ,
524524 δ=δ,
525525 lr=lr,
526526 T=T,
@@ -562,7 +562,7 @@ def temp_diff(model, qtable, state, accept, key):
562562@jax.jit
563563def run_one_epoch(model, qtable, key, max_times=20000):
564564 """Run an "epoch"."""
565- eps , δ, lr, T = model.eps , model.δ, model.lr, model.T
565+ ε , δ, lr, T = model.ε , model.δ, model.lr, model.T
566566
567567 # Split keys for multiple random operations
568568 key, subkey1, subkey2 = jax.random.split(key, 3)
@@ -576,10 +576,10 @@ def run_one_epoch(model, qtable, key, max_times=20000):
576576 # Split key for this iteration's random operations
577577 key, action_key, td_key = jax.random.split(key, 3)
578578
579- # Choose action (epsilon -greedy)
579+ # Choose action (ε -greedy)
580580 accept = jnp.argmax(qtable[s, :])
581581 random_val = jax.random.uniform(action_key)
582- accept = jnp.where(random_val <= eps , 1 - accept, accept)
582+ accept = jnp.where(random_val <= ε , 1 - accept, accept)
583583
584584 # Update accept count
585585 accept_count = jnp.where(accept == 1, accept_count + 1, 0)
0 commit comments