@@ -734,10 +734,11 @@ begin
734734 if isterm(i_s)
735735 if continuing
736736 state_transition_map[:, i_s] .= i_start
737+ reward_transition_map[:, i_s] .= stepreward
737738 else
738739 state_transition_map[:, i_s] .= i_s
740+ reward_transition_map[:, i_s] .= 0f0
739741 end
740- reward_transition_map[:, i_s] .= 0f0
741742 else
742743 for a in actions
743744 i_a = action_index[a] # get index for action
@@ -1191,6 +1192,16 @@ State values for the random policy. Notice that at a discount rate of $\gamma=1
11911192md"""Select Discount Rate for State Policy Evaluation: $(@bind γ_gridworld_policy_evaluation Slider(0.01f0:0.01f0:1f0; show_value=true, default = 1f0))"""
11921193 ╠═╡ =#
11931194
1195+ # ╔═╡ e8fb7296-ecaf-48a4-a15c-cb994e399387
1196+ #= ╠═╡
1197+ @bind ex_3_5_params PlutoUI.combine() do Child
1198+ md"""
1199+ Discount Rate: $(Child(:γ, Slider(0.1f0:0.1f0:1.f0; default = 0.9f0, show_value=true)))
1200+ Reward Boost: $(Child(:c, NumberField(0:100)))
1201+ """
1202+ end
1203+ ╠═╡ =#
1204+
11941205# ╔═╡ cb96b24a-65aa-4832-bc7d-093f0c951f83
11951206# ╠═╡ skip_as_script = true
11961207#= ╠═╡
@@ -5236,6 +5247,39 @@ show_grid_value(windy_gridworld, windy_gridworld_random_policy_evaluation.value_
52365247show_grid_value(stochastic_gridworld, stochastic_gridworld_random_policy_evaluation.value_function, "gridworld_random_values"; square_pixels = 50)
52375248 ╠═╡ =#
52385249
5250+ # ╔═╡ 75513920-f739-4d9d-b2e7-598a7905c854
5251+ #= ╠═╡
5252+ function ex_3_5_grid(γ, c)
5253+ mdp1 = make_deterministic_gridworld(;stepreward = 0f0 + Float32(c), termreward=10f0+c, continuing=false)
5254+ mdp2 = make_deterministic_gridworld(;stepreward = 0f0 + Float32(c), termreward=10f0+c, continuing=true)
5255+
5256+ solution1 = policy_evaluation_v(mdp1, example_gridworld_random_policy, γ)
5257+ solution2 = policy_evaluation_v(mdp2, example_gridworld_random_policy, γ)
5258+
5259+ v1 = copy(solution1.value_function)
5260+ v1 .-= minimum(solution1.value_function[findall(.!mdp1.terminal_states)])
5261+ v1[findall(mdp1.terminal_states)] .= 0f0
5262+
5263+ @htl("""
5264+ <div style = "display: flex; justify-content: space-around;">
5265+ <div>
5266+ Episodic Values Relative to Minimum Non-Terminal Value
5267+ $(show_grid_value(mdp1, v1, "solution_values_351"; square_pixels = 50))
5268+ </div>
5269+ <div>
5270+ Continuing Values Relative to Minimum
5271+ $(show_grid_value(mdp2, solution2.value_function .- minimum(solution2.value_function), "solution_values_352"; square_pixels = 50))
5272+ </div>
5273+ </div>
5274+ """)
5275+ end
5276+ ╠═╡ =#
5277+
5278+ # ╔═╡ 822d6b66-5c1f-4abe-87d0-a10db0f309f7
5279+ #= ╠═╡
5280+ ex_3_5_grid(ex_3_5_params...)
5281+ ╠═╡ =#
5282+
52395283# ╔═╡ f856ecc7-53e9-47e4-9869-abca0f19a98b
52405284#= ╠═╡
52415285show_grid_value(differential_gridworld, differential_policy_evaluation_v(differential_gridworld, π_list_differential[3]; θ = 1f-6).value_function, "fdfs", square_pixels = 40)
@@ -6581,8 +6625,11 @@ version = "17.4.0+2"
65816625# ╟─900a2ece-9638-49fc-afbe-e012f9520b48
65826626# ╟─0f6cc7a9-4184-471f-86d5-4ad0c0e495ce
65836627# ╟─91ca282d-e857-41d7-b99d-d9449b82da09
6584- # ╠═5b53ef57-12d1-45e2-ad1e-28c490c336a6
6585- # ╟─966eae0d-7556-4ff9-b9f7-d47a736524a4
6628+ # ╟─5b53ef57-12d1-45e2-ad1e-28c490c336a6
6629+ # ╟─e8fb7296-ecaf-48a4-a15c-cb994e399387
6630+ # ╟─822d6b66-5c1f-4abe-87d0-a10db0f309f7
6631+ # ╠═75513920-f739-4d9d-b2e7-598a7905c854
6632+ # ╠═966eae0d-7556-4ff9-b9f7-d47a736524a4
65866633# ╟─cb96b24a-65aa-4832-bc7d-093f0c951f83
65876634# ╟─7df4fcbb-2f5f-4d59-ba0c-c7e635bb0503
65886635# ╟─4f0f052d-b461-4040-b5ff-46aac74a24de
0 commit comments