Remove number type from TMaze and StochasticMaze

wouterwln · wouterwln · commit 8c79c7449711 · 2025-04-30T15:43:38.000+02:00
diff --git a/scripts/stochastic_maze.jl b/scripts/stochastic_maze.jl
@@ -42,10 +42,6 @@ function parse_command_line()
         help = "Time to wait between steps in seconds (default: 0.0)"
         arg_type = Float64
         default = 0.0
-        "--number-type", "-n"
-        help = "Number type to use (default: Float64)"
-        arg_type = Symbol
-        default = :Float64
         "--seed", "-s"
         help = "Random seed for the experiment"
         arg_type = Int
@@ -79,17 +75,6 @@ function parse_command_line()
     args["n-iterations"] > 0 || throw(ArgumentError("n-iterations must be positive"))
     args["wait-time"] >= 0 || throw(ArgumentError("wait-time must be non-negative"))
 
-    # Convert number type string to actual type
-    number_type = if args["number-type"] == :Float32
-        Float32
-    elseif args["number-type"] == :Float64
-        Float64
-    elseif args["number-type"] == :Float16
-        Float16
-    else
-        throw(ArgumentError("Unsupported number type: $(args["number-type"])"))
-    end
-
     # Handle save_results argument logic
     save_results = true
     if args["no-save-results"]
@@ -103,7 +88,6 @@ function parse_command_line()
         n_episodes=args["n-episodes"],
         n_iterations=args["n-iterations"],
         wait_time=args["wait-time"],
-        number_type=number_type,
         seed=args["seed"],
         record_episode=args["record-episode"],
         experiment_name=args["experiment-name"],
@@ -121,7 +105,6 @@ function run_stochastic_maze_experiment(;
     wait_time::Float64=0.0,
     record_episode::Bool=false,
     seed::Int=123,
-    number_type::Type{<:AbstractFloat}=Float64,
     experiment_name::String="stochastic_maze_$(Dates.format(now(), "yyyymmdd_HHMMSS"))",
     log_dir::String=datadir("logs", "stochastic_maze"),
     save_results::Bool=true,
@@ -151,7 +134,7 @@ function run_stochastic_maze_experiment(;
     n_actions = 4  # NESW
 
     # Create goal distribution focused on the goal state
-    p_goal = zeros(number_type, n_states)
+    p_goal = zeros(Float64, n_states)
     p_goal[goal_state] = 1.0
     goal_distribution = Categorical(p_goal)
 
@@ -161,7 +144,6 @@ function run_stochastic_maze_experiment(;
         n_episodes=n_episodes,
         n_iterations=n_iterations,
         wait_time=wait_time,
-        number_type=number_type,
         seed=seed,
         record_episode=record_episode,
         experiment_name=experiment_name
@@ -312,7 +294,6 @@ function main()
         wait_time=args.wait_time,
         record_episode=args.record_episode,
         seed=args.seed,
-        number_type=args.number_type,
         experiment_name=args.experiment_name,
         save_results=args.save_results,
         debug_mode=args.debug_mode,
diff --git a/scripts/tmaze_experiments.jl b/scripts/tmaze_experiments.jl
@@ -50,10 +50,6 @@ function parse_command_line()
         help = "Time to wait between steps in seconds (default: 0.0)"
         arg_type = Float64
         default = 0.0
-        "--number-type", "-n"
-        help = "Number type to use (default: Float64)"
-        arg_type = Symbol
-        default = :Float64
         "--seed", "-s"
         help = "Random seed for the experiment"
         arg_type = Int
@@ -87,17 +83,6 @@ function parse_command_line()
     args["n-iterations"] > 0 || throw(ArgumentError("n-iterations must be positive"))
     args["wait-time"] >= 0 || throw(ArgumentError("wait-time must be non-negative"))
 
-    # Convert number type string to actual type
-    number_type = if args["number-type"] == :Float32
-        Float32
-    elseif args["number-type"] == :Float64
-        Float64
-    elseif args["number-type"] == :Float16
-        Float16
-    else
-        throw(ArgumentError("Unsupported number type: $(args["number-type"])"))
-    end
-
     # Handle save_results argument logic
     save_results = true
     if args["no-save-results"]
@@ -111,7 +96,6 @@ function parse_command_line()
         n_episodes=args["n-episodes"],
         n_iterations=args["n-iterations"],
         wait_time=args["wait-time"],
-        number_type=number_type,
         seed=args["seed"],
         record_episode=args["record-episode"],
         experiment_name=args["experiment-name"],
@@ -128,7 +112,6 @@ function run_tmaze_experiment(;
     wait_time::Float64=0.0,
     record_episode::Bool=false,
     seed::Int=123,
-    number_type::Type{<:AbstractFloat}=Float64,
     experiment_name::String="tmaze_$(Dates.format(now(), "yyyymmdd_HHMMSS"))",
     log_dir::String=datadir("logs", "tmaze"),
     save_results::Bool=true,
@@ -152,7 +135,7 @@ function run_tmaze_experiment(;
     end
 
     # Create goal distribution - prefer the left arm location (state 3)
-    left_goal = zeros(number_type, 5)
+    left_goal = zeros(Float64, 5)
     left_goal[3] = 1.0
     left_goal_distribution = Categorical(left_goal)
 
@@ -162,7 +145,6 @@ function run_tmaze_experiment(;
         n_episodes=n_episodes,
         n_iterations=n_iterations,
         wait_time=wait_time,
-        number_type=number_type,
         seed=seed,
         record_episode=record_episode,
         experiment_name=experiment_name
@@ -312,7 +294,6 @@ function main()
         wait_time=args.wait_time,
         record_episode=args.record_episode,
         seed=args.seed,
-        number_type=args.number_type,
         experiment_name=args.experiment_name,
         save_results=args.save_results,
         debug_mode=args.debug_mode,
diff --git a/src/agents/stochastic_maze_agent.jl b/src/agents/stochastic_maze_agent.jl
@@ -24,18 +24,16 @@ Configuration for StochasticMaze agent experiments.
 - `n_episodes::Int`: Number of episodes to run
 - `n_iterations::Int`: Number of inference iterations per step
 - `wait_time::Float64`: Time to wait between steps (for visualization)
-- `number_type::Type{T}`: Numeric type for computations
 - `seed::Int`: Random seed
 - `record_episode::Bool`: Whether to record episode frames as individual PNG files
 - `experiment_name::String`: Name of the experiment (for saving results)
 - `parallel::Bool`: Whether to run episodes in parallel
 """
-Base.@kwdef struct StochasticMazeConfig{T<:AbstractFloat}
+Base.@kwdef struct StochasticMazeConfig
     time_horizon::Int
     n_episodes::Int
     n_iterations::Int
     wait_time::Float64
-    number_type::Type{T}
     seed::Int
     record_episode::Bool = false
     experiment_name::String
@@ -48,10 +46,10 @@ end
 Container for agent's beliefs about the StochasticMaze environment.
 
 # Fields
-- `state::Categorical{T}`: Belief about current state
+- `state::Categorical{Float64}`: Belief about current state
 """
-Base.@kwdef mutable struct StochasticMazeBeliefs{T<:AbstractFloat}
-    state::Categorical{T}
+Base.@kwdef mutable struct StochasticMazeBeliefs
+    state::Categorical{Float64}
 end
 
 """
@@ -67,14 +65,14 @@ function validate_config(config::StochasticMazeConfig)
 end
 
 """
-    initialize_beliefs_stochastic_maze(n_states::Int, T::Type{<:AbstractFloat})
+    initialize_beliefs_stochastic_maze(n_states::Int)
 
 Initialize agent beliefs for the StochasticMaze environment.
 """
-function initialize_beliefs_stochastic_maze(n_states::Int, T::Type{<:AbstractFloat})
+function initialize_beliefs_stochastic_maze(n_states::Int)
     # Initialize with uniform beliefs over states
     return StochasticMazeBeliefs(
-        state=Categorical(fill(T(1.0 / n_states), n_states))
+        state=Categorical(fill(1.0 / n_states, n_states))
     )
 end
 
@@ -103,18 +101,18 @@ function execute_step(env, observation, beliefs, model, tensors, config, goal, c
 
     # Convert previous action to one-hot encoding
     n_actions = 4
-    previous_action_vec = zeros(config.number_type, n_actions)
+    previous_action_vec = zeros(Float64, n_actions)
     if !isnothing(previous_action)
-        previous_action_vec[previous_action.index] = one(config.number_type)
+        previous_action_vec[previous_action.index] = one(Float64)
     end
 
     # Get initialization from previous results or initialize fresh
     n_states = size(tensors.transition_tensor, 1)
     initialization = initialization_fn(n_states)
 
     # Create observation vector
-    observation_vec = zeros(config.number_type, n_states)
-    observation_vec[observation] = one(config.number_type)
+    observation_vec = zeros(Float64, n_states)
+    observation_vec[observation] = one(Float64)
 
     # Run inference
     result = infer(
@@ -191,7 +189,7 @@ function run_stochastic_maze_single_episode(model, tensors, config, goal, callba
 
     # Initialize beliefs
     n_states = size(tensors.transition_tensor, 1)
-    beliefs = initialize_beliefs_stochastic_maze(n_states, config.number_type)
+    beliefs = initialize_beliefs_stochastic_maze(n_states)
 
     # Initialize tracking variables
     total_reward = 0.0
diff --git a/src/agents/tmaze_agent.jl b/src/agents/tmaze_agent.jl
@@ -22,18 +22,16 @@ Configuration for TMaze agent experiments.
 - `n_episodes::Int`: Number of episodes to run
 - `n_iterations::Int`: Number of inference iterations per step
 - `wait_time::Float64`: Time to wait between steps (for visualization)
-- `number_type::Type{T}`: Numeric type for computations
 - `seed::Int`: Random seed
 - `record_episode::Bool`: Whether to record episode frames as individual PNG files
 - `experiment_name::String`: Name of the experiment (for saving results)
 - `parallel::Bool`: Whether to run episodes in parallel
 """
-Base.@kwdef struct TMazeConfig{T<:AbstractFloat}
+Base.@kwdef struct TMazeConfig
     time_horizon::Int
     n_episodes::Int
     n_iterations::Int
     wait_time::Float64
-    number_type::Type{T}
     seed::Int
     record_episode::Bool = false
     experiment_name::String
@@ -46,12 +44,12 @@ end
 Container for agent's beliefs about the TMaze environment.
 
 # Fields
-- `location::Categorical{T}`: Belief about current location (5 possible states)
-- `reward_location::Categorical{T}`: Belief about reward location (left or right)
+- `location::Categorical{Float64}`: Belief about current location (5 possible states)
+- `reward_location::Categorical{Float64}`: Belief about reward location (left or right)
 """
-Base.@kwdef mutable struct TMazeBeliefs{T<:AbstractFloat}
-    location::Categorical{T}
-    reward_location::Categorical{T}
+Base.@kwdef mutable struct TMazeBeliefs
+    location::Categorical{Float64}
+    reward_location::Categorical{Float64}
 end
 
 """
@@ -67,15 +65,15 @@ function validate_config(config::TMazeConfig)
 end
 
 """
-    initialize_beliefs_tmaze(T::Type{<:AbstractFloat})
+    initialize_beliefs_tmaze()
 
 Initialize agent beliefs for the TMaze environment.
 """
-function initialize_beliefs_tmaze(T::Type{<:AbstractFloat})
+function initialize_beliefs_tmaze()
     # Initialize with uniform beliefs over states
     return TMazeBeliefs(
-        location=Categorical(fill(T(1 / 5), 5)),
-        reward_location=Categorical([T(0.5), T(0.5)])
+        location=Categorical(fill(1.0 / 5, 5)),
+        reward_location=Categorical([0.5, 0.5])
     )
 end
 
@@ -127,15 +125,15 @@ Takes current observations and returns the next planned action.
 function execute_step(env, position_obs, reward_cue, beliefs, model, tensors, config, goal, callbacks, time_remaining, previous_result, previous_action;
     constraints_fn, initialization_fn, inference_kwargs...)
     # Convert previous action to one-hot encoding
-    previous_action_vec = zeros(config.number_type, 4)
+    previous_action_vec = zeros(Float64, 4)
     if previous_action.direction isa North
-        previous_action_vec[1] = one(config.number_type)
+        previous_action_vec[1] = one(Float64)
     elseif previous_action.direction isa East
-        previous_action_vec[2] = one(config.number_type)
+        previous_action_vec[2] = one(Float64)
     elseif previous_action.direction isa South
-        previous_action_vec[3] = one(config.number_type)
+        previous_action_vec[3] = one(Float64)
     elseif previous_action.direction isa West
-        previous_action_vec[4] = one(config.number_type)
+        previous_action_vec[4] = one(Float64)
     end
 
     # Get initialization from previous results or initialize fresh
@@ -210,7 +208,7 @@ function run_tmaze_single_episode(model, tensors, config, goal, callbacks, seed;
     env = create_tmaze(reward_position, (2, 2))  # Start at middle junction (2,2)
 
     # Initialize beliefs
-    beliefs = initialize_beliefs_tmaze(config.number_type)
+    beliefs = initialize_beliefs_tmaze()
 
     # Initialize tracking variables
     total_reward = 0.0
@@ -247,8 +245,8 @@ function run_tmaze_single_episode(model, tensors, config, goal, callbacks, seed;
     )
 
     # Initial position observation and reward cue
-    position_obs = convert.(config.number_type, get_position_observation(env))
-    reward_cue = convert.(config.number_type, get_reward_cue(env))
+    position_obs = convert.(Float64, get_position_observation(env))
+    reward_cue = convert.(Float64, get_reward_cue(env))
 
     # Record initial state
     push!(episode_data["positions"], [env.agent_position...])
@@ -283,8 +281,8 @@ function run_tmaze_single_episode(model, tensors, config, goal, callbacks, seed;
         position_obs, reward_cue, reward = step!(env, next_action)
 
         # Convert to the required numeric type
-        position_obs = convert.(config.number_type, position_obs)
-        reward_cue = convert.(config.number_type, reward_cue)
+        position_obs = convert.(Float64, position_obs)
+        reward_cue = convert.(Float64, reward_cue)
 
         # Update total reward
         episode_reward = reward isa Number ? reward : 0
@@ -408,7 +406,6 @@ function run_tmaze_agent(
                     n_episodes=config.n_episodes,
                     n_iterations=config.n_iterations,
                     wait_time=config.wait_time,
-                    number_type=config.number_type,
                     seed=config.seed,
                     record_episode=config.record_episode,
                     experiment_name=config.experiment_name,
diff --git a/src/environments/stochastic_maze.jl b/src/environments/stochastic_maze.jl
@@ -550,7 +550,7 @@ function visualize_stochastic_maze(env::StochasticMaze)
         x, y = state_to_xy(state, grid_size_x)
         color = reward > 0 ? MAZE_THEME.reward_positive : MAZE_THEME.reward_negative
         opacity = min(abs(reward), 1.0) # Use absolute value of reward for opacity, capped at 1.0
-        scatter!(p, [x - 0.5], [grid_size_y - y + 0.5], color=color, alpha=opacity, markersize=ceil(Int, scale))
+        scatter!(p, [x - 0.5], [grid_size_y - y + 0.5], color=color, alpha=opacity, markersize=ceil(Int, scale), markerstrokewidth=ceil(Int, scale / 15))
     end
 
     # Plot observation noise
@@ -583,7 +583,7 @@ function visualize_stochastic_maze(env::StochasticMaze)
 
     # Plot agent
     x, y = state_to_xy(env.agent_state, grid_size_x)
-    scatter!(p, [x - 0.5], [grid_size_y - y + 0.5], color=MAZE_THEME.agent, markersize=ceil(Int, (2 / 3) * scale))
+    scatter!(p, [x - 0.5], [grid_size_y - y + 0.5], color=MAZE_THEME.agent, markersize=ceil(Int, (2 / 3) * scale), markerstrokewidth=ceil(Int, scale / 15))
 
     return p
 end
diff --git a/src/environments/tmaze.jl b/src/environments/tmaze.jl