Skip to content

Commit 08f346f

Browse files
Merge pull request #1911 from ayush-09/at
Robot Navigation
2 parents 8277883 + 9e6de74 commit 08f346f

File tree

2 files changed

+170
-0
lines changed

2 files changed

+170
-0
lines changed
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Created on Fri Jun 4 18:02:01 2021
4+
5+
@author: Ayush
6+
"""
7+
8+
import gym
9+
import numpy as np
10+
from IPython.display import clear_output
11+
12+
env = gym.make('Taxi-v3')
13+
14+
episodes = 10
15+
for episode in range(1,episodes):
16+
state = env.reset()
17+
done= False
18+
score=0
19+
20+
while not done:
21+
env.render()
22+
state,reward,done,info = env.step(env.action_space.sample())
23+
score += reward
24+
clear_output(wait=True)
25+
print('Episode: {}\nScore: {}'.format(episode, score))
26+
env.close()
27+
28+
#Creating Q-Table
29+
actions = env.action_space.n
30+
state = env.observation_space.n
31+
32+
q_table = np.zeros((state,actions))
33+
34+
#q_table.shape
35+
#q_table
36+
37+
#Parameters for Q-Learning
38+
num_episodes = 10000
39+
max_steps_per_episode =1000
40+
learning_rate=0.01
41+
discount_rate = 0.99
42+
exploration_rate=1
43+
max_exploration_rate = 1
44+
min_exploration_rate = 0.01
45+
exploration_decay_rate= 0.01
46+
47+
rewards_all_episodes = []
48+
49+
#Q-Learning Algorithm
50+
import random
51+
for episode in range(num_episodes):
52+
state = env.reset()
53+
done = False
54+
reward_current_episode = 0
55+
56+
for step in range(max_steps_per_episode):
57+
#Exploration vs Exploitation trade-off
58+
exploration_threshold = random.uniform(0,1)
59+
if exploration_threshold > exploration_rate:
60+
action = np.argmax(q_table[state,:])
61+
else:
62+
action = env.action_space.sample()
63+
new_state,reward,done,info = env.step(action)
64+
65+
#Update Q-Table
66+
q_table[state,action] = q_table[state,action]*(1-learning_rate)+ learning_rate*(reward + discount_rate * np.max(q_table[new_state, :]))
67+
state=new_state
68+
reward_current_episode += reward
69+
70+
if done== True:
71+
break
72+
exploration_rate = min_exploration_rate + \
73+
(max_exploration_rate- min_exploration_rate) * np.exp(-exploration_decay_rate * episode)
74+
rewards_all_episodes.append(reward_current_episode)
75+
print("***** Training Finished *****")
76+
77+
q_table
78+
79+
#Calculate and print average reward per thousand episodes
80+
rewards_per_thousand_episodes = np.split(np.array(rewards_all_episodes), num_episodes/1000)
81+
count = 1000
82+
83+
print("Average per thousand episodes")
84+
85+
for r in rewards_per_thousand_episodes:
86+
print(count, ":", str(sum(r/1000)))
87+
count+=1000
88+
89+
# Visualize Agent
90+
import time
91+
for episode in range(3):
92+
status = env.reset()
93+
done = False
94+
print("Episode is: "+ str(episode))
95+
time.sleep(1)
96+
97+
for step in range(max_steps_per_episode):
98+
clear_output(wait=True)
99+
env.render()
100+
time.sleep(.4)
101+
102+
action = np.argmax(q_table[state,:])
103+
104+
new_state, reward, done, info = env.step(action)
105+
106+
if done:
107+
clear_output(wait=True)
108+
env.render()
109+
if reward == 1:
110+
print("****Reached Goal****")
111+
time.sleep(2)
112+
clear_output(wait=True)
113+
else:
114+
print("****Failed****")
115+
time.sleep(2)
116+
clear_output(wait=True)
117+
118+
break
119+
state=new_state
120+
env.close()
121+
122+
123+

Autonomous-Taxi-Agent/README.md

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# Q-Learning with Taxi-v3 Environment
2+
3+
This repository contains code for implementing the Q-Learning algorithm using the Taxi-v3 environment from the OpenAI Gym.
4+
5+
## Prerequisites
6+
7+
To run this code, you need the following dependencies:
8+
9+
- Python 3.x
10+
- Gym: `pip install gym`
11+
- NumPy: `pip install numpy`
12+
13+
## Getting Started
14+
15+
1. Clone the repository: `git clone https://github.com/your_username/your_repository.git`
16+
2. Navigate to the cloned repository: `cd your_repository`
17+
18+
## Running the Code
19+
20+
1. Open the Python script `q_learning_taxi.py`.
21+
2. Configure the number of episodes, learning parameters, and other settings as needed.
22+
3. Run the script: `python q_learning_taxi.py`.
23+
24+
## Understanding the Code
25+
26+
The code performs the following steps:
27+
28+
1. Imports the necessary libraries and initializes the Taxi-v3 environment.
29+
2. Runs a specified number of episodes, where each episode represents a learning iteration.
30+
3. Resets the environment for each episode and plays the game until completion.
31+
4. Renders the environment to visualize the game.
32+
5. Selects actions randomly for exploration or based on the learned Q-values for exploitation.
33+
6. Updates the Q-table based on the Q-Learning algorithm.
34+
7. Adjusts the exploration rate over time to balance exploration and exploitation.
35+
8. Stores the rewards obtained in each episode.
36+
9. Prints the Q-table after training.
37+
10. Calculates and prints the average reward per thousand episodes.
38+
11. Visualizes the agent's performance in a few test episodes.
39+
40+
41+
42+
## Acknowledgments
43+
44+
- [OpenAI Gym](https://gym.openai.com/)
45+
46+
Feel free to modify and adapt this code according to your needs.
47+

0 commit comments

Comments
 (0)