Merge pull request #1911 from ayush-09/at

Yashbhadiyadra · web-flow · commit 08f346fe2445 · 2023-06-24T11:57:51.000+05:30
Robot Navigation
diff --git a/Autonomous-Taxi-Agent/Autonomous TaxiAgent.py b/Autonomous-Taxi-Agent/Autonomous TaxiAgent.py
@@ -0,0 +1,123 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Jun  4 18:02:01 2021
+
+@author: Ayush
+"""
+
+import gym
+import numpy as np
+from IPython.display import clear_output
+
+env = gym.make('Taxi-v3')
+
+episodes = 10
+for episode in range(1,episodes):
+    state = env.reset()
+    done= False
+    score=0
+    
+    while not done:
+        env.render()
+        state,reward,done,info = env.step(env.action_space.sample())
+        score += reward
+        clear_output(wait=True)
+    print('Episode: {}\nScore: {}'.format(episode, score))
+env.close()
+
+#Creating Q-Table
+actions = env.action_space.n
+state = env.observation_space.n
+
+q_table = np.zeros((state,actions))
+
+#q_table.shape
+#q_table
+
+#Parameters for Q-Learning
+num_episodes = 10000
+max_steps_per_episode =1000
+learning_rate=0.01
+discount_rate = 0.99
+exploration_rate=1
+max_exploration_rate = 1
+min_exploration_rate = 0.01
+exploration_decay_rate= 0.01
+
+rewards_all_episodes = []
+
+#Q-Learning Algorithm
+import random
+for episode in range(num_episodes):
+    state = env.reset()
+    done = False
+    reward_current_episode = 0
+    
+    for step in range(max_steps_per_episode):
+        #Exploration vs Exploitation trade-off
+        exploration_threshold = random.uniform(0,1)
+        if exploration_threshold > exploration_rate:
+            action = np.argmax(q_table[state,:])
+        else:
+            action = env.action_space.sample()
+        new_state,reward,done,info = env.step(action)
+        
+        #Update Q-Table
+        q_table[state,action] = q_table[state,action]*(1-learning_rate)+ learning_rate*(reward + discount_rate * np.max(q_table[new_state, :]))
+        state=new_state
+        reward_current_episode += reward
+        
+        if done== True:
+            break
+    exploration_rate = min_exploration_rate + \
+        (max_exploration_rate- min_exploration_rate) * np.exp(-exploration_decay_rate * episode)
+    rewards_all_episodes.append(reward_current_episode)
+print("***** Training Finished *****")
+
+q_table
+
+#Calculate and print average reward per thousand episodes
+rewards_per_thousand_episodes = np.split(np.array(rewards_all_episodes), num_episodes/1000)
+count = 1000
+
+print("Average per thousand episodes")
+
+for r in rewards_per_thousand_episodes:
+    print(count, ":", str(sum(r/1000)))
+    count+=1000
+    
+# Visualize Agent
+import time 
+for episode in range(3):
+    status = env.reset()
+    done = False
+    print("Episode is: "+ str(episode))
+    time.sleep(1)
+    
+    for step in range(max_steps_per_episode):
+        clear_output(wait=True)
+        env.render()
+        time.sleep(.4)
+        
+        action = np.argmax(q_table[state,:])
+        
+        new_state, reward, done, info = env.step(action)
+        
+        if done:
+            clear_output(wait=True)
+            env.render()
+            if reward == 1:
+                print("****Reached Goal****")
+                time.sleep(2)
+                clear_output(wait=True)
+            else:
+                print("****Failed****")
+                time.sleep(2)
+                clear_output(wait=True)
+                
+            break
+        state=new_state
+env.close()
+
+
+
diff --git a/Autonomous-Taxi-Agent/README.md b/Autonomous-Taxi-Agent/README.md
@@ -0,0 +1,47 @@
+# Q-Learning with Taxi-v3 Environment
+
+This repository contains code for implementing the Q-Learning algorithm using the Taxi-v3 environment from the OpenAI Gym.
+
+## Prerequisites
+
+To run this code, you need the following dependencies:
+
+- Python 3.x
+- Gym: `pip install gym`
+- NumPy: `pip install numpy`
+
+## Getting Started
+
+1. Clone the repository: `git clone https://github.com/your_username/your_repository.git`
+2. Navigate to the cloned repository: `cd your_repository`
+
+## Running the Code
+
+1. Open the Python script `q_learning_taxi.py`.
+2. Configure the number of episodes, learning parameters, and other settings as needed.
+3. Run the script: `python q_learning_taxi.py`.
+
+## Understanding the Code
+
+The code performs the following steps:
+
+1. Imports the necessary libraries and initializes the Taxi-v3 environment.
+2. Runs a specified number of episodes, where each episode represents a learning iteration.
+3. Resets the environment for each episode and plays the game until completion.
+4. Renders the environment to visualize the game.
+5. Selects actions randomly for exploration or based on the learned Q-values for exploitation.
+6. Updates the Q-table based on the Q-Learning algorithm.
+7. Adjusts the exploration rate over time to balance exploration and exploitation.
+8. Stores the rewards obtained in each episode.
+9. Prints the Q-table after training.
+10. Calculates and prints the average reward per thousand episodes.
+11. Visualizes the agent's performance in a few test episodes.
+
+
+
+## Acknowledgments
+
+- [OpenAI Gym](https://gym.openai.com/)
+
+Feel free to modify and adapt this code according to your needs.
+