diff --git a/data/algorithms/vin_pretrained_16x16.pth b/data/algorithms/vin_pretrained_16x16.pth new file mode 100644 index 000000000..574bc7b03 Binary files /dev/null and b/data/algorithms/vin_pretrained_16x16.pth differ diff --git a/data/algorithms/vin_pretrained_28x28.pth b/data/algorithms/vin_pretrained_28x28.pth new file mode 100644 index 000000000..d3ac1b05e Binary files /dev/null and b/data/algorithms/vin_pretrained_28x28.pth differ diff --git a/data/algorithms/vin_pretrained_8x8.pth b/data/algorithms/vin_pretrained_8x8.pth new file mode 100644 index 000000000..a8ad96a37 Binary files /dev/null and b/data/algorithms/vin_pretrained_8x8.pth differ diff --git a/src/algorithms/algorithm_manager.py b/src/algorithms/algorithm_manager.py index 1447e7cc9..bf69dbe9e 100644 --- a/src/algorithms/algorithm_manager.py +++ b/src/algorithms/algorithm_manager.py @@ -30,9 +30,10 @@ from algorithms.classic.sample_based.rrt_star import RRT_Star from algorithms.classic.sample_based.rrt_connect import RRT_Connect from algorithms.classic.graph_based.wavefront import Wavefront -from algorithms.lstm.LSTM_tile_by_tile import OnlineLSTM -from algorithms.lstm.a_star_waypoint import WayPointNavigation -from algorithms.lstm.combined_online_LSTM import CombinedOnlineLSTM +from algorithms.learning.LSTM_tile_by_tile import OnlineLSTM +from algorithms.learning.a_star_waypoint import WayPointNavigation +from algorithms.learning.combined_online_LSTM import CombinedOnlineLSTM +from algorithms.learning.VIN.VIN import VINAlgorithm if HAS_OMPL: from algorithms.classic.sample_based.ompl_rrt import OMPL_RRT @@ -103,7 +104,8 @@ def _static_init_(cls): "Dijkstra": (Dijkstra, DijkstraTesting, ([], {})), "Bug1": (Bug1, BasicTesting, ([], {})), "Bug2": (Bug2, BasicTesting, ([], {})), - "Potential Field": (PotentialField, BasicTesting, ([], {})) + "Potential Field": (PotentialField, BasicTesting, ([], {})), + "VIN": (VINAlgorithm, BasicTesting, ([], {"load_name": "vin_pretrained"})) } if HAS_OMPL: diff --git a/src/algorithms/classic/testing/way_point_navigation_testing.py b/src/algorithms/classic/testing/way_point_navigation_testing.py index 39cdaae92..dbce9939c 100644 --- a/src/algorithms/classic/testing/way_point_navigation_testing.py +++ b/src/algorithms/classic/testing/way_point_navigation_testing.py @@ -3,7 +3,7 @@ import numpy as np from algorithms.basic_testing import BasicTesting -from algorithms.lstm.combined_online_LSTM import CombinedOnlineLSTM +from algorithms.learning.combined_online_LSTM import CombinedOnlineLSTM from simulator.services.debug import DebugLevel diff --git a/src/algorithms/configuration/configuration.py b/src/algorithms/configuration/configuration.py index c62ca1be2..8c1978fdd 100644 --- a/src/algorithms/configuration/configuration.py +++ b/src/algorithms/configuration/configuration.py @@ -4,8 +4,8 @@ from algorithms.algorithm import Algorithm from algorithms.basic_testing import BasicTesting from algorithms.configuration.maps.map import Map -from algorithms.lstm.LSTM_tile_by_tile import BasicLSTMModule -from algorithms.lstm.ML_model import MLModel +from algorithms.learning.LSTM_tile_by_tile import BasicLSTMModule +from algorithms.learning.ML_model import MLModel from simulator.services.debug import DebugLevel from structures import Point diff --git a/src/algorithms/lstm/LSTM_CAE_tile_by_tile.py b/src/algorithms/learning/LSTM_CAE_tile_by_tile.py similarity index 98% rename from src/algorithms/lstm/LSTM_CAE_tile_by_tile.py rename to src/algorithms/learning/LSTM_CAE_tile_by_tile.py index 8812271e0..282969a8c 100644 --- a/src/algorithms/lstm/LSTM_CAE_tile_by_tile.py +++ b/src/algorithms/learning/LSTM_CAE_tile_by_tile.py @@ -15,9 +15,9 @@ from algorithms.basic_testing import BasicTesting from algorithms.configuration.maps.map import Map -from algorithms.lstm.LSTM_tile_by_tile import BasicLSTMModule, OnlineLSTM -from algorithms.lstm.ML_model import MLModel, EvaluationResults -from algorithms.lstm.map_processing import MapProcessing +from algorithms.learning.LSTM_tile_by_tile import BasicLSTMModule, OnlineLSTM +from algorithms.learning.ML_model import MLModel, EvaluationResults +from algorithms.learning.map_processing import MapProcessing from simulator.services.services import Services from utility.constants import DATA_PATH diff --git a/src/algorithms/lstm/LSTM_CNN_tile_by_tile_obsolete.py b/src/algorithms/learning/LSTM_CNN_tile_by_tile_obsolete.py similarity index 98% rename from src/algorithms/lstm/LSTM_CNN_tile_by_tile_obsolete.py rename to src/algorithms/learning/LSTM_CNN_tile_by_tile_obsolete.py index 359ba691d..b52a1bddb 100644 --- a/src/algorithms/lstm/LSTM_CNN_tile_by_tile_obsolete.py +++ b/src/algorithms/learning/LSTM_CNN_tile_by_tile_obsolete.py @@ -10,7 +10,7 @@ from torch import nn from algorithms.basic_testing import BasicTesting -from algorithms.lstm.online_lstm import BasicLSTMModule, OnlineLSTM +from algorithms.learning.online_lstm import BasicLSTMModule, OnlineLSTM from simulator.services.services import Services diff --git a/src/algorithms/lstm/LSTM_tile_by_tile.py b/src/algorithms/learning/LSTM_tile_by_tile.py similarity index 98% rename from src/algorithms/lstm/LSTM_tile_by_tile.py rename to src/algorithms/learning/LSTM_tile_by_tile.py index 13ca0fd13..2f8520fb6 100644 --- a/src/algorithms/lstm/LSTM_tile_by_tile.py +++ b/src/algorithms/learning/LSTM_tile_by_tile.py @@ -10,8 +10,8 @@ from algorithms.basic_testing import BasicTesting from algorithms.configuration.entities.goal import Goal from algorithms.configuration.maps.map import Map -from algorithms.lstm.ML_model import MLModel, SingleTensorDataset, PackedDataset -from algorithms.lstm.map_processing import MapProcessing +from algorithms.learning.ML_model import MLModel, SingleTensorDataset, PackedDataset +from algorithms.learning.map_processing import MapProcessing from simulator.services.services import Services from simulator.views.map.display.entities_map_display import EntitiesMapDisplay from simulator.views.map.display.online_lstm_map_display import OnlineLSTMMapDisplay diff --git a/src/algorithms/lstm/ML_model.py b/src/algorithms/learning/ML_model.py similarity index 99% rename from src/algorithms/lstm/ML_model.py rename to src/algorithms/learning/ML_model.py index d28587346..c1d2b5e74 100644 --- a/src/algorithms/lstm/ML_model.py +++ b/src/algorithms/learning/ML_model.py @@ -12,7 +12,7 @@ from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence, pack_sequence, PackedSequence from torch.utils import data from torch.utils.data import DataLoader, TensorDataset, Dataset, Subset -from algorithms.lstm.map_processing import MapProcessing +from algorithms.learning.map_processing import MapProcessing from simulator.services.debug import DebugLevel from simulator.services.services import Services @@ -154,7 +154,7 @@ class PackedDataset(Dataset): lengths: torch.Tensor def __init__(self, seq: List[torch.Tensor]) -> None: - from algorithms.lstm.LSTM_tile_by_tile import BasicLSTMModule + from algorithms.learning.LSTM_tile_by_tile import BasicLSTMModule ls = list(map(lambda el: el.shape[0], seq)) self.perm = BasicLSTMModule.get_sort_by_lengths_indices(ls) diff --git a/src/algorithms/learning/VIN/.gitignore b/src/algorithms/learning/VIN/.gitignore new file mode 100755 index 000000000..80247da36 --- /dev/null +++ b/src/algorithms/learning/VIN/.gitignore @@ -0,0 +1,135 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +# npz +*.npz + +# pth +*.pth +*.pth.1 + +vin_my_implementation.code-workspace + + +.vscode/launch.json +/resources/training_maps/ +/resources/testing_maps/ + +/resources/logs/ +/resources/test_maps/ +/resources/16_100k/ +/resources/16_60k/ +/resources/16_60k_pt2/ + +/resources/house_expo/ +/resources/house_expo_old/ +/resources/house_expo_100x100/ + + + + + + +resources/100k_no_block.tar.gz diff --git a/src/algorithms/learning/VIN/LICENSE b/src/algorithms/learning/VIN/LICENSE new file mode 100644 index 000000000..07a851703 --- /dev/null +++ b/src/algorithms/learning/VIN/LICENSE @@ -0,0 +1,29 @@ +BSD 3-Clause License + +Copyright (c) 2020, Hussein Ali Jaafar +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/src/algorithms/learning/VIN/README.md b/src/algorithms/learning/VIN/README.md new file mode 100644 index 000000000..101577f46 --- /dev/null +++ b/src/algorithms/learning/VIN/README.md @@ -0,0 +1,143 @@ + +## Introduction + +This is a modified implementation of Kent Sommer's PyTorch Value Iteration Networks implementation, meant to work with PathBench. + +Read about Kent Sommers Implementation: [Here](https://github.com/kentsommer/pytorch-value-iteration-networks "Pytorch implementation of Value Iteration Networks") + +Read about PathBench: [Here](https://github.com/husseinalijaafar/PathBench "A Benchmarking Platform for Classic and Learned Path Planning Algorithms") + +Read the original paper:[Here](https://arxiv.org/abs/1602.02867) + +Read about similar implemenations, which have made this modified implemenation possible +* [@kentsommer](https://github.com/kentsommer) ([PyTorch implementation](https://github.com/kentsommer/pytorch-value-iteration-networks)) +* [@avivt](https://github.com/avivt) ([Paper Author](https://arxiv.org/abs/1602.02867), [MATLAB implementation](https://github.com/avivt/VIN)) +* [@zuoxingdong](https://github.com/zuoxingdong) ([Tensorflow implementation](https://github.com/zuoxingdong/VIN_TensorFlow), [Pytorch implementation](https://github.com/zuoxingdong/VIN_PyTorch_Visdom)) +* [@TheAbhiKumar](https://github.com/TheAbhiKumar) ([Tensorflow implementation](https://github.com/TheAbhiKumar/tensorflow-value-iteration-networks)) +* [@onlytailei](https://github.com/onlytailei) ([Pytorch implementation](https://github.com/onlytailei/Value-Iteration-Networks-PyTorch)) + +## What has been modified? +In order to run on PathBench generated maps, I had to modify a few areas of the code. The main difference is within the `test.py` file and `gridworld` class. The maps (resources>maps), which are JSON files structured with the goal position, agent position and grid. +I have also added a few metrics, such as path deviation from optimal path, and time. + +The PathBench maps are structured where 0 is obstacle and 1 is free space. +Sample Map: + +## Installation +Instructions: +1. Install Packages +2. Download Training Data +3. Train on Training Data +4. Run `test.py`. + +This repository requires following packages: +- [SciPy](https://www.scipy.org/install.html) >= 0.19.0 +- [Python](https://www.python.org/) >= 2.7 (if using Python 3.x: python3-tk should be installed) +- [Numpy](https://pypi.python.org/pypi/numpy) >= 1.12.1 +- [Matplotlib](https://matplotlib.org/users/installing.html) >= 2.0.0 +- [PyTorch](http://pytorch.org/) >= 0.1.11 + +Use `pip` to install the necessary dependencies: +``` +pip install -U -r requirements.txt +``` +Note that PyTorch cannot be installed directly from PyPI; refer to http://pytorch.org/ for custom installation instructions specific to your needs. + +#### Downloading Training Data +WIP + +To aquire the training data (NOT for PathBench Maps), either run the training data generator `make_training_data.py` in the dataset folder (resource intensive), or run the shell script to download them: +1. `cd` into main directory +2. `chmod +x download_weights_and_datasets.sh` +3. `./download_weights_and_datasets` + +It should download the training data. +#### PathBench Training Data + +*Outdated- Please await updated training files* + +To download PathBench training data, visit [GDrive](https://drive.google.com/file/d/11D-QCf5qZ4qusv66XhxOqqdcyHl5RLHk/view?usp=sharing) +Currently, only the 90000 map training data is uploaded. I will upload more as I generate them. + +To generate your own training data, add the json files with correct structure to a specified path, and pass that path in the `make_training_data.py` file. It is fairly straight forward. + +## How to train +#### 8x8 gridworld +```bash +python train.py --datafile dataset/gridworld_8x8.npz --imsize 8 --lr 0.005 --epochs 30 --k 10 --batch_size 128 +``` +#### 16x16 gridworld +```bash +python train.py --datafile dataset/gridworld_16x16.npz --imsize 16 --lr 0.002 --epochs 30 --k 20 --batch_size 128 +``` +#### 28x28 gridworld +```bash +python train.py --datafile dataset/gridworld_28x28.npz --imsize 28 --lr 0.002 --epochs 30 --k 36 --batch_size 128 +``` +**Flags**: +- `datafile`: The path to the data files. +- `imsize`: The size of input images. One of: [8, 16, 28] +- `lr`: Learning rate with RMSProp optimizer. Recommended: [0.01, 0.005, 0.002, 0.001] +- `epochs`: Number of epochs to train. Default: 30 +- `k`: Number of Value Iterations. Recommended: [10 for 8x8, 20 for 16x16, 36 for 28x28] +- `l_i`: Number of channels in input layer. Default: 2, i.e. obstacles image and goal image. +- `l_h`: Number of channels in first convolutional layer. Default: 150, described in paper. +- `l_q`: Number of channels in q layer (~actions) in VI-module. Default: 10, described in paper. +- `batch_size`: Batch size. Default: 128 + +## How to test / visualize paths (requires training first) +#### 8x8 gridworld +```bash +python test.py --weights trained/vin_8x8.pth --imsize 8 --k 10 +``` +#### 16x16 gridworld +```bash +python test.py --weights trained/vin_16x16.pth --imsize 16 --k 20 +``` +#### 28x28 gridworld +```bash +python test.py --weights trained/vin_28x28.pth --imsize 28 --k 36 +``` +#### 64x64 gridworld +```bash +python test.py --weights trained/vin_28x28.pth --imsize 28 --k 36 +``` +(64x64 still uses 28x28 trained data, we haven't trained VIN on 64x64 maps yet.) + +To visualize the optimal and predicted paths simply pass: +```bash +--plot +``` + +**Flags**: +- `weights`: Path to trained weights. +- `imsize`: The size of input images. One of: [8, 16, 28] +- `plot`: If supplied, the optimal and predicted paths will be plotted +- `k`: Number of Value Iterations. Recommended: [10 for 8x8, 20 for 16x16, 36 for 28x28] +- `l_i`: Number of channels in input layer. Default: 2, i.e. obstacles image and goal image. +- `l_h`: Number of channels in first convolutional layer. Default: 150, described in paper. +- `l_q`: Number of channels in q layer (~actions) in VI-module. Default: 10, described in paper. + +## Results +The maps that VIN trains on are NOT the PathBench maps, rather they are maps generated from Kent's implementation. This is still WIP +Therefore, when running VIN w/ PathBench maps, it is running on untrained style maps (Block map and Uniform Random Fill are unfamiliar to +the algorithm). + +Logs are saved in `resources>logs`. You can change the logging behaviour (debug vs info) in `test_pb.py`. Ensure logs don't overwrite eachother by changing the name at each run. + +### Sample Maps: +Block Map: + + + +Uniform Random Fill Map + + +House Map + + + +### Training +WIP + + diff --git a/src/algorithms/learning/VIN/VIN.py b/src/algorithms/learning/VIN/VIN.py new file mode 100644 index 000000000..e15187a3a --- /dev/null +++ b/src/algorithms/learning/VIN/VIN.py @@ -0,0 +1,286 @@ +import sys +import random +import numpy as np +import torch +import logging +import time +import math +from torch.autograd import Variable +from typing import Dict, List, Type + +from .utility.utils import * + +from .model import * +from .domains.gridworld import * +from .generators.obstacle_gen import * + +from algorithms.algorithm import Algorithm +from simulator.services.services import Services +from algorithms.basic_testing import BasicTesting +from structures.point import Point + +class VINConfig: + def __init__(self, l_i=2, l_h=150, l_q=10, k=10): + self.l_i = l_i + self.l_h = l_h + self.l_q = l_q + self.k = k + +class VINAlgorithm(Algorithm): + cached_models: Dict[int, Type[VIN]] + use_GPU: bool + + def __init__(self, services: Services, testing: BasicTesting = None, config: VINConfig = VINConfig(), load_name: str = "VIN"): + super().__init__(services, testing) + self.cached_models = {} + self.use_GPU = torch.cuda.is_available() + self._load_name = load_name + self.config = config + + def set_display_info(self): + return super().set_display_info() + [ + + ] + + def _find_path_internal(self) -> None: + mp = self._get_grid() + assert mp.size[0] == mp.size[1] and len(mp.size) == 2, \ + f"VIN only accepts square 2D maps, map size {mp.size}" + imsize = mp.size[0] + grid = np.copy(mp.grid) + self.config.imsize = imsize + model: VIN = self.load_VIN(mp.size[0]) + start: Tuple[int] = (mp.agent.position.x, mp.agent.position.y) + goal: Tuple[int] = (mp.goal.position.x, mp.goal.position.y) + + grid[mp.agent.position.x, mp.agent.position.y] = 0 #Set the start position as freespace too + grid[mp.goal.position.x, mp.goal.position.y] = 0 #Set the goal position as freespace too + + obs = obstacles([imsize, imsize], goal) + obs.dom = grid + + im = obs.get_final() + G = gridworld(im, goal[0], goal[1]) + # ======= + value_prior = G.get_reward_prior() + # Sample random trajectories to our goal + states_xy, states_one_hot = sample_trajectory(G, 1, start, False) #dijkstra trajectory + # print('states_xy', states_xy[0] , len(states_xy[0])) + + i = 0 + if len(states_xy[i]) > 1: + # Get number of steps to goal + L = len(states_xy[i]) * 2 + # Allocate space for predicted steps + pred_traj = np.zeros((L, 2)) + # Set starting position + pred_traj[0, :] = states_xy[i][0, :] + + for j in range(1, L): + # Transform current state data + state_data = pred_traj[j - 1, :] + state_data = state_data.astype(np.int) + # Transform domain to Networks expected input shape + im_data = G.image.astype(np.int) + im_data = 1 - im_data + im_data = im_data.reshape(1, 1, imsize, + imsize) + # Transfrom value prior to Networks expected input shape + value_data = value_prior.astype(np.int) + value_data = value_data.reshape(1, 1, imsize, + imsize) + # Get inputs as expected by network + X_in = torch.from_numpy( + np.append(im_data, value_data, axis=1)).float() + S1_in = torch.from_numpy(state_data[0].reshape( + [1, 1])).float() + S2_in = torch.from_numpy(state_data[1].reshape( + [1, 1])).float() + # Send Tensors to GPU if available + if self.use_GPU: + X_in = X_in.cuda() + S1_in = S1_in.cuda() + S2_in = S2_in.cuda() + # Wrap to autograd.Variable + X_in, S1_in, S2_in = Variable(X_in), Variable( + S1_in), Variable(S2_in) + # Forward pass in our neural net + _, predictions = model(X_in, S1_in, S2_in, self.config) + _, indices = torch.max(predictions.cpu(), 1, keepdim=True) + a = indices.data.numpy()[0][0] + # Transform prediction to indices + s = G.map_ind_to_state(pred_traj[j - 1, 0], + pred_traj[j - 1, 1]) + ns = G.sample_next_state(s, a) + nr, nc = G.get_coords(ns) + pred_traj[j, 0] = nr + pred_traj[j, 1] = nc + self.move_agent(Point(nr, nc)) + self.key_frame(True) + if nr == goal[0] and nc == goal[1]: + # We hit goal so fill remaining steps + pred_traj[j + 1:, 0] = nr + pred_traj[j + 1:, 1] = nc + break + # Plot optimal and predicted path (also start, end) + if pred_traj[-1, 0] == goal[0] and pred_traj[-1, 1] == goal[1]: + self.move_agent(self._get_grid().goal.position) + self.key_frame(True) + return + self.key_frame(True) + + def load_VIN(self, size): + if size in self.cached_models: return self.cached_models[size] + load_fname = f"{self._load_name}_{size}x{size}.pth" + load_path = self._services.resources.model_dir._full_path() + load_fname + vin = VIN(self.config) + vin.load_state_dict(torch.load(load_path, map_location=None if self.use_GPU else torch.device("cpu"))) + if self.use_GPU: vin = vin.cuda() + self.cached_models[size] = vin + return vin + + +def visualize(dom, states_xy, pred_traj): + fig, ax = plt.subplots() + implot = plt.imshow(dom, cmap="Greys_r") + ax.plot(states_xy[:, 0], states_xy[:, 1], c='b', label='Optimal Path') + ax.plot( + pred_traj[:, 0], pred_traj[:, 1], '-X', c='r', label='Predicted Path') + ax.plot(states_xy[0, 0], states_xy[0, 1], '-o', label='Start') + ax.plot(states_xy[-1, 0], states_xy[-1, 1], '-s', label='Goal') + legend = ax.legend(loc='upper right', shadow=False) + for label in legend.get_texts(): + label.set_fontsize('x-small') # the legend text size + for label in legend.get_lines(): + label.set_linewidth(0.5) # the legend line width + plt.draw() + plt.waitforbuttonpress(0) + plt.close(fig) + + +def save_image(im, goal, start,states_xy,states_one_hot,counter): + ''' + Saves the data made by generator as jsons. + ''' + s = config.imsize + + if len(states_xy[0]) == 0: + + im.tolist()[start_x][start_y] = 1 + start_xy = [0,0] + mp = { + 'grid': im.tolist(), + 'goal': [goal[0],goal[1]], + # 'start': int(start), + 'agent': start_xy} + # 'states_xy': states_xy[0].tolist(), + # 'states_one_hot': states_one_hot[0].tolist() + else: + mp = { + 'grid': im.tolist(), + 'goal': [goal[0],goal[1]], + # 'start': int(start), + 'agent': states_xy[0][0].tolist() + # 'states_xy': states_xy[0].tolist(), + # 'states_one_hot': states_one_hot[0].tolist() + } + data.append(mp) + with open('./maps/' +str(s) + '_data_300' + '.json', 'w') as outfile: + json.dump(data,outfile) + +def open_map(dom,path): + ''' + Used to open a map json given dom and path, returns grid, goal and agent + ''' + with open(str(path) + str(dom) +'.json') as json_file: + data = json.load(json_file) + logging.info('Opening file: ' + str(path) + str(dom) + '.json' ) + return data['grid'], data['goal'], data['agent'] + +def open_map_list(dom,path): + with open(str(path) + '.json') as json_file: + data = json.load(json_file) + logging.info('Opening file: ' + str(path) + str(dom) + '.json' ) + return data[dom]['grid'], data[dom]['goal'], data[dom]['agent'] + +def deviation(optimal_path, pred_path,goal, map_num): + optimal_path = np.array(optimal_path) + optimal_path = 1.0 * optimal_path + + optimal_path_x = np.array(optimal_path[:,0]) + optimal_path_y = np.array(optimal_path[:,1]) + + pred_path = np.unique(pred_path, axis=0) #removes duplicates at the end (when it reaches goal) + + #print('Shortened path' , pred_path) + pred_path_x = np.array(pred_path[:,0]) + pred_path_y = np.array(pred_path[:,1]) + dist = 0.0 + astar_dist = 0.0 + prev = pred_path[0,:] + total_diff_gen = 0 + for xy in pred_path[:,:]: + + diff = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2)) + total_diff_gen += diff + dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5 + prev = xy + + #prev = [0,0] + #print('opt', optimal_path[0,:]) + prev = optimal_path[0,:] + total_diff_optim = 0 + for xy in optimal_path[:,:]: + # print('xy', xy) + diff2 = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2)) + total_diff_optim += diff2 + astar_dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5 + prev = xy + + dev_non_rel = abs(total_diff_optim-total_diff_gen) + dev_rel = dev_non_rel/total_diff_optim #TODO: Add avg distance of gen trajectory + return(dev_rel,dev_non_rel,dist,astar_dist) + +def dist_left(pred_traj, goal): + ''' + Finds the distance left between the point and the goal + ''' + pred_traj = np.array(pred_traj) #euclidean distance or geometric distance ? use geometric + x1,y1 = pred_traj[-1][0], pred_traj[-1][1] + x2,y2 = goal[0],goal[1] + dist = (((x2-x1)**2 + (y2-y1)**2))**0.5 + return dist + + +if __name__ == '__main__': + # Parsing training parameters + parser = argparse.ArgumentParser() + parser.add_argument( + '--weights', + type=str, + default='trained/vin_8x8.pth', + help='Path to trained weights') + parser.add_argument( + '--maps', + type=str, + default='resources/testing_maps/16x16', + help='Path to maps') + parser.add_argument('--plot', action='store_true', default=False) + parser.add_argument('--gen', action='store_true', default=False) + parser.add_argument('--imsize', type=int, default=8, help='Size of image') + parser.add_argument( + '--k', type=int, default=10, help='Number of Value Iterations') + parser.add_argument( + '--l_i', type=int, default=2, help='Number of channels in input layer') + parser.add_argument( + '--l_h', + type=int, + default=150, + help='Number of channels in first hidden layer') + parser.add_argument( + '--l_q', + type=int, + default=10, + help='Number of channels in q layer (~actions) in VI-module') + config = parser.parse_args() + # Compute Paths generated by network and plot diff --git a/src/algorithms/learning/VIN/dataset/README.MD b/src/algorithms/learning/VIN/dataset/README.MD new file mode 100644 index 000000000..d67709e69 --- /dev/null +++ b/src/algorithms/learning/VIN/dataset/README.MD @@ -0,0 +1,8 @@ +# Gridworld datasets +To use the gridworld datasets you have two choices: +1. Download and place the .npz dataset files here + * gridworld_8x8.npz + * gridworld_16x16.npz + * gridworld_28x28.npz +2. Use the dataset generation script + * ```make_training_data.py``` diff --git a/src/algorithms/lstm/__init__.py b/src/algorithms/learning/VIN/dataset/__init__.py similarity index 100% rename from src/algorithms/lstm/__init__.py rename to src/algorithms/learning/VIN/dataset/__init__.py diff --git a/src/algorithms/learning/VIN/dataset/dataset.py b/src/algorithms/learning/VIN/dataset/dataset.py new file mode 100644 index 000000000..c2f579bf6 --- /dev/null +++ b/src/algorithms/learning/VIN/dataset/dataset.py @@ -0,0 +1,67 @@ +import numpy as np + +import torch +import torch.utils.data as data + + +class GridworldData(data.Dataset): + def __init__(self, + file, + imsize, + train=True, + transform=None, + target_transform=None): + assert file.endswith('.npz') # Must be .npz format + self.file = file + self.imsize = imsize + self.transform = transform + self.target_transform = target_transform + self.train = train # training set or test set + + self.images, self.S1, self.S2, self.labels = \ + self._process(file, self.train) + + def __getitem__(self, index): + img = self.images[index] + s1 = self.S1[index] + s2 = self.S2[index] + label = self.labels[index] + # Apply transform if we have one + if self.transform is not None: + img = self.transform(img) + else: # Internal default transform: Just to Tensor + img = torch.from_numpy(img) + # Apply target transform if we have one + if self.target_transform is not None: + label = self.target_transform(label) + return img, int(s1), int(s2), int(label) + + def __len__(self): + return self.images.shape[0] + + def _process(self, file, train): + """Data format: A list, [train data, test data] + Each data sample: label, S1, S2, Images, in this order. + """ + with np.load(file, mmap_mode='r') as f: + if train: + images = f['arr_0'] + S1 = f['arr_1'] + S2 = f['arr_2'] + labels = f['arr_3'] + else: + images = f['arr_4'] + S1 = f['arr_5'] + S2 = f['arr_6'] + labels = f['arr_7'] + # Set proper datatypes + images = images.astype(np.float32) + S1 = S1.astype(int) # (S1, S2) location are integers + S2 = S2.astype(int) + labels = labels.astype(int) # labels are integers + # Print number of samples + if train: + print("Number of Train Samples: {0}".format(images.shape[0])) + else: + print("Number of Test Samples: {0}".format(images.shape[0])) + return images, S1, S2, labels diff --git a/src/algorithms/learning/VIN/dataset/make_training_data.py b/src/algorithms/learning/VIN/dataset/make_training_data.py new file mode 100644 index 000000000..6baf94347 --- /dev/null +++ b/src/algorithms/learning/VIN/dataset/make_training_data.py @@ -0,0 +1,142 @@ +import sys +import json +import numpy as np +from dataset import * + +sys.path.append('.') +from domains.gridworld import * +from generators.obstacle_gen import * +sys.path.remove('.') + + +def extract_action(traj): + # Given a trajectory, outputs a 1D vector of + # actions corresponding to the trajectory. + n_actions = 8 + action_vecs = np.asarray([[-1., 0.], [1., 0.], [0., 1.], [0., -1.], + [-1., 1.], [-1., -1.], [1., 1.], [1., -1.]]) + action_vecs[4:] = 1 / np.sqrt(2) * action_vecs[4:] + action_vecs = action_vecs.T + state_diff = np.diff(traj, axis=0) + norm_state_diff = state_diff * np.tile( + 1 / np.sqrt(np.sum(np.square(state_diff), axis=1)), (2, 1)).T + prj_state_diff = np.dot(norm_state_diff, action_vecs) + actions_one_hot = np.abs(prj_state_diff - 1) < 0.00001 + actions = np.dot(actions_one_hot, np.arange(n_actions).T) + return actions + + +def make_data(dom_size, n_domains, max_obs, max_obs_size, n_traj, + state_batch_size,testing): + + X_l = [] + S1_l = [] + S2_l = [] + Labels_l = [] + + dom = 0.0 + while dom <= n_domains: + # goal = [np.random.randint(dom_size[0]), np.random.randint(dom_size[1])] + # Generate obstacle map + if testing: + # path = './resources/training_maps/8x8/testing/' + path = '/home/hussein/Desktop/git-projects/hjaafar_vin/value-iteration-networks-pb/resources/training_maps/8x8_150000/50000_uniform_seed50/' + mp, goal, start = open_map(dom+100000,path) + else: + # path = './resources/training_maps/8x8/training/' + path = "/home/hussein/Desktop/git-projects/hjaafar_vin/value-iteration-networks-pb/resources/training_maps/8x8_150000/50000_uniform_seed50/" + mp, goal, start = open_map(dom,path) + + + mp[start[1]][start[0]] = 0 #Set the start position as freespace too + mp[goal[1]][goal[0]] = 0 #Set the goal position as freespace too + + goal = [goal[1],goal[0]] #swap them around, for the row col format (x = col not row) + start = [start[1],start[0]] + + obs = obstacles([dom_size[0], dom_size[1]], goal, max_obs_size) + obs.dom = mp + # Add obstacles to map + # n_obs = obs.add_n_rand_obs(max_obs) + # Add border to map + # border_res = obs.add_border() + # Ensure we have valid map + # if n_obs == 0 or not border_res: + # continue + # Get final map + im = obs.get_final() + # Generate gridworld from obstacle map + G = gridworld(im, goal[0], goal[1]) + # Get value prior + value_prior = G.t_get_reward_prior() + # Sample random trajectories to our goal + states_xy, states_one_hot = sample_trajectory(G, n_traj,start,gen=False) + for i in range(n_traj): + if len(states_xy[i]) > 1: + # Get optimal actions for each state + actions = extract_action(states_xy[i]) + ns = states_xy[i].shape[0] - 1 + # Invert domain image => 0 = free, 1 = obstacle + image = 1 - im + # Resize domain and goal images and concate + image_data = np.resize(image, (1, 1, dom_size[0], dom_size[1])) + value_data = np.resize(value_prior, + (1, 1, dom_size[0], dom_size[1])) + iv_mixed = np.concatenate((image_data, value_data), axis=1) + X_current = np.tile(iv_mixed, (ns, 1, 1, 1)) + # Resize states + S1_current = np.expand_dims(states_xy[i][0:ns, 0], axis=1) + S2_current = np.expand_dims(states_xy[i][0:ns, 1], axis=1) + # Resize labels + Labels_current = np.expand_dims(actions, axis=1) + # Append to output list + X_l.append(X_current) + S1_l.append(S1_current) + S2_l.append(S2_current) + Labels_l.append(Labels_current) + dom += 1 + sys.stdout.write("\r" + str(int((dom / n_domains) * 100)) + "%") + sys.stdout.flush() + sys.stdout.write("\n") + # Concat all outputs + X_f = np.concatenate(X_l) + S1_f = np.concatenate(S1_l) + S2_f = np.concatenate(S2_l) + Labels_f = np.concatenate(Labels_l) + return X_f, S1_f, S2_f, Labels_f + + +def open_map(dom,path): + # print('dom', dom) + # print('path', path) + dom = int(dom) + with open(str(path) + str(dom) +'.json') as json_file: + data = json.load(json_file) + logging.info('Opening file: ' + str(path) + str(dom) + '.json' ) + return data['grid'], data['goal'], data['agent'] + +def main(dom_size=[8, 8], + n_domains=100000, + max_obs=50, + max_obs_size=2, + n_traj=1, #This basically uses 7 diff start positions , but you need to have that in the map or else it throws an error + state_batch_size=1): + # Get path to save dataset + save_path = "dataset/new_gridworld_{0}x{1}".format(dom_size[0], dom_size[1]) + # Get training data + print("Now making training data...") + X_out_tr, S1_out_tr, S2_out_tr, Labels_out_tr = make_data( + dom_size, n_domains, max_obs, max_obs_size, n_traj, state_batch_size, testing=False) + # Get testing data + print("\nNow making testing data...") + X_out_ts, S1_out_ts, S2_out_ts, Labels_out_ts = make_data( + dom_size, n_domains / 6, max_obs, max_obs_size, n_traj, + state_batch_size,testing = True) + # Save dataset + np.savez_compressed(save_path, X_out_tr, S1_out_tr, S2_out_tr, + Labels_out_tr, X_out_ts, S1_out_ts, S2_out_ts, + Labels_out_ts) + + +if __name__ == '__main__': + main() diff --git a/src/algorithms/learning/VIN/dataset/make_training_data_og.py b/src/algorithms/learning/VIN/dataset/make_training_data_og.py new file mode 100644 index 000000000..cf31fd857 --- /dev/null +++ b/src/algorithms/learning/VIN/dataset/make_training_data_og.py @@ -0,0 +1,116 @@ +import sys + +import numpy as np +from dataset import * + +sys.path.append('.') +from domains.gridworld import * +from generators.obstacle_gen import * +sys.path.remove('.') + + +def extract_action(traj): + # Given a trajectory, outputs a 1D vector of + # actions corresponding to the trajectory. + n_actions = 8 + action_vecs = np.asarray([[-1., 0.], [1., 0.], [0., 1.], [0., -1.], + [-1., 1.], [-1., -1.], [1., 1.], [1., -1.]]) + action_vecs[4:] = 1 / np.sqrt(2) * action_vecs[4:] + action_vecs = action_vecs.T + state_diff = np.diff(traj, axis=0) + norm_state_diff = state_diff * np.tile( + 1 / np.sqrt(np.sum(np.square(state_diff), axis=1)), (2, 1)).T + prj_state_diff = np.dot(norm_state_diff, action_vecs) + actions_one_hot = np.abs(prj_state_diff - 1) < 0.00001 + actions = np.dot(actions_one_hot, np.arange(n_actions).T) + return actions + + +def make_data(dom_size, n_domains, max_obs, max_obs_size, n_traj, + state_batch_size): + + X_l = [] + S1_l = [] + S2_l = [] + Labels_l = [] + + dom = 0.0 + while dom <= n_domains: + goal = [np.random.randint(dom_size[0]), np.random.randint(dom_size[1])] + # Generate obstacle map + obs = obstacles([dom_size[0], dom_size[1]], goal, max_obs_size) + # Add obstacles to map + n_obs = obs.add_n_rand_obs(max_obs) + # Add border to map + border_res = obs.add_border() + # Ensure we have valid map + if n_obs == 0 or not border_res: + continue + # Get final map + im = obs.get_final() + # Generate gridworld from obstacle map + G = gridworld(im, goal[0], goal[1]) + # Get value prior + value_prior = G.t_get_reward_prior() + # Sample random trajectories to our goal + states_xy, states_one_hot = sample_trajectory(G, n_traj) + for i in range(n_traj): + if len(states_xy[i]) > 1: + # Get optimal actions for each state + actions = extract_action(states_xy[i]) + ns = states_xy[i].shape[0] - 1 + # Invert domain image => 0 = free, 1 = obstacle + image = 1 - im + # Resize domain and goal images and concate + image_data = np.resize(image, (1, 1, dom_size[0], dom_size[1])) + value_data = np.resize(value_prior, + (1, 1, dom_size[0], dom_size[1])) + iv_mixed = np.concatenate((image_data, value_data), axis=1) + X_current = np.tile(iv_mixed, (ns, 1, 1, 1)) + # Resize states + S1_current = np.expand_dims(states_xy[i][0:ns, 0], axis=1) + S2_current = np.expand_dims(states_xy[i][0:ns, 1], axis=1) + # Resize labels + Labels_current = np.expand_dims(actions, axis=1) + # Append to output list + X_l.append(X_current) + S1_l.append(S1_current) + S2_l.append(S2_current) + Labels_l.append(Labels_current) + dom += 1 + sys.stdout.write("\r" + str(int((dom / n_domains) * 100)) + "%") + sys.stdout.flush() + sys.stdout.write("\n") + # Concat all outputs + X_f = np.concatenate(X_l) + S1_f = np.concatenate(S1_l) + S2_f = np.concatenate(S2_l) + Labels_f = np.concatenate(Labels_l) + return X_f, S1_f, S2_f, Labels_f + + +def main(dom_size=[28, 28], + n_domains=5000, + max_obs=50, + max_obs_size=2, + n_traj=7, + state_batch_size=1): + # Get path to save dataset + save_path = "dataset/gridworld_{0}x{1}".format(dom_size[0], dom_size[1]) + # Get training data + print("Now making training data...") + X_out_tr, S1_out_tr, S2_out_tr, Labels_out_tr = make_data( + dom_size, n_domains, max_obs, max_obs_size, n_traj, state_batch_size) + # Get testing data + print("\nNow making testing data...") + X_out_ts, S1_out_ts, S2_out_ts, Labels_out_ts = make_data( + dom_size, n_domains / 6, max_obs, max_obs_size, n_traj, + state_batch_size) + # Save dataset + np.savez_compressed(save_path, X_out_tr, S1_out_tr, S2_out_tr, + Labels_out_tr, X_out_ts, S1_out_ts, S2_out_ts, + Labels_out_ts) + + +if __name__ == '__main__': + main() diff --git a/src/algorithms/learning/VIN/domains/__init__.py b/src/algorithms/learning/VIN/domains/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/algorithms/learning/VIN/domains/gridworld.py b/src/algorithms/learning/VIN/domains/gridworld.py new file mode 100644 index 000000000..9f42cb539 --- /dev/null +++ b/src/algorithms/learning/VIN/domains/gridworld.py @@ -0,0 +1,452 @@ +import numpy as np +from scipy.sparse import csr_matrix +from scipy.sparse.csgraph import dijkstra +import logging +import gc + + +class gridworld: + """A class for making gridworlds""" + + def __init__(self, image, targetx, targety): + self.image = image + self.n_row = image.shape[0] + self.n_col = image.shape[1] + self.obstacles = [] + self.freespace = [] + self.targetx = targetx + self.targety = targety + self.G = [] + self.W = [] + self.R = [] + self.P = [] + self.A = [] + self.n_states = 0 + self.n_actions = 0 + self.state_map_col = [] + self.state_map_row = [] + self.set_vals() + + def set_vals(self): + # Setup function to initialize all necessary + # data + row_obs, col_obs = np.where(self.image == 0) + row_free, col_free = np.where(self.image != 0) + self.obstacles = [row_obs, col_obs] + self.freespace = [row_free, col_free] + + n_states = self.n_row * self.n_col + n_actions = 8 + self.n_states = n_states + self.n_actions = n_actions + + p_n = np.zeros((self.n_states, self.n_states),np.int8) + p_s = np.zeros((self.n_states, self.n_states),np.int8) + p_e = np.zeros((self.n_states, self.n_states),np.int8) + p_w = np.zeros((self.n_states, self.n_states),np.int8) + p_ne = np.zeros((self.n_states, self.n_states),np.int8) + p_nw = np.zeros((self.n_states, self.n_states),np.int8) + p_se = np.zeros((self.n_states, self.n_states),np.int8) + p_sw = np.zeros((self.n_states, self.n_states),np.int8) + + R = -1 * np.ones((self.n_states, self.n_actions)) + R[:, 4:self.n_actions] = R[:, 4:self.n_actions] * np.sqrt(2) + target = np.ravel_multi_index( + [self.targetx, self.targety], (self.n_row, self.n_col), order='F') + R[target, :] = 0 + + + for row in range(0, self.n_row): + for col in range(0, self.n_col): + + curpos = np.ravel_multi_index( + [row, col], (self.n_row, self.n_col), order='F') + + rows, cols = self.neighbors(row, col) + + neighbor_inds = np.ravel_multi_index( + [rows, cols], (self.n_row, self.n_col), order='F') + + p_n[curpos, neighbor_inds[ + 0]] = p_n[curpos, neighbor_inds[0]] + 1 + p_s[curpos, neighbor_inds[ + 1]] = p_s[curpos, neighbor_inds[1]] + 1 + p_e[curpos, neighbor_inds[ + 2]] = p_e[curpos, neighbor_inds[2]] + 1 + p_w[curpos, neighbor_inds[ + 3]] = p_w[curpos, neighbor_inds[3]] + 1 + p_ne[curpos, neighbor_inds[ + 4]] = p_ne[curpos, neighbor_inds[4]] + 1 + p_nw[curpos, neighbor_inds[ + 5]] = p_nw[curpos, neighbor_inds[5]] + 1 + p_se[curpos, neighbor_inds[ + 6]] = p_se[curpos, neighbor_inds[6]] + 1 + p_sw[curpos, neighbor_inds[ + 7]] = p_sw[curpos, neighbor_inds[7]] + 1 + + #NSEW bool matrix + Q_1 = np.logical_or.reduce((p_n, p_s)) + Q_1 = np.logical_or.reduce((Q_1, p_e)) + Q_1 = np.logical_or.reduce((Q_1, p_w)) + #Those were all the N-S-E-W matrix + # Now for the diagonal matrix (ne, nw, se, sw) + Q_rt2 = np.logical_or.reduce((p_nw, p_ne)) + Q_rt2 = np.logical_or.reduce((Q_rt2, p_se)) + Q_rt2 = np.logical_or.reduce((Q_rt2, p_sw)) + + #Now combine the two + G = np.logical_or.reduce((Q_1,Q_rt2)) #This one is G like before + gc.collect() + # combines the diagonals and the vertical-horizontals + #This is the array with true replaced with 1 + W= np.array(Q_1, dtype=np.float32) + W_rt2 = np.array(Q_rt2, dtype=np.float32) + # W_and = np.logical_and.reduce((W_1, W_rt2)) + + #This will remove all common obstacles + W+= - W_rt2 + W = np.clip(W,0,1) #This will remove all negative 1 from the interesected ones + # So the resulting matrix will have the intersected portions removed. + W_rt2 *= np.sqrt(2) + # W_1 = W_1 - W_and + #combine both + W += W_rt2 + print(W) + + non_obstacles = np.ravel_multi_index( + [self.freespace[0], self.freespace[1]], (self.n_row, self.n_col), + order='F') + + non_obstacles = np.sort(non_obstacles) + p_n = p_n[non_obstacles, :] + p_n = np.expand_dims(p_n[:, non_obstacles], axis=2) + p_s = p_s[non_obstacles, :] + p_s = np.expand_dims(p_s[:, non_obstacles], axis=2) + p_e = p_e[non_obstacles, :] + p_e = np.expand_dims(p_e[:, non_obstacles], axis=2) + p_w = p_w[non_obstacles, :] + p_w = np.expand_dims(p_w[:, non_obstacles], axis=2) + p_ne = p_ne[non_obstacles, :] + p_ne = np.expand_dims(p_ne[:, non_obstacles], axis=2) + p_nw = p_nw[non_obstacles, :] + p_nw = np.expand_dims(p_nw[:, non_obstacles], axis=2) + p_se = p_se[non_obstacles, :] + p_se = np.expand_dims(p_se[:, non_obstacles], axis=2) + p_sw = p_sw[non_obstacles, :] + p_sw = np.expand_dims(p_sw[:, non_obstacles], axis=2) + G = G[non_obstacles, :] + G = G[:, non_obstacles] + W = W[non_obstacles, :] + W = W[:, non_obstacles] + R = R[non_obstacles, :] + + P = np.concatenate( + (p_n, p_s, p_e, p_w, p_ne, p_nw, p_se, p_sw), axis=2) + + self.G = G + self.W = W + self.P = P + self.R = R + state_map_col, state_map_row = np.meshgrid( + np.arange(0, self.n_col), np.arange(0, self.n_row)) + self.state_map_col = state_map_col.flatten('F')[non_obstacles] + + self.state_map_row = state_map_row.flatten('F')[non_obstacles] #see what self.statemaprow is before flattening + + + + + def get_graph(self): + # Returns graph + G = self.G + W = self.W[self.W != 0] + return G, W + + def get_graph_inv(self): + # Returns transpose of graph + G = self.G.T + W = self.W.T + return G, W + + def val_2_image(self, val): + # Zeros for obstacles, val for free space + im = np.zeros((self.n_row, self.n_col)) + im[self.freespace[0], self.freespace[1]] = val + return im + + def get_value_prior(self): + # Returns value prior for gridworld + s_map_col, s_map_row = np.meshgrid( + np.arange(0, self.n_col), np.arange(0, self.n_row)) + im = np.sqrt( + np.square(s_map_col - self.targety) + + np.square(s_map_row - self.targetx)) + return im + + def get_reward_prior(self): + # Returns reward prior for gridworld + im = -1 * np.ones((self.n_row, self.n_col)) + im[self.targetx, self.targety] = 10 + return im + + def t_get_reward_prior(self): + # Returns reward prior as needed for + # dataset generation + im = np.zeros((self.n_row, self.n_col)) + im[self.targetx, self.targety] = 10 + return im + + def get_state_image(self, row, col): + # Zeros everywhere except [row,col] + im = np.zeros((self.n_row, self.n_col)) + im[row, col] = 1 + return im + + def map_ind_to_state(self, row, col): + # Takes [row, col] and maps to a state + ''' + Finds the position of the two integers passed in the freespace, (self.state_map_row and self.state_map_col), + and returns the intersection of the two (row and col, to give the coordinate) as a index of self.state_map_row. + ''' + logging.debug('Trying to find row %s', row) + logging.debug('Trying to find col %s', col) + + rw = np.where(self.state_map_row == row) #in the list self.state_map_row , what position (1,2,3...) is equal to row (int) + cl = np.where(self.state_map_col == col) #i.e where is value col in the aarray self.state_map_col + ''' + + The above acts as np.nonzero, i.e where in the available space is the targetx and target y (row col) + So you find where target_x is available, and where target_y is available, and then you find the intersect, + which should be the position of the goal. so self.state_map_row[16] and self.state_map_row[16], i.e 16th element + of self.state_map_row, and 16th element of self.state_map_col + + ''' + logging.debug('self.state_map_row = : %s', self.state_map_row) + logging.debug('self.state_map_col = : %s', self.state_map_col) + + + logging.debug('rw = : %s ', rw) + logging.debug('cl = : %s', cl) + + + return np.intersect1d(rw, cl)[0] + + def get_coords(self, states): + # Given a state or states, returns + # [row,col] pairs for the state(s) + non_obstacles = np.ravel_multi_index( + [self.freespace[0], self.freespace[1]], (self.n_row, self.n_col), + order='F') + non_obstacles = np.sort(non_obstacles) + states = states.astype(int) + r, c = np.unravel_index( + non_obstacles[states], (self.n_col, self.n_row), order='F') + return r, c + + def rand_choose(self, in_vec): + # Samples + if len(in_vec.shape) > 1: + if in_vec.shape[1] == 1: + in_vec = in_vec.T + temp = np.hstack((np.zeros((1)), np.cumsum(in_vec))).astype('int') + q = np.random.rand() + x = np.where(q > temp[0:-1]) + y = np.where(q < temp[1:]) + return np.intersect1d(x, y)[0] + + def next_state_prob(self, s, a): + # Gets next state probability for + # a given action (a) + if hasattr(a, "__iter__"): + p = np.squeeze(self.P[s, :, a]) + else: + p = np.squeeze(self.P[s, :, a]).T + return p + + def sample_next_state(self, s, a): + # Gets the next state given the + # current state (s) and an + # action (a) + vec = self.next_state_prob(s, a) + result = self.rand_choose(vec) + return result + + def get_size(self): + # Returns domain size + return self.n_row, self.n_col + + def north(self, row, col): + # Returns new [row,col] + # if we take the action + new_row = np.max([row - 1, 0]) + new_col = col + if self.image[new_row, new_col] == 0: + new_row = row + new_col = col + return new_row, new_col + + def northeast(self, row, col): + # Returns new [row,col] + # if we take the action + new_row = np.max([row - 1, 0]) + new_col = np.min([col + 1, self.n_col - 1]) + if self.image[new_row, new_col] == 0: + new_row = row + new_col = col + return new_row, new_col + + def northwest(self, row, col): + # Returns new [row,col] + # if we take the action + new_row = np.max([row - 1, 0]) + new_col = np.max([col - 1, 0]) + if self.image[new_row, new_col] == 0: + new_row = row + new_col = col + return new_row, new_col + + def south(self, row, col): + # Returns new [row,col] + # if we take the action + new_row = np.min([row + 1, self.n_row - 1]) + new_col = col + if self.image[new_row, new_col] == 0: + new_row = row + new_col = col + return new_row, new_col + + def southeast(self, row, col): + # Returns new [row,col] + # if we take the action + new_row = np.min([row + 1, self.n_row - 1]) + new_col = np.min([col + 1, self.n_col - 1]) + if self.image[new_row, new_col] == 0: + new_row = row + new_col = col + return new_row, new_col + + def southwest(self, row, col): + # Returns new [row,col] + # if we take the action + new_row = np.min([row + 1, self.n_row - 1]) + new_col = np.max([col - 1, 0]) + if self.image[new_row, new_col] == 0: + new_row = row + new_col = col + return new_row, new_col + + def east(self, row, col): + # Returns new [row,col] + # if we take the action + new_row = row + new_col = np.min([col + 1, self.n_col - 1]) + if self.image[new_row, new_col] == 0: + new_row = row + new_col = col + return new_row, new_col + + def west(self, row, col): + # Returns new [row,col] + # if we take the action + new_row = row + new_col = np.max([col - 1, 0]) + if self.image[new_row, new_col] == 0: + new_row = row + new_col = col + return new_row, new_col + + def neighbors(self, row, col): + # Get valid neighbors in all valid directions + rows, cols = self.north(row, col) + new_row, new_col = self.south(row, col) + rows, cols = np.append(rows, new_row), np.append(cols, new_col) + new_row, new_col = self.east(row, col) + rows, cols = np.append(rows, new_row), np.append(cols, new_col) + new_row, new_col = self.west(row, col) + rows, cols = np.append(rows, new_row), np.append(cols, new_col) + new_row, new_col = self.northeast(row, col) + rows, cols = np.append(rows, new_row), np.append(cols, new_col) + new_row, new_col = self.northwest(row, col) + rows, cols = np.append(rows, new_row), np.append(cols, new_col) + new_row, new_col = self.southeast(row, col) + rows, cols = np.append(rows, new_row), np.append(cols, new_col) + new_row, new_col = self.southwest(row, col) + rows, cols = np.append(rows, new_row), np.append(cols, new_col) + return rows, cols + + def return_state_map_row(self): + return self.state_map_row, self.state_map_col + + +def trace_path(pred, source, target): + # traces back shortest path from + # source to target given pred + # (a predicessor list) + max_len = 1000 + path = np.zeros((max_len, 1)) + i = max_len - 1 + path[i] = target + while path[i] != source and i > 0: + try: + path[i - 1] = pred[int(path[i])] + i -= 1 + except Exception as e: + return [] + if i >= 0: + path = path[i:] + else: + path = None + return path + + +def sample_trajectory(M, n_states,start,gen=False): #Everything here, find agent (start ... source etc. ) + # Samples trajectories from random nodes + # in our domain (M) + G, W = M.get_graph_inv() + N = G.shape[0] + if N >= n_states: + rand_ind = np.random.permutation(N) + else: + rand_ind = np.tile(np.random.permutation(N), (1, 10)) + init_states = rand_ind[0:n_states].flatten() #TODO: This is where start is chosen. + + #init_states is a list with the index to the start position in the free space (state_map_row, state_map_col) + goal_s = M.map_ind_to_state(M.targetx, M.targety) #This is not the source, rather, this is the index of the goal + + states = [] + states_xy = [] + states_one_hot = [] + + if not gen: + start_x = start[0] + start_y = start[1] + init_states = [M.map_ind_to_state(start_x,start_y)] #Because the goal and agent I provide are in the form + #x and y, but in terms of row, and col , it would be x = col and y = row + + # Get optimal path from graph + g_dense = W + g_masked = np.ma.masked_values(g_dense, 0) + g_sparse = csr_matrix(g_dense) + + + state_map_row, state_map_col = M.return_state_map_row() + + + logging.debug('init_states_index = %s', init_states[0]) + logging.info('Start position is %s %s', state_map_row[init_states[0]],state_map_col[init_states[0]]) + d, pred = dijkstra(g_sparse, indices=goal_s, return_predecessors=True) + for i in range(n_states): + path = trace_path(pred, goal_s, init_states[i]) #goal_s is source + path = np.flip(path, 0) + states.append(path) + for state in states: + L = len(state) + r, c = M.get_coords(state) + row_m = np.zeros((L, M.n_row)) + col_m = np.zeros((L, M.n_col)) + for i in range(L): + row_m[i, r[i]] = 1 + col_m[i, c[i]] = 1 + states_one_hot.append(np.hstack((row_m, col_m))) + states_xy.append(np.hstack((r, c))) + return states_xy, states_one_hot diff --git a/src/algorithms/learning/VIN/domains/gridworld_before_mem.py b/src/algorithms/learning/VIN/domains/gridworld_before_mem.py new file mode 100644 index 000000000..bc3c0c495 --- /dev/null +++ b/src/algorithms/learning/VIN/domains/gridworld_before_mem.py @@ -0,0 +1,435 @@ +import numpy as np +from scipy.sparse import csr_matrix +from scipy.sparse.csgraph import dijkstra +import logging + + +class gridworld: + """A class for making gridworlds""" + + def __init__(self, image, targetx, targety): + self.image = image + self.n_row = image.shape[0] + self.n_col = image.shape[1] + self.obstacles = [] + self.freespace = [] + self.targetx = targetx + self.targety = targety + self.G = [] + self.W = [] + self.R = [] + self.P = [] + self.A = [] + self.n_states = 0 + self.n_actions = 0 + self.state_map_col = [] + self.state_map_row = [] + self.set_vals() + + def set_vals(self): + # Setup function to initialize all necessary + # data + row_obs, col_obs = np.where(self.image == 0) + row_free, col_free = np.where(self.image != 0) + self.obstacles = [row_obs, col_obs] + self.freespace = [row_free, col_free] + + n_states = self.n_row * self.n_col + n_actions = 8 + self.n_states = n_states + self.n_actions = n_actions + + p_n = np.zeros((self.n_states, self.n_states),np.int8) + p_s = np.zeros((self.n_states, self.n_states),np.int8) + p_e = np.zeros((self.n_states, self.n_states),np.int8) + p_w = np.zeros((self.n_states, self.n_states),np.int8) + p_ne = np.zeros((self.n_states, self.n_states),np.int8) + p_nw = np.zeros((self.n_states, self.n_states),np.int8) + p_se = np.zeros((self.n_states, self.n_states),np.int8) + p_sw = np.zeros((self.n_states, self.n_states),np.int8) + print('Line 50') + R = -1 * np.ones((self.n_states, self.n_actions)) + R[:, 4:self.n_actions] = R[:, 4:self.n_actions] * np.sqrt(2) + target = np.ravel_multi_index( + [self.targetx, self.targety], (self.n_row, self.n_col), order='F') + R[target, :] = 0 + print('Line 56') + + for row in range(0, self.n_row): + for col in range(0, self.n_col): + + curpos = np.ravel_multi_index( + [row, col], (self.n_row, self.n_col), order='F') + + rows, cols = self.neighbors(row, col) + + neighbor_inds = np.ravel_multi_index( + [rows, cols], (self.n_row, self.n_col), order='F') + + p_n[curpos, neighbor_inds[ + 0]] = p_n[curpos, neighbor_inds[0]] + 1 + p_s[curpos, neighbor_inds[ + 1]] = p_s[curpos, neighbor_inds[1]] + 1 + p_e[curpos, neighbor_inds[ + 2]] = p_e[curpos, neighbor_inds[2]] + 1 + p_w[curpos, neighbor_inds[ + 3]] = p_w[curpos, neighbor_inds[3]] + 1 + p_ne[curpos, neighbor_inds[ + 4]] = p_ne[curpos, neighbor_inds[4]] + 1 + p_nw[curpos, neighbor_inds[ + 5]] = p_nw[curpos, neighbor_inds[5]] + 1 + p_se[curpos, neighbor_inds[ + 6]] = p_se[curpos, neighbor_inds[6]] + 1 + p_sw[curpos, neighbor_inds[ + 7]] = p_sw[curpos, neighbor_inds[7]] + 1 + print('Line 85') + G = np.logical_or.reduce((p_n, p_s, p_e, p_w, p_ne, p_nw, p_se, p_sw)) + print('G ', G.shape) + print('Line 87') + W = np.maximum( + np.maximum( + np.maximum( + np.maximum( + np.maximum(np.maximum(np.maximum(p_n, p_s), p_e), p_w), + np.sqrt(2) * p_ne), + np.sqrt(2) * p_nw), + np.sqrt(2) * p_se), + np.sqrt(2) * p_sw) + + non_obstacles = np.ravel_multi_index( + [self.freespace[0], self.freespace[1]], (self.n_row, self.n_col), + order='F') + print('Line 101') + non_obstacles = np.sort(non_obstacles) + p_n = p_n[non_obstacles, :] + p_n = np.expand_dims(p_n[:, non_obstacles], axis=2) + p_s = p_s[non_obstacles, :] + p_s = np.expand_dims(p_s[:, non_obstacles], axis=2) + p_e = p_e[non_obstacles, :] + p_e = np.expand_dims(p_e[:, non_obstacles], axis=2) + p_w = p_w[non_obstacles, :] + p_w = np.expand_dims(p_w[:, non_obstacles], axis=2) + p_ne = p_ne[non_obstacles, :] + p_ne = np.expand_dims(p_ne[:, non_obstacles], axis=2) + p_nw = p_nw[non_obstacles, :] + p_nw = np.expand_dims(p_nw[:, non_obstacles], axis=2) + p_se = p_se[non_obstacles, :] + p_se = np.expand_dims(p_se[:, non_obstacles], axis=2) + p_sw = p_sw[non_obstacles, :] + p_sw = np.expand_dims(p_sw[:, non_obstacles], axis=2) + G = G[non_obstacles, :] + G = G[:, non_obstacles] + W = W[non_obstacles, :] + W = W[:, non_obstacles] + R = R[non_obstacles, :] + + P = np.concatenate( + (p_n, p_s, p_e, p_w, p_ne, p_nw, p_se, p_sw), axis=2) + print('Line 127') + self.G = G + self.W = W + self.P = P + self.R = R + state_map_col, state_map_row = np.meshgrid( + np.arange(0, self.n_col), np.arange(0, self.n_row)) + self.state_map_col = state_map_col.flatten('F')[non_obstacles] + + self.state_map_row = state_map_row.flatten('F')[non_obstacles] #see what self.statemaprow is before flattening + + + + + def get_graph(self): + # Returns graph + G = self.G + W = self.W[self.W != 0] + return G, W + + def get_graph_inv(self): + # Returns transpose of graph + G = self.G.T + W = self.W.T + return G, W + + def val_2_image(self, val): + # Zeros for obstacles, val for free space + im = np.zeros((self.n_row, self.n_col)) + im[self.freespace[0], self.freespace[1]] = val + return im + + def get_value_prior(self): + # Returns value prior for gridworld + s_map_col, s_map_row = np.meshgrid( + np.arange(0, self.n_col), np.arange(0, self.n_row)) + im = np.sqrt( + np.square(s_map_col - self.targety) + + np.square(s_map_row - self.targetx)) + return im + + def get_reward_prior(self): + # Returns reward prior for gridworld + im = -1 * np.ones((self.n_row, self.n_col)) + im[self.targetx, self.targety] = 10 + return im + + def t_get_reward_prior(self): + # Returns reward prior as needed for + # dataset generation + im = np.zeros((self.n_row, self.n_col)) + im[self.targetx, self.targety] = 10 + return im + + def get_state_image(self, row, col): + # Zeros everywhere except [row,col] + im = np.zeros((self.n_row, self.n_col)) + im[row, col] = 1 + return im + + def map_ind_to_state(self, row, col): + # Takes [row, col] and maps to a state + ''' + Finds the position of the two integers passed in the freespace, (self.state_map_row and self.state_map_col), + and returns the intersection of the two (row and col, to give the coordinate) as a index of self.state_map_row. + ''' + logging.debug('Trying to find row %s', row) + logging.debug('Trying to find col %s', col) + + rw = np.where(self.state_map_row == row) #in the list self.state_map_row , what position (1,2,3...) is equal to row (int) + cl = np.where(self.state_map_col == col) #i.e where is value col in the aarray self.state_map_col + ''' + + The above acts as np.nonzero, i.e where in the available space is the targetx and target y (row col) + So you find where target_x is available, and where target_y is available, and then you find the intersect, + which should be the position of the goal. so self.state_map_row[16] and self.state_map_row[16], i.e 16th element + of self.state_map_row, and 16th element of self.state_map_col + + ''' + logging.debug('self.state_map_row = : %s', self.state_map_row) + logging.debug('self.state_map_col = : %s', self.state_map_col) + + + logging.debug('rw = : %s ', rw) + logging.debug('cl = : %s', cl) + + + return np.intersect1d(rw, cl)[0] + + def get_coords(self, states): + # Given a state or states, returns + # [row,col] pairs for the state(s) + non_obstacles = np.ravel_multi_index( + [self.freespace[0], self.freespace[1]], (self.n_row, self.n_col), + order='F') + non_obstacles = np.sort(non_obstacles) + states = states.astype(int) + r, c = np.unravel_index( + non_obstacles[states], (self.n_col, self.n_row), order='F') + return r, c + + def rand_choose(self, in_vec): + # Samples + if len(in_vec.shape) > 1: + if in_vec.shape[1] == 1: + in_vec = in_vec.T + temp = np.hstack((np.zeros((1)), np.cumsum(in_vec))).astype('int') + q = np.random.rand() + x = np.where(q > temp[0:-1]) + y = np.where(q < temp[1:]) + return np.intersect1d(x, y)[0] + + def next_state_prob(self, s, a): + # Gets next state probability for + # a given action (a) + if hasattr(a, "__iter__"): + p = np.squeeze(self.P[s, :, a]) + else: + p = np.squeeze(self.P[s, :, a]).T + return p + + def sample_next_state(self, s, a): + # Gets the next state given the + # current state (s) and an + # action (a) + vec = self.next_state_prob(s, a) + result = self.rand_choose(vec) + return result + + def get_size(self): + # Returns domain size + return self.n_row, self.n_col + + def north(self, row, col): + # Returns new [row,col] + # if we take the action + new_row = np.max([row - 1, 0]) + new_col = col + if self.image[new_row, new_col] == 0: + new_row = row + new_col = col + return new_row, new_col + + def northeast(self, row, col): + # Returns new [row,col] + # if we take the action + new_row = np.max([row - 1, 0]) + new_col = np.min([col + 1, self.n_col - 1]) + if self.image[new_row, new_col] == 0: + new_row = row + new_col = col + return new_row, new_col + + def northwest(self, row, col): + # Returns new [row,col] + # if we take the action + new_row = np.max([row - 1, 0]) + new_col = np.max([col - 1, 0]) + if self.image[new_row, new_col] == 0: + new_row = row + new_col = col + return new_row, new_col + + def south(self, row, col): + # Returns new [row,col] + # if we take the action + new_row = np.min([row + 1, self.n_row - 1]) + new_col = col + if self.image[new_row, new_col] == 0: + new_row = row + new_col = col + return new_row, new_col + + def southeast(self, row, col): + # Returns new [row,col] + # if we take the action + new_row = np.min([row + 1, self.n_row - 1]) + new_col = np.min([col + 1, self.n_col - 1]) + if self.image[new_row, new_col] == 0: + new_row = row + new_col = col + return new_row, new_col + + def southwest(self, row, col): + # Returns new [row,col] + # if we take the action + new_row = np.min([row + 1, self.n_row - 1]) + new_col = np.max([col - 1, 0]) + if self.image[new_row, new_col] == 0: + new_row = row + new_col = col + return new_row, new_col + + def east(self, row, col): + # Returns new [row,col] + # if we take the action + new_row = row + new_col = np.min([col + 1, self.n_col - 1]) + if self.image[new_row, new_col] == 0: + new_row = row + new_col = col + return new_row, new_col + + def west(self, row, col): + # Returns new [row,col] + # if we take the action + new_row = row + new_col = np.max([col - 1, 0]) + if self.image[new_row, new_col] == 0: + new_row = row + new_col = col + return new_row, new_col + + def neighbors(self, row, col): + # Get valid neighbors in all valid directions + rows, cols = self.north(row, col) + new_row, new_col = self.south(row, col) + rows, cols = np.append(rows, new_row), np.append(cols, new_col) + new_row, new_col = self.east(row, col) + rows, cols = np.append(rows, new_row), np.append(cols, new_col) + new_row, new_col = self.west(row, col) + rows, cols = np.append(rows, new_row), np.append(cols, new_col) + new_row, new_col = self.northeast(row, col) + rows, cols = np.append(rows, new_row), np.append(cols, new_col) + new_row, new_col = self.northwest(row, col) + rows, cols = np.append(rows, new_row), np.append(cols, new_col) + new_row, new_col = self.southeast(row, col) + rows, cols = np.append(rows, new_row), np.append(cols, new_col) + new_row, new_col = self.southwest(row, col) + rows, cols = np.append(rows, new_row), np.append(cols, new_col) + return rows, cols + + def return_state_map_row(self): + return self.state_map_row, self.state_map_col + + +def trace_path(pred, source, target): + # traces back shortest path from + # source to target given pred + # (a predicessor list) + max_len = 1000 + path = np.zeros((max_len, 1)) + i = max_len - 1 + path[i] = target + while path[i] != source and i > 0: + try: + path[i - 1] = pred[int(path[i])] + i -= 1 + except Exception as e: + return [] + if i >= 0: + path = path[i:] + else: + path = None + return path + + +def sample_trajectory(M, n_states,start,gen=False): #Everything here, find agent (start ... source etc. ) + # Samples trajectories from random nodes + # in our domain (M) + G, W = M.get_graph_inv() + N = G.shape[0] + if N >= n_states: + rand_ind = np.random.permutation(N) + else: + rand_ind = np.tile(np.random.permutation(N), (1, 10)) + init_states = rand_ind[0:n_states].flatten() #TODO: This is where start is chosen. + + #init_states is a list with the index to the start position in the free space (state_map_row, state_map_col) + goal_s = M.map_ind_to_state(M.targetx, M.targety) #This is not the source, rather, this is the index of the goal + + states = [] + states_xy = [] + states_one_hot = [] + + if not gen: + start_x = start[0] + start_y = start[1] + init_states = [M.map_ind_to_state(start_x,start_y)] #Because the goal and agent I provide are in the form + #x and y, but in terms of row, and col , it would be x = col and y = row + + # Get optimal path from graph + g_dense = W + g_masked = np.ma.masked_values(g_dense, 0) + g_sparse = csr_matrix(g_dense) + + + state_map_row, state_map_col = M.return_state_map_row() + + + logging.debug('init_states_index = %s', init_states[0]) + logging.info('Start position is %s %s', state_map_row[init_states[0]],state_map_col[init_states[0]]) + d, pred = dijkstra(g_sparse, indices=goal_s, return_predecessors=True) + for i in range(n_states): + path = trace_path(pred, goal_s, init_states[i]) #goal_s is source + path = np.flip(path, 0) + states.append(path) + for state in states: + L = len(state) + r, c = M.get_coords(state) + row_m = np.zeros((L, M.n_row)) + col_m = np.zeros((L, M.n_col)) + for i in range(L): + row_m[i, r[i]] = 1 + col_m[i, c[i]] = 1 + states_one_hot.append(np.hstack((row_m, col_m))) + states_xy.append(np.hstack((r, c))) + return states_xy, states_one_hot diff --git a/src/algorithms/learning/VIN/domains/gridworld_og.py b/src/algorithms/learning/VIN/domains/gridworld_og.py new file mode 100644 index 000000000..c60a4b7a5 --- /dev/null +++ b/src/algorithms/learning/VIN/domains/gridworld_og.py @@ -0,0 +1,385 @@ +import numpy as np +from scipy.sparse import csr_matrix +from scipy.sparse.csgraph import dijkstra + + +class gridworld: + """A class for making gridworlds""" + + def __init__(self, image, targetx, targety): + self.image = image + self.n_row = image.shape[0] + self.n_col = image.shape[1] + self.obstacles = [] + self.freespace = [] + self.targetx = targetx + self.targety = targety + self.G = [] + self.W = [] + self.R = [] + self.P = [] + self.A = [] + self.n_states = 0 + self.n_actions = 0 + self.state_map_col = [] + self.state_map_row = [] + self.set_vals() + + def set_vals(self): + # Setup function to initialize all necessary + # data + row_obs, col_obs = np.where(self.image == 0) + row_free, col_free = np.where(self.image != 0) + self.obstacles = [row_obs, col_obs] + self.freespace = [row_free, col_free] + + n_states = self.n_row * self.n_col + n_actions = 8 + self.n_states = n_states + self.n_actions = n_actions + + p_n = np.zeros((self.n_states, self.n_states)) + p_s = np.zeros((self.n_states, self.n_states)) + p_e = np.zeros((self.n_states, self.n_states)) + p_w = np.zeros((self.n_states, self.n_states)) + p_ne = np.zeros((self.n_states, self.n_states)) + p_nw = np.zeros((self.n_states, self.n_states)) + p_se = np.zeros((self.n_states, self.n_states)) + p_sw = np.zeros((self.n_states, self.n_states)) + + R = -1 * np.ones((self.n_states, self.n_actions)) + R[:, 4:self.n_actions] = R[:, 4:self.n_actions] * np.sqrt(2) + target = np.ravel_multi_index( + [self.targetx, self.targety], (self.n_row, self.n_col), order='F') + R[target, :] = 0 + + for row in range(0, self.n_row): + for col in range(0, self.n_col): + + curpos = np.ravel_multi_index( + [row, col], (self.n_row, self.n_col), order='F') + + rows, cols = self.neighbors(row, col) + + neighbor_inds = np.ravel_multi_index( + [rows, cols], (self.n_row, self.n_col), order='F') + + p_n[curpos, neighbor_inds[ + 0]] = p_n[curpos, neighbor_inds[0]] + 1 + p_s[curpos, neighbor_inds[ + 1]] = p_s[curpos, neighbor_inds[1]] + 1 + p_e[curpos, neighbor_inds[ + 2]] = p_e[curpos, neighbor_inds[2]] + 1 + p_w[curpos, neighbor_inds[ + 3]] = p_w[curpos, neighbor_inds[3]] + 1 + p_ne[curpos, neighbor_inds[ + 4]] = p_ne[curpos, neighbor_inds[4]] + 1 + p_nw[curpos, neighbor_inds[ + 5]] = p_nw[curpos, neighbor_inds[5]] + 1 + p_se[curpos, neighbor_inds[ + 6]] = p_se[curpos, neighbor_inds[6]] + 1 + p_sw[curpos, neighbor_inds[ + 7]] = p_sw[curpos, neighbor_inds[7]] + 1 + + G = np.logical_or.reduce((p_n, p_s, p_e, p_w, p_ne, p_nw, p_se, p_sw)) + + W = np.maximum( + np.maximum( + np.maximum( + np.maximum( + np.maximum(np.maximum(np.maximum(p_n, p_s), p_e), p_w), + np.sqrt(2) * p_ne), + np.sqrt(2) * p_nw), + np.sqrt(2) * p_se), + np.sqrt(2) * p_sw) + + non_obstacles = np.ravel_multi_index( + [self.freespace[0], self.freespace[1]], (self.n_row, self.n_col), + order='F') + + non_obstacles = np.sort(non_obstacles) + p_n = p_n[non_obstacles, :] + p_n = np.expand_dims(p_n[:, non_obstacles], axis=2) + p_s = p_s[non_obstacles, :] + p_s = np.expand_dims(p_s[:, non_obstacles], axis=2) + p_e = p_e[non_obstacles, :] + p_e = np.expand_dims(p_e[:, non_obstacles], axis=2) + p_w = p_w[non_obstacles, :] + p_w = np.expand_dims(p_w[:, non_obstacles], axis=2) + p_ne = p_ne[non_obstacles, :] + p_ne = np.expand_dims(p_ne[:, non_obstacles], axis=2) + p_nw = p_nw[non_obstacles, :] + p_nw = np.expand_dims(p_nw[:, non_obstacles], axis=2) + p_se = p_se[non_obstacles, :] + p_se = np.expand_dims(p_se[:, non_obstacles], axis=2) + p_sw = p_sw[non_obstacles, :] + p_sw = np.expand_dims(p_sw[:, non_obstacles], axis=2) + G = G[non_obstacles, :] + G = G[:, non_obstacles] + W = W[non_obstacles, :] + W = W[:, non_obstacles] + R = R[non_obstacles, :] + + P = np.concatenate( + (p_n, p_s, p_e, p_w, p_ne, p_nw, p_se, p_sw), axis=2) + + self.G = G + self.W = W + self.P = P + self.R = R + state_map_col, state_map_row = np.meshgrid( + np.arange(0, self.n_col), np.arange(0, self.n_row)) + self.state_map_col = state_map_col.flatten('F')[non_obstacles] + self.state_map_row = state_map_row.flatten('F')[non_obstacles] + + def get_graph(self): + # Returns graph + G = self.G + W = self.W[self.W != 0] + return G, W + + def get_graph_inv(self): + # Returns transpose of graph + G = self.G.T + W = self.W.T + return G, W + + def val_2_image(self, val): + # Zeros for obstacles, val for free space + im = np.zeros((self.n_row, self.n_col)) + im[self.freespace[0], self.freespace[1]] = val + return im + + def get_value_prior(self): + # Returns value prior for gridworld + s_map_col, s_map_row = np.meshgrid( + np.arange(0, self.n_col), np.arange(0, self.n_row)) + im = np.sqrt( + np.square(s_map_col - self.targety) + + np.square(s_map_row - self.targetx)) + return im + + def get_reward_prior(self): + # Returns reward prior for gridworld + im = -1 * np.ones((self.n_row, self.n_col)) + im[self.targetx, self.targety] = 10 + return im + + def t_get_reward_prior(self): + # Returns reward prior as needed for + # dataset generation + im = np.zeros((self.n_row, self.n_col)) + im[self.targetx, self.targety] = 10 + return im + + def get_state_image(self, row, col): + # Zeros everywhere except [row,col] + im = np.zeros((self.n_row, self.n_col)) + im[row, col] = 1 + return im + + def map_ind_to_state(self, row, col): + # Takes [row, col] and maps to a state + rw = np.where(self.state_map_row == row) + cl = np.where(self.state_map_col == col) + return np.intersect1d(rw, cl)[0] + + def get_coords(self, states): + # Given a state or states, returns + # [row,col] pairs for the state(s) + non_obstacles = np.ravel_multi_index( + [self.freespace[0], self.freespace[1]], (self.n_row, self.n_col), + order='F') + non_obstacles = np.sort(non_obstacles) + states = states.astype(int) + r, c = np.unravel_index( + non_obstacles[states], (self.n_col, self.n_row), order='F') + return r, c + + def rand_choose(self, in_vec): + # Samples + if len(in_vec.shape) > 1: + if in_vec.shape[1] == 1: + in_vec = in_vec.T + temp = np.hstack((np.zeros((1)), np.cumsum(in_vec))).astype('int') + q = np.random.rand() + x = np.where(q > temp[0:-1]) + y = np.where(q < temp[1:]) + return np.intersect1d(x, y)[0] + + def next_state_prob(self, s, a): + # Gets next state probability for + # a given action (a) + if hasattr(a, "__iter__"): + p = np.squeeze(self.P[s, :, a]) + else: + p = np.squeeze(self.P[s, :, a]).T + return p + + def sample_next_state(self, s, a): + # Gets the next state given the + # current state (s) and an + # action (a) + vec = self.next_state_prob(s, a) + result = self.rand_choose(vec) + return result + + def get_size(self): + # Returns domain size + return self.n_row, self.n_col + + def north(self, row, col): + # Returns new [row,col] + # if we take the action + new_row = np.max([row - 1, 0]) + new_col = col + if self.image[new_row, new_col] == 0: + new_row = row + new_col = col + return new_row, new_col + + def northeast(self, row, col): + # Returns new [row,col] + # if we take the action + new_row = np.max([row - 1, 0]) + new_col = np.min([col + 1, self.n_col - 1]) + if self.image[new_row, new_col] == 0: + new_row = row + new_col = col + return new_row, new_col + + def northwest(self, row, col): + # Returns new [row,col] + # if we take the action + new_row = np.max([row - 1, 0]) + new_col = np.max([col - 1, 0]) + if self.image[new_row, new_col] == 0: + new_row = row + new_col = col + return new_row, new_col + + def south(self, row, col): + # Returns new [row,col] + # if we take the action + new_row = np.min([row + 1, self.n_row - 1]) + new_col = col + if self.image[new_row, new_col] == 0: + new_row = row + new_col = col + return new_row, new_col + + def southeast(self, row, col): + # Returns new [row,col] + # if we take the action + new_row = np.min([row + 1, self.n_row - 1]) + new_col = np.min([col + 1, self.n_col - 1]) + if self.image[new_row, new_col] == 0: + new_row = row + new_col = col + return new_row, new_col + + def southwest(self, row, col): + # Returns new [row,col] + # if we take the action + new_row = np.min([row + 1, self.n_row - 1]) + new_col = np.max([col - 1, 0]) + if self.image[new_row, new_col] == 0: + new_row = row + new_col = col + return new_row, new_col + + def east(self, row, col): + # Returns new [row,col] + # if we take the action + new_row = row + new_col = np.min([col + 1, self.n_col - 1]) + if self.image[new_row, new_col] == 0: + new_row = row + new_col = col + return new_row, new_col + + def west(self, row, col): + # Returns new [row,col] + # if we take the action + new_row = row + new_col = np.max([col - 1, 0]) + if self.image[new_row, new_col] == 0: + new_row = row + new_col = col + return new_row, new_col + + def neighbors(self, row, col): + # Get valid neighbors in all valid directions + rows, cols = self.north(row, col) + new_row, new_col = self.south(row, col) + rows, cols = np.append(rows, new_row), np.append(cols, new_col) + new_row, new_col = self.east(row, col) + rows, cols = np.append(rows, new_row), np.append(cols, new_col) + new_row, new_col = self.west(row, col) + rows, cols = np.append(rows, new_row), np.append(cols, new_col) + new_row, new_col = self.northeast(row, col) + rows, cols = np.append(rows, new_row), np.append(cols, new_col) + new_row, new_col = self.northwest(row, col) + rows, cols = np.append(rows, new_row), np.append(cols, new_col) + new_row, new_col = self.southeast(row, col) + rows, cols = np.append(rows, new_row), np.append(cols, new_col) + new_row, new_col = self.southwest(row, col) + rows, cols = np.append(rows, new_row), np.append(cols, new_col) + return rows, cols + + +def trace_path(pred, source, target): + # traces back shortest path from + # source to target given pred + # (a predicessor list) + max_len = 1000 + path = np.zeros((max_len, 1)) + i = max_len - 1 + path[i] = target + while path[i] != source and i > 0: + try: + path[i - 1] = pred[int(path[i])] + i -= 1 + except Exception as e: + return [] + if i >= 0: + path = path[i:] + else: + path = None + return path + + +def sample_trajectory(M, n_states): + # Samples trajectories from random nodes + # in our domain (M) + G, W = M.get_graph_inv() + N = G.shape[0] + if N >= n_states: + rand_ind = np.random.permutation(N) + else: + rand_ind = np.tile(np.random.permutation(N), (1, 10)) + init_states = rand_ind[0:n_states].flatten() + goal_s = M.map_ind_to_state(M.targetx, M.targety) + states = [] + states_xy = [] + states_one_hot = [] + # Get optimal path from graph + g_dense = W + g_masked = np.ma.masked_values(g_dense, 0) + g_sparse = csr_matrix(g_dense) + d, pred = dijkstra(g_sparse, indices=goal_s, return_predecessors=True) + for i in range(n_states): + path = trace_path(pred, goal_s, init_states[i]) + path = np.flip(path, 0) + states.append(path) + for state in states: + L = len(state) + r, c = M.get_coords(state) + row_m = np.zeros((L, M.n_row)) + col_m = np.zeros((L, M.n_col)) + for i in range(L): + row_m[i, r[i]] = 1 + col_m[i, c[i]] = 1 + states_one_hot.append(np.hstack((row_m, col_m))) + states_xy.append(np.hstack((r, c))) + return states_xy, states_one_hot \ No newline at end of file diff --git a/src/algorithms/learning/VIN/download_weights_and_datasets.sh b/src/algorithms/learning/VIN/download_weights_and_datasets.sh new file mode 100755 index 000000000..8d85b3455 --- /dev/null +++ b/src/algorithms/learning/VIN/download_weights_and_datasets.sh @@ -0,0 +1,9 @@ +cd trained +wget 'https://github.com/kentsommer/pytorch-value-iteration-networks/releases/download/v1.1/vin_8x8.pth' +wget 'https://github.com/kentsommer/pytorch-value-iteration-networks/releases/download/v1.1/vin_16x16.pth' +wget 'https://github.com/kentsommer/pytorch-value-iteration-networks/releases/download/v1.1/vin_28x28.pth' +cd ../dataset +#wget 'https://github.com/kentsommer/pytorch-value-iteration-networks/releases/download/v1.1/gridworld_8x8.npz' +# wget 'https://github.com/kentsommer/pytorch-value-iteration-networks/releases/download/v1.1/gridworld_16x16.npz' +# wget 'https://github.com/kentsommer/pytorch-value-iteration-networks/releases/download/v1.1/gridworld_28x28.npz' +cd .. diff --git a/src/algorithms/learning/VIN/general_test16.py b/src/algorithms/learning/VIN/general_test16.py new file mode 100644 index 000000000..999ff4a95 --- /dev/null +++ b/src/algorithms/learning/VIN/general_test16.py @@ -0,0 +1,340 @@ +import sys +import argparse +import json +import matplotlib.pyplot as plt +import random +import numpy as np +import torch +from torch.autograd import Variable + +from dataset.dataset import * +from utility.utils import * +from model import * + +from domains.gridworld import * +from generators.obstacle_gen import * + +import logging +import time +import math + +def main(config, + n_domains=3000, + max_obs=30, + max_obs_size=None, + n_traj=1, + n_actions=8,gen = False): + # Correct vs total: + logging.basicConfig(filename='./resources/logs/generalization/16_w_64_model',format='%(asctime)s-%(levelname)s:%(message)s', level=logging.INFO) + correct, total = 0.0, 0.0 + # Automatic swith of GPU mode if available + use_GPU = torch.cuda.is_available() + # Instantiate a VIN model + vin = VIN(config) + # Load model parameters + vin.load_state_dict(torch.load(config.weights)) + # Use GPU if available + if use_GPU: + vin = vin.cuda() + counter,total_no_soln = 0,0 + global data + data = [] + t_list = [] + total_dev_non_rel, total_dev_rel = 0.0,0.0 + total_dist, total_astar_dist = 0.0,0.0 + metrics = True #this enables displaying the distance left to reach goal upon a failure + dist_remain_avg = 0.0 + for dom in range(n_domains): + if gen: + print('Gen started') + goal = [ + np.random.randint(config.imsize), + np.random.randint(config.imsize) + ] + obs = obstacles([config.imsize, config.imsize], goal, max_obs_size) + # Add obstacles to map + n_obs = obs.add_n_rand_obs(max_obs) + # Add border to map + border_res = obs.add_border() + # Ensure we have valid map + if n_obs == 0 or not border_res: + continue + start = None + else: + wpn = True + # path = './resources/maps/' + path = './resources/testing_maps/16x16/' + mp, goal, start = open_map(dom,path) + # path = './maps/8_data_300' + # mp, goal, start = open_map_list(dom,path) + mp[start[1]][start[0]] = 0 #Set the start position as freespace too + mp[goal[1]][goal[0]] = 0 #Set the goal position as freespace too + + goal = [goal[1],goal[0]] #swap them around, for the row col format (x = col not row) + start = [start[1],start[0]] + obs = obstacles([config.imsize, config.imsize], goal, max_obs_size) + obs.dom = mp + + # Get final map + im = obs.get_final() + + + #1 is obstacles. + #set obs.dom as the mp + logging.debug('0 is obstacle ') + logging.debug(' im: %s ', im) + # Generate gridworld from obstacle map + G = gridworld(im, goal[0], goal[1]) + # Get value prior + value_prior = G.get_reward_prior() + # Sample random trajectories to our goal + states_xy, states_one_hot = sample_trajectory(G, n_traj,start,gen) #dijkstra trajectory + # print('states_xy', states_xy[0] , len(states_xy[0])) + if gen and len(states_xy[0]) > 0: + save_image(G.image,(goal[0],goal[1]),states_xy[0][0],states_xy, states_one_hot,counter) #this saves the maps + + counter += 1 + t0 = time.time() + for i in range(n_traj): + if len(states_xy[i]) > 1: + + # Get number of steps to goal + L = len(states_xy[i]) * 2 + # Allocate space for predicted steps + pred_traj = np.zeros((L, 2)) + # Set starting position + pred_traj[0, :] = states_xy[i][0, :] + + for j in range(1, L): + # Transform current state data + state_data = pred_traj[j - 1, :] + state_data = state_data.astype(np.int) + # Transform domain to Networks expected input shape + im_data = G.image.astype(np.int) + im_data = 1 - im_data + im_data = im_data.reshape(1, 1, config.imsize, + config.imsize) + # Transfrom value prior to Networks expected input shape + value_data = value_prior.astype(np.int) + value_data = value_data.reshape(1, 1, config.imsize, + config.imsize) + # Get inputs as expected by network + X_in = torch.from_numpy( + np.append(im_data, value_data, axis=1)).float() + S1_in = torch.from_numpy(state_data[0].reshape( + [1, 1])).float() + S2_in = torch.from_numpy(state_data[1].reshape( + [1, 1])).float() + # Send Tensors to GPU if available + if use_GPU: + X_in = X_in.cuda() + S1_in = S1_in.cuda() + S2_in = S2_in.cuda() + # Wrap to autograd.Variable + X_in, S1_in, S2_in = Variable(X_in), Variable( + S1_in), Variable(S2_in) + # Forward pass in our neural net + _, predictions = vin(X_in, S1_in, S2_in, config) + _, indices = torch.max(predictions.cpu(), 1, keepdim=True) + a = indices.data.numpy()[0][0] + # Transform prediction to indices + s = G.map_ind_to_state(pred_traj[j - 1, 0], + pred_traj[j - 1, 1]) + ns = G.sample_next_state(s, a) + nr, nc = G.get_coords(ns) + pred_traj[j, 0] = nr + pred_traj[j, 1] = nc + if nr == goal[0] and nc == goal[1]: + # We hit goal so fill remaining steps + pred_traj[j + 1:, 0] = nr + pred_traj[j + 1:, 1] = nc + break + # Plot optimal and predicted path (also start, end) + if pred_traj[-1, 0] == goal[0] and pred_traj[-1, 1] == goal[1]: + logging.debug('#################### - Path Found map %s!\n', dom) + correct += 1 + t1 = time.time() + t_list.append(t1-t0) + dev_rel,dev_non_rel,dist,astar_dist = deviation(states_xy[i],pred_traj,goal,total) + total_dev_rel += dev_rel + total_dev_non_rel += dev_non_rel + total_dist += dist + total_astar_dist += astar_dist + if config.plot == True: + visualize(G.image.T, states_xy[i], pred_traj) + elif metrics: + d = dist_left(pred_traj,goal) + dist_remain_avg += d + if config.plot == True: + visualize(G.image.T, states_xy[i], pred_traj) + total += 1 + + + + elif wpn: + total_no_soln += 1 + sys.stdout.write("\r" + str(int( + (float(dom) / n_domains) * 100.0)) + "%") + sys.stdout.flush() + + sys.stdout.write("\n") + if total and correct: + logging.info('Rollout Accuracy: %s',(100 * (correct / total))) + logging.info('Rollout Accuracy Adjusted: %s',(100 * (correct / (total+total_no_soln)))) + logging.info('Total maps with no soln from Dijkstra %s', total_no_soln) + logging.info('Total avg Rel Deviation %s', (total_dev_rel/total)) + logging.info('Total avg Non-Rel Deviation %s', (total_dev_non_rel/total)) + logging.info('Total avg VIN Distance %s', (total_dist/total)) + logging.info('Total avg Dijkstra Distance %s', (total_astar_dist/total)) + logging.info('Avg deviation from Dijkstra: %s', ((((total_astar_dist/total))-((total_dist/total)))/((total_astar_dist/total)))) + logging.info('Total elapsed time %s', (sum(t_list)/(total))) #TODO: Possibly add total no soln + logging.info('Avg distance left when failed: %s ', (dist_remain_avg/(total-correct)) ) + logging.info('---------------------------------Done ------------------------------------') + + else: + logging.info('No successes either vin or dijkstra') + + +def visualize(dom, states_xy, pred_traj): + fig, ax = plt.subplots() + implot = plt.imshow(dom, cmap="Greys_r") + ax.plot(states_xy[:, 0], states_xy[:, 1], c='b', label='Optimal Path') + ax.plot( + pred_traj[:, 0], pred_traj[:, 1], '-X', c='r', label='Predicted Path') + ax.plot(states_xy[0, 0], states_xy[0, 1], '-o', label='Start') + ax.plot(states_xy[-1, 0], states_xy[-1, 1], '-s', label='Goal') + legend = ax.legend(loc='upper right', shadow=False) + for label in legend.get_texts(): + label.set_fontsize('x-small') # the legend text size + for label in legend.get_lines(): + label.set_linewidth(0.5) # the legend line width + plt.draw() + plt.waitforbuttonpress(0) + plt.close(fig) + + +def save_image(im, goal, start,states_xy,states_one_hot,counter): + ''' + Saves the data made by generator as jsons. + ''' + s = config.imsize + + if len(states_xy[0]) == 0: + + im.tolist()[start_x][start_y] = 1 + start_xy = [0,0] + mp = { + 'grid': im.tolist(), + 'goal': [goal[0],goal[1]], + # 'start': int(start), + 'agent': start_xy} + # 'states_xy': states_xy[0].tolist(), + # 'states_one_hot': states_one_hot[0].tolist() + else: + mp = { + 'grid': im.tolist(), + 'goal': [goal[0],goal[1]], + # 'start': int(start), + 'agent': states_xy[0][0].tolist() + # 'states_xy': states_xy[0].tolist(), + # 'states_one_hot': states_one_hot[0].tolist() + } + data.append(mp) + with open('./maps/' +str(s) + '_data_300' + '.json', 'w') as outfile: + json.dump(data,outfile) + +def open_map(dom,path): + ''' + Used to open a map json given dom and path, returns grid, goal and agent + ''' + with open(str(path) + str(dom) +'.json') as json_file: + data = json.load(json_file) + logging.info('Opening file: ' + str(path) + str(dom) + '.json' ) + return data['grid'], data['goal'], data['agent'] + +def open_map_list(dom,path): + with open(str(path) + '.json') as json_file: + data = json.load(json_file) + logging.info('Opening file: ' + str(path) + str(dom) + '.json' ) + return data[dom]['grid'], data[dom]['goal'], data[dom]['agent'] + +def deviation(optimal_path, pred_path,goal, map_num): + optimal_path = np.array(optimal_path) + optimal_path = 1.0 * optimal_path + + optimal_path_x = np.array(optimal_path[:,0]) + optimal_path_y = np.array(optimal_path[:,1]) + + pred_path = np.unique(pred_path, axis=0) #removes duplicates at the end (when it reaches goal) + + #print('Shortened path' , pred_path) + pred_path_x = np.array(pred_path[:,0]) + pred_path_y = np.array(pred_path[:,1]) + dist = 0.0 + astar_dist = 0.0 + prev = pred_path[0,:] + total_diff_gen = 0 + for xy in pred_path[:,:]: + + diff = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2)) + total_diff_gen += diff + dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5 + prev = xy + + #prev = [0,0] + #print('opt', optimal_path[0,:]) + prev = optimal_path[0,:] + total_diff_optim = 0 + for xy in optimal_path[:,:]: + # print('xy', xy) + diff2 = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2)) + total_diff_optim += diff2 + astar_dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5 + prev = xy + + dev_non_rel = abs(total_diff_optim-total_diff_gen) + dev_rel = dev_non_rel/total_diff_optim #TODO: Add avg distance of gen trajectory + return(dev_rel,dev_non_rel,dist,astar_dist) + +def dist_left(pred_traj, goal): + ''' + Finds the distance left between the point and the goal + ''' + pred_traj = np.array(pred_traj) #euclidean distance or geometric distance ? use geometric + x1,y1 = pred_traj[-1][0], pred_traj[-1][1] + x2,y2 = goal[0],goal[1] + dist = (((x2-x1)**2 + (y2-y1)**2))**0.5 + return dist + + +if __name__ == '__main__': + # Parsing training parameters + parser = argparse.ArgumentParser() + parser.add_argument( + '--weights', + type=str, + default='trained/30k_no_block_dataset_vin_64x64.pth', + help='Path to trained weights') + parser.add_argument('--plot', action='store_true', default=False) + parser.add_argument('--gen', action='store_true', default=False) + parser.add_argument('--imsize', type=int, default=16, help='Size of image') + parser.add_argument( + '--k', type=int, default=20, help='Number of Value Iterations') + parser.add_argument( + '--l_i', type=int, default=2, help='Number of channels in input layer') + parser.add_argument( + '--l_h', + type=int, + default=150, + help='Number of channels in first hidden layer') + parser.add_argument( + '--l_q', + type=int, + default=10, + help='Number of channels in q layer (~actions) in VI-module') + config = parser.parse_args() + # Compute Paths generated by network and plot + + for i in range(1): + main(config) + # main(config) diff --git a/src/algorithms/learning/VIN/general_test28.py b/src/algorithms/learning/VIN/general_test28.py new file mode 100644 index 000000000..6ab3dc04a --- /dev/null +++ b/src/algorithms/learning/VIN/general_test28.py @@ -0,0 +1,340 @@ +import sys +import argparse +import json +import matplotlib.pyplot as plt +import random +import numpy as np +import torch +from torch.autograd import Variable + +from dataset.dataset import * +from utility.utils import * +from model import * + +from domains.gridworld import * +from generators.obstacle_gen import * + +import logging +import time +import math + +def main(config, + n_domains=3000, + max_obs=30, + max_obs_size=None, + n_traj=1, + n_actions=8,gen = False): + # Correct vs total: + logging.basicConfig(filename='./resources/logs/generalization/28_w_64_model',format='%(asctime)s-%(levelname)s:%(message)s', level=logging.INFO) + correct, total = 0.0, 0.0 + # Automatic swith of GPU mode if available + use_GPU = torch.cuda.is_available() + # Instantiate a VIN model + vin = VIN(config) + # Load model parameters + vin.load_state_dict(torch.load(config.weights)) + # Use GPU if available + if use_GPU: + vin = vin.cuda() + counter,total_no_soln = 0,0 + global data + data = [] + t_list = [] + total_dev_non_rel, total_dev_rel = 0.0,0.0 + total_dist, total_astar_dist = 0.0,0.0 + metrics = True #this enables displaying the distance left to reach goal upon a failure + dist_remain_avg = 0.0 + for dom in range(n_domains): + if gen: + goal = [ + np.random.randint(config.imsize), + np.random.randint(config.imsize) + ] + obs = obstacles([config.imsize, config.imsize], goal, max_obs_size) + # Add obstacles to map + n_obs = obs.add_n_rand_obs(max_obs) + # Add border to map + border_res = obs.add_border() + # Ensure we have valid map + if n_obs == 0 or not border_res: + continue + start = None + else: + wpn = True + # path = './resources/maps/' + path = './resources/testing_maps/28x28/' + mp, goal, start = open_map(dom,path) + # path = './maps/8_data_300' + # mp, goal, start = open_map_list(dom,path) + mp[start[1]][start[0]] = 0 #Set the start position as freespace too + mp[goal[1]][goal[0]] = 0 #Set the goal position as freespace too + + goal = [goal[1],goal[0]] #swap them around, for the row col format (x = col not row) + start = [start[1],start[0]] + obs = obstacles([config.imsize, config.imsize], goal, max_obs_size) + obs.dom = mp + + # Get final map + im = obs.get_final() + + + #1 is obstacles. + #set obs.dom as the mp + logging.debug('0 is obstacle ') + logging.debug(' im: %s ', im) + # Generate gridworld from obstacle map + G = gridworld(im, goal[0], goal[1]) + # Get value prior + value_prior = G.get_reward_prior() + # Sample random trajectories to our goal + states_xy, states_one_hot = sample_trajectory(G, n_traj,start,gen) #dijkstra trajectory + # print('states_xy', states_xy[0] , len(states_xy[0])) + if gen and len(states_xy[0]) > 0: + save_image(G.image,(goal[0],goal[1]),states_xy[0][0],states_xy, states_one_hot,counter) #this saves the maps + + counter += 1 + t0 = time.time() + for i in range(n_traj): + if len(states_xy[i]) > 1: + + # Get number of steps to goal + L = len(states_xy[i]) * 2 + # Allocate space for predicted steps + pred_traj = np.zeros((L, 2)) + # Set starting position + pred_traj[0, :] = states_xy[i][0, :] + + for j in range(1, L): + # Transform current state data + state_data = pred_traj[j - 1, :] + state_data = state_data.astype(np.int) + # Transform domain to Networks expected input shape + im_data = G.image.astype(np.int) + im_data = 1 - im_data + im_data = im_data.reshape(1, 1, config.imsize, + config.imsize) + # Transfrom value prior to Networks expected input shape + value_data = value_prior.astype(np.int) + value_data = value_data.reshape(1, 1, config.imsize, + config.imsize) + # Get inputs as expected by network + X_in = torch.from_numpy( + np.append(im_data, value_data, axis=1)).float() + S1_in = torch.from_numpy(state_data[0].reshape( + [1, 1])).float() + S2_in = torch.from_numpy(state_data[1].reshape( + [1, 1])).float() + # Send Tensors to GPU if available + if use_GPU: + X_in = X_in.cuda() + S1_in = S1_in.cuda() + S2_in = S2_in.cuda() + # Wrap to autograd.Variable + X_in, S1_in, S2_in = Variable(X_in), Variable( + S1_in), Variable(S2_in) + # Forward pass in our neural net + _, predictions = vin(X_in, S1_in, S2_in, config) + _, indices = torch.max(predictions.cpu(), 1, keepdim=True) + a = indices.data.numpy()[0][0] + # Transform prediction to indices + s = G.map_ind_to_state(pred_traj[j - 1, 0], + pred_traj[j - 1, 1]) + ns = G.sample_next_state(s, a) + nr, nc = G.get_coords(ns) + pred_traj[j, 0] = nr + pred_traj[j, 1] = nc + if nr == goal[0] and nc == goal[1]: + # We hit goal so fill remaining steps + pred_traj[j + 1:, 0] = nr + pred_traj[j + 1:, 1] = nc + break + # Plot optimal and predicted path (also start, end) + if pred_traj[-1, 0] == goal[0] and pred_traj[-1, 1] == goal[1]: + logging.debug('#################### - Path Found map %s!\n', dom) + correct += 1 + t1 = time.time() + t_list.append(t1-t0) + dev_rel,dev_non_rel,dist,astar_dist = deviation(states_xy[i],pred_traj,goal,total) + total_dev_rel += dev_rel + total_dev_non_rel += dev_non_rel + total_dist += dist + total_astar_dist += astar_dist + if config.plot == True: + visualize(G.image.T, states_xy[i], pred_traj) + elif metrics: + d = dist_left(pred_traj,goal) + dist_remain_avg += d + if config.plot == True: + visualize(G.image.T, states_xy[i], pred_traj) + total += 1 + + + + elif wpn: + total_no_soln += 1 + sys.stdout.write("\r" + str(int( + (float(dom) / n_domains) * 100.0)) + "%") + sys.stdout.flush() + + sys.stdout.write("\n") + if total and correct: + logging.info('Rollout Accuracy Dijkstra: %s',(100*((total-total_no_soln)/total))) + logging.info('Rollout Accuracy: %s',(100 * (correct / total))) + logging.info('Rollout Accuracy Adjusted: %s',(100 * (correct / (total+total_no_soln)))) + logging.info('Total maps with no soln from Dijkstra %s', total_no_soln) + logging.info('Total avg Rel Deviation %s', (total_dev_rel/total)) + logging.info('Total avg Non-Rel Deviation %s', (total_dev_non_rel/total)) + logging.info('Total avg VIN Distance %s', (total_dist/total)) + logging.info('Total avg Dijkstra Distance %s', (total_astar_dist/total)) + logging.info('Avg deviation from Dijkstra: %s', ((((total_astar_dist/total))-((total_dist/total)))/((total_astar_dist/total)))) + logging.info('Total elapsed time %s', (sum(t_list)/(total))) #TODO: Possibly add total no soln + logging.info('Avg distance left when failed: %s ', (dist_remain_avg/(total-correct)) ) + logging.info('---------------------------------Done ------------------------------------') + + else: + logging.info('No successes either vin or dijkstra') + + +def visualize(dom, states_xy, pred_traj): + fig, ax = plt.subplots() + implot = plt.imshow(dom, cmap="Greys_r") + ax.plot(states_xy[:, 0], states_xy[:, 1], c='b', label='Optimal Path') + ax.plot( + pred_traj[:, 0], pred_traj[:, 1], '-X', c='r', label='Predicted Path') + ax.plot(states_xy[0, 0], states_xy[0, 1], '-o', label='Start') + ax.plot(states_xy[-1, 0], states_xy[-1, 1], '-s', label='Goal') + legend = ax.legend(loc='upper right', shadow=False) + for label in legend.get_texts(): + label.set_fontsize('x-small') # the legend text size + for label in legend.get_lines(): + label.set_linewidth(0.5) # the legend line width + plt.draw() + plt.waitforbuttonpress(0) + plt.close(fig) + + +def save_image(im, goal, start,states_xy,states_one_hot,counter): + ''' + Saves the data made by generator as jsons. + ''' + s = config.imsize + + if len(states_xy[0]) == 0: + + im.tolist()[start_x][start_y] = 1 + start_xy = [0,0] + mp = { + 'grid': im.tolist(), + 'goal': [goal[0],goal[1]], + # 'start': int(start), + 'agent': start_xy} + # 'states_xy': states_xy[0].tolist(), + # 'states_one_hot': states_one_hot[0].tolist() + else: + mp = { + 'grid': im.tolist(), + 'goal': [goal[0],goal[1]], + # 'start': int(start), + 'agent': states_xy[0][0].tolist() + # 'states_xy': states_xy[0].tolist(), + # 'states_one_hot': states_one_hot[0].tolist() + } + data.append(mp) + with open('./maps/' +str(s) + '_data_300' + '.json', 'w') as outfile: + json.dump(data,outfile) + +def open_map(dom,path): + ''' + Used to open a map json given dom and path, returns grid, goal and agent + ''' + with open(str(path) + str(dom) +'.json') as json_file: + data = json.load(json_file) + logging.info('Opening file: ' + str(path) + str(dom) + '.json' ) + return data['grid'], data['goal'], data['agent'] + +def open_map_list(dom,path): + with open(str(path) + '.json') as json_file: + data = json.load(json_file) + logging.info('Opening file: ' + str(path) + str(dom) + '.json' ) + return data[dom]['grid'], data[dom]['goal'], data[dom]['agent'] + +def deviation(optimal_path, pred_path,goal, map_num): + optimal_path = np.array(optimal_path) + optimal_path = 1.0 * optimal_path + + optimal_path_x = np.array(optimal_path[:,0]) + optimal_path_y = np.array(optimal_path[:,1]) + + pred_path = np.unique(pred_path, axis=0) #removes duplicates at the end (when it reaches goal) + + #print('Shortened path' , pred_path) + pred_path_x = np.array(pred_path[:,0]) + pred_path_y = np.array(pred_path[:,1]) + dist = 0.0 + astar_dist = 0.0 + prev = pred_path[0,:] + total_diff_gen = 0 + for xy in pred_path[:,:]: + + diff = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2)) + total_diff_gen += diff + dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5 + prev = xy + + #prev = [0,0] + #print('opt', optimal_path[0,:]) + prev = optimal_path[0,:] + total_diff_optim = 0 + for xy in optimal_path[:,:]: + # print('xy', xy) + diff2 = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2)) + total_diff_optim += diff2 + astar_dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5 + prev = xy + + dev_non_rel = abs(total_diff_optim-total_diff_gen) + dev_rel = dev_non_rel/total_diff_optim #TODO: Add avg distance of gen trajectory + return(dev_rel,dev_non_rel,dist,astar_dist) + +def dist_left(pred_traj, goal): + ''' + Finds the distance left between the point and the goal + ''' + pred_traj = np.array(pred_traj) #euclidean distance or geometric distance ? use geometric + x1,y1 = pred_traj[-1][0], pred_traj[-1][1] + x2,y2 = goal[0],goal[1] + dist = (((x2-x1)**2 + (y2-y1)**2))**0.5 + return dist + + +if __name__ == '__main__': + # Parsing training parameters + parser = argparse.ArgumentParser() + parser.add_argument( + '--weights', + type=str, + default='trained/30k_no_block_dataset_vin_64x64.pth', + help='Path to trained weights') + parser.add_argument('--plot', action='store_true', default=False) + parser.add_argument('--gen', action='store_true', default=False) + parser.add_argument('--imsize', type=int, default=28, help='Size of image') + parser.add_argument( + '--k', type=int, default=36, help='Number of Value Iterations') + parser.add_argument( + '--l_i', type=int, default=2, help='Number of channels in input layer') + parser.add_argument( + '--l_h', + type=int, + default=150, + help='Number of channels in first hidden layer') + parser.add_argument( + '--l_q', + type=int, + default=10, + help='Number of channels in q layer (~actions) in VI-module') + config = parser.parse_args() + # Compute Paths generated by network and plot + + for i in range(1): + main(config) + # main(config) diff --git a/src/algorithms/learning/VIN/general_test8.py b/src/algorithms/learning/VIN/general_test8.py new file mode 100644 index 000000000..968191203 --- /dev/null +++ b/src/algorithms/learning/VIN/general_test8.py @@ -0,0 +1,339 @@ +import sys +import argparse +import json +import matplotlib.pyplot as plt +import random +import numpy as np +import torch +from torch.autograd import Variable + +from dataset.dataset import * +from utility.utils import * +from model import * + +from domains.gridworld import * +from generators.obstacle_gen import * + +import logging +import time +import math + +def main(config, + n_domains=3000, + max_obs=30, + max_obs_size=None, + n_traj=1, + n_actions=8,gen = False): + # Correct vs total: + logging.basicConfig(filename='./resources/logs/generalization/8_w_64_model',format='%(asctime)s-%(levelname)s:%(message)s', level=logging.INFO) + correct, total = 0.0, 0.0 + # Automatic swith of GPU mode if available + use_GPU = torch.cuda.is_available() + # Instantiate a VIN model + vin = VIN(config) + # Load model parameters + vin.load_state_dict(torch.load(config.weights)) + # Use GPU if available + if use_GPU: + vin = vin.cuda() + counter,total_no_soln = 0,0 + global data + data = [] + t_list = [] + total_dev_non_rel, total_dev_rel = 0.0,0.0 + total_dist, total_astar_dist = 0.0,0.0 + metrics = True #this enables displaying the distance left to reach goal upon a failure + dist_remain_avg = 0.0 + for dom in range(n_domains): + if gen: + goal = [ + np.random.randint(config.imsize), + np.random.randint(config.imsize) + ] + obs = obstacles([config.imsize, config.imsize], goal, max_obs_size) + # Add obstacles to map + n_obs = obs.add_n_rand_obs(max_obs) + # Add border to map + border_res = obs.add_border() + # Ensure we have valid map + if n_obs == 0 or not border_res: + continue + start = None + else: + wpn = True + # path = './resources/maps/' + path = './resources/testing_maps/8x8/' + mp, goal, start = open_map(dom,path) + # path = './maps/8_data_300' + # mp, goal, start = open_map_list(dom,path) + mp[start[1]][start[0]] = 0 #Set the start position as freespace too + mp[goal[1]][goal[0]] = 0 #Set the goal position as freespace too + + goal = [goal[1],goal[0]] #swap them around, for the row col format (x = col not row) + start = [start[1],start[0]] + obs = obstacles([config.imsize, config.imsize], goal, max_obs_size) + obs.dom = mp + + # Get final map + im = obs.get_final() + + + #1 is obstacles. + #set obs.dom as the mp + logging.debug('0 is obstacle ') + logging.debug(' im: %s ', im) + # Generate gridworld from obstacle map + G = gridworld(im, goal[0], goal[1]) + # Get value prior + value_prior = G.get_reward_prior() + # Sample random trajectories to our goal + states_xy, states_one_hot = sample_trajectory(G, n_traj,start,gen) #dijkstra trajectory + # print('states_xy', states_xy[0] , len(states_xy[0])) + if gen and len(states_xy[0]) > 0: + save_image(G.image,(goal[0],goal[1]),states_xy[0][0],states_xy, states_one_hot,counter) #this saves the maps + + counter += 1 + t0 = time.time() + for i in range(n_traj): + if len(states_xy[i]) > 1: + + # Get number of steps to goal + L = len(states_xy[i]) * 2 + # Allocate space for predicted steps + pred_traj = np.zeros((L, 2)) + # Set starting position + pred_traj[0, :] = states_xy[i][0, :] + + for j in range(1, L): + # Transform current state data + state_data = pred_traj[j - 1, :] + state_data = state_data.astype(np.int) + # Transform domain to Networks expected input shape + im_data = G.image.astype(np.int) + im_data = 1 - im_data + im_data = im_data.reshape(1, 1, config.imsize, + config.imsize) + # Transfrom value prior to Networks expected input shape + value_data = value_prior.astype(np.int) + value_data = value_data.reshape(1, 1, config.imsize, + config.imsize) + # Get inputs as expected by network + X_in = torch.from_numpy( + np.append(im_data, value_data, axis=1)).float() + S1_in = torch.from_numpy(state_data[0].reshape( + [1, 1])).float() + S2_in = torch.from_numpy(state_data[1].reshape( + [1, 1])).float() + # Send Tensors to GPU if available + if use_GPU: + X_in = X_in.cuda() + S1_in = S1_in.cuda() + S2_in = S2_in.cuda() + # Wrap to autograd.Variable + X_in, S1_in, S2_in = Variable(X_in), Variable( + S1_in), Variable(S2_in) + # Forward pass in our neural net + _, predictions = vin(X_in, S1_in, S2_in, config) + _, indices = torch.max(predictions.cpu(), 1, keepdim=True) + a = indices.data.numpy()[0][0] + # Transform prediction to indices + s = G.map_ind_to_state(pred_traj[j - 1, 0], + pred_traj[j - 1, 1]) + ns = G.sample_next_state(s, a) + nr, nc = G.get_coords(ns) + pred_traj[j, 0] = nr + pred_traj[j, 1] = nc + if nr == goal[0] and nc == goal[1]: + # We hit goal so fill remaining steps + pred_traj[j + 1:, 0] = nr + pred_traj[j + 1:, 1] = nc + break + # Plot optimal and predicted path (also start, end) + if pred_traj[-1, 0] == goal[0] and pred_traj[-1, 1] == goal[1]: + logging.debug('#################### - Path Found map %s!\n', dom) + correct += 1 + t1 = time.time() + t_list.append(t1-t0) + dev_rel,dev_non_rel,dist,astar_dist = deviation(states_xy[i],pred_traj,goal,total) + total_dev_rel += dev_rel + total_dev_non_rel += dev_non_rel + total_dist += dist + total_astar_dist += astar_dist + if config.plot == True: + visualize(G.image.T, states_xy[i], pred_traj) + elif metrics: + d = dist_left(pred_traj,goal) + dist_remain_avg += d + if config.plot == True: + visualize(G.image.T, states_xy[i], pred_traj) + total += 1 + + + + elif wpn: + total_no_soln += 1 + sys.stdout.write("\r" + str(int( + (float(dom) / n_domains) * 100.0)) + "%") + sys.stdout.flush() + + sys.stdout.write("\n") + if total and correct: + logging.info('Rollout Accuracy: %s',(100 * (correct / total))) + logging.info('Rollout Accuracy Adjusted: %s',(100 * (correct / (total+total_no_soln)))) + logging.info('Total maps with no soln from Dijkstra %s', total_no_soln) + logging.info('Total avg Rel Deviation %s', (total_dev_rel/total)) + logging.info('Total avg Non-Rel Deviation %s', (total_dev_non_rel/total)) + logging.info('Total avg VIN Distance %s', (total_dist/total)) + logging.info('Total avg Dijkstra Distance %s', (total_astar_dist/total)) + logging.info('Avg deviation from Dijkstra: %s', ((((total_astar_dist/total))-((total_dist/total)))/((total_astar_dist/total)))) + logging.info('Total elapsed time %s', (sum(t_list)/(total))) #TODO: Possibly add total no soln + logging.info('Avg distance left when failed: %s ', (dist_remain_avg/(total-correct)) ) + logging.info('---------------------------------Done ------------------------------------') + + else: + logging.info('No successes either vin or dijkstra') + + +def visualize(dom, states_xy, pred_traj): + fig, ax = plt.subplots() + implot = plt.imshow(dom, cmap="Greys_r") + ax.plot(states_xy[:, 0], states_xy[:, 1], c='b', label='Optimal Path') + ax.plot( + pred_traj[:, 0], pred_traj[:, 1], '-X', c='r', label='Predicted Path') + ax.plot(states_xy[0, 0], states_xy[0, 1], '-o', label='Start') + ax.plot(states_xy[-1, 0], states_xy[-1, 1], '-s', label='Goal') + legend = ax.legend(loc='upper right', shadow=False) + for label in legend.get_texts(): + label.set_fontsize('x-small') # the legend text size + for label in legend.get_lines(): + label.set_linewidth(0.5) # the legend line width + plt.draw() + plt.waitforbuttonpress(0) + plt.close(fig) + + +def save_image(im, goal, start,states_xy,states_one_hot,counter): + ''' + Saves the data made by generator as jsons. + ''' + s = config.imsize + + if len(states_xy[0]) == 0: + + im.tolist()[start_x][start_y] = 1 + start_xy = [0,0] + mp = { + 'grid': im.tolist(), + 'goal': [goal[0],goal[1]], + # 'start': int(start), + 'agent': start_xy} + # 'states_xy': states_xy[0].tolist(), + # 'states_one_hot': states_one_hot[0].tolist() + else: + mp = { + 'grid': im.tolist(), + 'goal': [goal[0],goal[1]], + # 'start': int(start), + 'agent': states_xy[0][0].tolist() + # 'states_xy': states_xy[0].tolist(), + # 'states_one_hot': states_one_hot[0].tolist() + } + data.append(mp) + with open('./maps/' +str(s) + '_data_300' + '.json', 'w') as outfile: + json.dump(data,outfile) + +def open_map(dom,path): + ''' + Used to open a map json given dom and path, returns grid, goal and agent + ''' + with open(str(path) + str(dom) +'.json') as json_file: + data = json.load(json_file) + logging.info('Opening file: ' + str(path) + str(dom) + '.json' ) + return data['grid'], data['goal'], data['agent'] + +def open_map_list(dom,path): + with open(str(path) + '.json') as json_file: + data = json.load(json_file) + logging.info('Opening file: ' + str(path) + str(dom) + '.json' ) + return data[dom]['grid'], data[dom]['goal'], data[dom]['agent'] + +def deviation(optimal_path, pred_path,goal, map_num): + optimal_path = np.array(optimal_path) + optimal_path = 1.0 * optimal_path + + optimal_path_x = np.array(optimal_path[:,0]) + optimal_path_y = np.array(optimal_path[:,1]) + + pred_path = np.unique(pred_path, axis=0) #removes duplicates at the end (when it reaches goal) + + #print('Shortened path' , pred_path) + pred_path_x = np.array(pred_path[:,0]) + pred_path_y = np.array(pred_path[:,1]) + dist = 0.0 + astar_dist = 0.0 + prev = pred_path[0,:] + total_diff_gen = 0 + for xy in pred_path[:,:]: + + diff = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2)) + total_diff_gen += diff + dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5 + prev = xy + + #prev = [0,0] + #print('opt', optimal_path[0,:]) + prev = optimal_path[0,:] + total_diff_optim = 0 + for xy in optimal_path[:,:]: + # print('xy', xy) + diff2 = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2)) + total_diff_optim += diff2 + astar_dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5 + prev = xy + + dev_non_rel = abs(total_diff_optim-total_diff_gen) + dev_rel = dev_non_rel/total_diff_optim #TODO: Add avg distance of gen trajectory + return(dev_rel,dev_non_rel,dist,astar_dist) + +def dist_left(pred_traj, goal): + ''' + Finds the distance left between the point and the goal + ''' + pred_traj = np.array(pred_traj) #euclidean distance or geometric distance ? use geometric + x1,y1 = pred_traj[-1][0], pred_traj[-1][1] + x2,y2 = goal[0],goal[1] + dist = (((x2-x1)**2 + (y2-y1)**2))**0.5 + return dist + + +if __name__ == '__main__': + # Parsing training parameters + parser = argparse.ArgumentParser() + parser.add_argument( + '--weights', + type=str, + default='trained/30k_no_block_dataset_vin_64x64.pth', + help='Path to trained weights') + parser.add_argument('--plot', action='store_true', default=False) + parser.add_argument('--gen', action='store_true', default=False) + parser.add_argument('--imsize', type=int, default=8, help='Size of image') + parser.add_argument( + '--k', type=int, default=10, help='Number of Value Iterations') + parser.add_argument( + '--l_i', type=int, default=2, help='Number of channels in input layer') + parser.add_argument( + '--l_h', + type=int, + default=150, + help='Number of channels in first hidden layer') + parser.add_argument( + '--l_q', + type=int, + default=10, + help='Number of channels in q layer (~actions) in VI-module') + config = parser.parse_args() + # Compute Paths generated by network and plot + + for i in range(1): + main(config) + # main(config) diff --git a/src/algorithms/learning/VIN/generators/__init__.py b/src/algorithms/learning/VIN/generators/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/algorithms/learning/VIN/generators/obstacle_gen.py b/src/algorithms/learning/VIN/generators/obstacle_gen.py new file mode 100644 index 000000000..e8d0b4010 --- /dev/null +++ b/src/algorithms/learning/VIN/generators/obstacle_gen.py @@ -0,0 +1,93 @@ +import numpy as np +import matplotlib.pyplot as plt + + +class obstacles: + """A class for generating obstacles in a domain""" + + def __init__(self, + domsize=None, + mask=None, + size_max=None, + dom=None, #Possibly pass it a domain?? + obs_types=None, + num_types=None): + self.domsize = domsize or [] + self.mask = mask or [] + self.dom = dom or np.zeros(self.domsize) + self.obs_types = obs_types or ["circ", "rect"] + self.num_types = num_types or len(self.obs_types) + self.size_max = size_max or np.max(self.domsize) / 4 + + def check_mask(self, dom=None): + # Ensure goal is in free space + if dom is not None: + return np.any(dom[self.mask[0], self.mask[1]]) + else: + return np.any(self.dom[self.mask[0], self.mask[1]]) + + def insert_rect(self, x, y, height, width): + # Insert a rectangular obstacle into map + im_try = np.copy(self.dom) + im_try[x:x + height, y:y + width] = 1 + return im_try + + def add_rand_obs(self, obj_type): + # Add random (valid) obstacle to map + if obj_type == "circ": + print("circ is not yet implemented... sorry") + elif obj_type == "rect": + rand_height = int(np.ceil(np.random.rand() * self.size_max)) + rand_width = int(np.ceil(np.random.rand() * self.size_max)) + randx = int(np.ceil(np.random.rand() * (self.domsize[1] - 1))) + randy = int(np.ceil(np.random.rand() * (self.domsize[1] - 1))) + im_try = self.insert_rect(randx, randy, rand_height, rand_width) + if self.check_mask(im_try): + return False + else: + self.dom = im_try + return True + + def add_n_rand_obs(self, n): + # Add random (valid) obstacles to map + count = 0 + for i in range(n): + obj_type = "rect" + if self.add_rand_obs(obj_type): + count += 1 + return count + + def add_border(self): + # Make full outer border an obstacle + im_try = np.copy(self.dom) + im_try[0:self.domsize[0], 0] = 1 + im_try[0, 0:self.domsize[1]] = 1 + im_try[0:self.domsize[0], self.domsize[1] - 1] = 1 + im_try[self.domsize[0] - 1, 0:self.domsize[1]] = 1 + if self.check_mask(im_try): + return False + else: + self.dom = im_try + return True + + def get_final(self): + # Process obstacle map for domain + im = np.copy(self.dom) + im = np.max(im) - im + im = im / np.max(im) + return im + + def show(self): + # Utility function to view obstacle map + plt.imshow(self.get_final(), cmap='Greys') + plt.show() + + def _print(self): + # Utility function to view obstacle map + # information + print("domsize: ", self.domsize) + print("mask: ", self.mask) + print("dom: ", self.dom) + print("obs_types: ", self.obs_types) + print("num_types: ", self.num_types) + print("size_max: ", self.size_max) diff --git a/src/algorithms/learning/VIN/model.py b/src/algorithms/learning/VIN/model.py new file mode 100644 index 000000000..2d4865795 --- /dev/null +++ b/src/algorithms/learning/VIN/model.py @@ -0,0 +1,68 @@ +import numpy as np + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +from torch.nn.parameter import Parameter + + +class VIN(nn.Module): + def __init__(self, config): + super(VIN, self).__init__() + self.config = config + self.h = nn.Conv2d( + in_channels=config.l_i, + out_channels=config.l_h, + kernel_size=(3, 3), + stride=1, + padding=1, + bias=True) + self.r = nn.Conv2d( + in_channels=config.l_h, + out_channels=1, + kernel_size=(1, 1), + stride=1, + padding=0, + bias=False) + self.q = nn.Conv2d( + in_channels=1, + out_channels=config.l_q, + kernel_size=(3, 3), + stride=1, + padding=1, + bias=False) + self.fc = nn.Linear(in_features=config.l_q, out_features=8, bias=False) + self.w = Parameter( + torch.zeros(config.l_q, 1, 3, 3), requires_grad=True) + self.sm = nn.Softmax(dim=1) + + def forward(self, X, S1, S2, config): + h = self.h(X) + r = self.r(h) + q = self.q(r) + v, _ = torch.max(q, dim=1, keepdim=True) + for i in range(0, config.k - 1): + q = F.conv2d( + torch.cat([r, v], 1), + torch.cat([self.q.weight, self.w], 1), + stride=1, + padding=1) + v, _ = torch.max(q, dim=1, keepdim=True) + + q = F.conv2d( + torch.cat([r, v], 1), + torch.cat([self.q.weight, self.w], 1), + stride=1, + padding=1) + + slice_s1 = S1.long().expand(config.imsize, 1, config.l_q, q.size(0)) + slice_s1 = slice_s1.permute(3, 2, 1, 0) + q_out = q.gather(2, slice_s1).squeeze(2) + + slice_s2 = S2.long().expand(1, config.l_q, q.size(0)) + slice_s2 = slice_s2.permute(2, 1, 0) + q_out = q_out.gather(2, slice_s2).squeeze(2) + + logits = self.fc(q_out) + return logits, self.sm(logits) diff --git a/src/algorithms/learning/VIN/requirements.txt b/src/algorithms/learning/VIN/requirements.txt new file mode 100644 index 000000000..27dabf2f9 --- /dev/null +++ b/src/algorithms/learning/VIN/requirements.txt @@ -0,0 +1,4 @@ +scipy>=0.19.0 +matplotlib>=2.0.0 +numpy>=1.12.1 +torchvision>=0.1.8 diff --git a/src/algorithms/learning/VIN/results/16x16_1.png b/src/algorithms/learning/VIN/results/16x16_1.png new file mode 100644 index 000000000..9eb172316 Binary files /dev/null and b/src/algorithms/learning/VIN/results/16x16_1.png differ diff --git a/src/algorithms/learning/VIN/results/16x16_2.png b/src/algorithms/learning/VIN/results/16x16_2.png new file mode 100644 index 000000000..5292bdc33 Binary files /dev/null and b/src/algorithms/learning/VIN/results/16x16_2.png differ diff --git a/src/algorithms/learning/VIN/results/28x28_1.png b/src/algorithms/learning/VIN/results/28x28_1.png new file mode 100644 index 000000000..4995df1dd Binary files /dev/null and b/src/algorithms/learning/VIN/results/28x28_1.png differ diff --git a/src/algorithms/learning/VIN/results/28x28_2.png b/src/algorithms/learning/VIN/results/28x28_2.png new file mode 100644 index 000000000..4cf22e8fa Binary files /dev/null and b/src/algorithms/learning/VIN/results/28x28_2.png differ diff --git a/src/algorithms/learning/VIN/results/8x8_1.png b/src/algorithms/learning/VIN/results/8x8_1.png new file mode 100644 index 000000000..0feacc9de Binary files /dev/null and b/src/algorithms/learning/VIN/results/8x8_1.png differ diff --git a/src/algorithms/learning/VIN/results/8x8_2.png b/src/algorithms/learning/VIN/results/8x8_2.png new file mode 100644 index 000000000..22e60de5a Binary files /dev/null and b/src/algorithms/learning/VIN/results/8x8_2.png differ diff --git a/src/algorithms/learning/VIN/results/8x8_3.png b/src/algorithms/learning/VIN/results/8x8_3.png new file mode 100644 index 000000000..a9a142e1e Binary files /dev/null and b/src/algorithms/learning/VIN/results/8x8_3.png differ diff --git a/src/algorithms/learning/VIN/scrap.py b/src/algorithms/learning/VIN/scrap.py new file mode 100644 index 000000000..506ee75c4 --- /dev/null +++ b/src/algorithms/learning/VIN/scrap.py @@ -0,0 +1,14 @@ +import numpy as np + +arr1 = np.array([[1 ,1,0], [0, 1, 1], [1,0,0]]) + +arr2 = np.array([[1 ,0,0], [1, 0, 1], [1,1,1]]) + +G = np.logical_or.reduce((arr1, arr2)) +W = np.array(G, dtype=np.int8) +M = np.maximum(arr1,arr2) +Q = np.intersect2d +print(G) +print(M) +print(W) + diff --git a/src/algorithms/learning/VIN/test.py b/src/algorithms/learning/VIN/test.py new file mode 100644 index 000000000..caeb4e69f --- /dev/null +++ b/src/algorithms/learning/VIN/test.py @@ -0,0 +1,346 @@ +import sys +import argparse +import json +import matplotlib.pyplot as plt +import random +import numpy as np +import torch +from torch.autograd import Variable + +from dataset.dataset import * +from utility.utils import * +from model import * + +from domains.gridworld import * +from generators.obstacle_gen import * + +import logging +import time +import math + +def main(config, + n_domains=3000, + max_obs=30, + max_obs_size=None, + n_traj=1, + n_actions=8,gen = False): + gen = config.gen + # Correct vs total: + logging.basicConfig(filename='./resources/logs/30k_16_no_block.log',format='%(asctime)s-%(levelname)s:%(message)s', level=logging.INFO) + correct, total = 0.0, 0.0 + # Automatic swith of GPU mode if available + use_GPU = torch.cuda.is_available() + # Instantiate a VIN model + vin = VIN(config) + # Load model parameters + vin.load_state_dict(torch.load(config.weights, map_location=None if use_GPU else torch.device("cpu"))) + # Use GPU if available + if use_GPU: + vin = vin.cuda() + counter,total_no_soln = 0,0 + global data + data = [] + t_list = [] + total_dev_non_rel, total_dev_rel = 0.0,0.0 + total_dist, total_astar_dist = 0.0,0.0 + metrics = True #this enables displaying the distance left to reach goal upon a failure + dist_remain_avg = 0.0 + wpn = False + for dom in range(n_domains): + if gen: + goal = [ + np.random.randint(config.imsize), + np.random.randint(config.imsize) + ] + obs = obstacles([config.imsize, config.imsize], goal, max_obs_size) + # Add obstacles to map + n_obs = obs.add_n_rand_obs(max_obs) + # Add border to map + border_res = obs.add_border() + # Ensure we have valid map + if n_obs == 0 or not border_res: + continue + start = None + else: + wpn = True + # path = './resources/maps/' + path = config.maps + mp, goal, start = open_map(dom,path) + # path = './maps/8_data_300' + # mp, goal, start = open_map_list(dom,path) + mp[start[1]][start[0]] = 0 #Set the start position as freespace too + mp[goal[1]][goal[0]] = 0 #Set the goal position as freespace too + + goal = [goal[1],goal[0]] #swap them around, for the row col format (x = col not row) + start = [start[1],start[0]] + obs = obstacles([config.imsize, config.imsize], goal, max_obs_size) + obs.dom = mp + + # Get final map + im = obs.get_final() + + + #1 is obstacles. + #set obs.dom as the mp + logging.debug('0 is obstacle ') + logging.debug(' im: %s ', im) + # Generate gridworld from obstacle map + G = gridworld(im, goal[0], goal[1]) + # Get value prior + value_prior = G.get_reward_prior() + # Sample random trajectories to our goal + states_xy, states_one_hot = sample_trajectory(G, n_traj,start,gen) #dijkstra trajectory + # print('states_xy', states_xy[0] , len(states_xy[0])) + if gen and len(states_xy[0]) > 0: + save_image(G.image,(goal[0],goal[1]),states_xy[0][0],states_xy, states_one_hot,counter) #this saves the maps + + counter += 1 + t0 = time.time() + for i in range(n_traj): + if len(states_xy[i]) > 1: + + # Get number of steps to goal + L = len(states_xy[i]) * 2 + # Allocate space for predicted steps + pred_traj = np.zeros((L, 2)) + # Set starting position + pred_traj[0, :] = states_xy[i][0, :] + + for j in range(1, L): + # Transform current state data + state_data = pred_traj[j - 1, :] + state_data = state_data.astype(np.int) + # Transform domain to Networks expected input shape + im_data = G.image.astype(np.int) + im_data = 1 - im_data + im_data = im_data.reshape(1, 1, config.imsize, + config.imsize) + # Transfrom value prior to Networks expected input shape + value_data = value_prior.astype(np.int) + value_data = value_data.reshape(1, 1, config.imsize, + config.imsize) + # Get inputs as expected by network + X_in = torch.from_numpy( + np.append(im_data, value_data, axis=1)).float() + S1_in = torch.from_numpy(state_data[0].reshape( + [1, 1])).float() + S2_in = torch.from_numpy(state_data[1].reshape( + [1, 1])).float() + # Send Tensors to GPU if available + if use_GPU: + X_in = X_in.cuda() + S1_in = S1_in.cuda() + S2_in = S2_in.cuda() + # Wrap to autograd.Variable + X_in, S1_in, S2_in = Variable(X_in), Variable( + S1_in), Variable(S2_in) + # Forward pass in our neural net + _, predictions = vin(X_in, S1_in, S2_in, config) + _, indices = torch.max(predictions.cpu(), 1, keepdim=True) + a = indices.data.numpy()[0][0] + # Transform prediction to indices + s = G.map_ind_to_state(pred_traj[j - 1, 0], + pred_traj[j - 1, 1]) + ns = G.sample_next_state(s, a) + nr, nc = G.get_coords(ns) + pred_traj[j, 0] = nr + pred_traj[j, 1] = nc + if nr == goal[0] and nc == goal[1]: + # We hit goal so fill remaining steps + pred_traj[j + 1:, 0] = nr + pred_traj[j + 1:, 1] = nc + break + # Plot optimal and predicted path (also start, end) + if pred_traj[-1, 0] == goal[0] and pred_traj[-1, 1] == goal[1]: + logging.debug('#################### - Path Found map %s!\n', dom) + correct += 1 + t1 = time.time() + t_list.append(t1-t0) + dev_rel,dev_non_rel,dist,astar_dist = deviation(states_xy[i],pred_traj,goal,total) + total_dev_rel += dev_rel + total_dev_non_rel += dev_non_rel + total_dist += dist + total_astar_dist += astar_dist + if config.plot == True: + visualize(G.image.T, states_xy[i], pred_traj) + elif metrics: + d = dist_left(pred_traj,goal) + dist_remain_avg += d + if config.plot == True: + visualize(G.image.T, states_xy[i], pred_traj) + total += 1 + + + + elif wpn: + total_no_soln += 1 + sys.stdout.write("\r" + str(int( + (float(dom) / n_domains) * 100.0)) + "%") + sys.stdout.flush() + + sys.stdout.write("\n") + if total and correct: + logging.info('Rollout Accuracy: %s',(100 * (correct / total))) + logging.info('Rollout Accuracy Adjusted: %s',(100 * (correct / (total+total_no_soln)))) + logging.info('Total maps with no soln from Dijkstra %s', total_no_soln) + logging.info('Total avg Rel Deviation %s', (total_dev_rel/total)) + logging.info('Total avg Non-Rel Deviation %s', (total_dev_non_rel/total)) + logging.info('Total avg VIN Distance %s', (total_dist/total)) + logging.info('Total avg Dijkstra Distance %s', (total_astar_dist/total)) + logging.info('Avg deviation from Dijkstra: %s', ((((total_astar_dist/total))-((total_dist/total)))/((total_astar_dist/total)))) + logging.info('Total elapsed time %s', (sum(t_list)/(total))) #TODO: Possibly add total no soln + logging.info('Avg distance left when failed: %s ', (dist_remain_avg/(total-correct)) ) + logging.info('---------------------------------Done ------------------------------------') + + else: + logging.info('No successes either vin or dijkstra') + + +def visualize(dom, states_xy, pred_traj): + fig, ax = plt.subplots() + implot = plt.imshow(dom, cmap="Greys_r") + ax.plot(states_xy[:, 0], states_xy[:, 1], c='b', label='Optimal Path') + ax.plot( + pred_traj[:, 0], pred_traj[:, 1], '-X', c='r', label='Predicted Path') + ax.plot(states_xy[0, 0], states_xy[0, 1], '-o', label='Start') + ax.plot(states_xy[-1, 0], states_xy[-1, 1], '-s', label='Goal') + legend = ax.legend(loc='upper right', shadow=False) + for label in legend.get_texts(): + label.set_fontsize('x-small') # the legend text size + for label in legend.get_lines(): + label.set_linewidth(0.5) # the legend line width + plt.draw() + plt.waitforbuttonpress(0) + plt.close(fig) + + +def save_image(im, goal, start,states_xy,states_one_hot,counter): + ''' + Saves the data made by generator as jsons. + ''' + s = config.imsize + + if len(states_xy[0]) == 0: + + im.tolist()[start_x][start_y] = 1 + start_xy = [0,0] + mp = { + 'grid': im.tolist(), + 'goal': [goal[0],goal[1]], + # 'start': int(start), + 'agent': start_xy} + # 'states_xy': states_xy[0].tolist(), + # 'states_one_hot': states_one_hot[0].tolist() + else: + mp = { + 'grid': im.tolist(), + 'goal': [goal[0],goal[1]], + # 'start': int(start), + 'agent': states_xy[0][0].tolist() + # 'states_xy': states_xy[0].tolist(), + # 'states_one_hot': states_one_hot[0].tolist() + } + data.append(mp) + with open('./maps/' +str(s) + '_data_300' + '.json', 'w') as outfile: + json.dump(data,outfile) + +def open_map(dom,path): + ''' + Used to open a map json given dom and path, returns grid, goal and agent + ''' + with open(str(path) + str(dom) +'.json') as json_file: + data = json.load(json_file) + logging.info('Opening file: ' + str(path) + str(dom) + '.json' ) + return data['grid'], data['goal'], data['agent'] + +def open_map_list(dom,path): + with open(str(path) + '.json') as json_file: + data = json.load(json_file) + logging.info('Opening file: ' + str(path) + str(dom) + '.json' ) + return data[dom]['grid'], data[dom]['goal'], data[dom]['agent'] + +def deviation(optimal_path, pred_path,goal, map_num): + optimal_path = np.array(optimal_path) + optimal_path = 1.0 * optimal_path + + optimal_path_x = np.array(optimal_path[:,0]) + optimal_path_y = np.array(optimal_path[:,1]) + + pred_path = np.unique(pred_path, axis=0) #removes duplicates at the end (when it reaches goal) + + #print('Shortened path' , pred_path) + pred_path_x = np.array(pred_path[:,0]) + pred_path_y = np.array(pred_path[:,1]) + dist = 0.0 + astar_dist = 0.0 + prev = pred_path[0,:] + total_diff_gen = 0 + for xy in pred_path[:,:]: + + diff = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2)) + total_diff_gen += diff + dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5 + prev = xy + + #prev = [0,0] + #print('opt', optimal_path[0,:]) + prev = optimal_path[0,:] + total_diff_optim = 0 + for xy in optimal_path[:,:]: + # print('xy', xy) + diff2 = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2)) + total_diff_optim += diff2 + astar_dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5 + prev = xy + + dev_non_rel = abs(total_diff_optim-total_diff_gen) + dev_rel = dev_non_rel/total_diff_optim #TODO: Add avg distance of gen trajectory + return(dev_rel,dev_non_rel,dist,astar_dist) + +def dist_left(pred_traj, goal): + ''' + Finds the distance left between the point and the goal + ''' + pred_traj = np.array(pred_traj) #euclidean distance or geometric distance ? use geometric + x1,y1 = pred_traj[-1][0], pred_traj[-1][1] + x2,y2 = goal[0],goal[1] + dist = (((x2-x1)**2 + (y2-y1)**2))**0.5 + return dist + + +if __name__ == '__main__': + # Parsing training parameters + parser = argparse.ArgumentParser() + parser.add_argument( + '--weights', + type=str, + default='trained/vin_8x8.pth', + help='Path to trained weights') + parser.add_argument( + '--maps', + type=str, + default='resources/testing_maps/16x16', + help='Path to maps') + parser.add_argument('--plot', action='store_true', default=False) + parser.add_argument('--gen', action='store_true', default=False) + parser.add_argument('--imsize', type=int, default=8, help='Size of image') + parser.add_argument( + '--k', type=int, default=10, help='Number of Value Iterations') + parser.add_argument( + '--l_i', type=int, default=2, help='Number of channels in input layer') + parser.add_argument( + '--l_h', + type=int, + default=150, + help='Number of channels in first hidden layer') + parser.add_argument( + '--l_q', + type=int, + default=10, + help='Number of channels in q layer (~actions) in VI-module') + config = parser.parse_args() + # Compute Paths generated by network and plot + + for i in range(1): + main(config) + # main(config) diff --git a/src/algorithms/learning/VIN/test16.py b/src/algorithms/learning/VIN/test16.py new file mode 100644 index 000000000..5734b5c90 --- /dev/null +++ b/src/algorithms/learning/VIN/test16.py @@ -0,0 +1,339 @@ +import sys +import argparse +import json +import matplotlib.pyplot as plt +import random +import numpy as np +import torch +from torch.autograd import Variable + +from dataset.dataset import * +from utility.utils import * +from model import * + +from domains.gridworld import * +from generators.obstacle_gen import * + +import logging +import time +import math + +def main(config, + n_domains=10, + max_obs=30, + max_obs_size=None, + n_traj=1, + n_actions=8,gen = False): + # Correct vs total: + logging.basicConfig(filename='./resources/logs/scrap.log',format='%(asctime)s-%(levelname)s:%(message)s', level=logging.INFO) + correct, total = 0.0, 0.0 + # Automatic swith of GPU mode if available + use_GPU = torch.cuda.is_available() + # Instantiate a VIN model + vin = VIN(config) + # Load model parameters + vin.load_state_dict(torch.load(config.weights)) + # Use GPU if available + if use_GPU: + vin = vin.cuda() + counter,total_no_soln = 0,0 + global data + data = [] + t_list = [] + total_dev_non_rel, total_dev_rel = 0.0,0.0 + total_dist, total_astar_dist = 0.0,0.0 + metrics = True #this enables displaying the distance left to reach goal upon a failure + dist_remain_avg = 0.0 + for dom in range(n_domains): + if gen: + print('Gen started') + goal = [ + np.random.randint(config.imsize), + np.random.randint(config.imsize) + ] + obs = obstacles([config.imsize, config.imsize], goal, max_obs_size) + # Add obstacles to map + n_obs = obs.add_n_rand_obs(max_obs) + # Add border to map + border_res = obs.add_border() + # Ensure we have valid map + if n_obs == 0 or not border_res: + continue + start = None + else: + wpn = True + # path = './resources/maps/' + path = './resources/testing_maps/16x16/' + mp, goal, start = open_map(dom,path) + # path = './maps/8_data_300' + # mp, goal, start = open_map_list(dom,path) + mp[start[1]][start[0]] = 0 #Set the start position as freespace too + mp[goal[1]][goal[0]] = 0 #Set the goal position as freespace too + + goal = [goal[1],goal[0]] #swap them around, for the row col format (x = col not row) + start = [start[1],start[0]] + obs = obstacles([config.imsize, config.imsize], goal, max_obs_size) + obs.dom = mp + + # Get final map + im = obs.get_final() + + + #1 is obstacles. + #set obs.dom as the mp + logging.debug('0 is obstacle ') + logging.debug(' im: %s ', im) + # Generate gridworld from obstacle map + G = gridworld(im, goal[0], goal[1]) + # Get value prior + value_prior = G.get_reward_prior() + # Sample random trajectories to our goal + states_xy, states_one_hot = sample_trajectory(G, n_traj,start,gen) #dijkstra trajectory + # print('states_xy', states_xy[0] , len(states_xy[0])) + if gen and len(states_xy[0]) > 0: + save_image(G.image,(goal[0],goal[1]),states_xy[0][0],states_xy, states_one_hot,counter) #this saves the maps + + counter += 1 + for i in range(n_traj): + if len(states_xy[i]) > 1: + t0 = time.time() + # Get number of steps to goal + L = len(states_xy[i]) * 2 + # Allocate space for predicted steps + pred_traj = np.zeros((L, 2)) + # Set starting position + pred_traj[0, :] = states_xy[i][0, :] + + for j in range(1, L): + # Transform current state data + state_data = pred_traj[j - 1, :] + state_data = state_data.astype(np.int) + # Transform domain to Networks expected input shape + im_data = G.image.astype(np.int) + im_data = 1 - im_data + im_data = im_data.reshape(1, 1, config.imsize, + config.imsize) + # Transfrom value prior to Networks expected input shape + value_data = value_prior.astype(np.int) + value_data = value_data.reshape(1, 1, config.imsize, + config.imsize) + # Get inputs as expected by network + X_in = torch.from_numpy( + np.append(im_data, value_data, axis=1)).float() + S1_in = torch.from_numpy(state_data[0].reshape( + [1, 1])).float() + S2_in = torch.from_numpy(state_data[1].reshape( + [1, 1])).float() + # Send Tensors to GPU if available + if use_GPU: + X_in = X_in.cuda() + S1_in = S1_in.cuda() + S2_in = S2_in.cuda() + # Wrap to autograd.Variable + X_in, S1_in, S2_in = Variable(X_in), Variable( + S1_in), Variable(S2_in) + # Forward pass in our neural net + _, predictions = vin(X_in, S1_in, S2_in, config) + _, indices = torch.max(predictions.cpu(), 1, keepdim=True) + a = indices.data.numpy()[0][0] + # Transform prediction to indices + s = G.map_ind_to_state(pred_traj[j - 1, 0], + pred_traj[j - 1, 1]) + ns = G.sample_next_state(s, a) + nr, nc = G.get_coords(ns) + pred_traj[j, 0] = nr + pred_traj[j, 1] = nc + if nr == goal[0] and nc == goal[1]: + # We hit goal so fill remaining steps + pred_traj[j + 1:, 0] = nr + pred_traj[j + 1:, 1] = nc + break + # Plot optimal and predicted path (also start, end) + if pred_traj[-1, 0] == goal[0] and pred_traj[-1, 1] == goal[1]: + logging.debug('#################### - Path Found map %s!\n', dom) + correct += 1 + t1 = time.time() + t_list.append(t1-t0) + dev_rel,dev_non_rel,dist,astar_dist = deviation(states_xy[i],pred_traj,goal,total) + total_dev_rel += dev_rel + total_dev_non_rel += dev_non_rel + total_dist += dist + total_astar_dist += astar_dist + if config.plot == True: + visualize(G.image.T, states_xy[i], pred_traj) + elif metrics: + d = dist_left(pred_traj,goal) + dist_remain_avg += d + if config.plot == True: + visualize(G.image.T, states_xy[i], pred_traj) + total += 1 + + + + elif wpn: + total_no_soln += 1 + sys.stdout.write("\r" + str(int( + (float(dom) / n_domains) * 100.0)) + "%") + sys.stdout.flush() + + sys.stdout.write("\n") + if total and correct: + logging.info('Rollout Accuracy: %s',(100 * (correct / total))) + logging.info('Rollout Accuracy Adjusted: %s',(100 * (correct / (total+total_no_soln)))) + logging.info('Total maps with no soln from Dijkstra %s', total_no_soln) + logging.info('Total avg Rel Deviation %s', (total_dev_rel/total)) + logging.info('Total avg Non-Rel Deviation %s', (total_dev_non_rel/total)) + logging.info('Total avg VIN Distance %s', (total_dist/total)) + logging.info('Total avg Dijkstra Distance %s', (total_astar_dist/total)) + logging.info('Avg deviation from Dijkstra: %s', ((((total_astar_dist/total))-((total_dist/total)))/((total_astar_dist/total)))) + logging.info('Total elapsed time %s', (sum(t_list)/(correct))) #TODO: Possibly add total no soln + logging.info('Avg distance left when failed: %s ', (dist_remain_avg/(total-correct)) ) + logging.info('---------------------------------Done ------------------------------------') + + else: + logging.info('No successes either vin or dijkstra') + + +def visualize(dom, states_xy, pred_traj): + fig, ax = plt.subplots() + implot = plt.imshow(dom, cmap="Greys_r") + ax.plot(states_xy[:, 0], states_xy[:, 1], c='b', label='Optimal Path') + ax.plot( + pred_traj[:, 0], pred_traj[:, 1], '-X', c='r', label='Predicted Path') + ax.plot(states_xy[0, 0], states_xy[0, 1], '-o', label='Start') + ax.plot(states_xy[-1, 0], states_xy[-1, 1], '-s', label='Goal') + legend = ax.legend(loc='upper right', shadow=False) + for label in legend.get_texts(): + label.set_fontsize('x-small') # the legend text size + for label in legend.get_lines(): + label.set_linewidth(0.5) # the legend line width + plt.draw() + plt.waitforbuttonpress(0) + plt.close(fig) + + +def save_image(im, goal, start,states_xy,states_one_hot,counter): + ''' + Saves the data made by generator as jsons. + ''' + s = config.imsize + + if len(states_xy[0]) == 0: + + im.tolist()[start_x][start_y] = 1 + start_xy = [0,0] + mp = { + 'grid': im.tolist(), + 'goal': [goal[0],goal[1]], + # 'start': int(start), + 'agent': start_xy} + # 'states_xy': states_xy[0].tolist(), + # 'states_one_hot': states_one_hot[0].tolist() + else: + mp = { + 'grid': im.tolist(), + 'goal': [goal[0],goal[1]], + # 'start': int(start), + 'agent': states_xy[0][0].tolist() + # 'states_xy': states_xy[0].tolist(), + # 'states_one_hot': states_one_hot[0].tolist() + } + data.append(mp) + with open('./maps/' +str(s) + '_data_300' + '.json', 'w') as outfile: + json.dump(data,outfile) + +def open_map(dom,path): + ''' + Used to open a map json given dom and path, returns grid, goal and agent + ''' + with open(str(path) + str(dom) +'.json') as json_file: + data = json.load(json_file) + logging.debug('Opening file: ' + str(path) + str(dom) + '.json' ) + return data['grid'], data['goal'], data['agent'] + +def open_map_list(dom,path): + with open(str(path) + '.json') as json_file: + data = json.load(json_file) + logging.info('Opening file: ' + str(path) + str(dom) + '.json' ) + return data[dom]['grid'], data[dom]['goal'], data[dom]['agent'] + +def deviation(optimal_path, pred_path,goal, map_num): + optimal_path = np.array(optimal_path) + optimal_path = 1.0 * optimal_path + + optimal_path_x = np.array(optimal_path[:,0]) + optimal_path_y = np.array(optimal_path[:,1]) + + pred_path = np.unique(pred_path, axis=0) #removes duplicates at the end (when it reaches goal) + + #print('Shortened path' , pred_path) + pred_path_x = np.array(pred_path[:,0]) + pred_path_y = np.array(pred_path[:,1]) + dist = 0.0 + astar_dist = 0.0 + prev = pred_path[0,:] + total_diff_gen = 0 + for xy in pred_path[:,:]: + + diff = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2)) + total_diff_gen += diff + dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5 + prev = xy + + #prev = [0,0] + #print('opt', optimal_path[0,:]) + prev = optimal_path[0,:] + total_diff_optim = 0 + for xy in optimal_path[:,:]: + # print('xy', xy) + diff2 = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2)) + total_diff_optim += diff2 + astar_dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5 + prev = xy + + dev_non_rel = abs(total_diff_optim-total_diff_gen) + dev_rel = dev_non_rel/total_diff_optim #TODO: Add avg distance of gen trajectory + return(dev_rel,dev_non_rel,dist,astar_dist) + +def dist_left(pred_traj, goal): + ''' + Finds the distance left between the point and the goal + ''' + pred_traj = np.array(pred_traj) #euclidean distance or geometric distance ? use geometric + x1,y1 = pred_traj[-1][0], pred_traj[-1][1] + x2,y2 = goal[0],goal[1] + dist = (((x2-x1)**2 + (y2-y1)**2))**0.5 + return dist + + +if __name__ == '__main__': + # Parsing training parameters + parser = argparse.ArgumentParser() + parser.add_argument( + '--weights', + type=str, + default='trained/60k_no_block_att3_vin_16x16.pth', + help='Path to trained weights') + parser.add_argument('--plot', action='store_true', default=False) + parser.add_argument('--gen', action='store_true', default=False) + parser.add_argument('--imsize', type=int, default=16, help='Size of image') + parser.add_argument( + '--k', type=int, default=20, help='Number of Value Iterations') + parser.add_argument( + '--l_i', type=int, default=2, help='Number of channels in input layer') + parser.add_argument( + '--l_h', + type=int, + default=150, + help='Number of channels in first hidden layer') + parser.add_argument( + '--l_q', + type=int, + default=10, + help='Number of channels in q layer (~actions) in VI-module') + config = parser.parse_args() + # Compute Paths generated by network and plot + + for i in range(1): + main(config) + # main(config) diff --git a/src/algorithms/learning/VIN/test28.py b/src/algorithms/learning/VIN/test28.py new file mode 100644 index 000000000..1be43611b --- /dev/null +++ b/src/algorithms/learning/VIN/test28.py @@ -0,0 +1,339 @@ +import sys +import argparse +import json +import matplotlib.pyplot as plt +import random +import numpy as np +import torch +from torch.autograd import Variable + +from dataset.dataset import * +from utility.utils import * +from model import * + +from domains.gridworld import * +from generators.obstacle_gen import * + +import logging +import time +import math + +def main(config, + n_domains=3000, + max_obs=30, + max_obs_size=None, + n_traj=1, + n_actions=8,gen = False): + # Correct vs total: + logging.basicConfig(filename='./resources/logs/test28_60k_no_block.log',format='%(asctime)s-%(levelname)s:%(message)s', level=logging.INFO) + correct, total = 0.0, 0.0 + # Automatic swith of GPU mode if available + use_GPU = torch.cuda.is_available() + # Instantiate a VIN model + vin = VIN(config) + # Load model parameters + vin.load_state_dict(torch.load(config.weights)) + # Use GPU if available + if use_GPU: + vin = vin.cuda() + counter,total_no_soln = 0,0 + global data + data = [] + t_list = [] + total_dev_non_rel, total_dev_rel = 0.0,0.0 + total_dist, total_astar_dist = 0.0,0.0 + metrics = True #this enables displaying the distance left to reach goal upon a failure + dist_remain_avg = 0.0 + for dom in range(n_domains): + if gen: + goal = [ + np.random.randint(config.imsize), + np.random.randint(config.imsize) + ] + obs = obstacles([config.imsize, config.imsize], goal, max_obs_size) + # Add obstacles to map + n_obs = obs.add_n_rand_obs(max_obs) + # Add border to map + border_res = obs.add_border() + # Ensure we have valid map + if n_obs == 0 or not border_res: + continue + start = None + else: + wpn = True + # path = './resources/maps/' + path = './resources/testing_maps/28x28/' + mp, goal, start = open_map(dom,path) + # path = './maps/8_data_300' + # mp, goal, start = open_map_list(dom,path) + mp[start[1]][start[0]] = 0 #Set the start position as freespace too + mp[goal[1]][goal[0]] = 0 #Set the goal position as freespace too + + goal = [goal[1],goal[0]] #swap them around, for the row col format (x = col not row) + start = [start[1],start[0]] + obs = obstacles([config.imsize, config.imsize], goal, max_obs_size) + obs.dom = mp + + # Get final map + im = obs.get_final() + + + #1 is obstacles. + #set obs.dom as the mp + logging.debug('0 is obstacle ') + logging.debug(' im: %s ', im) + # Generate gridworld from obstacle map + G = gridworld(im, goal[0], goal[1]) + # Get value prior + value_prior = G.get_reward_prior() + # Sample random trajectories to our goal + states_xy, states_one_hot = sample_trajectory(G, n_traj,start,gen) #dijkstra trajectory + # print('states_xy', states_xy[0] , len(states_xy[0])) + if gen and len(states_xy[0]) > 0: + save_image(G.image,(goal[0],goal[1]),states_xy[0][0],states_xy, states_one_hot,counter) #this saves the maps + + counter += 1 + for i in range(n_traj): + if len(states_xy[i]) > 1: + t0 = time.time() + # Get number of steps to goal + L = len(states_xy[i]) * 2 + # Allocate space for predicted steps + pred_traj = np.zeros((L, 2)) + # Set starting position + pred_traj[0, :] = states_xy[i][0, :] + + for j in range(1, L): + # Transform current state data + state_data = pred_traj[j - 1, :] + state_data = state_data.astype(np.int) + # Transform domain to Networks expected input shape + im_data = G.image.astype(np.int) + im_data = 1 - im_data + im_data = im_data.reshape(1, 1, config.imsize, + config.imsize) + # Transfrom value prior to Networks expected input shape + value_data = value_prior.astype(np.int) + value_data = value_data.reshape(1, 1, config.imsize, + config.imsize) + # Get inputs as expected by network + X_in = torch.from_numpy( + np.append(im_data, value_data, axis=1)).float() + S1_in = torch.from_numpy(state_data[0].reshape( + [1, 1])).float() + S2_in = torch.from_numpy(state_data[1].reshape( + [1, 1])).float() + # Send Tensors to GPU if available + if use_GPU: + X_in = X_in.cuda() + S1_in = S1_in.cuda() + S2_in = S2_in.cuda() + # Wrap to autograd.Variable + X_in, S1_in, S2_in = Variable(X_in), Variable( + S1_in), Variable(S2_in) + # Forward pass in our neural net + _, predictions = vin(X_in, S1_in, S2_in, config) + _, indices = torch.max(predictions.cpu(), 1, keepdim=True) + a = indices.data.numpy()[0][0] + # Transform prediction to indices + s = G.map_ind_to_state(pred_traj[j - 1, 0], + pred_traj[j - 1, 1]) + ns = G.sample_next_state(s, a) + nr, nc = G.get_coords(ns) + pred_traj[j, 0] = nr + pred_traj[j, 1] = nc + if nr == goal[0] and nc == goal[1]: + # We hit goal so fill remaining steps + pred_traj[j + 1:, 0] = nr + pred_traj[j + 1:, 1] = nc + break + # Plot optimal and predicted path (also start, end) + if pred_traj[-1, 0] == goal[0] and pred_traj[-1, 1] == goal[1]: + logging.debug('#################### - Path Found map %s!\n', dom) + correct += 1 + t1 = time.time() + t_list.append(t1-t0) + dev_rel,dev_non_rel,dist,astar_dist = deviation(states_xy[i],pred_traj,goal,total) + total_dev_rel += dev_rel + total_dev_non_rel += dev_non_rel + total_dist += dist + total_astar_dist += astar_dist + if config.plot == True: + visualize(G.image.T, states_xy[i], pred_traj) + elif metrics: + d = dist_left(pred_traj,goal) + dist_remain_avg += d + if config.plot == True: + visualize(G.image.T, states_xy[i], pred_traj) + total += 1 + + + + elif wpn: + total_no_soln += 1 + sys.stdout.write("\r" + str(int( + (float(dom) / n_domains) * 100.0)) + "%") + sys.stdout.flush() + + sys.stdout.write("\n") + if total and correct: + logging.info('Rollout Accuracy Dijkstra: %s',(100*((total-total_no_soln)/total))) + logging.info('Rollout Accuracy: %s',(100 * (correct / total))) + logging.info('Rollout Accuracy Adjusted: %s',(100 * (correct / (total+total_no_soln)))) + logging.info('Total maps with no soln from Dijkstra %s', total_no_soln) + logging.info('Total avg Rel Deviation %s', (total_dev_rel/total)) + logging.info('Total avg Non-Rel Deviation %s', (total_dev_non_rel/total)) + logging.info('Total avg VIN Distance %s', (total_dist/total)) + logging.info('Total avg Dijkstra Distance %s', (total_astar_dist/total)) + logging.info('Avg deviation from Dijkstra: %s', ((((total_astar_dist/total))-((total_dist/total)))/((total_astar_dist/total)))) + logging.info('Total elapsed time %s', (sum(t_list)/(correct))) #TODO: Possibly add total no soln + logging.info('Avg distance left when failed: %s ', (dist_remain_avg/(total-correct)) ) + logging.info('---------------------------------Done ------------------------------------') + + else: + logging.info('No successes either vin or dijkstra') + + +def visualize(dom, states_xy, pred_traj): + fig, ax = plt.subplots() + implot = plt.imshow(dom, cmap="Greys_r") + ax.plot(states_xy[:, 0], states_xy[:, 1], c='b', label='Optimal Path') + ax.plot( + pred_traj[:, 0], pred_traj[:, 1], '-X', c='r', label='Predicted Path') + ax.plot(states_xy[0, 0], states_xy[0, 1], '-o', label='Start') + ax.plot(states_xy[-1, 0], states_xy[-1, 1], '-s', label='Goal') + legend = ax.legend(loc='upper right', shadow=False) + for label in legend.get_texts(): + label.set_fontsize('x-small') # the legend text size + for label in legend.get_lines(): + label.set_linewidth(0.5) # the legend line width + plt.draw() + plt.waitforbuttonpress(0) + plt.close(fig) + + +def save_image(im, goal, start,states_xy,states_one_hot,counter): + ''' + Saves the data made by generator as jsons. + ''' + s = config.imsize + + if len(states_xy[0]) == 0: + + im.tolist()[start_x][start_y] = 1 + start_xy = [0,0] + mp = { + 'grid': im.tolist(), + 'goal': [goal[0],goal[1]], + # 'start': int(start), + 'agent': start_xy} + # 'states_xy': states_xy[0].tolist(), + # 'states_one_hot': states_one_hot[0].tolist() + else: + mp = { + 'grid': im.tolist(), + 'goal': [goal[0],goal[1]], + # 'start': int(start), + 'agent': states_xy[0][0].tolist() + # 'states_xy': states_xy[0].tolist(), + # 'states_one_hot': states_one_hot[0].tolist() + } + data.append(mp) + with open('./maps/' +str(s) + '_data_300' + '.json', 'w') as outfile: + json.dump(data,outfile) + +def open_map(dom,path): + ''' + Used to open a map json given dom and path, returns grid, goal and agent + ''' + with open(str(path) + str(dom) +'.json') as json_file: + data = json.load(json_file) + logging.info('Opening file: ' + str(path) + str(dom) + '.json' ) + return data['grid'], data['goal'], data['agent'] + +def open_map_list(dom,path): + with open(str(path) + '.json') as json_file: + data = json.load(json_file) + logging.info('Opening file: ' + str(path) + str(dom) + '.json' ) + return data[dom]['grid'], data[dom]['goal'], data[dom]['agent'] + +def deviation(optimal_path, pred_path,goal, map_num): + optimal_path = np.array(optimal_path) + optimal_path = 1.0 * optimal_path + + optimal_path_x = np.array(optimal_path[:,0]) + optimal_path_y = np.array(optimal_path[:,1]) + + pred_path = np.unique(pred_path, axis=0) #removes duplicates at the end (when it reaches goal) + + #print('Shortened path' , pred_path) + pred_path_x = np.array(pred_path[:,0]) + pred_path_y = np.array(pred_path[:,1]) + dist = 0.0 + astar_dist = 0.0 + prev = pred_path[0,:] + total_diff_gen = 0 + for xy in pred_path[:,:]: + + diff = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2)) + total_diff_gen += diff + dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5 + prev = xy + + #prev = [0,0] + #print('opt', optimal_path[0,:]) + prev = optimal_path[0,:] + total_diff_optim = 0 + for xy in optimal_path[:,:]: + # print('xy', xy) + diff2 = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2)) + total_diff_optim += diff2 + astar_dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5 + prev = xy + + dev_non_rel = abs(total_diff_optim-total_diff_gen) + dev_rel = dev_non_rel/total_diff_optim #TODO: Add avg distance of gen trajectory + return(dev_rel,dev_non_rel,dist,astar_dist) + +def dist_left(pred_traj, goal): + ''' + Finds the distance left between the point and the goal + ''' + pred_traj = np.array(pred_traj) #euclidean distance or geometric distance ? use geometric + x1,y1 = pred_traj[-1][0], pred_traj[-1][1] + x2,y2 = goal[0],goal[1] + dist = (((x2-x1)**2 + (y2-y1)**2))**0.5 + return dist + + +if __name__ == '__main__': + # Parsing training parameters + parser = argparse.ArgumentParser() + parser.add_argument( + '--weights', + type=str, + default='trained/60k_no_block_att3_vin_28x28.pth', + help='Path to trained weights') + parser.add_argument('--plot', action='store_true', default=False) + parser.add_argument('--gen', action='store_true', default=False) + parser.add_argument('--imsize', type=int, default=28, help='Size of image') + parser.add_argument( + '--k', type=int, default=36, help='Number of Value Iterations') + parser.add_argument( + '--l_i', type=int, default=2, help='Number of channels in input layer') + parser.add_argument( + '--l_h', + type=int, + default=150, + help='Number of channels in first hidden layer') + parser.add_argument( + '--l_q', + type=int, + default=10, + help='Number of channels in q layer (~actions) in VI-module') + config = parser.parse_args() + # Compute Paths generated by network and plot + + for i in range(1): + main(config) + # main(config) diff --git a/src/algorithms/learning/VIN/test64.py b/src/algorithms/learning/VIN/test64.py new file mode 100644 index 000000000..e58306b9b --- /dev/null +++ b/src/algorithms/learning/VIN/test64.py @@ -0,0 +1,339 @@ +import sys +import argparse +import json +import matplotlib.pyplot as plt +import random +import numpy as np +import torch +from torch.autograd import Variable + +from dataset.dataset import * +from utility.utils import * +from model import * + +from domains.gridworld import * +from generators.obstacle_gen import * + +import logging +import time +import math + +def main(config, + n_domains=600, + max_obs=30, + max_obs_size=None, + n_traj=1, + n_actions=8,gen = False): + # Correct vs total: + logging.basicConfig(filename='./resources/logs/test_64_30k_64x64_600',format='%(asctime)s-%(levelname)s:%(message)s', level=logging.INFO) + correct, total = 0.0, 0.0 + # Automatic swith of GPU mode if available + use_GPU = torch.cuda.is_available() + # Instantiate a VIN model + vin = VIN(config) + # Load model parameters + vin.load_state_dict(torch.load(config.weights)) + # Use GPU if available + if use_GPU: + vin = vin.cuda() + counter,total_no_soln = 0,0 + global data + data = [] + t_list = [] + total_dev_non_rel, total_dev_rel = 0.0,0.0 + total_dist, total_astar_dist = 0.0,0.0 + metrics = True #this enables displaying the distance left to reach goal upon a failure + dist_remain_avg = 0.0 + for dom in range(n_domains): + if gen: + goal = [ + np.random.randint(config.imsize), + np.random.randint(config.imsize) + ] + obs = obstacles([config.imsize, config.imsize], goal, max_obs_size) + # Add obstacles to map + n_obs = obs.add_n_rand_obs(max_obs) + # Add border to map + border_res = obs.add_border() + # Ensure we have valid map + if n_obs == 0 or not border_res: + continue + start = None + else: + wpn = True + # path = './resources/maps/' + path = './resources/testing_maps/64x64_300/' + mp, goal, start = open_map(dom,path) + # path = './maps/8_data_300' + # mp, goal, start = open_map_list(dom,path) + mp[start[1]][start[0]] = 0 #Set the start position as freespace too + mp[goal[1]][goal[0]] = 0 #Set the goal position as freespace too + + goal = [goal[1],goal[0]] #swap them around, for the row col format (x = col not row) + start = [start[1],start[0]] + obs = obstacles([config.imsize, config.imsize], goal, max_obs_size) + obs.dom = mp + + # Get final map + im = obs.get_final() + + + #1 is obstacles. + #set obs.dom as the mp + logging.debug('0 is obstacle ') + logging.debug(' im: %s ', im) + # Generate gridworld from obstacle map + G = gridworld(im, goal[0], goal[1]) + # Get value prior + value_prior = G.get_reward_prior() + # Sample random trajectories to our goal + states_xy, states_one_hot = sample_trajectory(G, n_traj,start,gen) #dijkstra trajectory + # print('states_xy', states_xy[0] , len(states_xy[0])) + if gen and len(states_xy[0]) > 0: + save_image(G.image,(goal[0],goal[1]),states_xy[0][0],states_xy, states_one_hot,counter) #this saves the maps + + counter += 1 + for i in range(n_traj): + if len(states_xy[i]) > 1: + t0 = time.time() + # Get number of steps to goal + L = len(states_xy[i]) * 2 + # Allocate space for predicted steps + pred_traj = np.zeros((L, 2)) + # Set starting position + pred_traj[0, :] = states_xy[i][0, :] + + for j in range(1, L): + # Transform current state data + state_data = pred_traj[j - 1, :] + state_data = state_data.astype(np.int) + # Transform domain to Networks expected input shape + im_data = G.image.astype(np.int) + im_data = 1 - im_data + im_data = im_data.reshape(1, 1, config.imsize, + config.imsize) + # Transfrom value prior to Networks expected input shape + value_data = value_prior.astype(np.int) + value_data = value_data.reshape(1, 1, config.imsize, + config.imsize) + # Get inputs as expected by network + X_in = torch.from_numpy( + np.append(im_data, value_data, axis=1)).float() + S1_in = torch.from_numpy(state_data[0].reshape( + [1, 1])).float() + S2_in = torch.from_numpy(state_data[1].reshape( + [1, 1])).float() + # Send Tensors to GPU if available + if use_GPU: + X_in = X_in.cuda() + S1_in = S1_in.cuda() + S2_in = S2_in.cuda() + # Wrap to autograd.Variable + X_in, S1_in, S2_in = Variable(X_in), Variable( + S1_in), Variable(S2_in) + # Forward pass in our neural net + _, predictions = vin(X_in, S1_in, S2_in, config) + _, indices = torch.max(predictions.cpu(), 1, keepdim=True) + a = indices.data.numpy()[0][0] + # Transform prediction to indices + s = G.map_ind_to_state(pred_traj[j - 1, 0], + pred_traj[j - 1, 1]) + ns = G.sample_next_state(s, a) + nr, nc = G.get_coords(ns) + pred_traj[j, 0] = nr + pred_traj[j, 1] = nc + if nr == goal[0] and nc == goal[1]: + # We hit goal so fill remaining steps + pred_traj[j + 1:, 0] = nr + pred_traj[j + 1:, 1] = nc + break + # Plot optimal and predicted path (also start, end) + if pred_traj[-1, 0] == goal[0] and pred_traj[-1, 1] == goal[1]: + logging.debug('#################### - Path Found map %s!\n', dom) + correct += 1 + t1 = time.time() + t_list.append(t1-t0) + dev_rel,dev_non_rel,dist,astar_dist = deviation(states_xy[i],pred_traj,goal,total) + total_dev_rel += dev_rel + total_dev_non_rel += dev_non_rel + total_dist += dist + total_astar_dist += astar_dist + if config.plot == True: + visualize(G.image.T, states_xy[i], pred_traj) + elif metrics: + d = dist_left(pred_traj,goal) + dist_remain_avg += d + if config.plot == True: + visualize(G.image.T, states_xy[i], pred_traj) + total += 1 + + + + elif wpn: + total_no_soln += 1 + sys.stdout.write("\r" + str(int( + (float(dom) / n_domains) * 100.0)) + "%") + sys.stdout.flush() + + sys.stdout.write("\n") + if total and correct: + logging.info('Rollout Accuracy Dijkstra: %s',(100*((total-total_no_soln)/total))) + logging.info('Rollout Accuracy: %s',(100 * (correct / total))) + logging.info('Rollout Accuracy Adjusted: %s',(100 * (correct / (total+total_no_soln)))) + logging.info('Total maps with no soln from Dijkstra %s', total_no_soln) + logging.info('Total avg Rel Deviation %s', (total_dev_rel/total)) + logging.info('Total avg Non-Rel Deviation %s', (total_dev_non_rel/total)) + logging.info('Total avg VIN Distance %s', (total_dist/total)) + logging.info('Total avg Dijkstra Distance %s', (total_astar_dist/total)) + logging.info('Avg deviation from Dijkstra: %s', ((((total_astar_dist/total))-((total_dist/total)))/((total_astar_dist/total)))) + logging.info('Total elapsed time %s', (sum(t_list)/(correct))) #TODO: Possibly add total no soln + logging.info('Avg distance left when failed: %s ', (dist_remain_avg/(total-correct)) ) + logging.info('---------------------------------Done ------------------------------------') + + else: + logging.info('No successes either vin or dijkstra') + + +def visualize(dom, states_xy, pred_traj): + fig, ax = plt.subplots() + implot = plt.imshow(dom, cmap="Greys_r") + ax.plot(states_xy[:, 0], states_xy[:, 1], c='b', label='Optimal Path') + ax.plot( + pred_traj[:, 0], pred_traj[:, 1], '-X', c='r', label='Predicted Path') + ax.plot(states_xy[0, 0], states_xy[0, 1], '-o', label='Start') + ax.plot(states_xy[-1, 0], states_xy[-1, 1], '-s', label='Goal') + legend = ax.legend(loc='upper right', shadow=False) + for label in legend.get_texts(): + label.set_fontsize('x-small') # the legend text size + for label in legend.get_lines(): + label.set_linewidth(0.5) # the legend line width + plt.draw() + plt.waitforbuttonpress(0) + plt.close(fig) + + +def save_image(im, goal, start,states_xy,states_one_hot,counter): + ''' + Saves the data made by generator as jsons. + ''' + s = config.imsize + + if len(states_xy[0]) == 0: + + im.tolist()[start_x][start_y] = 1 + start_xy = [0,0] + mp = { + 'grid': im.tolist(), + 'goal': [goal[0],goal[1]], + # 'start': int(start), + 'agent': start_xy} + # 'states_xy': states_xy[0].tolist(), + # 'states_one_hot': states_one_hot[0].tolist() + else: + mp = { + 'grid': im.tolist(), + 'goal': [goal[0],goal[1]], + # 'start': int(start), + 'agent': states_xy[0][0].tolist() + # 'states_xy': states_xy[0].tolist(), + # 'states_one_hot': states_one_hot[0].tolist() + } + data.append(mp) + with open('./maps/' +str(s) + '_data_300' + '.json', 'w') as outfile: + json.dump(data,outfile) + +def open_map(dom,path): + ''' + Used to open a map json given dom and path, returns grid, goal and agent + ''' + with open(str(path) + str(dom) +'.json') as json_file: + data = json.load(json_file) + logging.info('Opening file: ' + str(path) + str(dom) + '.json' ) + return data['grid'], data['goal'], data['agent'] + +def open_map_list(dom,path): + with open(str(path) + '.json') as json_file: + data = json.load(json_file) + logging.info('Opening file: ' + str(path) + str(dom) + '.json' ) + return data[dom]['grid'], data[dom]['goal'], data[dom]['agent'] + +def deviation(optimal_path, pred_path,goal, map_num): + optimal_path = np.array(optimal_path) + optimal_path = 1.0 * optimal_path + + optimal_path_x = np.array(optimal_path[:,0]) + optimal_path_y = np.array(optimal_path[:,1]) + + pred_path = np.unique(pred_path, axis=0) #removes duplicates at the end (when it reaches goal) + + #print('Shortened path' , pred_path) + pred_path_x = np.array(pred_path[:,0]) + pred_path_y = np.array(pred_path[:,1]) + dist = 0.0 + astar_dist = 0.0 + prev = pred_path[0,:] + total_diff_gen = 0 + for xy in pred_path[:,:]: + + diff = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2)) + total_diff_gen += diff + dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5 + prev = xy + + #prev = [0,0] + #print('opt', optimal_path[0,:]) + prev = optimal_path[0,:] + total_diff_optim = 0 + for xy in optimal_path[:,:]: + # print('xy', xy) + diff2 = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2)) + total_diff_optim += diff2 + astar_dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5 + prev = xy + + dev_non_rel = abs(total_diff_optim-total_diff_gen) + dev_rel = dev_non_rel/total_diff_optim #TODO: Add avg distance of gen trajectory + return(dev_rel,dev_non_rel,dist,astar_dist) + +def dist_left(pred_traj, goal): + ''' + Finds the distance left between the point and the goal + ''' + pred_traj = np.array(pred_traj) #euclidean distance or geometric distance ? use geometric + x1,y1 = pred_traj[-1][0], pred_traj[-1][1] + x2,y2 = goal[0],goal[1] + dist = (((x2-x1)**2 + (y2-y1)**2))**0.5 + return dist + + +if __name__ == '__main__': + # Parsing training parameters + parser = argparse.ArgumentParser() + parser.add_argument( + '--weights', + type=str, + default='trained/30k_no_block_dataset_vin_64x64.pth', + help='Path to trained weights') + parser.add_argument('--plot', action='store_true', default=False) + parser.add_argument('--gen', action='store_true', default=False) + parser.add_argument('--imsize', type=int, default=64, help='Size of image') + parser.add_argument( + '--k', type=int, default=48, help='Number of Value Iterations') + parser.add_argument( + '--l_i', type=int, default=2, help='Number of channels in input layer') + parser.add_argument( + '--l_h', + type=int, + default=150, + help='Number of channels in first hidden layer') + parser.add_argument( + '--l_q', + type=int, + default=10, + help='Number of channels in q layer (~actions) in VI-module') + config = parser.parse_args() + # Compute Paths generated by network and plot + + for i in range(1): + main(config) + # main(config) diff --git a/src/algorithms/learning/VIN/test8.py b/src/algorithms/learning/VIN/test8.py new file mode 100644 index 000000000..4f2946c84 --- /dev/null +++ b/src/algorithms/learning/VIN/test8.py @@ -0,0 +1,338 @@ +import sys +import argparse +import json +import matplotlib.pyplot as plt +import random +import numpy as np +import torch +from torch.autograd import Variable + +from dataset.dataset import * +from utility.utils import * +from model import * + +from domains.gridworld import * +from generators.obstacle_gen import * + +import logging +import time +import math + +def main(config, + n_domains=3000, + max_obs=30, + max_obs_size=None, + n_traj=1, + n_actions=8,gen = False): + # Correct vs total: + logging.basicConfig(filename='./resources/logs/test8_rerun_60k_no_block.log',format='%(asctime)s-%(levelname)s:%(message)s', level=logging.INFO) + correct, total = 0.0, 0.0 + # Automatic swith of GPU mode if available + use_GPU = torch.cuda.is_available() + # Instantiate a VIN model + vin = VIN(config) + # Load model parameters + vin.load_state_dict(torch.load(config.weights)) + # Use GPU if available + if use_GPU: + vin = vin.cuda() + counter,total_no_soln = 0,0 + global data + data = [] + t_list = [] + total_dev_non_rel, total_dev_rel = 0.0,0.0 + total_dist, total_astar_dist = 0.0,0.0 + metrics = True #this enables displaying the distance left to reach goal upon a failure + dist_remain_avg = 0.0 + for dom in range(n_domains): + if gen: + goal = [ + np.random.randint(config.imsize), + np.random.randint(config.imsize) + ] + obs = obstacles([config.imsize, config.imsize], goal, max_obs_size) + # Add obstacles to map + n_obs = obs.add_n_rand_obs(max_obs) + # Add border to map + border_res = obs.add_border() + # Ensure we have valid map + if n_obs == 0 or not border_res: + continue + start = None + else: + wpn = True + # path = './resources/maps/' + path = './resources/testing_maps/8x8/' + mp, goal, start = open_map(dom,path) + # path = './maps/8_data_300' + # mp, goal, start = open_map_list(dom,path) + mp[start[1]][start[0]] = 0 #Set the start position as freespace too + mp[goal[1]][goal[0]] = 0 #Set the goal position as freespace too + + goal = [goal[1],goal[0]] #swap them around, for the row col format (x = col not row) + start = [start[1],start[0]] + obs = obstacles([config.imsize, config.imsize], goal, max_obs_size) + obs.dom = mp + + # Get final map + im = obs.get_final() + + + #1 is obstacles. + #set obs.dom as the mp + logging.debug('0 is obstacle ') + logging.debug(' im: %s ', im) + # Generate gridworld from obstacle map + G = gridworld(im, goal[0], goal[1]) + # Get value prior + value_prior = G.get_reward_prior() + # Sample random trajectories to our goal + states_xy, states_one_hot = sample_trajectory(G, n_traj,start,gen) #dijkstra trajectory + # print('states_xy', states_xy[0] , len(states_xy[0])) + if gen and len(states_xy[0]) > 0: + save_image(G.image,(goal[0],goal[1]),states_xy[0][0],states_xy, states_one_hot,counter) #this saves the maps + + counter += 1 + for i in range(n_traj): + if len(states_xy[i]) > 1: + t0 = time.time() + # Get number of steps to goal + L = len(states_xy[i]) * 2 + # Allocate space for predicted steps + pred_traj = np.zeros((L, 2)) + # Set starting position + pred_traj[0, :] = states_xy[i][0, :] + + for j in range(1, L): + # Transform current state data + state_data = pred_traj[j - 1, :] + state_data = state_data.astype(np.int) + # Transform domain to Networks expected input shape + im_data = G.image.astype(np.int) + im_data = 1 - im_data + im_data = im_data.reshape(1, 1, config.imsize, + config.imsize) + # Transfrom value prior to Networks expected input shape + value_data = value_prior.astype(np.int) + value_data = value_data.reshape(1, 1, config.imsize, + config.imsize) + # Get inputs as expected by network + X_in = torch.from_numpy( + np.append(im_data, value_data, axis=1)).float() + S1_in = torch.from_numpy(state_data[0].reshape( + [1, 1])).float() + S2_in = torch.from_numpy(state_data[1].reshape( + [1, 1])).float() + # Send Tensors to GPU if available + if use_GPU: + X_in = X_in.cuda() + S1_in = S1_in.cuda() + S2_in = S2_in.cuda() + # Wrap to autograd.Variable + X_in, S1_in, S2_in = Variable(X_in), Variable( + S1_in), Variable(S2_in) + # Forward pass in our neural net + _, predictions = vin(X_in, S1_in, S2_in, config) + _, indices = torch.max(predictions.cpu(), 1, keepdim=True) + a = indices.data.numpy()[0][0] + # Transform prediction to indices + s = G.map_ind_to_state(pred_traj[j - 1, 0], + pred_traj[j - 1, 1]) + ns = G.sample_next_state(s, a) + nr, nc = G.get_coords(ns) + pred_traj[j, 0] = nr + pred_traj[j, 1] = nc + if nr == goal[0] and nc == goal[1]: + # We hit goal so fill remaining steps + pred_traj[j + 1:, 0] = nr + pred_traj[j + 1:, 1] = nc + break + # Plot optimal and predicted path (also start, end) + if pred_traj[-1, 0] == goal[0] and pred_traj[-1, 1] == goal[1]: + logging.debug('#################### - Path Found map %s!\n', dom) + correct += 1 + t1 = time.time() + t_list.append(t1-t0) + dev_rel,dev_non_rel,dist,astar_dist = deviation(states_xy[i],pred_traj,goal,total) + total_dev_rel += dev_rel + total_dev_non_rel += dev_non_rel + total_dist += dist + total_astar_dist += astar_dist + if config.plot == True: + visualize(G.image.T, states_xy[i], pred_traj) + elif metrics: + d = dist_left(pred_traj,goal) + dist_remain_avg += d + if config.plot == True: + visualize(G.image.T, states_xy[i], pred_traj) + total += 1 + + + + elif wpn: + total_no_soln += 1 + sys.stdout.write("\r" + str(int( + (float(dom) / n_domains) * 100.0)) + "%") + sys.stdout.flush() + + sys.stdout.write("\n") + if total and correct: + logging.info('Rollout Accuracy: %s',(100 * (correct / total))) + logging.info('Rollout Accuracy Adjusted: %s',(100 * (correct / (total+total_no_soln)))) + logging.info('Total maps with no soln from Dijkstra %s', total_no_soln) + logging.info('Total avg Rel Deviation %s', (total_dev_rel/total)) + logging.info('Total avg Non-Rel Deviation %s', (total_dev_non_rel/total)) + logging.info('Total avg VIN Distance %s', (total_dist/total)) + logging.info('Total avg Dijkstra Distance %s', (total_astar_dist/total)) + logging.info('Avg deviation from Dijkstra: %s', ((((total_astar_dist/total))-((total_dist/total)))/((total_astar_dist/total)))) + logging.info('Total elapsed time %s', (sum(t_list)/(correct))) + logging.info('Avg distance left when failed: %s ', (dist_remain_avg/(total-correct)) ) + logging.info('---------------------------------Done ------------------------------------') + + else: + logging.info('No successes either vin or dijkstra') + + +def visualize(dom, states_xy, pred_traj): + fig, ax = plt.subplots() + implot = plt.imshow(dom, cmap="Greys_r") + ax.plot(states_xy[:, 0], states_xy[:, 1], c='b', label='Optimal Path') + ax.plot( + pred_traj[:, 0], pred_traj[:, 1], '-X', c='r', label='Predicted Path') + ax.plot(states_xy[0, 0], states_xy[0, 1], '-o', label='Start') + ax.plot(states_xy[-1, 0], states_xy[-1, 1], '-s', label='Goal') + legend = ax.legend(loc='upper right', shadow=False) + for label in legend.get_texts(): + label.set_fontsize('x-small') # the legend text size + for label in legend.get_lines(): + label.set_linewidth(0.5) # the legend line width + plt.draw() + plt.waitforbuttonpress(0) + plt.close(fig) + + +def save_image(im, goal, start,states_xy,states_one_hot,counter): + ''' + Saves the data made by generator as jsons. + ''' + s = config.imsize + + if len(states_xy[0]) == 0: + + im.tolist()[start_x][start_y] = 1 + start_xy = [0,0] + mp = { + 'grid': im.tolist(), + 'goal': [goal[0],goal[1]], + # 'start': int(start), + 'agent': start_xy} + # 'states_xy': states_xy[0].tolist(), + # 'states_one_hot': states_one_hot[0].tolist() + else: + mp = { + 'grid': im.tolist(), + 'goal': [goal[0],goal[1]], + # 'start': int(start), + 'agent': states_xy[0][0].tolist() + # 'states_xy': states_xy[0].tolist(), + # 'states_one_hot': states_one_hot[0].tolist() + } + data.append(mp) + with open('./maps/' +str(s) + '_data_300' + '.json', 'w') as outfile: + json.dump(data,outfile) + +def open_map(dom,path): + ''' + Used to open a map json given dom and path, returns grid, goal and agent + ''' + with open(str(path) + str(dom) +'.json') as json_file: + data = json.load(json_file) + logging.info('Opening file: ' + str(path) + str(dom) + '.json' ) + return data['grid'], data['goal'], data['agent'] + +def open_map_list(dom,path): + with open(str(path) + '.json') as json_file: + data = json.load(json_file) + logging.info('Opening file: ' + str(path) + str(dom) + '.json' ) + return data[dom]['grid'], data[dom]['goal'], data[dom]['agent'] + +def deviation(optimal_path, pred_path,goal, map_num): + optimal_path = np.array(optimal_path) + optimal_path = 1.0 * optimal_path + + optimal_path_x = np.array(optimal_path[:,0]) + optimal_path_y = np.array(optimal_path[:,1]) + + pred_path = np.unique(pred_path, axis=0) #removes duplicates at the end (when it reaches goal) + + #print('Shortened path' , pred_path) + pred_path_x = np.array(pred_path[:,0]) + pred_path_y = np.array(pred_path[:,1]) + dist = 0.0 + astar_dist = 0.0 + prev = pred_path[0,:] + total_diff_gen = 0 + for xy in pred_path[:,:]: + + diff = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2)) + total_diff_gen += diff + dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5 + prev = xy + + #prev = [0,0] + #print('opt', optimal_path[0,:]) + prev = optimal_path[0,:] + total_diff_optim = 0 + for xy in optimal_path[:,:]: + # print('xy', xy) + diff2 = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2)) + total_diff_optim += diff2 + astar_dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5 + prev = xy + + dev_non_rel = abs(total_diff_optim-total_diff_gen) + dev_rel = dev_non_rel/total_diff_optim #TODO: Add avg distance of gen trajectory + return(dev_rel,dev_non_rel,dist,astar_dist) + +def dist_left(pred_traj, goal): + ''' + Finds the distance left between the point and the goal + ''' + pred_traj = np.array(pred_traj) #euclidean distance or geometric distance ? use geometric + x1,y1 = pred_traj[-1][0], pred_traj[-1][1] + x2,y2 = goal[0],goal[1] + dist = (((x2-x1)**2 + (y2-y1)**2))**0.5 + return dist + + +if __name__ == '__main__': + # Parsing training parameters + parser = argparse.ArgumentParser() + parser.add_argument( + '--weights', + type=str, + default='trained/60k_no_block_att3_vin_8x8.pth', + help='Path to trained weights') + parser.add_argument('--plot', action='store_true', default=False) + parser.add_argument('--gen', action='store_true', default=False) + parser.add_argument('--imsize', type=int, default=8, help='Size of image') + parser.add_argument( + '--k', type=int, default=10, help='Number of Value Iterations') + parser.add_argument( + '--l_i', type=int, default=2, help='Number of channels in input layer') + parser.add_argument( + '--l_h', + type=int, + default=150, + help='Number of channels in first hidden layer') + parser.add_argument( + '--l_q', + type=int, + default=10, + help='Number of channels in q layer (~actions) in VI-module') + config = parser.parse_args() + # Compute Paths generated by network and plot + + for i in range(1): + main(config) + # main(config) diff --git a/src/algorithms/learning/VIN/test_house_expo.py b/src/algorithms/learning/VIN/test_house_expo.py new file mode 100644 index 000000000..e2a9b6666 --- /dev/null +++ b/src/algorithms/learning/VIN/test_house_expo.py @@ -0,0 +1,340 @@ +import sys +import argparse +import json +import matplotlib.pyplot as plt +import random +import numpy as np +import torch +from torch.autograd import Variable + +from dataset.dataset import * +from utility.utils import * +from model import * + +from domains.gridworld import * +from generators.obstacle_gen import * + +import logging +import time +import math + +def main(config, + n_domains=30, + max_obs=30, + max_obs_size=None, + n_traj=1, + n_actions=8,gen = False): + # Correct vs total: + logging.basicConfig(filename='./resources/logs/house_expo.log',format='%(asctime)s-%(levelname)s:%(message)s', level=logging.INFO) + correct, total = 0.0, 0.0 + # Automatic swith of GPU mode if available + use_GPU = torch.cuda.is_available() + # Instantiate a VIN model + vin = VIN(config) + # Load model parameters + vin.load_state_dict(torch.load(config.weights)) + # Use GPU if available + if use_GPU: + vin = vin.cuda() + counter,total_no_soln = 0,0 + global data + data = [] + t_list = [] + total_dev_non_rel, total_dev_rel = 0.0,0.0 + total_dist, total_astar_dist = 0.0,0.0 + metrics = True #this enables displaying the distance left to reach goal upon a failure + dist_remain_avg = 0.0 + for dom in range(n_domains): + if gen: + goal = [ + np.random.randint(config.imsize), + np.random.randint(config.imsize) + ] + obs = obstacles([config.imsize, config.imsize], goal, max_obs_size) + # Add obstacles to map + n_obs = obs.add_n_rand_obs(max_obs) + # Add border to map + border_res = obs.add_border() + # Ensure we have valid map + if n_obs == 0 or not border_res: + continue + start = None + else: + wpn = True + # path = './resources/maps/' + path = './resources/house_expo/' + mp, goal, start = open_map(dom,path) + # path = './maps/8_data_300' + # mp, goal, start = open_map_list(dom,path) + mp[start[1]][start[0]] = 0 #Set the start position as freespace too + mp[goal[1]][goal[0]] = 0 #Set the goal position as freespace too + + goal = [goal[1],goal[0]] #swap them around, for the row col format (x = col not row) + start = [start[1],start[0]] + obs = obstacles([config.imsize, config.imsize], goal, max_obs_size) + obs.dom = mp + + # Get final map + im = obs.get_final() + + + #1 is obstacles. + #set obs.dom as the mp + logging.debug('0 is obstacle ') + logging.debug(' im: %s ', im) + # Generate gridworld from obstacle map + print('got here') + G = gridworld(im, goal[0], goal[1]) + # Get value prior + value_prior = G.get_reward_prior() + # Sample random trajectories to our goal + states_xy, states_one_hot = sample_trajectory(G, n_traj,start,gen) #dijkstra trajectory + # print('states_xy', states_xy[0] , len(states_xy[0])) + if gen and len(states_xy[0]) > 0: + save_image(G.image,(goal[0],goal[1]),states_xy[0][0],states_xy, states_one_hot,counter) #this saves the maps + + counter += 1 + for i in range(n_traj): + if len(states_xy[i]) > 1: + t0 = time.time() + # Get number of steps to goal + L = len(states_xy[i]) * 2 + # Allocate space for predicted steps + pred_traj = np.zeros((L, 2)) + # Set starting position + pred_traj[0, :] = states_xy[i][0, :] + + for j in range(1, L): + # Transform current state data + state_data = pred_traj[j - 1, :] + state_data = state_data.astype(np.int) + # Transform domain to Networks expected input shape + im_data = G.image.astype(np.int) + im_data = 1 - im_data + im_data = im_data.reshape(1, 1, config.imsize, + config.imsize) + # Transfrom value prior to Networks expected input shape + value_data = value_prior.astype(np.int) + value_data = value_data.reshape(1, 1, config.imsize, + config.imsize) + # Get inputs as expected by network + X_in = torch.from_numpy( + np.append(im_data, value_data, axis=1)).float() + S1_in = torch.from_numpy(state_data[0].reshape( + [1, 1])).float() + S2_in = torch.from_numpy(state_data[1].reshape( + [1, 1])).float() + # Send Tensors to GPU if available + if use_GPU: + X_in = X_in.cuda() + S1_in = S1_in.cuda() + S2_in = S2_in.cuda() + # Wrap to autograd.Variable + X_in, S1_in, S2_in = Variable(X_in), Variable( + S1_in), Variable(S2_in) + # Forward pass in our neural net + _, predictions = vin(X_in, S1_in, S2_in, config) + _, indices = torch.max(predictions.cpu(), 1, keepdim=True) + a = indices.data.numpy()[0][0] + # Transform prediction to indices + s = G.map_ind_to_state(pred_traj[j - 1, 0], + pred_traj[j - 1, 1]) + ns = G.sample_next_state(s, a) + nr, nc = G.get_coords(ns) + pred_traj[j, 0] = nr + pred_traj[j, 1] = nc + if nr == goal[0] and nc == goal[1]: + # We hit goal so fill remaining steps + pred_traj[j + 1:, 0] = nr + pred_traj[j + 1:, 1] = nc + break + # Plot optimal and predicted path (also start, end) + if pred_traj[-1, 0] == goal[0] and pred_traj[-1, 1] == goal[1]: + logging.debug('#################### - Path Found map %s!\n', dom) + correct += 1 + t1 = time.time() + t_list.append(t1-t0) + dev_rel,dev_non_rel,dist,astar_dist = deviation(states_xy[i],pred_traj,goal,total) + total_dev_rel += dev_rel + total_dev_non_rel += dev_non_rel + total_dist += dist + total_astar_dist += astar_dist + if config.plot == True: + visualize(G.image.T, states_xy[i], pred_traj) + elif metrics: + d = dist_left(pred_traj,goal) + dist_remain_avg += d + if config.plot == True: + visualize(G.image.T, states_xy[i], pred_traj) + total += 1 + + + + elif wpn: + total_no_soln += 1 + sys.stdout.write("\r" + str(int( + (float(dom) / n_domains) * 100.0)) + "%") + sys.stdout.flush() + + sys.stdout.write("\n") + if total and correct: + logging.info('Rollout Accuracy Dijkstra: %s',(100*((total-total_no_soln)/total))) + logging.info('Rollout Accuracy: %s',(100 * (correct / total))) + logging.info('Rollout Accuracy Adjusted: %s',(100 * (correct / (total+total_no_soln)))) + logging.info('Total maps with no soln from Dijkstra %s', total_no_soln) + logging.info('Total avg Rel Deviation %s', (total_dev_rel/total)) + logging.info('Total avg Non-Rel Deviation %s', (total_dev_non_rel/total)) + logging.info('Total avg VIN Distance %s', (total_dist/total)) + logging.info('Total avg Dijkstra Distance %s', (total_astar_dist/total)) + logging.info('Avg deviation from Dijkstra: %s', ((((total_astar_dist/total))-((total_dist/total)))/((total_astar_dist/total)))) + logging.info('Total elapsed time %s', (sum(t_list)/(correct))) #TODO: Possibly add total no soln + logging.info('Avg distance left when failed: %s ', (dist_remain_avg/(total-correct)) ) + logging.info('---------------------------------Done ------------------------------------') + + else: + logging.info('No successes either vin or dijkstra') + + +def visualize(dom, states_xy, pred_traj): + fig, ax = plt.subplots() + implot = plt.imshow(dom, cmap="Greys_r") + ax.plot(states_xy[:, 0], states_xy[:, 1], c='b', label='Optimal Path') + ax.plot( + pred_traj[:, 0], pred_traj[:, 1], '-X', c='r', label='Predicted Path') + ax.plot(states_xy[0, 0], states_xy[0, 1], '-o', label='Start') + ax.plot(states_xy[-1, 0], states_xy[-1, 1], '-s', label='Goal') + legend = ax.legend(loc='upper right', shadow=False) + for label in legend.get_texts(): + label.set_fontsize('x-small') # the legend text size + for label in legend.get_lines(): + label.set_linewidth(0.5) # the legend line width + plt.draw() + plt.waitforbuttonpress(0) + plt.close(fig) + + +def save_image(im, goal, start,states_xy,states_one_hot,counter): + ''' + Saves the data made by generator as jsons. + ''' + s = config.imsize + + if len(states_xy[0]) == 0: + + im.tolist()[start_x][start_y] = 1 + start_xy = [0,0] + mp = { + 'grid': im.tolist(), + 'goal': [goal[0],goal[1]], + # 'start': int(start), + 'agent': start_xy} + # 'states_xy': states_xy[0].tolist(), + # 'states_one_hot': states_one_hot[0].tolist() + else: + mp = { + 'grid': im.tolist(), + 'goal': [goal[0],goal[1]], + # 'start': int(start), + 'agent': states_xy[0][0].tolist() + # 'states_xy': states_xy[0].tolist(), + # 'states_one_hot': states_one_hot[0].tolist() + } + data.append(mp) + with open('./maps/' +str(s) + '_data_300' + '.json', 'w') as outfile: + json.dump(data,outfile) + +def open_map(dom,path): + ''' + Used to open a map json given dom and path, returns grid, goal and agent + ''' + with open(str(path) + str(dom) +'.json') as json_file: + data = json.load(json_file) + logging.info('Opening file: ' + str(path) + str(dom) + '.json' ) + return data['grid'], data['goal'], data['agent'] + +def open_map_list(dom,path): + with open(str(path) + '.json') as json_file: + data = json.load(json_file) + logging.info('Opening file: ' + str(path) + str(dom) + '.json' ) + return data[dom]['grid'], data[dom]['goal'], data[dom]['agent'] + +def deviation(optimal_path, pred_path,goal, map_num): + optimal_path = np.array(optimal_path) + optimal_path = 1.0 * optimal_path + + optimal_path_x = np.array(optimal_path[:,0]) + optimal_path_y = np.array(optimal_path[:,1]) + + pred_path = np.unique(pred_path, axis=0) #removes duplicates at the end (when it reaches goal) + + #print('Shortened path' , pred_path) + pred_path_x = np.array(pred_path[:,0]) + pred_path_y = np.array(pred_path[:,1]) + dist = 0.0 + astar_dist = 0.0 + prev = pred_path[0,:] + total_diff_gen = 0 + for xy in pred_path[:,:]: + + diff = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2)) + total_diff_gen += diff + dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5 + prev = xy + + #prev = [0,0] + #print('opt', optimal_path[0,:]) + prev = optimal_path[0,:] + total_diff_optim = 0 + for xy in optimal_path[:,:]: + # print('xy', xy) + diff2 = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2)) + total_diff_optim += diff2 + astar_dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5 + prev = xy + + dev_non_rel = abs(total_diff_optim-total_diff_gen) + dev_rel = dev_non_rel/total_diff_optim #TODO: Add avg distance of gen trajectory + return(dev_rel,dev_non_rel,dist,astar_dist) + +def dist_left(pred_traj, goal): + ''' + Finds the distance left between the point and the goal + ''' + pred_traj = np.array(pred_traj) #euclidean distance or geometric distance ? use geometric + x1,y1 = pred_traj[-1][0], pred_traj[-1][1] + x2,y2 = goal[0],goal[1] + dist = (((x2-x1)**2 + (y2-y1)**2))**0.5 + return dist + + +if __name__ == '__main__': + # Parsing training parameters + parser = argparse.ArgumentParser() + parser.add_argument( + '--weights', + type=str, + default='trained/30k_no_block_dataset_vin_64x64.pth', + help='Path to trained weights') + parser.add_argument('--plot', action='store_true', default=False) + parser.add_argument('--gen', action='store_true', default=False) + parser.add_argument('--imsize', type=int, default=100, help='Size of image') + parser.add_argument( + '--k', type=int, default=48, help='Number of Value Iterations') + parser.add_argument( + '--l_i', type=int, default=2, help='Number of channels in input layer') + parser.add_argument( + '--l_h', + type=int, + default=150, + help='Number of channels in first hidden layer') + parser.add_argument( + '--l_q', + type=int, + default=10, + help='Number of channels in q layer (~actions) in VI-module') + config = parser.parse_args() + # Compute Paths generated by network and plot + + for i in range(1): + main(config) + # main(config) diff --git a/src/algorithms/learning/VIN/test_nonzeros.py b/src/algorithms/learning/VIN/test_nonzeros.py new file mode 100644 index 000000000..3cc429ef3 --- /dev/null +++ b/src/algorithms/learning/VIN/test_nonzeros.py @@ -0,0 +1,23 @@ +import numpy as np +a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) +print(a>3) #Where is A>3? + +''' +[[False False False] + [ True True True] + [ True True True]] + +gives the above. +So in 0th list, none are true. Then you have 1st list, 0th is true. 1st list 1th is true. 1st list 2nd is true +so you have 1-0, 1-1, 1-2, where 1st # is the list # and 2nd # is the index in that list +In the 2nd list (3rd one), 0th is true, 1st etc. + +''' +print(np.nonzero(a>3)) +''' +(array([1, 1, 1, 2, 2, 2]), array([0, 1, 2, 0, 1, 2])) +gives above. So the first array is the list #, and 2nd array is the index within that list +so list 1 number 0, list 1 number 1, list 1 number 2 +list 2 number 0, list 2 number 1, list 2 number 2 + +''' \ No newline at end of file diff --git a/src/algorithms/learning/VIN/test_og.py b/src/algorithms/learning/VIN/test_og.py new file mode 100644 index 000000000..5755a3d00 --- /dev/null +++ b/src/algorithms/learning/VIN/test_og.py @@ -0,0 +1,173 @@ +import sys +import argparse + +import matplotlib.pyplot as plt + +import numpy as np + +import torch +from torch.autograd import Variable + +from dataset.dataset_og import * +from utility.utils import * +from model import * + +from domains.gridworld_og import * +from generators.obstacle_gen import * + + +def main(config, + n_domains=100, + max_obs=30, + max_obs_size=None, + n_traj=1, + n_actions=8): + # Correct vs total: + correct, total = 0.0, 0.0 + # Automatic swith of GPU mode if available + use_GPU = torch.cuda.is_available() + # Instantiate a VIN model + vin = VIN(config) + # Load model parameters + vin.load_state_dict(torch.load(config.weights)) + # Use GPU if available + if use_GPU: + vin = vin.cuda() + + for dom in range(n_domains): + # Randomly select goal position + goal = [ + np.random.randint(config.imsize), + np.random.randint(config.imsize) + ] + # Generate obstacle map + obs = obstacles([config.imsize, config.imsize], goal, max_obs_size) + # Add obstacles to map + n_obs = obs.add_n_rand_obs(max_obs) + # Add border to map + border_res = obs.add_border() + # Ensure we have valid map + if n_obs == 0 or not border_res: + continue + # Get final map + im = obs.get_final() + + # Generate gridworld from obstacle map + G = gridworld(im, goal[0], goal[1]) + # Get value prior + value_prior = G.get_reward_prior() + # Sample random trajectories to our goal + states_xy, states_one_hot = sample_trajectory(G, n_traj) + + for i in range(n_traj): + if len(states_xy[i]) > 1: + + # Get number of steps to goal + L = len(states_xy[i]) * 2 + # Allocate space for predicted steps + pred_traj = np.zeros((L, 2)) + # Set starting position + pred_traj[0, :] = states_xy[i][0, :] + + for j in range(1, L): + # Transform current state data + state_data = pred_traj[j - 1, :] + state_data = state_data.astype(np.int) + # Transform domain to Networks expected input shape + im_data = G.image.astype(np.int) + im_data = 1 - im_data + im_data = im_data.reshape(1, 1, config.imsize, + config.imsize) + # Transfrom value prior to Networks expected input shape + value_data = value_prior.astype(np.int) + value_data = value_data.reshape(1, 1, config.imsize, + config.imsize) + # Get inputs as expected by network + X_in = torch.from_numpy( + np.append(im_data, value_data, axis=1)).float() + S1_in = torch.from_numpy(state_data[0].reshape( + [1, 1])).float() + S2_in = torch.from_numpy(state_data[1].reshape( + [1, 1])).float() + # Send Tensors to GPU if available + if use_GPU: + X_in = X_in.cuda() + S1_in = S1_in.cuda() + S2_in = S2_in.cuda() + # Wrap to autograd.Variable + X_in, S1_in, S2_in = Variable(X_in), Variable( + S1_in), Variable(S2_in) + # Forward pass in our neural net + _, predictions = vin(X_in, S1_in, S2_in, config) + _, indices = torch.max(predictions.cpu(), 1, keepdim=True) + a = indices.data.numpy()[0][0] + # Transform prediction to indices + s = G.map_ind_to_state(pred_traj[j - 1, 0], + pred_traj[j - 1, 1]) + ns = G.sample_next_state(s, a) + nr, nc = G.get_coords(ns) + pred_traj[j, 0] = nr + pred_traj[j, 1] = nc + if nr == goal[0] and nc == goal[1]: + # We hit goal so fill remaining steps + pred_traj[j + 1:, 0] = nr + pred_traj[j + 1:, 1] = nc + break + # Plot optimal and predicted path (also start, end) + if pred_traj[-1, 0] == goal[0] and pred_traj[-1, 1] == goal[1]: + correct += 1 + total += 1 + if config.plot == True: + visualize(G.image.T, states_xy[i], pred_traj) + sys.stdout.write("\r" + str(int( + (float(dom) / n_domains) * 100.0)) + "%") + sys.stdout.flush() + sys.stdout.write("\n") + print('Rollout Accuracy: {:.2f}%'.format(100 * (correct / total))) + + +def visualize(dom, states_xy, pred_traj): + fig, ax = plt.subplots() + implot = plt.imshow(dom, cmap="Greys_r") + ax.plot(states_xy[:, 0], states_xy[:, 1], c='b', label='Optimal Path') + ax.plot( + pred_traj[:, 0], pred_traj[:, 1], '-X', c='r', label='Predicted Path') + ax.plot(states_xy[0, 0], states_xy[0, 1], '-o', label='Start') + ax.plot(states_xy[-1, 0], states_xy[-1, 1], '-s', label='Goal') + legend = ax.legend(loc='upper right', shadow=False) + for label in legend.get_texts(): + label.set_fontsize('x-small') # the legend text size + for label in legend.get_lines(): + label.set_linewidth(0.5) # the legend line width + plt.draw() + plt.waitforbuttonpress(0) + plt.close(fig) + + +if __name__ == '__main__': + # Parsing training parameters + parser = argparse.ArgumentParser() + parser.add_argument( + '--weights', + type=str, + default='trained/vin_8x8.pth', + help='Path to trained weights') + parser.add_argument('--plot', action='store_true', default=False) + parser.add_argument('--imsize', type=int, default=8, help='Size of image') + parser.add_argument( + '--k', type=int, default=10, help='Number of Value Iterations') + parser.add_argument( + '--l_i', type=int, default=2, help='Number of channels in input layer') + parser.add_argument( + '--l_h', + type=int, + default=150, + help='Number of channels in first hidden layer') + parser.add_argument( + '--l_q', + type=int, + default=10, + help='Number of channels in q layer (~actions) in VI-module') + config = parser.parse_args() + # Compute Paths generated by network and plot + main(config) \ No newline at end of file diff --git a/src/algorithms/learning/VIN/train.py b/src/algorithms/learning/VIN/train.py new file mode 100644 index 000000000..0e0056e54 --- /dev/null +++ b/src/algorithms/learning/VIN/train.py @@ -0,0 +1,142 @@ +import time +import argparse +import numpy as np + +import torch +import torch.nn as nn +import torch.optim as optim + +import torchvision.transforms as transforms + +import matplotlib.pyplot as plt +from dataset.dataset import * +from utility.utils import * +from model import * + + +def train(net, trainloader, config, criterion, optimizer): + print_header() + for epoch in range(config.epochs): # Loop over dataset multiple times + avg_error, avg_loss, num_batches = 0.0, 0.0, 0.0 + start_time = time.time() + for i, data in enumerate(trainloader): # Loop over batches of data + # Get input batch + X, S1, S2, labels = data + if X.size()[0] != config.batch_size: + continue # Drop those data, if not enough for a batch + # Automaticlly select device to make the code device agnostic + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + X = X.to(device) + S1 = S1.to(device) + S2 = S2.to(device) + labels = labels.to(device) + net = net.to(device) + # Zero the parameter gradients + optimizer.zero_grad() + # Forward pass + outputs, predictions = net(X, S1, S2, config) + # Loss + loss = criterion(outputs, labels) + # Backward pass + loss.backward() + # Update params + optimizer.step() + # Calculate Loss and Error + loss_batch, error_batch = get_stats(loss, predictions, labels) + avg_loss += loss_batch + avg_error += error_batch + num_batches += 1 + time_duration = time.time() - start_time + # Print epoch logs + print_stats(epoch, avg_loss, avg_error, num_batches, time_duration) + print('\nFinished training. \n') + + +def test(net, testloader, config): + total, correct = 0.0, 0.0 + for i, data in enumerate(testloader): + # Get inputs + X, S1, S2, labels = data + if X.size()[0] != config.batch_size: + continue # Drop those data, if not enough for a batch + # automaticlly select device, device agnostic + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + X = X.to(device) + S1 = S1.to(device) + S2 = S2.to(device) + labels = labels.to(device) + net = net.to(device) + # Forward pass + outputs, predictions = net(X, S1, S2, config) + # Select actions with max scores(logits) + _, predicted = torch.max(outputs, dim=1, keepdim=True) + # Unwrap autograd.Variable to Tensor + predicted = predicted.data + # Compute test accuracy + correct += (torch.eq(torch.squeeze(predicted), labels)).sum() + total += labels.size()[0] + print('Test Accuracy: {:.2f}%'.format(100 * (correct / total))) + + +if __name__ == '__main__': + # Parsing training parameters + parser = argparse.ArgumentParser() + parser.add_argument( + '--datafile', + type=str, + default='dataset/30000_new_gridworld_8x8.npz', + help='Path to data file') + parser.add_argument('--imsize', type=int, default=8, help='Size of image') + parser.add_argument( + '--lr', + type=float, + default=0.005, + help='Learning rate, [0.01, 0.005, 0.002, 0.001]') + parser.add_argument( + '--epochs', type=int, default=30, help='Number of epochs to train') + parser.add_argument( + '--k', type=int, default=10, help='Number of Value Iterations') + parser.add_argument( + '--l_i', type=int, default=2, help='Number of channels in input layer') + parser.add_argument( + '--l_h', + type=int, + default=150, + help='Number of channels in first hidden layer') + parser.add_argument( + '--l_q', + type=int, + default=10, + help='Number of channels in q layer (~actions) in VI-module') + parser.add_argument( + '--batch_size', type=int, default=128, help='Batch size') + config = parser.parse_args() + # Get path to save trained model + save_path = "trained/60k_local_new_vin_{0}x{0}.pth".format(config.imsize) + # Instantiate a VIN model + net = VIN(config) + # Loss + criterion = nn.CrossEntropyLoss() + # Optimizer + optimizer = optim.RMSprop(net.parameters(), lr=config.lr, eps=1e-6) + # Dataset transformer: torchvision.transforms + transform = None + # Define Dataset + trainset = GridworldData( + config.datafile, imsize=config.imsize, train=True, transform=transform) + testset = GridworldData( + config.datafile, + imsize=config.imsize, + train=False, + transform=transform) + # Create Dataloader + trainloader = torch.utils.data.DataLoader( + trainset, batch_size=config.batch_size, shuffle=True, num_workers=0) + testloader = torch.utils.data.DataLoader( + testset, batch_size=config.batch_size, shuffle=False, num_workers=0) + # Train the model + train(net, trainloader, config, criterion, optimizer) + # Test accuracy + test(net, testloader, config) + # Save the trained model parameters + torch.save(net.state_dict(), save_path) diff --git a/src/algorithms/learning/VIN/train_og.py b/src/algorithms/learning/VIN/train_og.py new file mode 100644 index 000000000..4e18c0f19 --- /dev/null +++ b/src/algorithms/learning/VIN/train_og.py @@ -0,0 +1,142 @@ +import time +import argparse +import numpy as np + +import torch +import torch.nn as nn +import torch.optim as optim + +import torchvision.transforms as transforms + +import matplotlib.pyplot as plt +from dataset.dataset_og import * +from utility.utils import * +from model import * + + +def train(net, trainloader, config, criterion, optimizer): + print_header() + for epoch in range(config.epochs): # Loop over dataset multiple times + avg_error, avg_loss, num_batches = 0.0, 0.0, 0.0 + start_time = time.time() + for i, data in enumerate(trainloader): # Loop over batches of data + # Get input batch + X, S1, S2, labels = data + if X.size()[0] != config.batch_size: + continue # Drop those data, if not enough for a batch + # Automaticlly select device to make the code device agnostic + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + X = X.to(device) + S1 = S1.to(device) + S2 = S2.to(device) + labels = labels.to(device) + net = net.to(device) + # Zero the parameter gradients + optimizer.zero_grad() + # Forward pass + outputs, predictions = net(X, S1, S2, config) + # Loss + loss = criterion(outputs, labels) + # Backward pass + loss.backward() + # Update params + optimizer.step() + # Calculate Loss and Error + loss_batch, error_batch = get_stats(loss, predictions, labels) + avg_loss += loss_batch + avg_error += error_batch + num_batches += 1 + time_duration = time.time() - start_time + # Print epoch logs + print_stats(epoch, avg_loss, avg_error, num_batches, time_duration) + print('\nFinished training. \n') + + +def test(net, testloader, config): + total, correct = 0.0, 0.0 + for i, data in enumerate(testloader): + # Get inputs + X, S1, S2, labels = data + if X.size()[0] != config.batch_size: + continue # Drop those data, if not enough for a batch + # automaticlly select device, device agnostic + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + X = X.to(device) + S1 = S1.to(device) + S2 = S2.to(device) + labels = labels.to(device) + net = net.to(device) + # Forward pass + outputs, predictions = net(X, S1, S2, config) + # Select actions with max scores(logits) + _, predicted = torch.max(outputs, dim=1, keepdim=True) + # Unwrap autograd.Variable to Tensor + predicted = predicted.data + # Compute test accuracy + correct += (torch.eq(torch.squeeze(predicted), labels)).sum() + total += labels.size()[0] + print('Test Accuracy: {:.2f}%'.format(100 * (correct / total))) + + +if __name__ == '__main__': + # Parsing training parameters + parser = argparse.ArgumentParser() + parser.add_argument( + '--datafile', + type=str, + default='dataset/gridworld_8x8.npz', + help='Path to data file') + parser.add_argument('--imsize', type=int, default=8, help='Size of image') + parser.add_argument( + '--lr', + type=float, + default=0.005, + help='Learning rate, [0.01, 0.005, 0.002, 0.001]') + parser.add_argument( + '--epochs', type=int, default=30, help='Number of epochs to train') + parser.add_argument( + '--k', type=int, default=10, help='Number of Value Iterations') + parser.add_argument( + '--l_i', type=int, default=2, help='Number of channels in input layer') + parser.add_argument( + '--l_h', + type=int, + default=150, + help='Number of channels in first hidden layer') + parser.add_argument( + '--l_q', + type=int, + default=10, + help='Number of channels in q layer (~actions) in VI-module') + parser.add_argument( + '--batch_size', type=int, default=128, help='Batch size') + config = parser.parse_args() + # Get path to save trained model + save_path = "trained/vin_{0}x{0}.pth".format(config.imsize) + # Instantiate a VIN model + net = VIN(config) + # Loss + criterion = nn.CrossEntropyLoss() + # Optimizer + optimizer = optim.RMSprop(net.parameters(), lr=config.lr, eps=1e-6) + # Dataset transformer: torchvision.transforms + transform = None + # Define Dataset + trainset = GridworldData( + config.datafile, imsize=config.imsize, train=True, transform=transform) + testset = GridworldData( + config.datafile, + imsize=config.imsize, + train=False, + transform=transform) + # Create Dataloader + trainloader = torch.utils.data.DataLoader( + trainset, batch_size=config.batch_size, shuffle=True, num_workers=0) + testloader = torch.utils.data.DataLoader( + testset, batch_size=config.batch_size, shuffle=False, num_workers=0) + # Train the model + train(net, trainloader, config, criterion, optimizer) + # Test accuracy + test(net, testloader, config) + # Save the trained model parameters + torch.save(net.state_dict(), save_path) diff --git a/src/algorithms/learning/VIN/trained/README.md b/src/algorithms/learning/VIN/trained/README.md new file mode 100644 index 000000000..9d1d9b432 --- /dev/null +++ b/src/algorithms/learning/VIN/trained/README.md @@ -0,0 +1,4 @@ +# Trained Models +To use a pretrained model you have two choices: +1. Download and place the trained .pth model files here +2. Train the VIN on the datasets yourself (the models will save themselves here) \ No newline at end of file diff --git a/src/algorithms/learning/VIN/utility/__init__.py b/src/algorithms/learning/VIN/utility/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/algorithms/learning/VIN/utility/utils.py b/src/algorithms/learning/VIN/utility/utils.py new file mode 100644 index 000000000..de0104400 --- /dev/null +++ b/src/algorithms/learning/VIN/utility/utils.py @@ -0,0 +1,34 @@ +import numpy as np +import torch + + +def fmt_row(width, row): + out = " | ".join(fmt_item(x, width) for x in row) + return out + + +def fmt_item(x, l): + if isinstance(x, np.ndarray): + assert x.ndim == 0 + x = x.item() + if isinstance(x, float): rep = "%g" % x + else: rep = str(x) + return " " * (l - len(rep)) + rep + + +def get_stats(loss, predictions, labels): + cp = np.argmax(predictions.cpu().data.numpy(), 1) + error = np.mean(cp != labels.cpu().data.numpy()) + return loss.item(), error + + +def print_stats(epoch, avg_loss, avg_error, num_batches, time_duration): + print( + fmt_row(10, [ + epoch + 1, avg_loss / num_batches, avg_error / num_batches, + time_duration + ])) + + +def print_header(): + print(fmt_row(10, ["Epoch", "Train Loss", "Train Error", "Epoch Time"])) diff --git a/src/algorithms/learning/__init__.py b/src/algorithms/learning/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/algorithms/lstm/a_star_heuristic_augmentation.py b/src/algorithms/learning/a_star_heuristic_augmentation.py similarity index 98% rename from src/algorithms/lstm/a_star_heuristic_augmentation.py rename to src/algorithms/learning/a_star_heuristic_augmentation.py index 39762f7c8..d958a394d 100644 --- a/src/algorithms/lstm/a_star_heuristic_augmentation.py +++ b/src/algorithms/learning/a_star_heuristic_augmentation.py @@ -8,7 +8,7 @@ from algorithms.classic.graph_based.a_star import AStar from algorithms.configuration.entities.goal import Goal from algorithms.configuration.entities.trace import Trace -from algorithms.lstm.LSTM_tile_by_tile import OnlineLSTM +from algorithms.learning.LSTM_tile_by_tile import OnlineLSTM from simulator.services.algorithm_runner import AlgorithmRunner from simulator.services.services import Services from simulator.views.map.display.gradient_list_map_display import GradientListMapDisplay diff --git a/src/algorithms/lstm/a_star_waypoint.py b/src/algorithms/learning/a_star_waypoint.py similarity index 98% rename from src/algorithms/lstm/a_star_waypoint.py rename to src/algorithms/learning/a_star_waypoint.py index 2ea738daf..57a23460d 100644 --- a/src/algorithms/lstm/a_star_waypoint.py +++ b/src/algorithms/learning/a_star_waypoint.py @@ -7,8 +7,8 @@ from algorithms.classic.testing.combined_online_lstm_testing import CombinedOnlineLSTMTesting from algorithms.configuration.maps.map import Map from algorithms.configuration.maps.ros_map import RosMap -from algorithms.lstm.LSTM_tile_by_tile import OnlineLSTM -from algorithms.lstm.combined_online_LSTM import CombinedOnlineLSTM +from algorithms.learning.LSTM_tile_by_tile import OnlineLSTM +from algorithms.learning.combined_online_LSTM import CombinedOnlineLSTM from simulator.services.algorithm_runner import AlgorithmRunner from simulator.services.services import Services from simulator.views.map.display.map_display import MapDisplay diff --git a/src/algorithms/lstm/combined_online_LSTM.py b/src/algorithms/learning/combined_online_LSTM.py similarity index 98% rename from src/algorithms/lstm/combined_online_LSTM.py rename to src/algorithms/learning/combined_online_LSTM.py index fa1cc1a45..512dc1f57 100644 --- a/src/algorithms/lstm/combined_online_LSTM.py +++ b/src/algorithms/learning/combined_online_LSTM.py @@ -5,7 +5,7 @@ from algorithms.algorithm import Algorithm from algorithms.basic_testing import BasicTesting from algorithms.configuration.maps.map import Map -from algorithms.lstm.LSTM_tile_by_tile import OnlineLSTM +from algorithms.learning.LSTM_tile_by_tile import OnlineLSTM from simulator.services.algorithm_runner import AlgorithmRunner from simulator.services.services import Services from simulator.views.map.display.entities_map_display import EntitiesMapDisplay diff --git a/src/algorithms/lstm/map_processing.py b/src/algorithms/learning/map_processing.py similarity index 100% rename from src/algorithms/lstm/map_processing.py rename to src/algorithms/learning/map_processing.py diff --git a/src/algorithms/lstm/trainer.py b/src/algorithms/learning/trainer.py similarity index 100% rename from src/algorithms/lstm/trainer.py rename to src/algorithms/learning/trainer.py diff --git a/src/analyzer/analyzer.py b/src/analyzer/analyzer.py index 7e4ca09ac..804895b3c 100644 --- a/src/analyzer/analyzer.py +++ b/src/analyzer/analyzer.py @@ -3,8 +3,8 @@ from simulator.simulator import Simulator from simulator.services.services import Services from simulator.services.debug import DebugLevel, Debug -from algorithms.lstm.a_star_waypoint import WayPointNavigation -from algorithms.lstm.LSTM_tile_by_tile import OnlineLSTM +from algorithms.learning.a_star_waypoint import WayPointNavigation +from algorithms.learning.LSTM_tile_by_tile import OnlineLSTM from algorithms.configuration.maps.map import Map from algorithms.configuration.maps.dense_map import DenseMap from algorithms.configuration.configuration import Configuration diff --git a/src/generator/generator.py b/src/generator/generator.py index 93ed2d726..746a21704 100644 --- a/src/generator/generator.py +++ b/src/generator/generator.py @@ -10,7 +10,7 @@ from matplotlib import pyplot as plt from natsort import natsorted -from algorithms.lstm.LSTM_CAE_tile_by_tile import CAE +from algorithms.learning.LSTM_CAE_tile_by_tile import CAE from algorithms.classic.graph_based.a_star import AStar from algorithms.classic.testing.a_star_testing import AStarTesting from algorithms.configuration.configuration import Configuration @@ -18,7 +18,7 @@ from algorithms.configuration.entities.entity import Entity from algorithms.configuration.maps.dense_map import DenseMap from algorithms.configuration.maps.map import Map -from algorithms.lstm.map_processing import MapProcessing +from algorithms.learning.map_processing import MapProcessing from simulator.services.debug import DebugLevel from simulator.services.progress import Progress from simulator.services.resources.atlas import Atlas diff --git a/src/main.py b/src/main.py index 34f12be6c..42469710d 100644 --- a/src/main.py +++ b/src/main.py @@ -1,7 +1,7 @@ from algorithms.configuration.configuration import Configuration from algorithms.algorithm_manager import AlgorithmManager from maps.map_manager import MapManager -from algorithms.lstm.trainer import Trainer +from algorithms.learning.trainer import Trainer from analyzer.analyzer import Analyzer from generator.generator import Generator from simulator.services.debug import DebugLevel diff --git a/src/run_trainer.py b/src/run_trainer.py index e5107abc0..e1e522e17 100644 --- a/src/run_trainer.py +++ b/src/run_trainer.py @@ -5,8 +5,8 @@ from algorithms.basic_testing import BasicTesting from algorithms.configuration.maps.map import Map from maps.map_manager import MapManager -from algorithms.lstm.LSTM_tile_by_tile import BasicLSTMModule, OnlineLSTM -from algorithms.lstm.ML_model import MLModel +from algorithms.learning.LSTM_tile_by_tile import BasicLSTMModule, OnlineLSTM +from algorithms.learning.ML_model import MLModel from simulator.services.debug import DebugLevel from analyzer.analyzer import Analyzer from generator.generator import Generator @@ -27,10 +27,10 @@ from algorithms.classic.sample_based.rrt_connect import RRT_Connect from algorithms.classic.graph_based.wavefront import Wavefront from algorithms.configuration.configuration import Configuration -from algorithms.lstm.LSTM_tile_by_tile import OnlineLSTM -from algorithms.lstm.a_star_waypoint import WayPointNavigation -from algorithms.lstm.combined_online_LSTM import CombinedOnlineLSTM -from algorithms.lstm.LSTM_CAE_tile_by_tile import CAE, LSTMCAEModel +from algorithms.learning.LSTM_tile_by_tile import OnlineLSTM +from algorithms.learning.a_star_waypoint import WayPointNavigation +from algorithms.learning.combined_online_LSTM import CombinedOnlineLSTM +from algorithms.learning.LSTM_CAE_tile_by_tile import CAE, LSTMCAEModel # planner testing diff --git a/src/simulator/services/resources/directories.py b/src/simulator/services/resources/directories.py index 5da6bc806..2167873b4 100644 --- a/src/simulator/services/resources/directories.py +++ b/src/simulator/services/resources/directories.py @@ -13,7 +13,7 @@ from simulator.services.services import Services if TYPE_CHECKING: - from algorithms.lstm.ML_model import MLModel + from algorithms.learning.ML_model import MLModel class ModelSubdir(Directory): diff --git a/src/simulator/views/map/display/online_lstm_map_display.py b/src/simulator/views/map/display/online_lstm_map_display.py index 9c90be130..d1cc4de86 100644 --- a/src/simulator/views/map/display/online_lstm_map_display.py +++ b/src/simulator/views/map/display/online_lstm_map_display.py @@ -8,7 +8,7 @@ from algorithms.configuration.entities.agent import Agent from algorithms.configuration.entities.goal import Goal from algorithms.configuration.maps.map import Map -from algorithms.lstm.map_processing import MapProcessing +from algorithms.learning.map_processing import MapProcessing from simulator.services.services import Services from simulator.views.map.display.map_display import MapDisplay from structures import Point, Colour, DynamicColour, RED